001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.common.parsers.core;
015
016import java.io.BufferedReader;
017import java.io.IOException;
018import java.io.InputStream;
019import java.io.InputStreamReader;
020import java.nio.charset.StandardCharsets;
021import java.util.Iterator;
022import java.util.NoSuchElementException;
023import java.util.regex.Pattern;
024
025import org.apache.commons.lang3.StringUtils;
026
027/**
028 * A very simple Dictionary backed by a tab delimited file.
029 */
030public abstract class FileBasedDictionaryParser<T> extends DictionaryBackedParser<T> {
031
032  public FileBasedDictionaryParser(boolean caseSensitive) {
033    super(caseSensitive);
034  }
035
036  protected void init(InputStream input) {
037    init(input, null);
038  }
039
040  /**
041   * Init the parser to read the InputStream and ignore lines starting with the commentMarker.
042   *
043   * @param input
044   * @param commentMarker marker identifying a commented line (e.g. #) or null to read all lines
045   */
046  protected void init(InputStream input, String commentMarker) {
047    init(new Source(input, commentMarker));
048  }
049
050  /**
051   * Returns the value read from the dictionary as an instance of <T>
052   *
053   * @param value
054   * @return
055   */
056  protected abstract T fromDictFile(String value);
057
058  /**
059   * An iterator over a well formed tab file.
060   * Should the file be poorly formed expect runtime exceptions.
061   */
062  class Source implements Iterator<KeyValue<String, T>> {
063
064    private final BufferedReader r;
065    private final Pattern tab = Pattern.compile("\t");
066    private final String commentMarker;
067    private String line = null;
068
069    Source(InputStream file) {
070      this(file, null);
071    }
072
073    Source(InputStream file, String commentMarker) {
074      r = new BufferedReader(new InputStreamReader(file, StandardCharsets.UTF_8));
075      this.commentMarker = commentMarker;
076    }
077
078    @Override
079    public boolean hasNext() {
080      if (line != null) {
081        return true;
082      }
083
084      try {
085        // we discard empty or commented lines
086        do {
087          line = r.readLine();
088        } while (line != null && !isValidLine(line));
089      } catch (IOException ignored) {
090        close();
091        return false;
092      }
093
094      if (line == null) {
095        close();
096        return false;
097      } else {
098        return true;
099      }
100    }
101
102    /**
103     * Check if a line is valid or not.
104     * A valid line is not a comment (if configured) and should be in the form "key<tab>value".
105     *
106     * @param line
107     * @return
108     */
109    private boolean isValidLine(String line) {
110      if (line == null) {
111        return false;
112      }
113
114      if (commentMarker != null) {
115        if (line.startsWith(commentMarker)) {
116          return false;
117        }
118      }
119      return (tab.split(line).length == 2);
120    }
121
122    @Override
123    public KeyValue<String, T> next() {
124      if (!hasNext()) {
125        throw new NoSuchElementException();
126      }
127
128      String[] atoms = tab.split(line);
129      line = null;
130      return new KeyValue<>(StringUtils.trimToNull(atoms[0]), fromDictFile(StringUtils.trimToNull(atoms[1])));
131    }
132
133    @Override
134    public void remove() {
135    }
136
137    public void close() {
138      if (r != null) {
139        try {
140          r.close();
141        } catch (IOException ignored) {
142        }
143      }
144    }
145  }
146}