001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.common.parsers.core;
015
016import java.util.HashMap;
017import java.util.Iterator;
018import java.util.Map;
019
020import org.apache.commons.lang3.StringUtils;
021import org.slf4j.Logger;
022import org.slf4j.LoggerFactory;
023
024/**
025 * A simple parser that will initialise with source data, and will
026 * use that as a lookup to replace values. This was written with
027 * basis of record lookup and country names in mind.
028 * Future improvements to this implementation might call a
029 * dictionary web service for example, to achieve the same,
030 * but allow the abstraction of the dictionary management to a
031 * better project (separation of concerns)
032 */
033public class DictionaryBackedParser<V> implements Parsable<V> {
034  protected final Logger log = LoggerFactory.getLogger(getClass());
035  private final Map<String, V> dictionary = new HashMap<String, V>();
036  private final boolean caseSensitive;
037
038  /**
039   * @param caseSensitive If the dictionary should be case sensitive (only applicable to String keys)
040   */
041  public DictionaryBackedParser(boolean caseSensitive) {
042    this.caseSensitive = caseSensitive;
043  }
044
045  /**
046   * @param source To build the dictionary from
047   */
048  @SuppressWarnings("unchecked")
049  public void init(Iterator<KeyValue<String, V>> source) {
050    while (source.hasNext()) {
051      KeyValue<String, V> kvp = source.next();
052      add(kvp.getKey(), kvp.getValue());
053    }
054  }
055
056  final protected void add(String key, V value) {
057    if (!StringUtils.isBlank(key)) {
058      String normedKey = normalize(key);
059      if (StringUtils.isNotEmpty(normedKey)) {
060        V existingValue = dictionary.get(normedKey);
061        if (existingValue == null) {
062          dictionary.put(normedKey, value);
063        } else if (!existingValue.equals(value)) {
064          log.warn("Ignoring mapping {}→{} as {} is already mapped to {}", key, value, key, existingValue);
065        }
066      }
067    }
068  }
069
070  /**
071   * Normalisation of a value used both by adding to the internal dictionary and parsing values.
072   * The default does trim and uppercase the value for Strings, but leaves other types unaltered.
073   * Override this method to provide specific normalisations for parsers.
074   *
075   * @param value the value to be normalised.
076   *
077   * @return the normalised value
078   */
079  protected String normalize(String value) {
080    if (value != null) {
081      if (!caseSensitive) {
082        return StringUtils.trimToNull(value.toUpperCase());
083      } else {
084        return StringUtils.trimToNull(value);
085      }
086    }
087    return value;
088  }
089
090  /**
091   * Tries to parse the input data according to its backing dictionary.
092   * If no entry in the dictionary can be found the result will be {@link ParseResult.STATUS FAIL} otherwise the
093   * result will be a {@link org.gbif.common.parsers.core.ParseResult.CONFIDENCE DEFINITE} {@link ParseResult.STATUS
094   * SUCCESS}.
095   *
096   * @param input To lookup in the dictionary
097   *
098   * @return the replacement from the dictionary
099   */
100  @Override
101  public ParseResult<V> parse(String input) {
102    String normed = normalize(input);
103    V value = dictionary.get(normed);
104    if (value == null) {
105      return ParseResult.fail();
106    } else {
107      return ParseResult.success(ParseResult.CONFIDENCE.DEFINITE, value);
108    }
109  }
110}