001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.common.parsers.core; 015 016import java.util.HashMap; 017import java.util.Iterator; 018import java.util.Map; 019 020import org.apache.commons.lang3.StringUtils; 021import org.slf4j.Logger; 022import org.slf4j.LoggerFactory; 023 024/** 025 * A simple parser that will initialise with source data, and will 026 * use that as a lookup to replace values. This was written with 027 * basis of record lookup and country names in mind. 028 * Future improvements to this implementation might call a 029 * dictionary web service for example, to achieve the same, 030 * but allow the abstraction of the dictionary management to a 031 * better project (separation of concerns) 032 */ 033public class DictionaryBackedParser<V> implements Parsable<V> { 034 protected final Logger log = LoggerFactory.getLogger(getClass()); 035 private final Map<String, V> dictionary = new HashMap<String, V>(); 036 private final boolean caseSensitive; 037 038 /** 039 * @param caseSensitive If the dictionary should be case sensitive (only applicable to String keys) 040 */ 041 public DictionaryBackedParser(boolean caseSensitive) { 042 this.caseSensitive = caseSensitive; 043 } 044 045 /** 046 * @param source To build the dictionary from 047 */ 048 @SuppressWarnings("unchecked") 049 public void init(Iterator<KeyValue<String, V>> source) { 050 while (source.hasNext()) { 051 KeyValue<String, V> kvp = source.next(); 052 add(kvp.getKey(), kvp.getValue()); 053 } 054 } 055 056 final protected void add(String key, V value) { 057 if (!StringUtils.isBlank(key)) { 058 String normedKey = normalize(key); 059 if (StringUtils.isNotEmpty(normedKey)) { 060 V existingValue = dictionary.get(normedKey); 061 if (existingValue == null) { 062 dictionary.put(normedKey, value); 063 } else if (!existingValue.equals(value)) { 064 log.warn("Ignoring mapping {}→{} as {} is already mapped to {}", key, value, key, existingValue); 065 } 066 } 067 } 068 } 069 070 /** 071 * Normalisation of a value used both by adding to the internal dictionary and parsing values. 072 * The default does trim and uppercase the value for Strings, but leaves other types unaltered. 073 * Override this method to provide specific normalisations for parsers. 074 * 075 * @param value the value to be normalised. 076 * 077 * @return the normalised value 078 */ 079 protected String normalize(String value) { 080 if (value != null) { 081 if (!caseSensitive) { 082 return StringUtils.trimToNull(value.toUpperCase()); 083 } else { 084 return StringUtils.trimToNull(value); 085 } 086 } 087 return value; 088 } 089 090 /** 091 * Tries to parse the input data according to its backing dictionary. 092 * If no entry in the dictionary can be found the result will be {@link ParseResult.STATUS FAIL} otherwise the 093 * result will be a {@link org.gbif.common.parsers.core.ParseResult.CONFIDENCE DEFINITE} {@link ParseResult.STATUS 094 * SUCCESS}. 095 * 096 * @param input To lookup in the dictionary 097 * 098 * @return the replacement from the dictionary 099 */ 100 @Override 101 public ParseResult<V> parse(String input) { 102 String normed = normalize(input); 103 V value = dictionary.get(normed); 104 if (value == null) { 105 return ParseResult.fail(); 106 } else { 107 return ParseResult.success(ParseResult.CONFIDENCE.DEFINITE, value); 108 } 109 } 110}