001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.common.parsers; 015 016import org.gbif.api.vocabulary.NomenclaturalStatus; 017import org.gbif.common.parsers.core.EnumParser; 018import org.gbif.common.parsers.core.ParseResult; 019 020import java.io.InputStream; 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Map; 024import java.util.regex.Pattern; 025 026import org.apache.commons.lang3.StringUtils; 027 028/** 029 * Singleton implementation of the dictionary that uses the file /dictionaries/parse/nomStatus.txt. 030 */ 031public class NomStatusParser extends EnumParser<NomenclaturalStatus> { 032 private Pattern CLEAN_PREFIX = Pattern.compile("\\s*\\.?\\s*"); 033 private final Map<String, NomenclaturalStatus> PREFIXES; 034 private static NomStatusParser singletonObject = null; 035 036 { 037 Map<String, NomenclaturalStatus> prefixes = new HashMap<>(); 038 prefixes.put("nom illeg", NomenclaturalStatus.ILLEGITIMATE); 039 prefixes.put("nom inval", NomenclaturalStatus.INVALID); 040 prefixes.put("comb nov", NomenclaturalStatus.NEW_COMBINATION); 041 prefixes.put("nom nov", NomenclaturalStatus.REPLACEMENT); 042 prefixes.put("nom nud", NomenclaturalStatus.NUDUM); 043 prefixes.put("nom rej", NomenclaturalStatus.REJECTED); 044 prefixes.put("unavailable", NomenclaturalStatus.INVALID); 045 PREFIXES = Collections.unmodifiableMap(prefixes); 046 } 047 048 private NomStatusParser(InputStream... file) { 049 super(NomenclaturalStatus.class, false, file); 050 // also make sure we have all enum knowledge mapped 051 for (NomenclaturalStatus ns : NomenclaturalStatus.values()) { 052 add(ns.getLatinLabel(), ns); 053 add(ns.getAbbreviatedLabel(), ns); 054 } 055 } 056 057 @Override 058 public ParseResult<NomenclaturalStatus> parse(String input) { 059 ParseResult<NomenclaturalStatus> result = super.parse(input); 060 if (!result.isSuccessful() && StringUtils.isNotEmpty(input)) { 061 String normed = CLEAN_PREFIX.matcher(input).replaceFirst(" ").trim().toLowerCase(); 062 if (StringUtils.isNotEmpty(normed)) { 063 // try generic parsing of status prefixes only 064 for (Map.Entry<String, NomenclaturalStatus> entry : PREFIXES.entrySet()) { 065 if (normed.startsWith(entry.getKey())) { 066 return ParseResult.success(ParseResult.CONFIDENCE.PROBABLE, entry.getValue()); 067 } 068 } 069 } 070 } 071 return result; 072 } 073 074 public static NomStatusParser getInstance() 075 throws ClassCastException, AbstractMethodError, ArithmeticException, ArrayIndexOutOfBoundsException { 076 synchronized (NomStatusParser.class) { 077 if (singletonObject == null) { 078 singletonObject = new NomStatusParser(NomStatusParser.class.getResourceAsStream("/dictionaries/parse/nomStatus.tsv")); 079 } 080 } 081 return singletonObject; 082 } 083 084 085}