001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.common.parsers;
015
016import org.gbif.api.vocabulary.NomenclaturalStatus;
017import org.gbif.common.parsers.core.EnumParser;
018import org.gbif.common.parsers.core.ParseResult;
019
020import java.io.InputStream;
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.Map;
024import java.util.regex.Pattern;
025
026import org.apache.commons.lang3.StringUtils;
027
028/**
029 * Singleton implementation of the dictionary that uses the file /dictionaries/parse/nomStatus.txt.
030 */
031public class NomStatusParser extends EnumParser<NomenclaturalStatus> {
032    private Pattern CLEAN_PREFIX = Pattern.compile("\\s*\\.?\\s*");
033    private final Map<String, NomenclaturalStatus> PREFIXES;
034  private static NomStatusParser singletonObject = null;
035
036  {
037    Map<String, NomenclaturalStatus> prefixes = new HashMap<>();
038    prefixes.put("nom illeg", NomenclaturalStatus.ILLEGITIMATE);
039    prefixes.put("nom inval", NomenclaturalStatus.INVALID);
040    prefixes.put("comb nov", NomenclaturalStatus.NEW_COMBINATION);
041    prefixes.put("nom nov", NomenclaturalStatus.REPLACEMENT);
042    prefixes.put("nom nud", NomenclaturalStatus.NUDUM);
043    prefixes.put("nom rej", NomenclaturalStatus.REJECTED);
044    prefixes.put("unavailable", NomenclaturalStatus.INVALID);
045    PREFIXES = Collections.unmodifiableMap(prefixes);
046  }
047
048  private NomStatusParser(InputStream... file) {
049    super(NomenclaturalStatus.class, false, file);
050    // also make sure we have all enum knowledge mapped
051    for (NomenclaturalStatus ns : NomenclaturalStatus.values()) {
052      add(ns.getLatinLabel(), ns);
053      add(ns.getAbbreviatedLabel(), ns);
054    }
055  }
056
057    @Override
058    public ParseResult<NomenclaturalStatus> parse(String input) {
059        ParseResult<NomenclaturalStatus> result = super.parse(input);
060        if (!result.isSuccessful() && StringUtils.isNotEmpty(input)) {
061            String normed = CLEAN_PREFIX.matcher(input).replaceFirst(" ").trim().toLowerCase();
062            if (StringUtils.isNotEmpty(normed)) {
063                // try generic parsing of status prefixes only
064                for (Map.Entry<String, NomenclaturalStatus> entry : PREFIXES.entrySet()) {
065                    if (normed.startsWith(entry.getKey())) {
066                        return ParseResult.success(ParseResult.CONFIDENCE.PROBABLE, entry.getValue());
067                    }
068                }
069            }
070        }
071        return result;
072    }
073
074    public static NomStatusParser getInstance()
075    throws ClassCastException, AbstractMethodError, ArithmeticException, ArrayIndexOutOfBoundsException {
076    synchronized (NomStatusParser.class) {
077      if (singletonObject == null) {
078        singletonObject = new NomStatusParser(NomStatusParser.class.getResourceAsStream("/dictionaries/parse/nomStatus.tsv"));
079      }
080    }
081    return singletonObject;
082  }
083
084
085}