001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.common.parsers;
015
016import org.gbif.api.vocabulary.TypeStatus;
017import org.gbif.common.parsers.core.EnumParser;
018import org.gbif.common.parsers.core.ParseResult;
019
020import java.util.regex.Matcher;
021import java.util.regex.Pattern;
022
023import org.apache.commons.lang3.StringUtils;
024
025/**
026 * Singleton implementation of the dictionary that uses the file /dictionaries/parse/typeStatus.txt.
027 */
028public class TypeStatusParser extends EnumParser<TypeStatus> {
029
030  private static TypeStatusParser singletonObject = null;
031  private static final Pattern NAME_SEPARATOR = Pattern.compile("^(.+) (OF|FOR) ");
032
033  private TypeStatusParser() {
034    super(TypeStatus.class, false);
035    init(TypeStatusParser.class.getResourceAsStream("/dictionaries/parse/typeStatus.tsv"));
036  }
037
038  @Override
039  protected String normalize(String value) {
040    if (StringUtils.isEmpty(value)) {
041      return null;
042    }
043    // uppercase
044    value = value.toUpperCase().trim();
045
046    // keep only words before the typifiedName if existing, e.g. Holotype for "Holotype of Dianthus fruticosus ssp. amorginus Runemark"
047    Matcher m = NAME_SEPARATOR.matcher(value);
048    if (m.find()) {
049      value = m.group(1);
050    }
051    // remove whitespace and non letters
052    ParseResult<String> ascii = asciiParser.parse(value);
053
054    // remove all non-letters
055    return ascii.getPayload().chars()
056        .filter(p -> Character.isLetter((char) p))
057        .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
058        .toString();
059  }
060
061
062  public static TypeStatusParser getInstance()
063    throws ClassCastException, AbstractMethodError, ArithmeticException, ArrayIndexOutOfBoundsException {
064    synchronized (TypeStatusParser.class) {
065      if (singletonObject == null) {
066        singletonObject = new TypeStatusParser();
067      }
068    }
069    return singletonObject;
070  }
071}