001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.common.parsers; 015 016import org.gbif.api.vocabulary.TypeStatus; 017import org.gbif.common.parsers.core.EnumParser; 018import org.gbif.common.parsers.core.ParseResult; 019 020import java.util.regex.Matcher; 021import java.util.regex.Pattern; 022 023import org.apache.commons.lang3.StringUtils; 024 025/** 026 * Singleton implementation of the dictionary that uses the file /dictionaries/parse/typeStatus.txt. 027 */ 028public class TypeStatusParser extends EnumParser<TypeStatus> { 029 030 private static TypeStatusParser singletonObject = null; 031 private static final Pattern NAME_SEPARATOR = Pattern.compile("^(.+) (OF|FOR) "); 032 033 private TypeStatusParser() { 034 super(TypeStatus.class, false); 035 init(TypeStatusParser.class.getResourceAsStream("/dictionaries/parse/typeStatus.tsv")); 036 } 037 038 @Override 039 protected String normalize(String value) { 040 if (StringUtils.isEmpty(value)) { 041 return null; 042 } 043 // uppercase 044 value = value.toUpperCase().trim(); 045 046 // keep only words before the typifiedName if existing, e.g. Holotype for "Holotype of Dianthus fruticosus ssp. amorginus Runemark" 047 Matcher m = NAME_SEPARATOR.matcher(value); 048 if (m.find()) { 049 value = m.group(1); 050 } 051 // remove whitespace and non letters 052 ParseResult<String> ascii = asciiParser.parse(value); 053 054 // remove all non-letters 055 return ascii.getPayload().chars() 056 .filter(p -> Character.isLetter((char) p)) 057 .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) 058 .toString(); 059 } 060 061 062 public static TypeStatusParser getInstance() 063 throws ClassCastException, AbstractMethodError, ArithmeticException, ArrayIndexOutOfBoundsException { 064 synchronized (TypeStatusParser.class) { 065 if (singletonObject == null) { 066 singletonObject = new TypeStatusParser(); 067 } 068 } 069 return singletonObject; 070 } 071}