001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.nameparser; 015 016import com.google.common.collect.ImmutableMap; 017import org.apache.commons.lang3.StringUtils; 018import org.gbif.api.exception.UnparsableException; 019import org.gbif.api.model.checklistbank.ParsedName; 020import org.gbif.api.service.checklistbank.NameParser; 021import org.gbif.api.vocabulary.NameType; 022import org.gbif.api.vocabulary.Rank; 023import org.gbif.nameparser.api.NamePart; 024import org.gbif.nameparser.api.NomCode; 025import org.gbif.nameparser.api.UnparsableNameException; 026import org.gbif.nameparser.api.Warnings; 027import org.gbif.nameparser.util.NameFormatter; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import javax.annotation.Nullable; 032import java.util.Map; 033 034import static org.gbif.nameparser.api.ParsedName.State; 035 036/** 037 * A name parser wrapped to return classic ParsedName objects from the GBIF API v1. 038 */ 039public class NameParserGbifV1 implements NameParser { 040 041 private static Logger LOG = LoggerFactory.getLogger(NameParserGbifV1.class); 042 private static final Map<org.gbif.nameparser.api.NameType, NameType> NAME_TYPE_MAP = ImmutableMap.<org.gbif.nameparser.api.NameType, NameType>builder() 043 .put(org.gbif.nameparser.api.NameType.SCIENTIFIC, NameType.SCIENTIFIC) 044 .put(org.gbif.nameparser.api.NameType.VIRUS, NameType.VIRUS) 045 .put(org.gbif.nameparser.api.NameType.HYBRID_FORMULA, NameType.HYBRID) 046 .put(org.gbif.nameparser.api.NameType.INFORMAL, NameType.INFORMAL) 047 .put(org.gbif.nameparser.api.NameType.OTU, NameType.OTU) 048 .put(org.gbif.nameparser.api.NameType.PLACEHOLDER, NameType.PLACEHOLDER) 049 .put(org.gbif.nameparser.api.NameType.NO_NAME, NameType.NO_NAME) 050 .build(); 051 052 private final NameParserGBIF parser; 053 054 /** 055 * Using the default GBIF RegEx Name Parser. 056 */ 057 public NameParserGbifV1() { 058 this.parser = new NameParserGBIF(); 059 } 060 061 /** 062 * Using the default GBIF RegEx Name Parser with a given timeout for parsing a single name. 063 * @param timeout in milliseconds before returning an Unparsable name 064 */ 065 public NameParserGbifV1(long timeout) { 066 this.parser = new NameParserGBIF(timeout); 067 } 068 069 public NameParserGbifV1(NameParserGBIF parser) { 070 this.parser = parser; 071 } 072 073 public long getTimeout() { 074 return parser.getTimeout(); 075 } 076 077 public void setTimeout(long timeout) { 078 parser.setTimeout(timeout); 079 } 080 081 @Override 082 public ParsedName parse(String s, @Nullable Rank rank) throws UnparsableException { 083 try { 084 return convert(s, rank, parser.parse(s, fromGbif(rank))); 085 086 } catch (InterruptedException e) { 087 // got interrupted but we cant handle it or rethrow it. next best option is to reset the threads flag 088 Thread.currentThread().interrupt(); 089 throw new IllegalStateException("Thread got interrupted"); 090 091 } catch (UnparsableNameException e) { 092 throw new UnparsableException(NAME_TYPE_MAP.getOrDefault(e.getType(), NameType.DOUBTFUL), e.getName()); 093 } 094 } 095 096 @Override 097 public ParsedName parse(String scientificName) throws UnparsableException { 098 return parse(scientificName, null); 099 } 100 101 @Override 102 public ParsedName parseQuietly(String scientificName, @Nullable Rank rank) { 103 ParsedName p; 104 try { 105 p = parse(scientificName, rank); 106 107 } catch (UnparsableException e) { 108 p = new ParsedName(); 109 p.setScientificName(scientificName); 110 p.setRank(rank); 111 p.setType(e.type); 112 p.setParsed(false); 113 p.setParsedPartially(false); 114 } 115 116 return p; 117 } 118 119 @Override 120 public ParsedName parseQuietly(String scientificName) { 121 return parseQuietly(scientificName, null); 122 } 123 124 @Override 125 // parses the name without authorship and returns the ParsedName.canonicalName() string 126 public String parseToCanonical(String scientificName, @Nullable Rank rank) { 127 if (StringUtils.isBlank(scientificName)) { 128 return null; 129 } 130 try { 131 ParsedName pn = parse(scientificName, rank); 132 if (pn != null) { 133 return pn.canonicalName(); 134 } 135 } catch (UnparsableException e) { 136 logUnparsable(e); 137 } 138 return null; 139 } 140 141 private static void logUnparsable(UnparsableException e) { 142 if (e.type.isParsable()) { 143 LOG.debug("Unparsable {} {} >>> {}", e.type, e.name, e.getMessage()); 144 } else { 145 LOG.warn("Unparsable {} {} >>> {}", e.type, e.name, e.getMessage()); 146 } 147 } 148 149 @Override 150 public String parseToCanonical(String scientificName) { 151 return parseToCanonical(scientificName, null); 152 } 153 154 public String parseToCanonicalOrScientificName(String scientificName) { 155 return parseToCanonicalOrScientificName(scientificName, null); 156 } 157 158 /** 159 * Tries to parses the name without authorship and returns the ParsedName.canonicalName() string 160 * For unparsable types and other UnparsableExceptions the original scientific name is returned. 161 * @param rank the rank of the name if it is known externally. Helps identifying infrageneric names vs bracket authors 162 */ 163 public String parseToCanonicalOrScientificName(String scientificName, @Nullable Rank rank) { 164 if (StringUtils.isBlank(scientificName)) { 165 return null; 166 } 167 try { 168 ParsedName pn = parse(scientificName, rank); 169 if (pn != null) { 170 return pn.canonicalName(); 171 } 172 } catch (UnparsableException e) { 173 logUnparsable(e); 174 } 175 return StringUtils.normalizeSpace(scientificName.trim()); 176 } 177 178 179 180 private ParsedName convert(String scientificName, Rank rank, org.gbif.nameparser.api.ParsedName pn) throws UnparsableException { 181 // throw unparsable for all unparsable types but placeholder and for all names that have a not parsed state 182 if ((!pn.getType().isParsable() && pn.getType() != org.gbif.nameparser.api.NameType.PLACEHOLDER) 183 || pn.getState() == org.gbif.nameparser.api.ParsedName.State.NONE) { 184 throw new UnparsableException(gbifNameType(pn), scientificName); 185 } 186 187 ParsedName gbif = new ParsedName(); 188 189 gbif.setType(gbifNameType(pn)); 190 gbif.setScientificName(scientificName); 191 192 gbif.setGenusOrAbove(pn.getGenus() != null ? pn.getGenus(): pn.getUninomial()); 193 gbif.setInfraGeneric(pn.getInfragenericEpithet()); 194 gbif.setSpecificEpithet(pn.getSpecificEpithet()); 195 gbif.setInfraSpecificEpithet(pn.getInfraspecificEpithet()); 196 gbif.setCultivarEpithet(pn.getCultivarEpithet()); 197 gbif.setNotho(toGbif(pn.getNotho())); 198 gbif.setRank(toGbif(pn.getRank())); 199 // in the old API we used null instead of unranked 200 if (gbif.getRank() == Rank.UNRANKED && Rank.UNRANKED != rank) { 201 gbif.setRank(null); 202 } 203 gbif.setStrain(pn.getPhrase()); 204 gbif.setSensu(pn.getTaxonomicNote()); 205 206 gbif.setAuthorship(NameFormatter.authorString(pn.getCombinationAuthorship(), false)); 207 gbif.setYear(pn.getCombinationAuthorship().getYear()); 208 gbif.setBracketAuthorship(NameFormatter.authorString(pn.getBasionymAuthorship(), false)); 209 gbif.setBracketYear(pn.getBasionymAuthorship().getYear()); 210 211 gbif.setNomStatus(pn.getNomenclaturalNote()); 212 if (pn.getEpithetQualifier() != null && !pn.getEpithetQualifier().isEmpty()) { 213 StringBuilder sb = new StringBuilder(); 214 for (Map.Entry<NamePart, String> pq : pn.getEpithetQualifier().entrySet()) { 215 if (sb.length() < 1) { 216 sb.append(" "); 217 } 218 sb.append(pq.getValue()) 219 .append(" ") 220 .append(pn.getEpithet(pq.getKey())); 221 } 222 gbif.setRemarks(sb.toString()); 223 } 224 225 // we throw UnparsableException above already for State.NONE 226 gbif.setParsed(true); 227 gbif.setParsedPartially(pn.getState() == State.PARTIAL); 228 229 return gbif; 230 } 231 232 233 public static NameType gbifNameType(org.gbif.nameparser.api.ParsedName pn) { 234 NameType t; 235 // detect name types that only exist in the GBIF API v1 236 if (pn.isDoubtful() && pn.getWarnings().contains(Warnings.BLACKLISTED_EPITHET)) { 237 t = NameType.BLACKLISTED; 238 } else if (pn.isCandidatus()) { 239 t = NameType.CANDIDATUS; 240 } else if (pn.getCode() == NomCode.CULTIVARS || pn.getCultivarEpithet() != null) { 241 t = NameType.CULTIVAR; 242 } else { 243 // convert all others 244 t = NAME_TYPE_MAP.get(pn.getType()); 245 } 246 // use doubtful in too good cases 247 if (pn.isDoubtful() && (t == NameType.SCIENTIFIC || t == NameType.CULTIVAR)) { 248 return NameType.DOUBTFUL; 249 } 250 return t; 251 } 252 253 public static org.gbif.api.vocabulary.NamePart toGbif(NamePart notho) { 254 return convertEnum(org.gbif.api.vocabulary.NamePart.class, notho); 255 } 256 257 public static Rank toGbif(org.gbif.nameparser.api.Rank rank) { 258 if (rank == null) { 259 return null; 260 } 261 switch (rank) { 262 case SUPERDIVISION: return Rank.SUPERPHYLUM; 263 case DIVISION: return Rank.PHYLUM; 264 case SUBDIVISION: return Rank.SUBPHYLUM; 265 case INFRADIVISION: return Rank.INFRAPHYLUM; 266 267 case SUPERSECTION: 268 case SUPERSERIES: 269 return Rank.INFRAGENERIC_NAME; 270 271 case MEGAFAMILY: 272 case GRANDFAMILY: 273 case EPIFAMILY: 274 275 case GIGAORDER: 276 case MIRORDER: 277 case NANORDER: 278 case HYPOORDER: 279 case MINORDER: 280 281 case MEGACOHORT: 282 283 case GIGACLASS: 284 case MEGACLASS: 285 case SUBTERCLASS: 286 287 case PARVPHYLUM: 288 case MICROPHYLUM: 289 case NANOPHYLUM: 290 291 case REALM: 292 case SUBREALM: 293 return Rank.SUPRAGENERIC_NAME; 294 295 default: return convertEnum(Rank.class, rank); 296 } 297 } 298 299 300 public static org.gbif.nameparser.api.Rank fromGbif(Rank rank) { 301 if (rank == null) { 302 return null; 303 } 304 if (Rank.RACE == rank) { 305 return org.gbif.nameparser.api.Rank.PROLES; 306 } 307 return convertEnum(org.gbif.nameparser.api.Rank.class, rank); 308 } 309 310 /** 311 * Converts an enumeration value into a constant with the exact same name from a different enumeration class. 312 * In case the enumeration constant name does not exist an error is thrown. 313 * 314 * @param targetClass class of the target enumeration 315 * @param value 316 * @throws IllegalArgumentException in case the enumeration name does not exist in the target class 317 */ 318 private static <G extends Enum<G>> G convertEnum(Class<G> targetClass, Enum<?> value) { 319 try { 320 return value == null ? null : Enum.valueOf(targetClass, value.name()); 321 } catch (IllegalArgumentException e) { 322 LOG.warn("Unable to convert {} into {}", value, targetClass); 323 return null; 324 } 325 } 326}