001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.nameparser;
015
016import com.google.common.collect.ImmutableMap;
017import org.apache.commons.lang3.StringUtils;
018import org.gbif.api.exception.UnparsableException;
019import org.gbif.api.model.checklistbank.ParsedName;
020import org.gbif.api.service.checklistbank.NameParser;
021import org.gbif.api.vocabulary.NameType;
022import org.gbif.api.vocabulary.Rank;
023import org.gbif.nameparser.api.NamePart;
024import org.gbif.nameparser.api.NomCode;
025import org.gbif.nameparser.api.UnparsableNameException;
026import org.gbif.nameparser.api.Warnings;
027import org.gbif.nameparser.util.NameFormatter;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import javax.annotation.Nullable;
032import java.util.Map;
033
034import static org.gbif.nameparser.api.ParsedName.State;
035
036/**
037 * A name parser wrapped to return classic ParsedName objects from the GBIF API v1.
038 */
039public class NameParserGbifV1 implements NameParser {
040
041  private static Logger LOG = LoggerFactory.getLogger(NameParserGbifV1.class);
042  private static final Map<org.gbif.nameparser.api.NameType, NameType> NAME_TYPE_MAP = ImmutableMap.<org.gbif.nameparser.api.NameType, NameType>builder()
043      .put(org.gbif.nameparser.api.NameType.SCIENTIFIC, NameType.SCIENTIFIC)
044      .put(org.gbif.nameparser.api.NameType.VIRUS, NameType.VIRUS)
045      .put(org.gbif.nameparser.api.NameType.HYBRID_FORMULA, NameType.HYBRID)
046      .put(org.gbif.nameparser.api.NameType.INFORMAL, NameType.INFORMAL)
047      .put(org.gbif.nameparser.api.NameType.OTU, NameType.OTU)
048      .put(org.gbif.nameparser.api.NameType.PLACEHOLDER, NameType.PLACEHOLDER)
049      .put(org.gbif.nameparser.api.NameType.NO_NAME, NameType.NO_NAME)
050      .build();
051
052  private final NameParserGBIF parser;
053
054  /**
055   * Using the default GBIF RegEx Name Parser.
056   */
057  public NameParserGbifV1() {
058    this.parser = new NameParserGBIF();
059  }
060
061  /**
062   * Using the default GBIF RegEx Name Parser with a given timeout for parsing a single name.
063   * @param timeout in milliseconds before returning an Unparsable name
064   */
065  public NameParserGbifV1(long timeout) {
066    this.parser = new NameParserGBIF(timeout);
067  }
068
069  public NameParserGbifV1(NameParserGBIF parser) {
070    this.parser = parser;
071  }
072
073  public long getTimeout() {
074    return parser.getTimeout();
075  }
076
077  public void setTimeout(long timeout) {
078    parser.setTimeout(timeout);
079  }
080
081  @Override
082  public ParsedName parse(String s, @Nullable Rank rank) throws UnparsableException {
083    try {
084      return convert(s, rank, parser.parse(s, fromGbif(rank)));
085
086    } catch (InterruptedException e) {
087      // got interrupted but we cant handle it or rethrow it. next best option is to reset the threads flag
088      Thread.currentThread().interrupt();
089      throw new IllegalStateException("Thread got interrupted");
090
091    } catch (UnparsableNameException e) {
092      throw new UnparsableException(NAME_TYPE_MAP.getOrDefault(e.getType(), NameType.DOUBTFUL), e.getName());
093    }
094  }
095
096  @Override
097  public ParsedName parse(String scientificName) throws UnparsableException {
098    return parse(scientificName, null);
099  }
100
101  @Override
102  public ParsedName parseQuietly(String scientificName, @Nullable Rank rank) {
103    ParsedName p;
104    try {
105      p = parse(scientificName, rank);
106
107    } catch (UnparsableException e) {
108      p = new ParsedName();
109      p.setScientificName(scientificName);
110      p.setRank(rank);
111      p.setType(e.type);
112      p.setParsed(false);
113      p.setParsedPartially(false);
114    }
115
116    return p;
117  }
118
119  @Override
120  public ParsedName parseQuietly(String scientificName) {
121    return parseQuietly(scientificName, null);
122  }
123
124  @Override
125  // parses the name without authorship and returns the ParsedName.canonicalName() string
126  public String parseToCanonical(String scientificName, @Nullable Rank rank) {
127    if (StringUtils.isBlank(scientificName)) {
128      return null;
129    }
130    try {
131      ParsedName pn = parse(scientificName, rank);
132      if (pn != null) {
133        return pn.canonicalName();
134      }
135    } catch (UnparsableException e) {
136      logUnparsable(e);
137    }
138    return null;
139  }
140
141  private static void logUnparsable(UnparsableException e) {
142    if (e.type.isParsable()) {
143      LOG.debug("Unparsable {} {} >>> {}", e.type, e.name, e.getMessage());
144    } else {
145      LOG.warn("Unparsable {} {} >>> {}", e.type, e.name, e.getMessage());
146    }
147  }
148
149  @Override
150  public String parseToCanonical(String scientificName) {
151    return parseToCanonical(scientificName, null);
152  }
153
154  public String parseToCanonicalOrScientificName(String scientificName) {
155    return parseToCanonicalOrScientificName(scientificName, null);
156  }
157
158  /**
159   * Tries to parses the name without authorship and returns the ParsedName.canonicalName() string
160   * For unparsable types and other UnparsableExceptions the original scientific name is returned.
161   * @param rank the rank of the name if it is known externally. Helps identifying infrageneric names vs bracket authors
162   */
163  public String parseToCanonicalOrScientificName(String scientificName, @Nullable Rank rank) {
164    if (StringUtils.isBlank(scientificName)) {
165      return null;
166    }
167    try {
168      ParsedName pn = parse(scientificName, rank);
169      if (pn != null) {
170        return pn.canonicalName();
171      }
172    } catch (UnparsableException e) {
173      logUnparsable(e);
174    }
175    return StringUtils.normalizeSpace(scientificName.trim());
176  }
177
178
179
180  private ParsedName convert(String scientificName, Rank rank, org.gbif.nameparser.api.ParsedName pn) throws UnparsableException {
181    // throw unparsable for all unparsable types but placeholder and for all names that have a not parsed state
182    if ((!pn.getType().isParsable() && pn.getType() != org.gbif.nameparser.api.NameType.PLACEHOLDER)
183        || pn.getState() == org.gbif.nameparser.api.ParsedName.State.NONE) {
184      throw new UnparsableException(gbifNameType(pn), scientificName);
185    }
186
187    ParsedName gbif = new ParsedName();
188
189    gbif.setType(gbifNameType(pn));
190    gbif.setScientificName(scientificName);
191
192    gbif.setGenusOrAbove(pn.getGenus() != null ? pn.getGenus(): pn.getUninomial());
193    gbif.setInfraGeneric(pn.getInfragenericEpithet());
194    gbif.setSpecificEpithet(pn.getSpecificEpithet());
195    gbif.setInfraSpecificEpithet(pn.getInfraspecificEpithet());
196    gbif.setCultivarEpithet(pn.getCultivarEpithet());
197    gbif.setNotho(toGbif(pn.getNotho()));
198    gbif.setRank(toGbif(pn.getRank()));
199    // in the old API we used null instead of unranked
200    if (gbif.getRank() == Rank.UNRANKED && Rank.UNRANKED != rank) {
201      gbif.setRank(null);
202    }
203    gbif.setStrain(pn.getPhrase());
204    gbif.setSensu(pn.getTaxonomicNote());
205
206    gbif.setAuthorship(NameFormatter.authorString(pn.getCombinationAuthorship(), false));
207    gbif.setYear(pn.getCombinationAuthorship().getYear());
208    gbif.setBracketAuthorship(NameFormatter.authorString(pn.getBasionymAuthorship(), false));
209    gbif.setBracketYear(pn.getBasionymAuthorship().getYear());
210
211    gbif.setNomStatus(pn.getNomenclaturalNote());
212    if (pn.getEpithetQualifier() != null && !pn.getEpithetQualifier().isEmpty()) {
213      StringBuilder sb = new StringBuilder();
214      for (Map.Entry<NamePart, String> pq : pn.getEpithetQualifier().entrySet()) {
215        if (sb.length() < 1) {
216          sb.append(" ");
217        }
218        sb.append(pq.getValue())
219          .append(" ")
220          .append(pn.getEpithet(pq.getKey()));
221      }
222      gbif.setRemarks(sb.toString());
223    }
224
225    // we throw UnparsableException above already for State.NONE
226    gbif.setParsed(true);
227    gbif.setParsedPartially(pn.getState() == State.PARTIAL);
228
229    return gbif;
230  }
231
232
233  public static NameType gbifNameType(org.gbif.nameparser.api.ParsedName pn) {
234    NameType t;
235    // detect name types that only exist in the GBIF API v1
236    if (pn.isDoubtful() && pn.getWarnings().contains(Warnings.BLACKLISTED_EPITHET)) {
237      t = NameType.BLACKLISTED;
238    } else if (pn.isCandidatus()) {
239      t = NameType.CANDIDATUS;
240    } else if (pn.getCode() == NomCode.CULTIVARS || pn.getCultivarEpithet() != null) {
241      t = NameType.CULTIVAR;
242    } else {
243      // convert all others
244      t = NAME_TYPE_MAP.get(pn.getType());
245    }
246    // use doubtful in too good cases
247    if (pn.isDoubtful() && (t == NameType.SCIENTIFIC || t == NameType.CULTIVAR)) {
248      return NameType.DOUBTFUL;
249    }
250    return t;
251  }
252
253  public static org.gbif.api.vocabulary.NamePart toGbif(NamePart notho) {
254    return convertEnum(org.gbif.api.vocabulary.NamePart.class, notho);
255  }
256
257  public static Rank toGbif(org.gbif.nameparser.api.Rank rank) {
258    if (rank == null) {
259      return null;
260    }
261    switch (rank) {
262      case SUPERDIVISION: return Rank.SUPERPHYLUM;
263      case DIVISION: return Rank.PHYLUM;
264      case SUBDIVISION: return Rank.SUBPHYLUM;
265      case INFRADIVISION: return Rank.INFRAPHYLUM;
266
267      case SUPERSECTION:
268      case SUPERSERIES:
269        return Rank.INFRAGENERIC_NAME;
270      
271      case MEGAFAMILY:
272      case GRANDFAMILY:
273      case EPIFAMILY:
274
275      case GIGAORDER:
276      case MIRORDER:
277      case NANORDER:
278      case HYPOORDER:
279      case MINORDER:
280
281      case MEGACOHORT:
282
283      case GIGACLASS:
284      case MEGACLASS:
285      case SUBTERCLASS:
286
287      case PARVPHYLUM:
288      case MICROPHYLUM:
289      case NANOPHYLUM:
290
291      case REALM:
292      case SUBREALM:
293        return Rank.SUPRAGENERIC_NAME;
294
295      default: return convertEnum(Rank.class, rank);
296    }
297  }
298
299
300  public static org.gbif.nameparser.api.Rank fromGbif(Rank rank) {
301    if (rank == null) {
302      return null;
303    }
304    if (Rank.RACE == rank) {
305      return org.gbif.nameparser.api.Rank.PROLES;
306    }
307    return convertEnum(org.gbif.nameparser.api.Rank.class, rank);
308  }
309
310  /**
311   * Converts an enumeration value into a constant with the exact same name from a different enumeration class.
312   * In case the enumeration constant name does not exist an error is thrown.
313   *
314   * @param targetClass class of the target enumeration
315   * @param value
316   * @throws IllegalArgumentException in case the enumeration name does not exist in the target class
317   */
318  private static <G extends Enum<G>> G convertEnum(Class<G> targetClass, Enum<?> value) {
319    try {
320      return value == null ? null : Enum.valueOf(targetClass, value.name());
321    } catch (IllegalArgumentException e) {
322      LOG.warn("Unable to convert {} into {}", value, targetClass);
323      return null;
324    }
325  }
326}