001/*
002 * Copyright 2020 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.vocabulary;
017
018import java.util.Arrays;
019import java.util.Collections;
020import java.util.HashMap;
021import java.util.HashSet;
022import java.util.Map;
023import java.util.Set;
024import java.util.regex.Pattern;
025
026import javax.annotation.Nullable;
027
028import org.apache.commons.lang3.StringUtils;
029
030/**
031 * Vocabulary for the nomenclatural status of a name.
032 *
033 * @see <a href="http://dev.e-taxonomy.eu/trac/wiki/NomenclaturalStatus">EDIT CDM</a>
034 * @see <a href="http://wiki.tdwg.org/twiki/bin/view/UBIF/LinneanCoreNomenclaturalStatus">TDWG LinneanCoreNomenclaturalStatus</a>
035 * @see <a href="http://www.biologybrowser.org/nomglos">Nomenclatural Glossary for Zoology</a>
036 * @see <a href="http://www.northernontarioflora.ca/definitions.cfm">Northern Ontario plant database</a>
037 * @see <a href="http://rs.gbif.org/vocabulary/gbif/nomenclatural_status.xml">rs.gbif.org vocabulary</a>
038 * @see <a href="http://darwin.eeb.uconn.edu/systsem/table.html">Nomenclatural equivalences</a>
039 *
040 */
041public enum NomenclaturalStatus {
042
043 LEGITIMATE(null, null,    "acceptable", "potentially valid"),
044
045  /**
046 * A name that is correctly proposed according to the a Code of Nomenclature.
047 * The different codes have various terminology for the same concept:
048 * <ul>
049 *   <li>Zoology: available name</li>
050 *   <li>Botany: validly published name</li>
051 *   <li>BioCode: established name</li>
052 *   <li>Bacteria: validly published name</li>
053 * </ul>
054 * An available name is not necessarily the correct name.
055 * @see <a href="https://en.wikipedia.org/wiki/Validly_published_name_(botany)">wikipedia</a>
056 * @see <a href="https://en.wikipedia.org/wiki/Available_name">wikipedia</a>
057 */
058 VALIDLY_PUBLISHED(null, null, "available","valid", "established"),
059
060  /**
061   * The name is a new combination, i.e. a name change involving the epithet of the basionym.
062   * ICBN: Name of the original author being kept within parantheses.
063   *
064   * A new name is introduced consisting of a new generic name for an earlier named species
065   * combined with the existing epitheton of said species.
066   * For exxample when Calymmatobacterium granulomatis was renamed Klebsiella granulomatis,
067   * it was referred to as Klebsiella granulomatis comb. nov. to denote it is a new combination.
068   */
069 NEW_COMBINATION("combinatio nova", "comb. nov."),
070
071  /**
072   * A scientific name that is created specifically to replace a name which is a junior synonym or homonym.
073   * New name designated when a name cannot be used for nomenclaturalpurposes and no type or original material exists.
074   * A name established expressly to replace an already established name. A nominal taxon denoted by a new
075   * replacement name (nomen novum) has the same name-bearing type as the nominal taxon denoted by the replaced name.
076   * ICZN: new name which is expressly proposed as a replacement name for a preoccupied name, automatically takes
077   * the same type and type locality. (= a replacement name or substitute name for a preoccupied name).
078   * Commonly applied to names proposed to replace junior homonyms. A name proposed as a substitute for a previously
079   * published name (ICBN Art. 7.3 and 33.4).
080   */
081 REPLACEMENT("nomen novum", "nom. nov.",      "replacement name", "substitute name"),
082
083  /**
084   * A scientific name that enjoys special nomenclatural protection, i.e. a name conserved in respective code.
085   * Names classed as available and valid by action of the ICZN or ICBN exercising its Plenary Powers .
086   * Includes rulings to conserve junior/later synonyms in place of rejected forgotten names (nomen oblitum).
087   * Such names are entered on the Official Lists.
088   */
089 CONSERVED("nomen conservandum", "nom. cons.",     "orth. cons."),
090
091  /**
092   * Protected names are conserved names applied to a name which has been given precedence over it unused senior synonym
093   * or senior homonym relegated to the status of nomen oblitum (see Article 23.9.2).
094   */
095 PROTECTED("nomen protectum", "nom. prot."),
096
097  /**
098   * Corrected names or 'improved' names, available names which are mandatory and allowable emendations
099   * of imperfect names (qv) or of taxonomic names higher than family
100   * (which are not subject to name form and ending regulations).
101   * Do not depend on transfer in taxon rank or assignment. (= an emended name).
102   */
103 CORRECTED("nomen correctum", "nom. corr.",    "improved"),
104
105  /**
106   * The original combination of a newly described any name regardless of the rank.
107   */
108 ORIGINAL_COMBINATION(null, null),
109
110  /**
111   * The original combination of a newly described species.
112   * Specific type of ORIGINAL_COMBINATION.
113   */
114 NEW_SPECIES("species novum", "sp. nov."),
115
116  /**
117   * The original combination of a newly described genus.
118   * Specific type of ORIGINAL_COMBINATION.
119   */
120 NEW_GENUS("genus novum", "gen. nov."),
121
122  /**
123   * An alternative name given in the original publication before 1953 based on the same type.
124   */
125 ALTERNATIVE("nomen alternativum", "nom. altern."),
126
127  /**
128   * A name, which was published in an obscure publication, was never widely used.
129   * In botanical literature, the name remained in obscurity.
130   * This has no influence on the formal evaluation of valid publication under ICBN.
131   * It may be valuable information nevertheless.
132   */
133 OBSCURE("nomen obscurum", "nom. obsc."),
134
135  /**
136   * A proposed conserved name. See CONSERVED.
137   */
138 CONSERVED_PROPOSED("nomen conservandum propositum", "orth. cons. prop."),
139
140  /**
141   * Provisional name, a name proposed in anticipation of the future acceptance of the taxon concerned,
142   * or of a particular circumscription, position, or rank of the taxon (ICBN Art. 34.1).
143   */
144 PROVISIONAL("nomen provisorium", "nom. prov."),
145
146  /**
147   * Formerly, a new taxon with a scant diagnosis/description
148   * (e.g., perennial; robust plant; large leaf; aromatic plant; fragrant flower; Red flowers; large fruits; etc.).
149   * Such short descriptions/diagnoses were termed as nom. subnud.
150   *
151   * Occasionally, a short diagnosis may be a key character providing an ID of a taxon.
152   * Formally, any arbitrary short description is valid under ICBN ("small fungus, spores not seen").
153   * Thus the qualifiers "nom. ambig", "nom. confus.", "nom. obsc." and "nom. subnud.",
154   * are applicable either to a currently "botany: valid and legitimate" or "zoology: available" name,
155   * or are reasons given for "nom. rej."/"nom. utique rej.".
156   * The are not actual status codes, rather highlight potential problems.
157   */
158 SUBNUDUM("nomen subnudum", "nom. subnud."),
159
160  /**
161   * proposed rejected name. Temporary status until the next botanical congress decides about the proposal.
162   */
163 REJECTED_PROPOSED("nomen rejiciendum propositum", "nom. rej. prop."),
164
165  /**
166   * proposed rejected name on the basis of appendix V of ICBN
167  */
168 REJECTED_OUTRIGHT_PROPOSED("nomen utique rejiciendum propositum", "nom. utique rej. prop."),
169
170  /**
171   * A name of uncertain sense, of doubtful validity.
172   * E.g. the name Encephalartos tridentatus (Willdenow) Lehmann (Pugillus 6, 1834) is a nomen dubium
173   * which may refer to several species of Encephalartos or Macrozamia.
174   * ICZN: doubtful or dubious names, names which are not certainly applicable to any known taxon or
175   * for which the evidence is insufficient to permit recognition of the taxon to which they belong.
176   * May possess availability conducive to uncertainty and instability.
177   * Also 'names under enquiry': NOMEN INQUIRENDUM (NOMINA INQUIRENDA).
178   *
179   * In botany a name whose application is uncertain;
180   * the confusion being derived from an incomplete or confusing description.
181   * Example: Platanus hispanica auct., non Mill. ex Münchh., nom. dub.
182   * The application of the name Platanus hispanica is uncertain, so the name has been rejected
183   * in favour of Platanus ×acerifolia (Aiton) Willd., pro. sp.
184   */
185 DOUBTFUL("nomen dubium", "nom. dub.",     "dubious"),
186
187  /**
188   * Ambiguous name, one which has been used so long by different authors in different senses that it has become
189   * a persistent cause of error and confusion. It is used in senses other than originally intended,
190   * and thus the source of much confusion. A nom. ambig. is a rejected name.
191   * Example: Trifolium agrarium L., nom. ambig.
192   * The name Trifolium agrarium was misapplied to three taxa, so the name has been rejected in favour of the names
193   * Trifolium aureum Pollich, T. dubium Sibth., and T. campestre Schreb., each referring to different taxa.
194   */
195 AMBIGUOUS("nomen ambigua", "nom. ambig."),
196
197  /**
198   * A rejected name that is based on a type consisting of two or more entirely discordant elements,
199   * so that it is difficult to select a satisfactory lectotype.
200   */
201 CONFUSED("nomen confusum", "nom. confus."),
202
203  /**
204   * a name that has not been used in the scientific community for more than fifty years after its original proposal.
205   * forgotten names, senior synonyms which have remained unused in the literature for many years.
206   * Have been treated differently by different editions of the Code, and remain unavailable names.
207   */
208 FORGOTTEN("nomen oblitum", "nom. obl."),
209
210  /**
211   * A name which violated the Code in operation at that time.
212   */
213 ABORTED("nomen abortivum", "nom. abort."),
214
215  /**
216   * In botanical nomenclature, an orthographical variant (abbreviated orth. var.) is a variant spelling
217   * of the same name. For example, Hieronima and Hyeronima are orthographical variants of Hieronyma.
218   * One of the spellings must be treated as the correct one. In this case, the spelling Hieronyma has been conserved
219   * and is to be used as the correct spelling.
220   *
221   * An inadvertent use of one of the other spellings has no consequences:
222   * the name is to be treated as if it were correctly spelled.
223   * Any subsequent use is to be corrected. Orthographical variants are treated in Art 61 of the ICBN.
224   *
225   * In zoology, orthographical variants in the formal sense do not exist;
226   * a misspelling or orthographic error is treated as a lapsus, a form of inadvertent error.
227   * The first reviser is allowed to choose one variant for mandatory further use, but in other ways,
228   * these errors generally have no further formal standing.
229   * Inadvertent misspellings are treated in Art. 32-33 of the ICZN.
230   */
231
232 ORTHOGRAPHIC_VARIANT("nomen orthographia", "orth. var.",     "spelling variant"),
233
234  /**
235   * A name superfluous when published, an unnecessary substitute name.
236   * In botany a name for which a validly published name existed previously and should have been adopted,
237   * thus the name is deemed nomenclaturally superfluous.
238   * Example: Astragalus astragalinus (Hook.) Á. & D. Löve, nom. illeg. superfl.
239   * The GRIN database reports that the combination Astragalus astragalinus (Hook.) Á.& D. Löve, is a superfluous name,
240   * based on an incorrect basionym, see R.C. Barneby, Taxon, 25(5-6): 628 (1976).
241   * The correct basionym is Phaca astragalina DC., not Astragalus astragalinus (DC.) Hook.
242   * This taxon is a synonym of Astragalus alpinus L.
243   */
244 SUPERFLUOUS("nomen superfluum", "nom. superfl."),
245
246  /**
247   * A nomen nudum (plural nomina nuda) is used for a name which is unavailable because it does not have a description,
248   * reference or indication (specifically a name published before 1931 which fails to conform to Article 12,
249   * or after 1930 but fails to conform to Article 13).
250   *
251   * Nomina nuda and other unavailable names can be made available if they are published again in a way
252   * that meets the criteria of availability;
253   * however, they are attributed to the author who first made them available, not the person who first used them.
254   */
255 NUDUM("nomen nudum", "nom. nud.",   "nomen solum","nom. sol."),
256
257  /**
258   * Null names, unavailable names which as defined by the Code are non demonstrably intentional changes of
259   * an original spelling i.e. a form of incorrect subsequent spelling.
260   */
261 NULL_NAME("nomen nullum", "nom. null."),
262
263  /**
264   * Names in specified ranks included in publications listed as suppressed works (opera utique oppressa; App. VI)
265   * are not validly published.
266   */
267 SUPPRESSED("nomen oppressa", "nom. opp."),
268
269  /**
270   * Name rejected outright, i. e. without proposing another name to be conserved in favor of this name
271   * (nomen utique rejiciendum). This status applies to explicitly listed protonym names as well as to any
272   * combinations based on the protonym. See ICBN (Art. 56.1, Appendix V) because otherwise it would cause a
273   * disadvantageous nomenclatural change.
274   *
275   * Example: Cerastium vulgatum L. 1755, non 1762, nom. utique rej.
276   * For a discussion on why this name was rejected, see Brummitt 2000. Taxon 49 (2): 262.
277   */
278 REJECTED_OUTRIGHT("nomen utique rejiciendum", "nom. utique rej."),
279
280  /**
281   * Rejected / surpressed name. Inverse of conserved against
282   */
283 REJECTED("nomen rejiciendum", "nom. rej."),
284
285  /**
286   * A nomen illegitimum is a validly published name, but one that contravenes some of the articles laid down by
287   * the International Botanical Congress. The name could be illegitimate because:
288   * <ul>
289   *   <li>(article 52) it was superfluous at its time of publication, i.e., the taxon (as represented by the type) already has a name</li>
290   *   <li>(articles 53 and 54) the name has already been applied to another plant (a homonym)</li>
291   * </ul>
292   * For the procedure of rejecting otherwise legitimate names, see conserved name.
293   */
294 ILLEGITIMATE("nomen illegitimum", "nom. illeg."),
295
296  /**
297   * A name that was not validly published according to the rules of the code,
298   * or a name that was not accepted by the author in the original publication, for example,
299   * if the name was suggested as a synonym of an accepted name.
300   * In zoology this is called an UNAVAILABLE name.
301   * Example: Linaria vulgaris Hill, nom. inval.
302   * Many names published by John Hill between 1753 and 1757 were not accepted as validly published.
303   */
304 INVALID("nomen invalidum", "nom. inval.",    "unavailable"),
305
306  /**
307   * denied names, unavailable names which are incorrect original spellings as defined by the Code.
308   * Subset of nom.inval. based only on spellings
309   */
310 DENIED("nomen negatum", "nom. neg.");
311
312  private static final Set<NomenclaturalStatus> VALID_VALUES =
313    Collections.unmodifiableSet(
314      new HashSet<>(
315        Arrays.asList(VALIDLY_PUBLISHED, LEGITIMATE, NEW_COMBINATION, REPLACEMENT, NEW_COMBINATION,
316          NEW_GENUS, NEW_SPECIES, SUBNUDUM, CONSERVED, PROTECTED, CORRECTED, ALTERNATIVE,
317          CONSERVED_PROPOSED, PROVISIONAL)));
318
319  private static final Set<NomenclaturalStatus> DOUBTFUL_VALUES =
320    Collections.unmodifiableSet(new HashSet<>(Arrays.asList(DOUBTFUL, OBSCURE)));
321
322  private static final Pattern NORMALIZE_TERM = Pattern.compile("[._ -]+");
323
324  private static String normalize(String x) {
325    return NORMALIZE_TERM.matcher(x.toUpperCase()).replaceAll("");
326  }
327
328  private static final Map<String, NomenclaturalStatus> LOOKUP;
329
330  static {
331    Map<String, NomenclaturalStatus> lookup = new HashMap<>();
332    for (NomenclaturalStatus n : values()) {
333      lookup.put(normalize(n.name()), n);
334      if (n.getLatinLabel() != null) {
335        lookup.put(normalize(n.getLatinLabel()), n);
336        lookup.put(normalize(n.getLatinLabel().replace("nomen ", "")), n);
337      }
338      if (n.getAbbreviatedLabel() != null) {
339        lookup.put(normalize(n.getAbbreviatedLabel()), n);
340      }
341      if (n.alternatives != null) {
342        for (String alt : n.alternatives) {
343          lookup.put(normalize(alt), n);
344        }
345      }
346    }
347    LOOKUP = Collections.unmodifiableMap(lookup);
348  }
349
350  /**
351   * Tries to case insenitively interpret a nomenclatural status given as a string appliying the enums name,
352   * the latin and abbreviated name of a term.
353   *
354   * @param nomStatus
355   * @deprecated use NomStatusParser instead
356   */
357  @Deprecated
358  public static NomenclaturalStatus fromString(String nomStatus) {
359    if (StringUtils.isEmpty(nomStatus)) {
360      return null;
361    }
362    return LOOKUP.get(normalize(nomStatus));
363  }
364
365  private final String latin;
366  private final String abbreviated;
367  private final String[] alternatives;
368
369  private NomenclaturalStatus(String latin, String abbreviated, String ... alternatives) {
370    this.latin = latin;
371    this.abbreviated = abbreviated;
372    this.alternatives = alternatives;
373  }
374
375  public String getLatinLabel() {
376    return latin;
377  }
378
379  /**
380   * The abbreviated status name, often used in botany.
381   * For example nom. inval.
382   */
383  @Nullable
384  public String getAbbreviatedLabel() {
385    return abbreviated;
386  }
387
388  /**
389   * A vague grouping of nomenclatural status terms including all that can be used to name a correct/valid taxon.
390   * Specifically avoid the confusing terms valid, accepted or correct here as they mean different things in the
391   * different codes.
392   */
393  public boolean isGood() {
394    return VALID_VALUES.contains(this);
395  }
396
397  public boolean isDoubtful() {
398    return DOUBTFUL_VALUES.contains(this);
399  }
400
401  /**
402   * Opposite of good and not doubtful.
403   */
404  public boolean isBad() {
405    return !(isGood() || isDoubtful());
406  }
407}