001/*
002 * Copyright 2014 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.vocabulary;
017
018import com.google.common.base.Strings;
019import com.google.common.collect.ImmutableMap;
020import com.google.common.collect.ImmutableSet;
021import com.google.common.collect.Maps;
022
023import javax.annotation.Nullable;
024import java.util.Map;
025import java.util.Set;
026import java.util.regex.Pattern;
027
028/**
029 * Vocabulary for the nomenclatural status of a name.
030 *
031 * @see <a href="http://dev.e-taxonomy.eu/trac/wiki/NomenclaturalStatus">EDIT CDM</a>
032 * @see <a href="http://wiki.tdwg.org/twiki/bin/view/UBIF/LinneanCoreNomenclaturalStatus">TDWG LinneanCoreNomenclaturalStatus</a>
033 * @see <a href="http://www.biologybrowser.org/nomglos">Nomenclatural Glossary for Zoology</a>
034 * @see <a href="http://www.northernontarioflora.ca/definitions.cfm">Northern Ontario plant database</a>
035 * @see <a href="http://rs.gbif.org/vocabulary/gbif/nomenclatural_status.xml">rs.gbif.org vocabulary</a>
036 * @see <a href="http://darwin.eeb.uconn.edu/systsem/table.html">Nomenclatural equivalences</a>
037 *
038 */
039public enum NomenclaturalStatus {
040
041  /**
042   *
043   */
044 LEGITIMATE(null, null,    "acceptable", "potentially valid"),
045
046  /**
047 * A name that is correctly proposed according to the a Code of Nomenclature.
048 * The different codes have various terminology for the same concept:
049 * <ul>
050 *   <li>Zoology: available name</li>
051 *   <li>Botany: validly published name</li>
052 *   <li>BioCode: established name</li>
053 *   <li>Bacteria: validly published name</li>
054 * </ul>
055 * An available name is not necessarily the correct name.
056 * @See <a href="http://en.wikipedia.org/wiki/Validly_published_name_(botany)">wikipedia</a>
057 * @See <a href="http://en.wikipedia.org/wiki/Available_name">wikipedia</a>
058 */
059 VALIDLY_PUBLISHED(null, null, "available","valid", "established"),
060
061  /**
062   * The name is a new combination, i.e. a name change involving the epithet of the basionym.
063   * ICBN: Name of the original author being kept within parantheses.
064   *
065   * A new name is introduced consisting of a new generic name for an earlier named species
066   * combined with the existing epitheton of said species.
067   * For exxample when Calymmatobacterium granulomatis was renamed Klebsiella granulomatis,
068   * it was referred to as Klebsiella granulomatis comb. nov. to denote it is a new combination.
069   */
070 NEW_COMBINATION("combinatio nova", "comb. nov."),
071
072  /**
073   * A scientific name that is created specifically to replace a name which is a junior synonym or homonym.
074   * New name designated when a name cannot be used for nomenclaturalpurposes and no type or original material exists.
075   * A name established expressly to replace an already established name. A nominal taxon denoted by a new
076   * replacement name (nomen novum) has the same name-bearing type as the nominal taxon denoted by the replaced name.
077   * ICZN: new name which is expressly proposed as a replacement name for a preoccupied name, automatically takes
078   * the same type and type locality. (= a replacement name or substitute name for a preoccupied name).
079   * Commonly applied to names proposed to replace junior homonyms. A name proposed as a substitute for a previously
080   * published name (ICBN Art. 7.3 and 33.4).
081   */
082 REPLACEMENT("nomen novum", "nom. nov.",      "replacement name", "substitute name"),
083
084  /**
085   * A scientific name that enjoys special nomenclatural protection, i.e. a name conserved in respective code.
086   * Names classed as available and valid by action of the ICZN or ICBN exercising its Plenary Powers .
087   * Includes rulings to conserve junior/later synonyms in place of rejected forgotten names (nomen oblitum).
088   * Such names are entered on the Official Lists.
089   */
090 CONSERVED("nomen conservandum", "nom. cons.",     "orth. cons."),
091
092  /**
093   * Protected names are conserved names applied to a name which has been given precedence over it unused senior synonym
094   * or senior homonym relegated to the status of nomen oblitum (see Article 23.9.2).
095   */
096 PROTECTED("nomen protectum", "nom. prot."),
097
098  /**
099   * Corrected names or 'improved' names, available names which are mandatory and allowable emendations
100   * of imperfect names (qv) or of taxonomic names higher than family
101   * (which are not subject to name form and ending regulations).
102   * Do not depend on transfer in taxon rank or assignment. (= an emended name).
103   */
104 CORRECTED("nomen correctum", "nom. corr.",    "improved"),
105
106  /**
107   * The original combination of a newly described any name regardless of the rank.
108   */
109 ORIGINAL_COMBINATION(null, null),
110
111  /**
112   * The original combination of a newly described species.
113   * Specific type of ORIGINAL_COMBINATION.
114   */
115 NEW_SPECIES("species novum", "sp. nov."),
116
117  /**
118   * The original combination of a newly described genus.
119   * Specific type of ORIGINAL_COMBINATION.
120   */
121 NEW_GENUS("genus novum", "gen. nov."),
122
123  /**
124   * An alternative name given in the original publication before 1953 based on the same type.
125   */
126 ALTERNATIVE("nomen alternativum", "nom. altern."),
127
128  /**
129   * A name, which was published in an obscure publication, was never widely used.
130   * In botanical literature, the name remained in obscurity.
131   * This has no influence on the formal evaluation of valid publication under ICBN.
132   * It may be valuable information nevertheless.
133   */
134 OBSCURE("nomen obscurum", "nom. obsc."),
135
136  /**
137   * A proposed conserved name. See CONSERVED.
138   */
139 CONSERVED_PROPOSED("nomen conservandum propositum", "orth. cons. prop."),
140
141  /**
142   * Provisional name, a name proposed in anticipation of the future acceptance of the taxon concerned,
143   * or of a particular circumscription, position, or rank of the taxon (ICBN Art. 34.1).
144   */
145 PROVISIONAL("nomen provisorium", "nom. prov."),
146
147
148  /**
149   * Formerly, a new taxon with a scant diagnosis/description
150   * (e.g., perennial; robust plant; large leaf; aromatic plant; fragrant flower; Red flowers; large fruits; etc.).
151   * Such short descriptions/diagnoses were termed as nom. subnud.
152   *
153   * Occasionally, a short diagnosis may be a key character providing an ID of a taxon.
154   * Formally, any arbitrary short description is valid under ICBN ("small fungus, spores not seen").
155   * Thus the qualifiers "nom. ambig", "nom. confus.", "nom. obsc." and "nom. subnud.",
156   * are applicable either to a currently "botany: valid and legitimate" or "zoology: available" name,
157   * or are reasons given for "nom. rej."/"nom. utique rej.".
158   * The are not actual status codes, rather highlight potential problems.
159   */
160 SUBNUDUM("nomen subnudum", "nom. subnud."),
161
162  /**
163   * proposed rejected name. Temporary status until the next botanical congress decides about the proposal.
164   */
165 REJECTED_PROPOSED("nomen rejiciendum propositum", "nom. rej. prop."),
166
167  /**
168   * proposed rejected name on the basis of appendix V of ICBN
169  */
170 REJECTED_OUTRIGHT_PROPOSED("nomen utique rejiciendum propositum", "nom. utique rej. prop."),
171
172  /**
173   * A name of uncertain sense, of doubtful validity.
174   * E.g. the name Encephalartos tridentatus (Willdenow) Lehmann (Pugillus 6, 1834) is a nomen dubium
175   * which may refer to several species of Encephalartos or Macrozamia.
176   * ICZN: doubtful or dubious names, names which are not certainly applicable to any known taxon or
177   * for which the evidence is insufficient to permit recognition of the taxon to which they belong.
178   * May possess availability conducive to uncertainty and instability.
179   * Also 'names under enquiry': NOMEN INQUIRENDUM (NOMINA INQUIRENDA).
180   *
181   * In botany a name whose application is uncertain;
182   * the confusion being derived from an incomplete or confusing description.
183   * Example: Platanus hispanica auct., non Mill. ex Münchh., nom. dub.
184   * The application of the name Platanus hispanica is uncertain, so the name has been rejected
185   * in favour of Platanus ×acerifolia (Aiton) Willd., pro. sp.
186   */
187 DOUBTFUL("nomen dubium", "nom. dub.",     "dubious"),
188
189  /**
190   * Ambiguous name, one which has been used so long by different authors in different senses that it has become
191   * a persistent cause of error and confusion. It is used in senses other than originally intended,
192   * and thus the source of much confusion. A nom. ambig. is a rejected name.
193   * Example: Trifolium agrarium L., nom. ambig.
194   * The name Trifolium agrarium was misapplied to three taxa, so the name has been rejected in favour of the names
195   * Trifolium aureum Pollich, T. dubium Sibth., and T. campestre Schreb., each referring to different taxa.
196   */
197 AMBIGUOUS("nomen ambigua", "nom. ambig."),
198
199  /**
200   * A rejected name that is based on a type consisting of two or more entirely discordant elements,
201   * so that it is difficult to select a satisfactory lectotype.
202   */
203 CONFUSED("nomen confusum", "nom. confus."),
204
205  /**
206   * a name that has not been used in the scientific community for more than fifty years after its original proposal.
207   * forgotten names, senior synonyms which have remained unused in the literature for many years.
208   * Have been treated differently by different editions of the Code, and remain unavailable names.
209   */
210 FORGOTTEN("nomen oblitum", "nom. obl."),
211
212  /**
213   * A name which violated the Code in operation at that time.
214   */
215 ABORTED("nomen abortivum", "nom. abort."),
216
217  /**
218   * In botanical nomenclature, an orthographical variant (abbreviated orth. var.) is a variant spelling
219   * of the same name. For example, Hieronima and Hyeronima are orthographical variants of Hieronyma.
220   * One of the spellings must be treated as the correct one. In this case, the spelling Hieronyma has been conserved
221   * and is to be used as the correct spelling.
222   *
223   * An inadvertent use of one of the other spellings has no consequences:
224   * the name is to be treated as if it were correctly spelled.
225   * Any subsequent use is to be corrected. Orthographical variants are treated in Art 61 of the ICBN.
226   *
227   * In zoology, orthographical variants in the formal sense do not exist;
228   * a misspelling or orthographic error is treated as a lapsus, a form of inadvertent error.
229   * The first reviser is allowed to choose one variant for mandatory further use, but in other ways,
230   * these errors generally have no further formal standing.
231   * Inadvertent misspellings are treated in Art. 32-33 of the ICZN.
232   */
233
234 ORTHOGRAPHIC_VARIANT("nomen orthographia", "orth. var.",     "spelling variant"),
235
236  /**
237   * A name superfluous when published, an unnecessary substitute name.
238   * In botany a name for which a validly published name existed previously and should have been adopted,
239   * thus the name is deemed nomenclaturally superfluous.
240   * Example: Astragalus astragalinus (Hook.) Á. & D. Löve, nom. illeg. superfl.
241   * The GRIN database reports that the combination Astragalus astragalinus (Hook.) Á.& D. Löve, is a superfluous name,
242   * based on an incorrect basionym, see R.C. Barneby, Taxon, 25(5-6): 628 (1976).
243   * The correct basionym is Phaca astragalina DC., not Astragalus astragalinus (DC.) Hook.
244   * This taxon is a synonym of Astragalus alpinus L.
245   */
246 SUPERFLUOUS("nomen superfluum", "nom. superfl."),
247
248  /**
249   * A nomen nudum (plural nomina nuda) is used for a name which is unavailable because it does not have a description,
250   * reference or indication (specifically a name published before 1931 which fails to conform to Article 12,
251   * or after 1930 but fails to conform to Article 13).
252   *
253   * Nomina nuda and other unavailable names can be made available if they are published again in a way
254   * that meets the criteria of availability;
255   * however, they are attributed to the author who first made them available, not the person who first used them.
256   */
257 NUDUM("nomen nudum", "nom. nud.",   "nomen solum","nom. sol."),
258
259  /**
260   * Null names, unavailable names which as defined by the Code are non demonstrably intentional changes of
261   * an original spelling i.e. a form of incorrect subsequent spelling.
262   */
263 NULL_NAME("nomen nullum", "nom. null."),
264
265  /**
266   * Names in specified ranks included in publications listed as suppressed works (opera utique oppressa; App. VI)
267   * are not validly published.
268   */
269 SUPPRESSED("nomen oppressa", "nom. opp."),
270
271  /**
272   * Name rejected outright, i. e. without proposing another name to be conserved in favor of this name
273   * (nomen utique rejiciendum). This status applies to explicitly listed protonym names as well as to any
274   * combinations based on the protonym. See ICBN (Art. 56.1, Appendix V) because otherwise it would cause a
275   * disadvantageous nomenclatural change.
276   *
277   * Example: Cerastium vulgatum L. 1755, non 1762, nom. utique rej.
278   * For a discussion on why this name was rejected, see Brummitt 2000. Taxon 49 (2): 262.
279   */
280 REJECTED_OUTRIGHT("nomen utique rejiciendum", "nom. utique rej."),
281
282  /**
283   * Rejected / surpressed name. Inverse of conserved against
284   */
285 REJECTED("nomen rejiciendum", "nom. rej."),
286
287  /**
288   * A nomen illegitimum is a validly published name, but one that contravenes some of the articles laid down by
289   * the International Botanical Congress. The name could be illegitimate because:
290   * <ul>
291   *   <li>(article 52) it was superfluous at its time of publication, i.e., the taxon (as represented by the type) already has a name</li>
292   *   <li>(articles 53 and 54) the name has already been applied to another plant (a homonym)</li>
293   * </ul>
294   * For the procedure of rejecting otherwise legitimate names, see conserved name.
295   */
296 ILLEGITIMATE("nomen illegitimum", "nom. illeg."),
297
298  /**
299   * A name that was not validly published according to the rules of the code,
300   * or a name that was not accepted by the author in the original publication, for example,
301   * if the name was suggested as a synonym of an accepted name.
302   * In zoology this is called an UNAVAILABLE name.
303   * Example: Linaria vulgaris Hill, nom. inval.
304   * Many names published by John Hill between 1753 and 1757 were not accepted as validly published.
305   */
306 INVALID("nomen invalidum", "nom. inval.",    "unavailable"),
307
308  /**
309   * denied names, unavailable names which are incorrect original spellings as defined by the Code.
310   * Subset of nom.inval. based only on spellings
311   */
312 DENIED("nomen negatum", "nom. neg.");
313
314
315
316  private static final Set VALID_VALUES = ImmutableSet.of(VALIDLY_PUBLISHED, LEGITIMATE, NEW_COMBINATION, REPLACEMENT,
317    NEW_COMBINATION, NEW_GENUS, NEW_SPECIES,SUBNUDUM,CONSERVED, PROTECTED, CORRECTED,ALTERNATIVE, CONSERVED_PROPOSED,
318    PROVISIONAL);
319  private static final Set DOUBTFUL_VALUES = ImmutableSet.of(DOUBTFUL, OBSCURE);
320
321  private static final Pattern NORMALIZE_TERM = Pattern.compile("[._ -]+");
322  private static String normalize(String x) {
323    return NORMALIZE_TERM.matcher(x.toUpperCase()).replaceAll("");
324  }
325
326  private static final Map<String, NomenclaturalStatus> LOOKUP;
327  static {
328    Map<String, NomenclaturalStatus> lookup = Maps.newHashMap();
329    for (NomenclaturalStatus n : values()) {
330      lookup.put(normalize(n.name()), n);
331      if (n.getLatinLabel() != null) {
332        lookup.put(normalize(n.getLatinLabel()), n);
333        lookup.put(normalize(n.getLatinLabel().replace("nomen ", "")), n);
334      }
335      if (n.getAbbreviatedLabel() != null) {
336        lookup.put(normalize(n.getAbbreviatedLabel()), n);
337      }
338      if (n.alternatives != null) {
339        for (String alt : n.alternatives) {
340          lookup.put(normalize(alt), n);
341        }
342      }
343    }
344    LOOKUP = ImmutableMap.copyOf(lookup);
345  }
346
347
348  /**
349   * Tries to case insenitively interpret a nomenclatural status given as a string appliying the enums name,
350   * the latin and abbreviated name of a term.
351   *
352   * @param nomStatus
353   * @return
354   *
355   * @deprecated use NomStatusParser instead
356   */
357  @Deprecated
358  public static NomenclaturalStatus fromString(String nomStatus) {
359    if (Strings.isNullOrEmpty(nomStatus)) {
360      return null;
361    }
362    return LOOKUP.get(normalize(nomStatus));
363  }
364
365
366
367  private final String latin;
368  private final String abbreviated;
369  private final String[] alternatives;
370
371  private NomenclaturalStatus(String latin, String abbreviated, String ... alternatives) {
372    this.latin = latin;
373    this.abbreviated = abbreviated;
374    this.alternatives = alternatives;
375  }
376
377  public String getLatinLabel() {
378    return latin;
379  }
380
381  /**
382   * The abbreviated status name, often used in botany.
383   * For example nom. inval.
384   * @return
385   */
386  @Nullable
387  public String getAbbreviatedLabel() {
388    return abbreviated;
389  }
390
391  /**
392   * A vague grouping of nomenclatural status terms including all that can be used to name a correct/valid taxon.
393   * Specifically avoid the confusing terms valid, accepted or correct here as they mean different things in the
394   * different codes.
395   */
396  public boolean isGood() {
397    return VALID_VALUES.contains(this);
398  }
399
400  public boolean isDoubtful() {
401    return DOUBTFUL_VALUES.contains(this);
402  }
403
404  /**
405   * Opposite of good and not doubtful.
406   */
407  public boolean isBad() {
408    return !(isGood() || isDoubtful());
409  }
410}