001/* 002 * Copyright 2020-2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.vocabulary; 017 018import org.gbif.api.util.AnnotationUtils; 019import org.gbif.dwc.terms.DwcTerm; 020import org.gbif.dwc.terms.GbifTerm; 021import org.gbif.dwc.terms.Term; 022 023import java.util.Arrays; 024import java.util.Collections; 025import java.util.HashSet; 026import java.util.Set; 027 028import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.ERROR; 029import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.INFO; 030import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.WARNING; 031 032/** 033 * Enumeration of issues for each name usage record encountered during checklist processing. 034 */ 035public enum NameUsageIssue implements InterpretationRemark { 036 /** 037 * The value for dwc:parentNameUsageID could not be resolved. 038 */ 039 PARENT_NAME_USAGE_ID_INVALID(ERROR, DwcTerm.parentNameUsageID), 040 041 /** 042 * The value for dwc:acceptedNameUsageID could not be resolved. 043 */ 044 ACCEPTED_NAME_USAGE_ID_INVALID(ERROR, DwcTerm.acceptedNameUsageID), 045 046 /** 047 * The value for dwc:originalNameUsageID could not be resolved. 048 */ 049 ORIGINAL_NAME_USAGE_ID_INVALID(ERROR, DwcTerm.originalNameUsageID), 050 051 /** 052 * Synonym lacking an accepted name. 053 */ 054 ACCEPTED_NAME_MISSING(DwcTerm.taxonomicStatus, DwcTerm.acceptedNameUsageID, DwcTerm.acceptedNameUsage), 055 056 /** 057 * dwc:taxonRank could not be interpreted 058 */ 059 RANK_INVALID(DwcTerm.taxonRank), 060 061 /** 062 * dwc:nomenclaturalStatus could not be interpreted 063 */ 064 NOMENCLATURAL_STATUS_INVALID(DwcTerm.nomenclaturalStatus), 065 066 /** 067 * dwc:taxonomicStatus could not be interpreted 068 */ 069 TAXONOMIC_STATUS_INVALID(DwcTerm.taxonomicStatus), 070 071 /** 072 * The scientific name was assembled from the individual name parts and not given as a whole string. 073 */ 074 SCIENTIFIC_NAME_ASSEMBLED(INFO, DwcTerm.scientificName, DwcTerm.genus, DwcTerm.specificEpithet, DwcTerm.infraspecificEpithet, DwcTerm.taxonRank, DwcTerm.scientificNameAuthorship, DwcTerm.namePublishedInYear), 075 076 /** 077 * If a synonym points to another synonym as its accepted taxon the chain is resolved. 078 */ 079 CHAINED_SYNOYM(DwcTerm.acceptedNameUsageID, DwcTerm.acceptedNameUsage), 080 081 /** 082 * The authorship of the original name does not match the authorship in brackets of the actual name. 083 */ 084 BASIONYM_AUTHOR_MISMATCH(DwcTerm.scientificName, DwcTerm.scientificNameAuthorship, DwcTerm.originalNameUsageID, DwcTerm.originalNameUsage), 085 086 TAXONOMIC_STATUS_MISMATCH(DwcTerm.taxonomicStatus, DwcTerm.acceptedNameUsageID, DwcTerm.acceptedNameUsage), 087 088 /** 089 * The child parent classification resulted into a cycle that needed to be resolved/cut. 090 */ 091 PARENT_CYCLE(ERROR, DwcTerm.parentNameUsageID, DwcTerm.parentNameUsage), 092 093 /** 094 * The given ranks of the names in the classification hierarchy do not follow the hierarchy of ranks. 095 */ 096 CLASSIFICATION_RANK_ORDER_INVALID(ERROR, DwcTerm.parentNameUsageID, DwcTerm.parentNameUsage, DwcTerm.taxonRank), 097 098 /** 099 * The denormalized classification could not be applied to the name usage. 100 * For example if the id based classification has no ranks. 101 */ 102 CLASSIFICATION_NOT_APPLIED(DwcTerm.kingdom, DwcTerm.phylum, DwcTerm.class_, DwcTerm.order, DwcTerm.family, DwcTerm.genus), 103 104 /** 105 * At least one vernacular name extension record attached to this name usage is invalid. 106 */ 107 VERNACULAR_NAME_INVALID(DwcTerm.vernacularName), 108 109 /** 110 * At least one description extension record attached to this name usage is invalid. 111 */ 112 DESCRIPTION_INVALID(GbifTerm.Description), 113 114 /** 115 * At least one distribution extension record attached to this name usage is invalid. 116 */ 117 DISTRIBUTION_INVALID(GbifTerm.Distribution), 118 119 /** 120 * At least one species profile extension record attached to this name usage is invalid. 121 */ 122 SPECIES_PROFILE_INVALID(GbifTerm.SpeciesProfile), 123 124 /** 125 * At least one multimedia extension record attached to this name usage is invalid. 126 * This covers multimedia coming in through various extensions including 127 * Audubon core, Simple images or multimedia or EOL media. 128 */ 129 MULTIMEDIA_INVALID(GbifTerm.Multimedia), 130 131 /** 132 * At least one bibliographic reference extension record attached to this name usage is invalid. 133 */ 134 BIB_REFERENCE_INVALID(GbifTerm.Reference), 135 136 /** 137 * At least one alternative identifier extension record attached to this name usage is invalid. 138 */ 139 ALT_IDENTIFIER_INVALID(GbifTerm.Identifier), 140 141 /** 142 * Name usage could not be matched to the GBIF backbone. 143 */ 144 BACKBONE_MATCH_NONE(INFO, DwcTerm.scientificName, DwcTerm.scientificNameAuthorship, DwcTerm.kingdom, DwcTerm.taxonRank), 145 146 /** 147 * Name usage could only be matched to the GBIF backbone using fuzzy matching. 148 * @deprecated because there should be no fuzzy matching being used anymore for matching checklist names 149 */ 150 @Deprecated 151 BACKBONE_MATCH_FUZZY(DwcTerm.scientificName, DwcTerm.scientificNameAuthorship, DwcTerm.kingdom, DwcTerm.taxonRank), 152 153 /** 154 * Name usage could only be matched to a GBIF backbone species, but was in fact a broader species aggregate/complex. 155 * @see <a href="https://github.com/gbif/portal-feedback/issues/2935">gbif/portal-feedback#2935</a> 156 */ 157 BACKBONE_MATCH_AGGREGATE(DwcTerm.scientificName, DwcTerm.scientificNameAuthorship, DwcTerm.kingdom, DwcTerm.taxonRank), 158 159 /** 160 * Synonym has a verbatim accepted name which is not unique and refers to several records. 161 */ 162 ACCEPTED_NAME_NOT_UNIQUE(DwcTerm.acceptedNameUsage), 163 164 /** 165 * Record has a verbatim parent name which is not unique and refers to several records. 166 */ 167 PARENT_NAME_NOT_UNIQUE(DwcTerm.parentNameUsage), 168 169 /** 170 * Record has a verbatim original name (basionym) which is not unique and refers to several records. 171 */ 172 ORIGINAL_NAME_NOT_UNIQUE(DwcTerm.originalNameUsage), 173 174 /** 175 * There were problems representing all name usage relationships, 176 * i.e. the link to the parent, accepted and/or original name. 177 * The interpreted record in ChecklistBank is lacking some of the original source relation. 178 */ 179 RELATIONSHIP_MISSING(DwcTerm.parentNameUsageID, DwcTerm.acceptedNameUsageID, DwcTerm.originalNameUsageID, DwcTerm.parentNameUsage, DwcTerm.acceptedNameUsage, DwcTerm.originalNameUsage), 180 181 /** 182 * Record has a original name (basionym) relationship which was derived from name & authorship comparison, but did not exist explicitly in the data. 183 * This should only be flagged in programmatically generated GBIF backbone usages. 184 * GBIF backbone specific issue. 185 */ 186 ORIGINAL_NAME_DERIVED(INFO), 187 188 /** 189 * There have been more than one accepted name in a homotypical basionym group of names. 190 * GBIF backbone specific issue. 191 */ 192 CONFLICTING_BASIONYM_COMBINATION(), 193 194 /** 195 * The group (currently only genera are tested) are lacking any accepted species 196 * GBIF backbone specific issue. 197 */ 198 NO_SPECIES(INFO), 199 200 /** 201 * The (accepted) bi/trinomial name does not match the parent name and should be recombined into the parent genus/species. 202 * For example the species Picea alba with a parent genus Abies is a mismatch and should be replaced by Abies alba. 203 * GBIF backbone specific issue. 204 */ 205 NAME_PARENT_MISMATCH(), 206 207 /** 208 * A potential orthographic variant exists in the backbone. 209 * GBIF backbone specific issue. 210 */ 211 ORTHOGRAPHIC_VARIANT(INFO), 212 213 /** 214 * A not synonymized homonym exists for this name in some other backbone source which have been ignored at build time. 215 */ 216 HOMONYM(INFO, DwcTerm.scientificName), 217 218 /** 219 * A bi/trinomial name published earlier than the parent genus was published. 220 * This might indicate that the name should rather be a recombination. 221 */ 222 PUBLISHED_BEFORE_GENUS(DwcTerm.scientificName, DwcTerm.scientificNameAuthorship, DwcTerm.namePublishedInYear, DwcTerm.genus, DwcTerm.parentNameUsageID, DwcTerm.parentNameUsage), 223 224 /** 225 * The scientific name string could not be parsed at all, but appears to be a parsable name type, 226 * i.e. it is not classified as a virus or hybrid formula. 227 */ 228 UNPARSABLE(DwcTerm.scientificName), 229 230 /** 231 * The beginning of the scientific name string was parsed, 232 * but there is additional information in the string that was not understood. 233 */ 234 PARTIALLY_PARSABLE(DwcTerm.scientificName); 235 236 237 private final Set<Term> related; 238 private final InterpretationRemarkSeverity severity; 239 private final boolean isDeprecated; 240 241 NameUsageIssue(Term ... related) { 242 this(WARNING, related); 243 } 244 245 NameUsageIssue(InterpretationRemarkSeverity severity, Term... related) { 246 if (related == null) { 247 this.related = Collections.emptySet(); 248 } else { 249 this.related = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(related))); 250 } 251 this.severity = severity; 252 this.isDeprecated = AnnotationUtils.isFieldDeprecated(NameUsageIssue.class, this.name()); 253 } 254 255 @Override 256 public String getId() { 257 return name(); 258 } 259 260 @Override 261 public Set<Term> getRelatedTerms() { 262 return related; 263 } 264 265 @Override 266 public InterpretationRemarkSeverity getSeverity(){ 267 return severity; 268 } 269 270 @Override 271 public boolean isDeprecated(){ 272 return isDeprecated; 273 } 274 275}