001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.vocabulary; 015 016import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.ERROR; 017import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.INFO; 018import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.WARNING; 019 020import java.util.Arrays; 021import java.util.Collections; 022import java.util.HashSet; 023import java.util.List; 024import java.util.Set; 025import java.util.stream.Collectors; 026import org.apache.commons.lang3.ArrayUtils; 027import org.gbif.api.util.AnnotationUtils; 028import org.gbif.dwc.terms.DcTerm; 029import org.gbif.dwc.terms.DwcTerm; 030import org.gbif.dwc.terms.GbifDnaTerm; 031import org.gbif.dwc.terms.MixsTerm; 032import org.gbif.dwc.terms.Term; 033 034/** An enumeration of validation rules for single occurrence records. */ 035public enum OccurrenceIssue implements InterpretationRemark { 036 037 /** Coordinate is the exact 0°, 0° coordinate, often indicating a bad null coordinate. */ 038 ZERO_COORDINATE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM), 039 040 /** 041 * Coordinate has a latitude and/or longitude value beyond the maximum (or minimum) decimal value. 042 */ 043 COORDINATE_OUT_OF_RANGE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM), 044 045 /** Coordinate value is given in some form but GBIF is unable to interpret it. */ 046 COORDINATE_INVALID(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM), 047 048 /** Original coordinate modified by rounding to 5 decimals. */ 049 COORDINATE_ROUNDED(INFO, TermsGroup.COORDINATES_TERMS_NO_DATUM), 050 051 /** The geodetic datum given could not be interpreted. */ 052 GEODETIC_DATUM_INVALID(WARNING, DwcTerm.geodeticDatum), 053 054 /** 055 * Indicating that the interpreted coordinates assume they are based on WGS84 datum as the datum 056 * was either not indicated or interpretable. See GEODETIC_DATUM_INVALID. 057 */ 058 GEODETIC_DATUM_ASSUMED_WGS84(INFO, DwcTerm.geodeticDatum), 059 060 /** 061 * The original coordinate was successfully reprojected from a different geodetic datum to WGS84. 062 */ 063 COORDINATE_REPROJECTED(INFO, TermsGroup.COORDINATES_TERMS), 064 065 /** 066 * The given decimal latitude and longitude could not be reprojected to WGS84 based on the 067 * provided datum. 068 */ 069 COORDINATE_REPROJECTION_FAILED(WARNING, TermsGroup.COORDINATES_TERMS), 070 071 /** 072 * Indicates successful coordinate reprojection according to provided datum, but which results in 073 * a datum shift larger than 0.1 decimal degrees. 074 */ 075 COORDINATE_REPROJECTION_SUSPICIOUS(WARNING, TermsGroup.COORDINATES_TERMS), 076 077 /** 078 * Indicates an invalid or very unlikely coordinate accuracy derived from precision or uncertainty 079 * in meters. 080 */ 081 @Deprecated // see POR-3061 082 COORDINATE_ACCURACY_INVALID(WARNING), 083 084 /** Indicates an invalid or very unlikely coordinatePrecision */ 085 COORDINATE_PRECISION_INVALID(WARNING, DwcTerm.coordinatePrecision), 086 087 /** Indicates an invalid or very unlikely dwc:uncertaintyInMeters. */ 088 COORDINATE_UNCERTAINTY_METERS_INVALID(WARNING, DwcTerm.coordinateUncertaintyInMeters), 089 090 /** There is a mismatch between coordinate uncertainty in meters and coordinate precision. */ 091 @Deprecated // see POR-1804 092 COORDINATE_PRECISION_UNCERTAINTY_MISMATCH(WARNING), 093 094 /** The Footprint Spatial Reference System given could not be interpreted. */ 095 FOOTPRINT_SRS_INVALID(WARNING, DwcTerm.footprintSRS), 096 097 /** 098 * The Footprint Well-Known-Text conflicts with the interpreted coordinates (Decimal Latitude, 099 * Decimal Longitude etc). 100 */ 101 FOOTPRINT_WKT_MISMATCH(WARNING, DwcTerm.footprintWKT), 102 103 /** The Footprint Well-Known-Text given could not be interpreted. */ 104 FOOTPRINT_WKT_INVALID(WARNING, DwcTerm.footprintWKT), 105 106 /** The interpreted occurrence coordinates fall outside of the indicated country. */ 107 COUNTRY_COORDINATE_MISMATCH(WARNING, TermsGroup.COORDINATES_COUNTRY_TERMS), 108 109 /** Interpreted country for dwc:country and dwc:countryCode contradict each other. */ 110 COUNTRY_MISMATCH(WARNING, TermsGroup.COUNTRY_TERMS), 111 112 /** Uninterpretable country values found. */ 113 COUNTRY_INVALID(WARNING, TermsGroup.COUNTRY_TERMS), 114 115 /** The interpreted country is based on the coordinates, not the verbatim string information. */ 116 COUNTRY_DERIVED_FROM_COORDINATES(WARNING, TermsGroup.COORDINATES_COUNTRY_TERMS), 117 118 /** The interpreted occurrence coordinates fall outside of the indicated continent. */ 119 CONTINENT_COORDINATE_MISMATCH(WARNING), 120 121 /** The interpreted continent and country do not match. */ 122 CONTINENT_COUNTRY_MISMATCH(WARNING), 123 124 /** Uninterpretable continent values found. */ 125 CONTINENT_INVALID(WARNING), 126 127 /** The interpreted continent is based on the country, not the verbatim string information. */ 128 CONTINENT_DERIVED_FROM_COUNTRY(WARNING), 129 130 /** The interpreted continent is based on the coordinates, not the verbatim string information. */ 131 CONTINENT_DERIVED_FROM_COORDINATES(WARNING), 132 133 /** Latitude and longitude appear to be swapped. */ 134 PRESUMED_SWAPPED_COORDINATE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM), 135 136 /** Longitude appears to be negated, e.g. 32.3 instead of -32.3 */ 137 PRESUMED_NEGATED_LONGITUDE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM), 138 139 /** Latitude appears to be negated, e.g. 32.3 instead of -32.3 */ 140 PRESUMED_NEGATED_LATITUDE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM), 141 142 /** 143 * The recorded date specified as the eventDate string and the individual year, month, day and/or 144 * startDayOfYear, endDayOfYear are contradictory. 145 */ 146 RECORDED_DATE_MISMATCH(WARNING, TermsGroup.RECORDED_DATE_TERMS), 147 148 /** A (partial) invalid date is given, such as a non-existent date, zero month, etc. */ 149 RECORDED_DATE_INVALID(WARNING, TermsGroup.RECORDED_DATE_TERMS), 150 151 /** 152 * The recorded date is highly unlikely, falling either into the future or representing a very old 153 * date before 1600 thus predating modern taxonomy. 154 */ 155 RECORDED_DATE_UNLIKELY(WARNING, TermsGroup.RECORDED_DATE_TERMS), 156 157 /** Matching to the taxonomic backbone can only be done using a fuzzy, non-exact match. */ 158 TAXON_MATCH_FUZZY(WARNING, TermsGroup.TAXONOMY_TERMS), 159 160 /** 161 * Matching to the taxonomic backbone can only be done on a higher rank and not the scientific 162 * name. 163 */ 164 TAXON_MATCH_HIGHERRANK(WARNING, TermsGroup.TAXONOMY_TERMS), 165 166 /** 167 * Matching to the taxonomic backbone can only be done on a species level, but the occurrence was 168 * in fact considered a broader species aggregate/complex. 169 * 170 * @see <a 171 * href="https://github.com/gbif/portal-feedback/issues/2935">gbif/portal-feedback#2935</a> 172 */ 173 TAXON_MATCH_AGGREGATE(WARNING, TermsGroup.TAXONOMY_TERMS), 174 175 /** 176 * The scientificNameID was not used when mapping the record to the GBIF backbone. This may 177 * indicate one of 178 * 179 * <ul> 180 * <li>The ID uses a pattern not configured for use by GBIF 181 * <li>The ID did not uniquely(!) identify a concept in the checklist 182 * <li>The ID found a concept in the checklist which did not map to the backbone 183 * <li>A different ID was used, or the record names were used as no ID lookup successfully 184 * linked to the backbone 185 * </ul> 186 * 187 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 188 */ 189 TAXON_MATCH_SCIENTIFIC_NAME_ID_IGNORED(INFO, DwcTerm.scientificNameID), 190 191 /** 192 * The taxonConceptID was not used when mapping the record to the GBIF backbone. This may indicate 193 * one of 194 * 195 * <ul> 196 * <li>The ID uses a pattern not configured for use by GBIF 197 * <li>The ID did not uniquely(!) identify a concept in the checklist 198 * <li>The ID found a concept in the checklist which did not map to the backbone 199 * <li>A different ID was used, or the record names were used as no ID lookup successfully 200 * linked to the backbone 201 * </ul> 202 * 203 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 204 */ 205 TAXON_MATCH_TAXON_CONCEPT_ID_IGNORED(INFO, DwcTerm.taxonConceptID), 206 207 /** 208 * The taxonID was not used when mapping the record to the GBIF backbone. This may indicate one of 209 * 210 * <ul> 211 * <li>The ID uses a pattern not configured for use by GBIF 212 * <li>The ID did not uniquely(!) identify a concept in the checklist 213 * <li>The ID found a concept in the checklist which did not map to the backbone 214 * <li>A different ID was used, or the record names were used as no ID lookup successfully 215 * linked to the backbone 216 * </ul> 217 * 218 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 219 */ 220 TAXON_MATCH_TAXON_ID_IGNORED(INFO, DwcTerm.taxonID), 221 222 /** 223 * The scientificNameID matched a known pattern, but it was not found in the associated checklist. 224 * The backbone lookup was performed using either the names or a different ID on the record. This 225 * may indicate a poorly formatted identifier or may be caused by a newly created ID that isn't 226 * yet known in the version of the published checklist. 227 * 228 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 229 */ 230 SCIENTIFIC_NAME_ID_NOT_FOUND(WARNING, DwcTerm.scientificNameID), 231 232 /** 233 * The taxonConceptID matched a known pattern, but it was not found in the associated checklist. 234 * The backbone lookup was performed using either the names or a different ID on the record. This 235 * may indicate a poorly formatted identifier or may be caused by a newly created ID that isn't 236 * yet known in the version of the published checklist. 237 * 238 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 239 */ 240 TAXON_CONCEPT_ID_NOT_FOUND(WARNING, DwcTerm.taxonConceptID), 241 242 /** 243 * The taxonID matched a known pattern, but it was not found in the associated checklist. The 244 * backbone lookup was performed using either the names or a different ID on the record. This may 245 * indicate a poorly formatted identifier or may be caused by a newly created ID that isn't yet 246 * known in the version of the published checklist. 247 * 248 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 249 */ 250 TAXON_ID_NOT_FOUND(WARNING, DwcTerm.taxonID), 251 252 /** 253 * The scientificName provided in the occurrence record does not precisely match the name in the 254 * registered checklist when using the scientificNameID, taxonID or taxonConceptID to look it up. 255 * Publishers are advised to check the IDs are correct, or update the formatting of the names on 256 * their records. 257 * 258 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 259 */ 260 SCIENTIFIC_NAME_AND_ID_INCONSISTENT( 261 WARNING, 262 DwcTerm.scientificNameID, 263 DwcTerm.taxonID, 264 DwcTerm.taxonConceptID, 265 DwcTerm.scientificName), 266 267 /** 268 * Matching to the taxonomic backbone cannot be done because there was no match at all, or several 269 * matches with too little information to keep them apart (potentially homonyms). 270 */ 271 TAXON_MATCH_NONE(WARNING, TermsGroup.TAXONOMY_TERMS), 272 273 /** 274 * The GBIF Backbone concept was found using the scientificNameID, taxonID or taxonConceptID, but 275 * it differs from what would have been found if the classification names on the record were used. 276 * This may indicate a gap in the GBIF backbone, a poor mapping between the checklist and the 277 * backbone, or a mismatch between the classification names and the declared IDs (scientificNameID 278 * or taxonConceptID) on the occurrence record itself. 279 * 280 * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a> 281 */ 282 TAXON_MATCH_NAME_AND_ID_AMBIGUOUS(WARNING, TermsGroup.TAXONOMY_TERMS), 283 284 /** 285 * Set if supplied depth is not given in the metric system, for example using feet instead of 286 * meters 287 */ 288 DEPTH_NOT_METRIC(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters), 289 290 /** Set if depth is larger than 11,000m or negative. */ 291 DEPTH_UNLIKELY(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters), 292 293 /** Set if supplied minimum depth > maximum depth */ 294 DEPTH_MIN_MAX_SWAPPED(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters), 295 296 /** Set if depth is a non-numeric value */ 297 DEPTH_NON_NUMERIC(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters), 298 299 /** Set if elevation is above the troposphere (17km) or below 11km (Mariana Trench). */ 300 ELEVATION_UNLIKELY(WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters), 301 302 /** Set if supplied minimum elevation > maximum elevation */ 303 ELEVATION_MIN_MAX_SWAPPED( 304 WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters), 305 306 /** 307 * Set if supplied elevation is not given in the metric system, for example using feet instead of 308 * meters 309 */ 310 ELEVATION_NOT_METRIC(WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters), 311 312 /** Set if elevation is a non-numeric value */ 313 ELEVATION_NON_NUMERIC( 314 WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters), 315 316 /** 317 * A (partial) invalid date is given for dc:modified, such as a nonexistent date, zero month, etc. 318 */ 319 MODIFIED_DATE_INVALID(WARNING, DcTerm.modified), 320 321 /** The date given for dc:modified is in the future or predates Unix time (1970). */ 322 MODIFIED_DATE_UNLIKELY(WARNING, DcTerm.modified), 323 324 /** The date given for dwc:dateIdentified is in the future or before Linnean times (1700). */ 325 IDENTIFIED_DATE_UNLIKELY(WARNING, DwcTerm.dateIdentified), 326 327 /** The date given for dwc:dateIdentified is invalid and can't be interpreted at all. */ 328 IDENTIFIED_DATE_INVALID(WARNING, DwcTerm.dateIdentified), 329 330 /** 331 * The given basis of record is impossible to interpret or significantly different from the 332 * recommended vocabulary. 333 */ 334 BASIS_OF_RECORD_INVALID(WARNING, DwcTerm.basisOfRecord), 335 336 /** 337 * The given type status is impossible to interpret or significantly different from the 338 * recommended vocabulary. 339 */ 340 TYPE_STATUS_INVALID(WARNING, DwcTerm.typeStatus), 341 342 /** The given type status contains some words that express uncertainty. */ 343 SUSPECTED_TYPE(WARNING, DwcTerm.typeStatus), 344 345 /** An invalid date is given for dc:created of a multimedia object. */ 346 MULTIMEDIA_DATE_INVALID(WARNING), 347 348 /** An invalid URI is given for a multimedia object. */ 349 MULTIMEDIA_URI_INVALID(WARNING), 350 351 /** An invalid URI is given for dc:references. */ 352 REFERENCES_URI_INVALID(WARNING, DcTerm.references), 353 354 /** An error occurred during interpretation, leaving the record interpretation incomplete. */ 355 INTERPRETATION_ERROR(ERROR), 356 357 /** The individual count value is not a positive integer */ 358 INDIVIDUAL_COUNT_INVALID(WARNING, DwcTerm.individualCount), 359 360 /** Example: individual count value > 0, but occurrence status is absent. */ 361 INDIVIDUAL_COUNT_CONFLICTS_WITH_OCCURRENCE_STATUS(WARNING, DwcTerm.individualCount), 362 363 /** Occurrence status value can't be assigned to {@link OccurrenceStatus} */ 364 OCCURRENCE_STATUS_UNPARSABLE(WARNING, DwcTerm.occurrenceStatus), 365 366 /** Occurrence status was inferred from the individual count value */ 367 OCCURRENCE_STATUS_INFERRED_FROM_INDIVIDUAL_COUNT(WARNING, DwcTerm.occurrenceStatus), 368 369 /** Occurrence status was inferred from basis of records */ 370 OCCURRENCE_STATUS_INFERRED_FROM_BASIS_OF_RECORD(WARNING, DwcTerm.occurrenceStatus), 371 372 /** The date given for dwc:georeferencedDate is in the future or before Linnean times (1700). */ 373 GEOREFERENCED_DATE_UNLIKELY(WARNING, DwcTerm.georeferencedDate), 374 375 /** The date given for dwc:georeferencedDate is invalid and can't be interpreted at all. */ 376 GEOREFERENCED_DATE_INVALID(WARNING, DwcTerm.georeferencedDate), 377 378 /** The given institution matches with more than 1 GRSciColl institution. */ 379 AMBIGUOUS_INSTITUTION(INFO, TermsGroup.INSTITUTION_TERMS), 380 381 /** The given collection matches with more than 1 GRSciColl collection. */ 382 AMBIGUOUS_COLLECTION(INFO, TermsGroup.COLLECTION_TERMS), 383 384 /** The given institution couldn't be matched with any GRSciColl institution. */ 385 INSTITUTION_MATCH_NONE(INFO, TermsGroup.INSTITUTION_TERMS), 386 387 /** The given collection couldn't be matched with any GRSciColl collection. */ 388 COLLECTION_MATCH_NONE(INFO, TermsGroup.COLLECTION_TERMS), 389 390 /** 391 * The given institution was fuzzily matched to a GRSciColl institution. This can happen when 392 * either the code or the ID don't match or when the institution name is used instead of the code. 393 */ 394 INSTITUTION_MATCH_FUZZY(INFO, TermsGroup.INSTITUTION_TERMS), 395 396 /** 397 * The given collection was fuzzily matched to a GRSciColl collection. This can happen when either 398 * the code or the ID don't match or when the collection name is used instead of the code. 399 */ 400 COLLECTION_MATCH_FUZZY(INFO, TermsGroup.COLLECTION_TERMS), 401 402 /** The collection matched doesn't belong to the institution matched. */ 403 INSTITUTION_COLLECTION_MISMATCH( 404 INFO, ArrayUtils.addAll(TermsGroup.INSTITUTION_TERMS, TermsGroup.INSTITUTION_TERMS)), 405 406 /** 407 * The given owner institution is different than the given institution. Therefore we assume it 408 * could be on loan and we don't link it to the occurrence. 409 * 410 * <p>Deprecated by {@link #DIFFERENT_OWNER_INSTITUTION}. 411 */ 412 @Deprecated 413 POSSIBLY_ON_LOAN(INFO, TermsGroup.INSTITUTION_TERMS), 414 415 /** 416 * The given owner institution is different than the given institution. Therefore we assume it 417 * doesn't belong to the institution and we don't link it to the occurrence. 418 */ 419 DIFFERENT_OWNER_INSTITUTION(INFO, TermsGroup.INSTITUTION_TERMS), 420 421 /** Era or erathem was inferred from a parent rank. */ 422 ERA_OR_ERATHEM_INFERRED_FROM_PARENT_RANK( 423 INFO, DwcTerm.earliestEraOrLowestErathem, DwcTerm.latestEraOrHighestErathem), 424 /** Period or system was inferred from a parent rank. */ 425 PERIOD_OR_SYSTEM_INFERRED_FROM_PARENT_RANK( 426 INFO, DwcTerm.earliestPeriodOrLowestSystem, DwcTerm.latestPeriodOrHighestSystem), 427 /** Epoch or series was inferred from a parent rank. */ 428 EPOCH_OR_SERIES_INFERRED_FROM_PARENT_RANK( 429 INFO, DwcTerm.earliestEpochOrLowestSeries, DwcTerm.latestEpochOrHighestSeries), 430 /** Age or stage was inferred from a parent rank. */ 431 AGE_OR_STAGE_INFERRED_FROM_PARENT_RANK( 432 INFO, DwcTerm.earliestAgeOrLowestStage, DwcTerm.latestAgeOrHighestStage), 433 434 /** The eon or eonothem provided belongs to another rank. */ 435 EON_OR_EONOTHEM_RANK_MISMATCH( 436 INFO, DwcTerm.earliestEonOrLowestEonothem, DwcTerm.latestEonOrHighestEonothem), 437 /** The era or erathem provided belongs to another rank. */ 438 ERA_OR_ERATHEM_RANK_MISMATCH( 439 INFO, DwcTerm.earliestEraOrLowestErathem, DwcTerm.latestEraOrHighestErathem), 440 /** The period or system provided belongs to another rank. */ 441 PERIOD_OR_SYSTEM_RANK_MISMATCH( 442 INFO, DwcTerm.earliestPeriodOrLowestSystem, DwcTerm.latestPeriodOrHighestSystem), 443 /** The period or series provided belongs to another rank. */ 444 EPOCH_OR_SERIES_RANK_MISMATCH( 445 INFO, DwcTerm.earliestEpochOrLowestSeries, DwcTerm.latestEpochOrHighestSeries), 446 /** The age or stage provided belongs to another rank. */ 447 AGE_OR_STAGE_RANK_MISMATCH( 448 INFO, DwcTerm.earliestAgeOrLowestStage, DwcTerm.latestAgeOrHighestStage), 449 450 /** The earliest eon or eonothem has to be earlier than the latest. */ 451 EON_OR_EONOTHEM_INVALID_RANGE( 452 INFO, DwcTerm.earliestEonOrLowestEonothem, DwcTerm.latestEonOrHighestEonothem), 453 /** The era or erathem has to be earlier than the latest. */ 454 ERA_OR_ERATHEM_INVALID_RANGE( 455 INFO, DwcTerm.earliestEraOrLowestErathem, DwcTerm.latestEraOrHighestErathem), 456 /** The period or system has to be earlier than the latest. */ 457 PERIOD_OR_SYSTEM_INVALID_RANGE( 458 INFO, DwcTerm.earliestPeriodOrLowestSystem, DwcTerm.latestPeriodOrHighestSystem), 459 /** The period or series has to be earlier than the latest. */ 460 EPOCH_OR_SERIES_INVALID_RANGE( 461 INFO, DwcTerm.earliestEpochOrLowestSeries, DwcTerm.latestEpochOrHighestSeries), 462 /** The age or stage has to be earlier than the latest. */ 463 AGE_OR_STAGE_INVALID_RANGE( 464 INFO, DwcTerm.earliestAgeOrLowestStage, DwcTerm.latestAgeOrHighestStage), 465 466 /** The era or erathem don't belong to the eon or eonothem. */ 467 EON_OR_EONOTHEM_AND_ERA_OR_ERATHEM_MISMATCH( 468 INFO, 469 DwcTerm.earliestEonOrLowestEonothem, 470 DwcTerm.latestEonOrHighestEonothem, 471 DwcTerm.earliestEraOrLowestErathem, 472 DwcTerm.latestEraOrHighestErathem), 473 474 /** The period or system don't belong to the era or erathem. */ 475 ERA_OR_ERATHEM_AND_PERIOD_OR_SYSTEM_MISMATCH( 476 INFO, 477 DwcTerm.earliestEraOrLowestErathem, 478 DwcTerm.latestEraOrHighestErathem, 479 DwcTerm.earliestPeriodOrLowestSystem, 480 DwcTerm.latestPeriodOrHighestSystem), 481 482 /** The epoch or series don't belong to the period or system. */ 483 PERIOD_OR_SYSTEM_AND_EPOCH_OR_SERIES_MISMATCH( 484 INFO, 485 DwcTerm.earliestPeriodOrLowestSystem, 486 DwcTerm.latestPeriodOrHighestSystem, 487 DwcTerm.earliestEpochOrLowestSeries, 488 DwcTerm.latestEpochOrHighestSeries), 489 490 /** The age or stage don't belong to the epoch or series. */ 491 EPOCH_OR_SERIES_AND_AGE_OR_STAGE_MISMATCH( 492 INFO, 493 DwcTerm.earliestEpochOrLowestSeries, 494 DwcTerm.latestEpochOrHighestSeries, 495 DwcTerm.earliestAgeOrLowestStage, 496 DwcTerm.latestAgeOrHighestStage), 497 498 /** Set when natural language text is detected in {@code dna_sequence}. */ 499 NUCLEOTIDE_SEQUENCE_NATURAL_LANGUAGE(INFO, GbifDnaTerm.dna_sequence), 500 501 /** Set when leading or trailing sequence characters are trimmed. */ 502 NUCLEOTIDE_SEQUENCE_ENDS_TRIMMED(INFO, GbifDnaTerm.dna_sequence), 503 504 /** Set when gaps or whitespace are removed from the sequence. */ 505 NUCLEOTIDE_SEQUENCE_GAPS_REMOVED(INFO, GbifDnaTerm.dna_sequence), 506 507 /** Set when the sequence is invalid after normalization and validation. */ 508 NUCLEOTIDE_SEQUENCE_INVALID(INFO, GbifDnaTerm.dna_sequence), 509 510 /** Set when the fraction of N bases exceeds the configured threshold. */ 511 NUCLEOTIDE_SEQUENCE_HIGH_N_FRACTION(INFO, GbifDnaTerm.dna_sequence), 512 513 /** Set when the fraction of non-ACGTN bases exceeds the configured threshold. */ 514 NUCLEOTIDE_SEQUENCE_HIGH_AMBIGUITY(INFO, GbifDnaTerm.dna_sequence), 515 516 /** Set when {@code target_gene} does not resolve to a concept in the vocabulary. */ 517 TARGET_GENE_INVALID(INFO, MixsTerm.target_gene); 518 519 /** 520 * Simple helper nested class to allow grouping of Term mostly to increase readability of this 521 * class. 522 */ 523 private static class TermsGroup { 524 525 static final Term[] COORDINATES_TERMS_NO_DATUM = { 526 DwcTerm.decimalLatitude, 527 DwcTerm.decimalLongitude, 528 DwcTerm.verbatimLatitude, 529 DwcTerm.verbatimLongitude, 530 DwcTerm.verbatimCoordinates 531 }; 532 533 static final Term[] COORDINATES_TERMS = { 534 DwcTerm.decimalLatitude, 535 DwcTerm.decimalLongitude, 536 DwcTerm.verbatimLatitude, 537 DwcTerm.verbatimLongitude, 538 DwcTerm.verbatimCoordinates, 539 DwcTerm.geodeticDatum 540 }; 541 542 static final Term[] COUNTRY_TERMS = {DwcTerm.country, DwcTerm.countryCode}; 543 544 static final Term[] COORDINATES_COUNTRY_TERMS = { 545 DwcTerm.decimalLatitude, 546 DwcTerm.decimalLongitude, 547 DwcTerm.verbatimLatitude, 548 DwcTerm.verbatimLongitude, 549 DwcTerm.verbatimCoordinates, 550 DwcTerm.geodeticDatum, 551 DwcTerm.country, 552 DwcTerm.countryCode 553 }; 554 555 static final Term[] RECORDED_DATE_TERMS = { 556 DwcTerm.eventDate, 557 DwcTerm.year, 558 DwcTerm.month, 559 DwcTerm.day, 560 DwcTerm.startDayOfYear, 561 DwcTerm.endDayOfYear 562 }; 563 564 static final Term[] TAXONOMY_TERMS = { 565 DwcTerm.kingdom, 566 DwcTerm.phylum, 567 DwcTerm.class_, 568 DwcTerm.order, 569 DwcTerm.family, 570 DwcTerm.genus, 571 DwcTerm.scientificName, 572 DwcTerm.scientificNameAuthorship, 573 DwcTerm.genericName, 574 DwcTerm.specificEpithet, 575 DwcTerm.infraspecificEpithet, 576 DwcTerm.scientificNameID, 577 DwcTerm.taxonConceptID, 578 DwcTerm.taxonID, 579 }; 580 581 static final Term[] INSTITUTION_TERMS = { 582 DwcTerm.institutionCode, DwcTerm.institutionID, DwcTerm.ownerInstitutionCode 583 }; 584 585 static final Term[] COLLECTION_TERMS = {DwcTerm.collectionCode, DwcTerm.collectionID}; 586 } 587 588 private final Set<Term> relatedTerms; 589 private final InterpretationRemarkSeverity severity; 590 private final boolean isDeprecated; 591 592 /** {@link OccurrenceIssue} not linked to any specific {@link Term}. */ 593 OccurrenceIssue(InterpretationRemarkSeverity severity) { 594 this.severity = severity; 595 this.relatedTerms = Collections.emptySet(); 596 this.isDeprecated = AnnotationUtils.isFieldDeprecated(OccurrenceIssue.class, this.name()); 597 } 598 599 /** {@link OccurrenceIssue} linked to the provided {@link Term}. */ 600 OccurrenceIssue(InterpretationRemarkSeverity severity, Term... relatedTerms) { 601 this.severity = severity; 602 this.relatedTerms = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(relatedTerms))); 603 this.isDeprecated = AnnotationUtils.isFieldDeprecated(OccurrenceIssue.class, this.name()); 604 } 605 606 @Override 607 public String getId() { 608 return name(); 609 } 610 611 @Override 612 public Set<Term> getRelatedTerms() { 613 return relatedTerms; 614 } 615 616 @Override 617 public InterpretationRemarkSeverity getSeverity() { 618 return severity; 619 } 620 621 @Override 622 public boolean isDeprecated() { 623 return isDeprecated; 624 } 625 626 /** 627 * All issues that indicate problems with the coordinates and thus should not be shown on maps. 628 */ 629 public static final List<OccurrenceIssue> GEOSPATIAL_RULES = 630 Collections.unmodifiableList( 631 Arrays.asList( 632 ZERO_COORDINATE, 633 COORDINATE_OUT_OF_RANGE, 634 COORDINATE_INVALID, 635 COUNTRY_COORDINATE_MISMATCH, 636 PRESUMED_SWAPPED_COORDINATE, 637 PRESUMED_NEGATED_LONGITUDE, 638 PRESUMED_NEGATED_LATITUDE)); 639 640 /** All issues related to taxonomic fields. */ 641 public static final List<OccurrenceIssue> TAXONOMIC_RULES = 642 Set.of(OccurrenceIssue.values()).stream() 643 .filter( 644 issue -> 645 issue.getRelatedTerms().stream() 646 .anyMatch(term -> Set.of(TermsGroup.TAXONOMY_TERMS).contains(term))) 647 .collect(Collectors.toList()); 648}