001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.api.vocabulary;
015
016import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.ERROR;
017import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.INFO;
018import static org.gbif.api.vocabulary.InterpretationRemarkSeverity.WARNING;
019
020import java.util.Arrays;
021import java.util.Collections;
022import java.util.HashSet;
023import java.util.List;
024import java.util.Set;
025import java.util.stream.Collectors;
026import org.apache.commons.lang3.ArrayUtils;
027import org.gbif.api.util.AnnotationUtils;
028import org.gbif.dwc.terms.DcTerm;
029import org.gbif.dwc.terms.DwcTerm;
030import org.gbif.dwc.terms.GbifDnaTerm;
031import org.gbif.dwc.terms.MixsTerm;
032import org.gbif.dwc.terms.Term;
033
034/** An enumeration of validation rules for single occurrence records. */
035public enum OccurrenceIssue implements InterpretationRemark {
036
037  /** Coordinate is the exact 0°, 0° coordinate, often indicating a bad null coordinate. */
038  ZERO_COORDINATE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM),
039
040  /**
041   * Coordinate has a latitude and/or longitude value beyond the maximum (or minimum) decimal value.
042   */
043  COORDINATE_OUT_OF_RANGE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM),
044
045  /** Coordinate value is given in some form but GBIF is unable to interpret it. */
046  COORDINATE_INVALID(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM),
047
048  /** Original coordinate modified by rounding to 5 decimals. */
049  COORDINATE_ROUNDED(INFO, TermsGroup.COORDINATES_TERMS_NO_DATUM),
050
051  /** The geodetic datum given could not be interpreted. */
052  GEODETIC_DATUM_INVALID(WARNING, DwcTerm.geodeticDatum),
053
054  /**
055   * Indicating that the interpreted coordinates assume they are based on WGS84 datum as the datum
056   * was either not indicated or interpretable. See GEODETIC_DATUM_INVALID.
057   */
058  GEODETIC_DATUM_ASSUMED_WGS84(INFO, DwcTerm.geodeticDatum),
059
060  /**
061   * The original coordinate was successfully reprojected from a different geodetic datum to WGS84.
062   */
063  COORDINATE_REPROJECTED(INFO, TermsGroup.COORDINATES_TERMS),
064
065  /**
066   * The given decimal latitude and longitude could not be reprojected to WGS84 based on the
067   * provided datum.
068   */
069  COORDINATE_REPROJECTION_FAILED(WARNING, TermsGroup.COORDINATES_TERMS),
070
071  /**
072   * Indicates successful coordinate reprojection according to provided datum, but which results in
073   * a datum shift larger than 0.1 decimal degrees.
074   */
075  COORDINATE_REPROJECTION_SUSPICIOUS(WARNING, TermsGroup.COORDINATES_TERMS),
076
077  /**
078   * Indicates an invalid or very unlikely coordinate accuracy derived from precision or uncertainty
079   * in meters.
080   */
081  @Deprecated // see POR-3061
082  COORDINATE_ACCURACY_INVALID(WARNING),
083
084  /** Indicates an invalid or very unlikely coordinatePrecision */
085  COORDINATE_PRECISION_INVALID(WARNING, DwcTerm.coordinatePrecision),
086
087  /** Indicates an invalid or very unlikely dwc:uncertaintyInMeters. */
088  COORDINATE_UNCERTAINTY_METERS_INVALID(WARNING, DwcTerm.coordinateUncertaintyInMeters),
089
090  /** There is a mismatch between coordinate uncertainty in meters and coordinate precision. */
091  @Deprecated // see POR-1804
092  COORDINATE_PRECISION_UNCERTAINTY_MISMATCH(WARNING),
093
094  /** The Footprint Spatial Reference System given could not be interpreted. */
095  FOOTPRINT_SRS_INVALID(WARNING, DwcTerm.footprintSRS),
096
097  /**
098   * The Footprint Well-Known-Text conflicts with the interpreted coordinates (Decimal Latitude,
099   * Decimal Longitude etc).
100   */
101  FOOTPRINT_WKT_MISMATCH(WARNING, DwcTerm.footprintWKT),
102
103  /** The Footprint Well-Known-Text given could not be interpreted. */
104  FOOTPRINT_WKT_INVALID(WARNING, DwcTerm.footprintWKT),
105
106  /** The interpreted occurrence coordinates fall outside of the indicated country. */
107  COUNTRY_COORDINATE_MISMATCH(WARNING, TermsGroup.COORDINATES_COUNTRY_TERMS),
108
109  /** Interpreted country for dwc:country and dwc:countryCode contradict each other. */
110  COUNTRY_MISMATCH(WARNING, TermsGroup.COUNTRY_TERMS),
111
112  /** Uninterpretable country values found. */
113  COUNTRY_INVALID(WARNING, TermsGroup.COUNTRY_TERMS),
114
115  /** The interpreted country is based on the coordinates, not the verbatim string information. */
116  COUNTRY_DERIVED_FROM_COORDINATES(WARNING, TermsGroup.COORDINATES_COUNTRY_TERMS),
117
118  /** The interpreted occurrence coordinates fall outside of the indicated continent. */
119  CONTINENT_COORDINATE_MISMATCH(WARNING),
120
121  /** The interpreted continent and country do not match. */
122  CONTINENT_COUNTRY_MISMATCH(WARNING),
123
124  /** Uninterpretable continent values found. */
125  CONTINENT_INVALID(WARNING),
126
127  /** The interpreted continent is based on the country, not the verbatim string information. */
128  CONTINENT_DERIVED_FROM_COUNTRY(WARNING),
129
130  /** The interpreted continent is based on the coordinates, not the verbatim string information. */
131  CONTINENT_DERIVED_FROM_COORDINATES(WARNING),
132
133  /** Latitude and longitude appear to be swapped. */
134  PRESUMED_SWAPPED_COORDINATE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM),
135
136  /** Longitude appears to be negated, e.g. 32.3 instead of -32.3 */
137  PRESUMED_NEGATED_LONGITUDE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM),
138
139  /** Latitude appears to be negated, e.g. 32.3 instead of -32.3 */
140  PRESUMED_NEGATED_LATITUDE(WARNING, TermsGroup.COORDINATES_TERMS_NO_DATUM),
141
142  /**
143   * The recorded date specified as the eventDate string and the individual year, month, day and/or
144   * startDayOfYear, endDayOfYear are contradictory.
145   */
146  RECORDED_DATE_MISMATCH(WARNING, TermsGroup.RECORDED_DATE_TERMS),
147
148  /** A (partial) invalid date is given, such as a non-existent date, zero month, etc. */
149  RECORDED_DATE_INVALID(WARNING, TermsGroup.RECORDED_DATE_TERMS),
150
151  /**
152   * The recorded date is highly unlikely, falling either into the future or representing a very old
153   * date before 1600 thus predating modern taxonomy.
154   */
155  RECORDED_DATE_UNLIKELY(WARNING, TermsGroup.RECORDED_DATE_TERMS),
156
157  /** Matching to the taxonomic backbone can only be done using a fuzzy, non-exact match. */
158  TAXON_MATCH_FUZZY(WARNING, TermsGroup.TAXONOMY_TERMS),
159
160  /**
161   * Matching to the taxonomic backbone can only be done on a higher rank and not the scientific
162   * name.
163   */
164  TAXON_MATCH_HIGHERRANK(WARNING, TermsGroup.TAXONOMY_TERMS),
165
166  /**
167   * Matching to the taxonomic backbone can only be done on a species level, but the occurrence was
168   * in fact considered a broader species aggregate/complex.
169   *
170   * @see <a
171   *     href="https://github.com/gbif/portal-feedback/issues/2935">gbif/portal-feedback#2935</a>
172   */
173  TAXON_MATCH_AGGREGATE(WARNING, TermsGroup.TAXONOMY_TERMS),
174
175  /**
176   * The scientificNameID was not used when mapping the record to the GBIF backbone. This may
177   * indicate one of
178   *
179   * <ul>
180   *   <li>The ID uses a pattern not configured for use by GBIF
181   *   <li>The ID did not uniquely(!) identify a concept in the checklist
182   *   <li>The ID found a concept in the checklist which did not map to the backbone
183   *   <li>A different ID was used, or the record names were used as no ID lookup successfully
184   *       linked to the backbone
185   * </ul>
186   *
187   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
188   */
189  TAXON_MATCH_SCIENTIFIC_NAME_ID_IGNORED(INFO, DwcTerm.scientificNameID),
190
191  /**
192   * The taxonConceptID was not used when mapping the record to the GBIF backbone. This may indicate
193   * one of
194   *
195   * <ul>
196   *   <li>The ID uses a pattern not configured for use by GBIF
197   *   <li>The ID did not uniquely(!) identify a concept in the checklist
198   *   <li>The ID found a concept in the checklist which did not map to the backbone
199   *   <li>A different ID was used, or the record names were used as no ID lookup successfully
200   *       linked to the backbone
201   * </ul>
202   *
203   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
204   */
205  TAXON_MATCH_TAXON_CONCEPT_ID_IGNORED(INFO, DwcTerm.taxonConceptID),
206
207  /**
208   * The taxonID was not used when mapping the record to the GBIF backbone. This may indicate one of
209   *
210   * <ul>
211   *   <li>The ID uses a pattern not configured for use by GBIF
212   *   <li>The ID did not uniquely(!) identify a concept in the checklist
213   *   <li>The ID found a concept in the checklist which did not map to the backbone
214   *   <li>A different ID was used, or the record names were used as no ID lookup successfully
215   *       linked to the backbone
216   * </ul>
217   *
218   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
219   */
220  TAXON_MATCH_TAXON_ID_IGNORED(INFO, DwcTerm.taxonID),
221
222  /**
223   * The scientificNameID matched a known pattern, but it was not found in the associated checklist.
224   * The backbone lookup was performed using either the names or a different ID on the record. This
225   * may indicate a poorly formatted identifier or may be caused by a newly created ID that isn't
226   * yet known in the version of the published checklist.
227   *
228   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
229   */
230  SCIENTIFIC_NAME_ID_NOT_FOUND(WARNING, DwcTerm.scientificNameID),
231
232  /**
233   * The taxonConceptID matched a known pattern, but it was not found in the associated checklist.
234   * The backbone lookup was performed using either the names or a different ID on the record. This
235   * may indicate a poorly formatted identifier or may be caused by a newly created ID that isn't
236   * yet known in the version of the published checklist.
237   *
238   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
239   */
240  TAXON_CONCEPT_ID_NOT_FOUND(WARNING, DwcTerm.taxonConceptID),
241
242  /**
243   * The taxonID matched a known pattern, but it was not found in the associated checklist. The
244   * backbone lookup was performed using either the names or a different ID on the record. This may
245   * indicate a poorly formatted identifier or may be caused by a newly created ID that isn't yet
246   * known in the version of the published checklist.
247   *
248   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
249   */
250  TAXON_ID_NOT_FOUND(WARNING, DwcTerm.taxonID),
251
252  /**
253   * The scientificName provided in the occurrence record does not precisely match the name in the
254   * registered checklist when using the scientificNameID, taxonID or taxonConceptID to look it up.
255   * Publishers are advised to check the IDs are correct, or update the formatting of the names on
256   * their records.
257   *
258   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
259   */
260  SCIENTIFIC_NAME_AND_ID_INCONSISTENT(
261      WARNING,
262      DwcTerm.scientificNameID,
263      DwcTerm.taxonID,
264      DwcTerm.taxonConceptID,
265      DwcTerm.scientificName),
266
267  /**
268   * Matching to the taxonomic backbone cannot be done because there was no match at all, or several
269   * matches with too little information to keep them apart (potentially homonyms).
270   */
271  TAXON_MATCH_NONE(WARNING, TermsGroup.TAXONOMY_TERMS),
272
273  /**
274   * The GBIF Backbone concept was found using the scientificNameID, taxonID or taxonConceptID, but
275   * it differs from what would have been found if the classification names on the record were used.
276   * This may indicate a gap in the GBIF backbone, a poor mapping between the checklist and the
277   * backbone, or a mismatch between the classification names and the declared IDs (scientificNameID
278   * or taxonConceptID) on the occurrence record itself.
279   *
280   * @see <a href="https://github.com/gbif/pipelines/issues/217">gbif/pipelines#217</a>
281   */
282  TAXON_MATCH_NAME_AND_ID_AMBIGUOUS(WARNING, TermsGroup.TAXONOMY_TERMS),
283
284  /**
285   * Set if supplied depth is not given in the metric system, for example using feet instead of
286   * meters
287   */
288  DEPTH_NOT_METRIC(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters),
289
290  /** Set if depth is larger than 11,000m or negative. */
291  DEPTH_UNLIKELY(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters),
292
293  /** Set if supplied minimum depth > maximum depth */
294  DEPTH_MIN_MAX_SWAPPED(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters),
295
296  /** Set if depth is a non-numeric value */
297  DEPTH_NON_NUMERIC(WARNING, DwcTerm.minimumDepthInMeters, DwcTerm.maximumDepthInMeters),
298
299  /** Set if elevation is above the troposphere (17km) or below 11km (Mariana Trench). */
300  ELEVATION_UNLIKELY(WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters),
301
302  /** Set if supplied minimum elevation > maximum elevation */
303  ELEVATION_MIN_MAX_SWAPPED(
304      WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters),
305
306  /**
307   * Set if supplied elevation is not given in the metric system, for example using feet instead of
308   * meters
309   */
310  ELEVATION_NOT_METRIC(WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters),
311
312  /** Set if elevation is a non-numeric value */
313  ELEVATION_NON_NUMERIC(
314      WARNING, DwcTerm.minimumElevationInMeters, DwcTerm.maximumElevationInMeters),
315
316  /**
317   * A (partial) invalid date is given for dc:modified, such as a nonexistent date, zero month, etc.
318   */
319  MODIFIED_DATE_INVALID(WARNING, DcTerm.modified),
320
321  /** The date given for dc:modified is in the future or predates Unix time (1970). */
322  MODIFIED_DATE_UNLIKELY(WARNING, DcTerm.modified),
323
324  /** The date given for dwc:dateIdentified is in the future or before Linnean times (1700). */
325  IDENTIFIED_DATE_UNLIKELY(WARNING, DwcTerm.dateIdentified),
326
327  /** The date given for dwc:dateIdentified is invalid and can't be interpreted at all. */
328  IDENTIFIED_DATE_INVALID(WARNING, DwcTerm.dateIdentified),
329
330  /**
331   * The given basis of record is impossible to interpret or significantly different from the
332   * recommended vocabulary.
333   */
334  BASIS_OF_RECORD_INVALID(WARNING, DwcTerm.basisOfRecord),
335
336  /**
337   * The given type status is impossible to interpret or significantly different from the
338   * recommended vocabulary.
339   */
340  TYPE_STATUS_INVALID(WARNING, DwcTerm.typeStatus),
341
342  /** The given type status contains some words that express uncertainty. */
343  SUSPECTED_TYPE(WARNING, DwcTerm.typeStatus),
344
345  /** An invalid date is given for dc:created of a multimedia object. */
346  MULTIMEDIA_DATE_INVALID(WARNING),
347
348  /** An invalid URI is given for a multimedia object. */
349  MULTIMEDIA_URI_INVALID(WARNING),
350
351  /** An invalid URI is given for dc:references. */
352  REFERENCES_URI_INVALID(WARNING, DcTerm.references),
353
354  /** An error occurred during interpretation, leaving the record interpretation incomplete. */
355  INTERPRETATION_ERROR(ERROR),
356
357  /** The individual count value is not a positive integer */
358  INDIVIDUAL_COUNT_INVALID(WARNING, DwcTerm.individualCount),
359
360  /** Example: individual count value > 0, but occurrence status is absent. */
361  INDIVIDUAL_COUNT_CONFLICTS_WITH_OCCURRENCE_STATUS(WARNING, DwcTerm.individualCount),
362
363  /** Occurrence status value can't be assigned to {@link OccurrenceStatus} */
364  OCCURRENCE_STATUS_UNPARSABLE(WARNING, DwcTerm.occurrenceStatus),
365
366  /** Occurrence status was inferred from the individual count value */
367  OCCURRENCE_STATUS_INFERRED_FROM_INDIVIDUAL_COUNT(WARNING, DwcTerm.occurrenceStatus),
368
369  /** Occurrence status was inferred from basis of records */
370  OCCURRENCE_STATUS_INFERRED_FROM_BASIS_OF_RECORD(WARNING, DwcTerm.occurrenceStatus),
371
372  /** The date given for dwc:georeferencedDate is in the future or before Linnean times (1700). */
373  GEOREFERENCED_DATE_UNLIKELY(WARNING, DwcTerm.georeferencedDate),
374
375  /** The date given for dwc:georeferencedDate is invalid and can't be interpreted at all. */
376  GEOREFERENCED_DATE_INVALID(WARNING, DwcTerm.georeferencedDate),
377
378  /** The given institution matches with more than 1 GRSciColl institution. */
379  AMBIGUOUS_INSTITUTION(INFO, TermsGroup.INSTITUTION_TERMS),
380
381  /** The given collection matches with more than 1 GRSciColl collection. */
382  AMBIGUOUS_COLLECTION(INFO, TermsGroup.COLLECTION_TERMS),
383
384  /** The given institution couldn't be matched with any GRSciColl institution. */
385  INSTITUTION_MATCH_NONE(INFO, TermsGroup.INSTITUTION_TERMS),
386
387  /** The given collection couldn't be matched with any GRSciColl collection. */
388  COLLECTION_MATCH_NONE(INFO, TermsGroup.COLLECTION_TERMS),
389
390  /**
391   * The given institution was fuzzily matched to a GRSciColl institution. This can happen when
392   * either the code or the ID don't match or when the institution name is used instead of the code.
393   */
394  INSTITUTION_MATCH_FUZZY(INFO, TermsGroup.INSTITUTION_TERMS),
395
396  /**
397   * The given collection was fuzzily matched to a GRSciColl collection. This can happen when either
398   * the code or the ID don't match or when the collection name is used instead of the code.
399   */
400  COLLECTION_MATCH_FUZZY(INFO, TermsGroup.COLLECTION_TERMS),
401
402  /** The collection matched doesn't belong to the institution matched. */
403  INSTITUTION_COLLECTION_MISMATCH(
404      INFO, ArrayUtils.addAll(TermsGroup.INSTITUTION_TERMS, TermsGroup.INSTITUTION_TERMS)),
405
406  /**
407   * The given owner institution is different than the given institution. Therefore we assume it
408   * could be on loan and we don't link it to the occurrence.
409   *
410   * <p>Deprecated by {@link #DIFFERENT_OWNER_INSTITUTION}.
411   */
412  @Deprecated
413  POSSIBLY_ON_LOAN(INFO, TermsGroup.INSTITUTION_TERMS),
414
415  /**
416   * The given owner institution is different than the given institution. Therefore we assume it
417   * doesn't belong to the institution and we don't link it to the occurrence.
418   */
419  DIFFERENT_OWNER_INSTITUTION(INFO, TermsGroup.INSTITUTION_TERMS),
420
421  /** Era or erathem was inferred from a parent rank. */
422  ERA_OR_ERATHEM_INFERRED_FROM_PARENT_RANK(
423      INFO, DwcTerm.earliestEraOrLowestErathem, DwcTerm.latestEraOrHighestErathem),
424  /** Period or system was inferred from a parent rank. */
425  PERIOD_OR_SYSTEM_INFERRED_FROM_PARENT_RANK(
426      INFO, DwcTerm.earliestPeriodOrLowestSystem, DwcTerm.latestPeriodOrHighestSystem),
427  /** Epoch or series was inferred from a parent rank. */
428  EPOCH_OR_SERIES_INFERRED_FROM_PARENT_RANK(
429      INFO, DwcTerm.earliestEpochOrLowestSeries, DwcTerm.latestEpochOrHighestSeries),
430  /** Age or stage was inferred from a parent rank. */
431  AGE_OR_STAGE_INFERRED_FROM_PARENT_RANK(
432      INFO, DwcTerm.earliestAgeOrLowestStage, DwcTerm.latestAgeOrHighestStage),
433
434  /** The eon or eonothem provided belongs to another rank. */
435  EON_OR_EONOTHEM_RANK_MISMATCH(
436      INFO, DwcTerm.earliestEonOrLowestEonothem, DwcTerm.latestEonOrHighestEonothem),
437  /** The era or erathem provided belongs to another rank. */
438  ERA_OR_ERATHEM_RANK_MISMATCH(
439      INFO, DwcTerm.earliestEraOrLowestErathem, DwcTerm.latestEraOrHighestErathem),
440  /** The period or system provided belongs to another rank. */
441  PERIOD_OR_SYSTEM_RANK_MISMATCH(
442      INFO, DwcTerm.earliestPeriodOrLowestSystem, DwcTerm.latestPeriodOrHighestSystem),
443  /** The period or series provided belongs to another rank. */
444  EPOCH_OR_SERIES_RANK_MISMATCH(
445      INFO, DwcTerm.earliestEpochOrLowestSeries, DwcTerm.latestEpochOrHighestSeries),
446  /** The age or stage provided belongs to another rank. */
447  AGE_OR_STAGE_RANK_MISMATCH(
448      INFO, DwcTerm.earliestAgeOrLowestStage, DwcTerm.latestAgeOrHighestStage),
449
450  /** The earliest eon or eonothem has to be earlier than the latest. */
451  EON_OR_EONOTHEM_INVALID_RANGE(
452      INFO, DwcTerm.earliestEonOrLowestEonothem, DwcTerm.latestEonOrHighestEonothem),
453  /** The era or erathem has to be earlier than the latest. */
454  ERA_OR_ERATHEM_INVALID_RANGE(
455      INFO, DwcTerm.earliestEraOrLowestErathem, DwcTerm.latestEraOrHighestErathem),
456  /** The period or system has to be earlier than the latest. */
457  PERIOD_OR_SYSTEM_INVALID_RANGE(
458      INFO, DwcTerm.earliestPeriodOrLowestSystem, DwcTerm.latestPeriodOrHighestSystem),
459  /** The period or series has to be earlier than the latest. */
460  EPOCH_OR_SERIES_INVALID_RANGE(
461      INFO, DwcTerm.earliestEpochOrLowestSeries, DwcTerm.latestEpochOrHighestSeries),
462  /** The age or stage has to be earlier than the latest. */
463  AGE_OR_STAGE_INVALID_RANGE(
464      INFO, DwcTerm.earliestAgeOrLowestStage, DwcTerm.latestAgeOrHighestStage),
465
466  /** The era or erathem don't belong to the eon or eonothem. */
467  EON_OR_EONOTHEM_AND_ERA_OR_ERATHEM_MISMATCH(
468      INFO,
469      DwcTerm.earliestEonOrLowestEonothem,
470      DwcTerm.latestEonOrHighestEonothem,
471      DwcTerm.earliestEraOrLowestErathem,
472      DwcTerm.latestEraOrHighestErathem),
473
474  /** The period or system don't belong to the era or erathem. */
475  ERA_OR_ERATHEM_AND_PERIOD_OR_SYSTEM_MISMATCH(
476      INFO,
477      DwcTerm.earliestEraOrLowestErathem,
478      DwcTerm.latestEraOrHighestErathem,
479      DwcTerm.earliestPeriodOrLowestSystem,
480      DwcTerm.latestPeriodOrHighestSystem),
481
482  /** The epoch or series don't belong to the period or system. */
483  PERIOD_OR_SYSTEM_AND_EPOCH_OR_SERIES_MISMATCH(
484      INFO,
485      DwcTerm.earliestPeriodOrLowestSystem,
486      DwcTerm.latestPeriodOrHighestSystem,
487      DwcTerm.earliestEpochOrLowestSeries,
488      DwcTerm.latestEpochOrHighestSeries),
489
490  /** The age or stage don't belong to the epoch or series. */
491  EPOCH_OR_SERIES_AND_AGE_OR_STAGE_MISMATCH(
492      INFO,
493      DwcTerm.earliestEpochOrLowestSeries,
494      DwcTerm.latestEpochOrHighestSeries,
495      DwcTerm.earliestAgeOrLowestStage,
496      DwcTerm.latestAgeOrHighestStage),
497
498  /** Set when natural language text is detected in {@code dna_sequence}. */
499  NUCLEOTIDE_SEQUENCE_NATURAL_LANGUAGE(INFO, GbifDnaTerm.dna_sequence),
500
501  /** Set when leading or trailing sequence characters are trimmed. */
502  NUCLEOTIDE_SEQUENCE_ENDS_TRIMMED(INFO, GbifDnaTerm.dna_sequence),
503
504  /** Set when gaps or whitespace are removed from the sequence. */
505  NUCLEOTIDE_SEQUENCE_GAPS_REMOVED(INFO, GbifDnaTerm.dna_sequence),
506
507  /** Set when the sequence is invalid after normalization and validation. */
508  NUCLEOTIDE_SEQUENCE_INVALID(INFO, GbifDnaTerm.dna_sequence),
509
510  /** Set when the fraction of N bases exceeds the configured threshold. */
511  NUCLEOTIDE_SEQUENCE_HIGH_N_FRACTION(INFO, GbifDnaTerm.dna_sequence),
512
513  /** Set when the fraction of non-ACGTN bases exceeds the configured threshold. */
514  NUCLEOTIDE_SEQUENCE_HIGH_AMBIGUITY(INFO, GbifDnaTerm.dna_sequence),
515
516  /** Set when {@code target_gene} does not resolve to a concept in the vocabulary. */
517  TARGET_GENE_INVALID(INFO, MixsTerm.target_gene);
518
519  /**
520   * Simple helper nested class to allow grouping of Term mostly to increase readability of this
521   * class.
522   */
523  private static class TermsGroup {
524
525    static final Term[] COORDINATES_TERMS_NO_DATUM = {
526      DwcTerm.decimalLatitude,
527      DwcTerm.decimalLongitude,
528      DwcTerm.verbatimLatitude,
529      DwcTerm.verbatimLongitude,
530      DwcTerm.verbatimCoordinates
531    };
532
533    static final Term[] COORDINATES_TERMS = {
534      DwcTerm.decimalLatitude,
535      DwcTerm.decimalLongitude,
536      DwcTerm.verbatimLatitude,
537      DwcTerm.verbatimLongitude,
538      DwcTerm.verbatimCoordinates,
539      DwcTerm.geodeticDatum
540    };
541
542    static final Term[] COUNTRY_TERMS = {DwcTerm.country, DwcTerm.countryCode};
543
544    static final Term[] COORDINATES_COUNTRY_TERMS = {
545      DwcTerm.decimalLatitude,
546      DwcTerm.decimalLongitude,
547      DwcTerm.verbatimLatitude,
548      DwcTerm.verbatimLongitude,
549      DwcTerm.verbatimCoordinates,
550      DwcTerm.geodeticDatum,
551      DwcTerm.country,
552      DwcTerm.countryCode
553    };
554
555    static final Term[] RECORDED_DATE_TERMS = {
556      DwcTerm.eventDate,
557      DwcTerm.year,
558      DwcTerm.month,
559      DwcTerm.day,
560      DwcTerm.startDayOfYear,
561      DwcTerm.endDayOfYear
562    };
563
564    static final Term[] TAXONOMY_TERMS = {
565      DwcTerm.kingdom,
566      DwcTerm.phylum,
567      DwcTerm.class_,
568      DwcTerm.order,
569      DwcTerm.family,
570      DwcTerm.genus,
571      DwcTerm.scientificName,
572      DwcTerm.scientificNameAuthorship,
573      DwcTerm.genericName,
574      DwcTerm.specificEpithet,
575      DwcTerm.infraspecificEpithet,
576      DwcTerm.scientificNameID,
577      DwcTerm.taxonConceptID,
578      DwcTerm.taxonID,
579    };
580
581    static final Term[] INSTITUTION_TERMS = {
582      DwcTerm.institutionCode, DwcTerm.institutionID, DwcTerm.ownerInstitutionCode
583    };
584
585    static final Term[] COLLECTION_TERMS = {DwcTerm.collectionCode, DwcTerm.collectionID};
586  }
587
588  private final Set<Term> relatedTerms;
589  private final InterpretationRemarkSeverity severity;
590  private final boolean isDeprecated;
591
592  /** {@link OccurrenceIssue} not linked to any specific {@link Term}. */
593  OccurrenceIssue(InterpretationRemarkSeverity severity) {
594    this.severity = severity;
595    this.relatedTerms = Collections.emptySet();
596    this.isDeprecated = AnnotationUtils.isFieldDeprecated(OccurrenceIssue.class, this.name());
597  }
598
599  /** {@link OccurrenceIssue} linked to the provided {@link Term}. */
600  OccurrenceIssue(InterpretationRemarkSeverity severity, Term... relatedTerms) {
601    this.severity = severity;
602    this.relatedTerms = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(relatedTerms)));
603    this.isDeprecated = AnnotationUtils.isFieldDeprecated(OccurrenceIssue.class, this.name());
604  }
605
606  @Override
607  public String getId() {
608    return name();
609  }
610
611  @Override
612  public Set<Term> getRelatedTerms() {
613    return relatedTerms;
614  }
615
616  @Override
617  public InterpretationRemarkSeverity getSeverity() {
618    return severity;
619  }
620
621  @Override
622  public boolean isDeprecated() {
623    return isDeprecated;
624  }
625
626  /**
627   * All issues that indicate problems with the coordinates and thus should not be shown on maps.
628   */
629  public static final List<OccurrenceIssue> GEOSPATIAL_RULES =
630      Collections.unmodifiableList(
631          Arrays.asList(
632              ZERO_COORDINATE,
633              COORDINATE_OUT_OF_RANGE,
634              COORDINATE_INVALID,
635              COUNTRY_COORDINATE_MISMATCH,
636              PRESUMED_SWAPPED_COORDINATE,
637              PRESUMED_NEGATED_LONGITUDE,
638              PRESUMED_NEGATED_LATITUDE));
639
640  /** All issues related to taxonomic fields. */
641  public static final List<OccurrenceIssue> TAXONOMIC_RULES =
642      Set.of(OccurrenceIssue.values()).stream()
643          .filter(
644              issue ->
645                  issue.getRelatedTerms().stream()
646                      .anyMatch(term -> Set.of(TermsGroup.TAXONOMY_TERMS).contains(term)))
647          .collect(Collectors.toList());
648}