001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.io.Serializable;
019import java.net.URI;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.List;
023
024/**
025 * All Darwin Core terms with namespace http://rs.tdwg.org/dwc/terms/ as an
026 * enumeration with alternative term names found sometimes in data.
027 * Old, deprecated terms are kept but marked as such.
028 */
029public enum DwcTerm implements Term, AlternativeNames, Serializable {
030
031  /**
032   * CLASS TERMS
033   * Listed in the order given on the Darwin Core Quick Reference Guide.
034   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
035   * Location is not on this list because it is a term in the dcterm namespace.
036   */
037  Occurrence(DwcTerm.GROUP_OCCURRENCE, "DarwinCore", "SimpleDarwinCore"),
038  Organism(DwcTerm.GROUP_ORGANISM),
039  MaterialEntity(DwcTerm.GROUP_MATERIAL_ENTITY),
040  MaterialSample(DwcTerm.GROUP_MATERIAL_SAMPLE),
041  Event(DwcTerm.GROUP_EVENT),
042  GeologicalContext(DwcTerm.GROUP_GEOLOGICALCONTEXT),
043  Identification(DwcTerm.GROUP_IDENTIFICATION),
044  Taxon(DwcTerm.GROUP_TAXON),
045  MeasurementOrFact(DwcTerm.GROUP_MEASUREMENTORFACT),
046  ResourceRelationship(DwcTerm.GROUP_RESOURCERELATIONSHIP),
047
048  /**
049   * PROPERTY TERMS
050   * Listed in the order given on the Darwin Core Quick Reference Guide.
051   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
052   */
053  institutionID(DwcTerm.GROUP_RECORD),
054  collectionID(DwcTerm.GROUP_RECORD),
055  datasetID(DwcTerm.GROUP_RECORD),
056  institutionCode(DwcTerm.GROUP_RECORD),
057  collectionCode(DwcTerm.GROUP_RECORD),
058  datasetName(DwcTerm.GROUP_RECORD),
059  ownerInstitutionCode(DwcTerm.GROUP_RECORD),
060  basisOfRecord(DwcTerm.GROUP_RECORD),
061  informationWithheld(DwcTerm.GROUP_RECORD),
062  dataGeneralizations(DwcTerm.GROUP_RECORD),
063  dynamicProperties(DwcTerm.GROUP_RECORD),
064
065  occurrenceID(DwcTerm.GROUP_OCCURRENCE),
066  catalogNumber(DwcTerm.GROUP_OCCURRENCE, "catalogNumberNumeric"),
067  recordNumber(DwcTerm.GROUP_OCCURRENCE, "collectorNumber"),
068  recordedBy(DwcTerm.GROUP_OCCURRENCE, "collector"),
069  recordedByID(DwcTerm.GROUP_OCCURRENCE, "gbif:recordedByID", "http://rs.gbif.org/terms/1.0/recordedByID"),
070  individualCount(DwcTerm.GROUP_OCCURRENCE),
071  organismQuantity(DwcTerm.GROUP_OCCURRENCE),
072  organismQuantityType(DwcTerm.GROUP_OCCURRENCE),
073  @Vocabulary sex(DwcTerm.GROUP_OCCURRENCE),
074  @Vocabulary lifeStage(DwcTerm.GROUP_OCCURRENCE),
075  reproductiveCondition(DwcTerm.GROUP_OCCURRENCE),
076  caste(DwcTerm.GROUP_OCCURRENCE),
077  behavior(DwcTerm.GROUP_OCCURRENCE),
078  vitality(DwcTerm.GROUP_OCCURRENCE),
079  @Vocabulary establishmentMeans(DwcTerm.GROUP_OCCURRENCE),
080  @Vocabulary degreeOfEstablishment(DwcTerm.GROUP_OCCURRENCE),
081  @Vocabulary pathway(DwcTerm.GROUP_OCCURRENCE),
082  georeferenceVerificationStatus(DwcTerm.GROUP_OCCURRENCE),
083  occurrenceStatus(DwcTerm.GROUP_OCCURRENCE),
084  preparations(DwcTerm.GROUP_MATERIAL_ENTITY),
085  // Incorrect namespace was used in the GGBN Loan extension: https://rs.gbif.org/extension/ggbn/loan.xml#disposition
086  // https://github.com/gbif/rs.gbif.org/issues/132
087  disposition(DwcTerm.GROUP_MATERIAL_ENTITY, "http://purl.org/dc/terms/disposition"),
088  associatedMedia(DwcTerm.GROUP_OCCURRENCE),
089  associatedOccurrences(DwcTerm.GROUP_OCCURRENCE),
090  associatedReferences(DwcTerm.GROUP_OCCURRENCE),
091  associatedSequences(DwcTerm.GROUP_MATERIAL_ENTITY),
092  associatedTaxa(DwcTerm.GROUP_OCCURRENCE),
093  otherCatalogNumbers(DwcTerm.GROUP_OCCURRENCE),
094  occurrenceRemarks(DwcTerm.GROUP_OCCURRENCE),
095
096  organismID(DwcTerm.GROUP_ORGANISM, "individualID"),
097  organismName(DwcTerm.GROUP_ORGANISM),
098  organismScope(DwcTerm.GROUP_ORGANISM),
099  associatedOrganisms(DwcTerm.GROUP_ORGANISM),
100  previousIdentifications(DwcTerm.GROUP_ORGANISM),
101  organismRemarks(DwcTerm.GROUP_ORGANISM),
102
103  materialEntityID(DwcTerm.GROUP_MATERIAL_ENTITY),
104  materialEntityRemarks(DwcTerm.GROUP_MATERIAL_ENTITY),
105  verbatimLabel(DwcTerm.GROUP_MATERIAL_ENTITY),
106  materialSampleID(DwcTerm.GROUP_MATERIAL_SAMPLE),
107
108  eventID(DwcTerm.GROUP_EVENT),
109  parentEventID(DwcTerm.GROUP_EVENT),
110  @Vocabulary eventType(DwcTerm.GROUP_EVENT),
111  fieldNumber(DwcTerm.GROUP_EVENT),
112  eventDate(DwcTerm.GROUP_EVENT, "earliestDateCollected", "latestDateCollected"),
113  eventTime(DwcTerm.GROUP_EVENT),
114  startDayOfYear(DwcTerm.GROUP_EVENT),
115  endDayOfYear(DwcTerm.GROUP_EVENT),
116  year(DwcTerm.GROUP_EVENT),
117  month(DwcTerm.GROUP_EVENT),
118  day(DwcTerm.GROUP_EVENT),
119  verbatimEventDate(DwcTerm.GROUP_EVENT),
120  habitat(DwcTerm.GROUP_EVENT),
121  samplingProtocol(DwcTerm.GROUP_EVENT),
122  sampleSizeValue(DwcTerm.GROUP_EVENT),
123  sampleSizeUnit(DwcTerm.GROUP_EVENT),
124  samplingEffort(DwcTerm.GROUP_EVENT),
125  fieldNotes(DwcTerm.GROUP_EVENT),
126  eventRemarks(DwcTerm.GROUP_EVENT),
127
128  locationID(DwcTerm.GROUP_LOCATION),
129  higherGeographyID(DwcTerm.GROUP_LOCATION),
130  higherGeography(DwcTerm.GROUP_LOCATION),
131  continent(DwcTerm.GROUP_LOCATION),
132  waterBody(DwcTerm.GROUP_LOCATION),
133  islandGroup(DwcTerm.GROUP_LOCATION),
134  island(DwcTerm.GROUP_LOCATION),
135  country(DwcTerm.GROUP_LOCATION),
136  countryCode(DwcTerm.GROUP_LOCATION),
137  stateProvince(DwcTerm.GROUP_LOCATION, "state", "province"),
138  county(DwcTerm.GROUP_LOCATION),
139  municipality(DwcTerm.GROUP_LOCATION, "city"),
140  locality(DwcTerm.GROUP_LOCATION),
141  verbatimLocality(DwcTerm.GROUP_LOCATION),
142  minimumElevationInMeters(DwcTerm.GROUP_LOCATION),
143  maximumElevationInMeters(DwcTerm.GROUP_LOCATION),
144  verbatimElevation(DwcTerm.GROUP_LOCATION),
145  verticalDatum(DwcTerm.GROUP_LOCATION),
146  minimumDepthInMeters(DwcTerm.GROUP_LOCATION),
147  maximumDepthInMeters(DwcTerm.GROUP_LOCATION),
148  verbatimDepth(DwcTerm.GROUP_LOCATION),
149  minimumDistanceAboveSurfaceInMeters(DwcTerm.GROUP_LOCATION),
150  maximumDistanceAboveSurfaceInMeters(DwcTerm.GROUP_LOCATION),
151  locationAccordingTo(DwcTerm.GROUP_LOCATION),
152  locationRemarks(DwcTerm.GROUP_LOCATION),
153  decimalLatitude(DwcTerm.GROUP_LOCATION, "latitude"),
154  decimalLongitude(DwcTerm.GROUP_LOCATION, "longitude"),
155  geodeticDatum(DwcTerm.GROUP_LOCATION, "datum", "horizontaldatum"),
156  coordinateUncertaintyInMeters(DwcTerm.GROUP_LOCATION),
157  coordinatePrecision(DwcTerm.GROUP_LOCATION),
158  pointRadiusSpatialFit(DwcTerm.GROUP_LOCATION),
159  verbatimCoordinates(DwcTerm.GROUP_LOCATION),
160  verbatimLatitude(DwcTerm.GROUP_LOCATION),
161  verbatimLongitude(DwcTerm.GROUP_LOCATION),
162  verbatimCoordinateSystem(DwcTerm.GROUP_LOCATION),
163  verbatimSRS(DwcTerm.GROUP_LOCATION),
164  footprintWKT(DwcTerm.GROUP_LOCATION),
165  footprintSRS(DwcTerm.GROUP_LOCATION),
166  footprintSpatialFit(DwcTerm.GROUP_LOCATION),
167  georeferencedBy(DwcTerm.GROUP_LOCATION),
168  georeferencedDate(DwcTerm.GROUP_LOCATION),
169  georeferenceProtocol(DwcTerm.GROUP_LOCATION),
170  georeferenceSources(DwcTerm.GROUP_LOCATION),
171  georeferenceRemarks(DwcTerm.GROUP_LOCATION),
172
173  geologicalContextID(DwcTerm.GROUP_GEOLOGICALCONTEXT),
174  @Vocabulary earliestEonOrLowestEonothem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
175  @Vocabulary latestEonOrHighestEonothem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
176  @Vocabulary earliestEraOrLowestErathem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
177  @Vocabulary latestEraOrHighestErathem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
178  @Vocabulary earliestPeriodOrLowestSystem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
179  @Vocabulary latestPeriodOrHighestSystem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
180  @Vocabulary earliestEpochOrLowestSeries(DwcTerm.GROUP_GEOLOGICALCONTEXT),
181  @Vocabulary latestEpochOrHighestSeries(DwcTerm.GROUP_GEOLOGICALCONTEXT),
182  @Vocabulary earliestAgeOrLowestStage(DwcTerm.GROUP_GEOLOGICALCONTEXT),
183  @Vocabulary latestAgeOrHighestStage(DwcTerm.GROUP_GEOLOGICALCONTEXT),
184  lowestBiostratigraphicZone(DwcTerm.GROUP_GEOLOGICALCONTEXT),
185  highestBiostratigraphicZone(DwcTerm.GROUP_GEOLOGICALCONTEXT),
186  lithostratigraphicTerms(DwcTerm.GROUP_GEOLOGICALCONTEXT),
187  group(DwcTerm.GROUP_GEOLOGICALCONTEXT),
188  formation(DwcTerm.GROUP_GEOLOGICALCONTEXT),
189  member(DwcTerm.GROUP_GEOLOGICALCONTEXT),
190  bed(DwcTerm.GROUP_GEOLOGICALCONTEXT),
191
192  identificationID(DwcTerm.GROUP_IDENTIFICATION),
193  verbatimIdentification(DwcTerm.GROUP_IDENTIFICATION),
194  identificationQualifier(DwcTerm.GROUP_IDENTIFICATION),
195  @Vocabulary typeStatus(DwcTerm.GROUP_IDENTIFICATION),
196  identifiedBy(DwcTerm.GROUP_IDENTIFICATION),
197  identifiedByID(DwcTerm.GROUP_IDENTIFICATION, "gbif:identifiedByID", "http://rs.gbif.org/terms/1.0/identifiedByID"),
198  dateIdentified(DwcTerm.GROUP_IDENTIFICATION),
199  identificationReferences(DwcTerm.GROUP_IDENTIFICATION),
200  identificationVerificationStatus(DwcTerm.GROUP_IDENTIFICATION),
201  identificationRemarks(DwcTerm.GROUP_IDENTIFICATION),
202
203  taxonID(DwcTerm.GROUP_TAXON, "nameUsageID"),
204  scientificNameID(DwcTerm.GROUP_TAXON, "nameID"),
205  acceptedNameUsageID(DwcTerm.GROUP_TAXON, "acceptedTaxonID"),
206  parentNameUsageID(DwcTerm.GROUP_TAXON, "higherNameUsageID", "parentTaxonID"),
207  originalNameUsageID(DwcTerm.GROUP_TAXON, "originalNameID", "basionymID"),
208  nameAccordingToID(DwcTerm.GROUP_TAXON, "taxonAccordingToID"),
209  namePublishedInID(DwcTerm.GROUP_TAXON),
210  taxonConceptID(DwcTerm.GROUP_TAXON),
211  scientificName(DwcTerm.GROUP_TAXON),
212  acceptedNameUsage(DwcTerm.GROUP_TAXON, "acceptedTaxon"),
213  parentNameUsage(DwcTerm.GROUP_TAXON, "parentTaxon", "higherTaxon", "higherNameUsage"),
214  originalNameUsage(DwcTerm.GROUP_TAXON, "originalName", "originalTaxon", "basionym"),
215  nameAccordingTo(DwcTerm.GROUP_TAXON, "taxonAccordingTo"),
216  namePublishedIn(DwcTerm.GROUP_TAXON),
217  namePublishedInYear(DwcTerm.GROUP_TAXON),
218  higherClassification(DwcTerm.GROUP_TAXON),
219  kingdom(DwcTerm.GROUP_TAXON),
220  phylum(DwcTerm.GROUP_TAXON),
221  /**
222   * The taxonomic class.
223   * The real Darwin Core term is class, but as java does not allow this name we use a variation instead.
224   */
225  class_(DwcTerm.GROUP_TAXON, "class"),
226  order(DwcTerm.GROUP_TAXON),
227  superfamily(DwcTerm.GROUP_TAXON),
228  family(DwcTerm.GROUP_TAXON),
229  subfamily(DwcTerm.GROUP_TAXON),
230  tribe(DwcTerm.GROUP_TAXON),
231  subtribe(DwcTerm.GROUP_TAXON),
232  genus(DwcTerm.GROUP_TAXON),
233  genericName(DwcTerm.GROUP_TAXON, "gbif:genericName", "http://rs.gbif.org/terms/1.0/genericName"),
234  subgenus(DwcTerm.GROUP_TAXON),
235  infragenericEpithet(DwcTerm.GROUP_TAXON),
236  specificEpithet(DwcTerm.GROUP_TAXON),
237  infraspecificEpithet(DwcTerm.GROUP_TAXON),
238  cultivarEpithet(DwcTerm.GROUP_TAXON),
239  taxonRank(DwcTerm.GROUP_TAXON, "rank"),
240  verbatimTaxonRank(DwcTerm.GROUP_TAXON),
241  scientificNameAuthorship(DwcTerm.GROUP_TAXON),
242  vernacularName(DwcTerm.GROUP_TAXON),
243  nomenclaturalCode(DwcTerm.GROUP_TAXON),
244  taxonomicStatus(DwcTerm.GROUP_TAXON),
245  nomenclaturalStatus(DwcTerm.GROUP_TAXON),
246  taxonRemarks(DwcTerm.GROUP_TAXON, "taxonRemark"),
247
248  measurementID(DwcTerm.GROUP_MEASUREMENTORFACT),
249  parentMeasurementID(DwcTerm.GROUP_MEASUREMENTORFACT),
250  measurementType(DwcTerm.GROUP_MEASUREMENTORFACT),
251  measurementValue(DwcTerm.GROUP_MEASUREMENTORFACT),
252  measurementAccuracy(DwcTerm.GROUP_MEASUREMENTORFACT),
253  measurementUnit(DwcTerm.GROUP_MEASUREMENTORFACT),
254  measurementDeterminedBy(DwcTerm.GROUP_MEASUREMENTORFACT),
255  measurementDeterminedDate(DwcTerm.GROUP_MEASUREMENTORFACT),
256  measurementMethod(DwcTerm.GROUP_MEASUREMENTORFACT),
257  measurementRemarks(DwcTerm.GROUP_MEASUREMENTORFACT),
258
259  resourceRelationshipID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
260  resourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
261  relationshipOfResourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
262  relatedResourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
263  relationshipOfResource(DwcTerm.GROUP_RESOURCERELATIONSHIP),
264  relationshipAccordingTo(DwcTerm.GROUP_RESOURCERELATIONSHIP),
265  relationshipEstablishedDate(DwcTerm.GROUP_RESOURCERELATIONSHIP),
266  relationshipRemarks(DwcTerm.GROUP_RESOURCERELATIONSHIP);
267
268  private static final String PREFIX = "dwc";
269  private static final String NS = "http://rs.tdwg.org/dwc/terms/";
270  private static final URI NS_URI = URI.create(NS);
271
272  public static final String GROUP_RECORD = "Record";
273  public static final String GROUP_OCCURRENCE = "Occurrence";
274  public static final String GROUP_ORGANISM = "Organism";
275  public static final String GROUP_MATERIAL_ENTITY = "MaterialEntity";
276  public static final String GROUP_MATERIAL_SAMPLE = "MaterialSample";
277  public static final String GROUP_EVENT = "Event";
278  public static final String GROUP_LOCATION = "Location";
279  public static final String GROUP_GEOLOGICALCONTEXT = "GeologicalContext";
280  public static final String GROUP_IDENTIFICATION = "Identification";
281  public static final String GROUP_TAXON = "Taxon";
282  public static final String GROUP_MEASUREMENTORFACT = "MeasurementOrFact";
283  public static final String GROUP_RESOURCERELATIONSHIP = "ResourceRelationship";
284
285  /**
286   * Lists all term groups in the order given on the Darwin Core Quick Reference Guide.
287   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
288   */
289  public static final String[] GROUPS =
290    {GROUP_RECORD, GROUP_OCCURRENCE, GROUP_ORGANISM,
291    GROUP_MATERIAL_ENTITY, GROUP_MATERIAL_SAMPLE, GROUP_EVENT, GROUP_LOCATION,
292          GROUP_GEOLOGICALCONTEXT, GROUP_IDENTIFICATION, GROUP_TAXON,
293          GROUP_MEASUREMENTORFACT, GROUP_RESOURCERELATIONSHIP};
294
295  public static final DwcTerm[] TAXONOMIC_TERMS = Arrays.stream(values())
296                                                        .filter(t -> !t.isClass() && t.getGroup().equals(GROUP_TAXON))
297                                                        .toArray(DwcTerm[]::new);
298
299  /**
300   * List of all higher rank terms in dwc, ordered by rank and starting with kingdom.
301   */
302  public static final DwcTerm[] HIGHER_RANKS =
303    {DwcTerm.kingdom, DwcTerm.phylum, DwcTerm.class_, DwcTerm.order,
304        DwcTerm.superfamily, DwcTerm.family, DwcTerm.subfamily,
305        DwcTerm.tribe, DwcTerm.subtribe,
306        DwcTerm.genus, DwcTerm.subgenus};
307
308  /**
309   * List of all class terms in dwc.
310   */
311  public static final DwcTerm[] CLASS_TERMS = Arrays.stream(values())
312                                                    .filter(DwcTerm::isClass)
313                                                    .toArray(DwcTerm[]::new);
314
315  private final String groupName;
316  public final String normQName;
317  public final String[] normAlts;
318
319  private DwcTerm(String groupName, String... alternatives) {
320    normQName = TermFactory.normaliseTerm(qualifiedName());
321    for (int i = 0; i < alternatives.length; i++) {
322      alternatives[i] = TermFactory.normaliseTerm(alternatives[i]);
323    }
324    normAlts = alternatives;
325    this.groupName = groupName;
326  }
327
328
329  /**
330   * The simple term name without a namespace.
331   * For example scientificName.
332   * @return simple term name
333   */
334  @Override
335  public String simpleName() {
336    if (this == class_) {
337      return "class";
338    }
339    return name();
340  }
341
342  /**
343   * Array of alternative simple names in use for the term.
344   * Often based on older dwc versions.
345   * @return simple term name
346   */
347  @Override
348  public String[] alternativeNames() {
349    return normAlts;
350  }
351
352  /**
353   * The optional group the term is grouped in.
354   * For example Taxon, Identification, etc.
355   */
356  public String getGroup() {
357    return groupName;
358  }
359
360  /**
361   * @return true if the dwc term is defining a class instead of a property, e.g. Taxon
362   */
363  @Override
364  public boolean isClass() {
365    return Character.isUpperCase(simpleName().charAt(0));
366  }
367
368  /**
369   * List all terms that belong to a given group.
370   *
371   * @param group the group to list terms for
372   *
373   * @return the list of dwc terms in the given group
374   */
375  public static List<DwcTerm> listByGroup(String group) {
376    List<DwcTerm> terms = new ArrayList<DwcTerm>();
377    for (DwcTerm t : DwcTerm.values()) {
378      if (t.getGroup().equalsIgnoreCase(group)) {
379        terms.add(t);
380      }
381    }
382    return terms;
383  }
384
385  @Override
386  public String prefix() {
387    return PREFIX;
388  }
389
390  @Override
391  public URI namespace() {
392    return NS_URI;
393  }
394
395  @Override
396  public String toString() {
397    return prefixedName();
398  }
399}