001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.io.Serializable;
019import java.net.URI;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.List;
023
024/**
025 * All Darwin Core terms with namespace http://rs.tdwg.org/dwc/terms/ as an
026 * enumeration with alternative term names found sometimes in data.
027 * Old, deprecated terms are kept but marked as such.
028 */
029public enum DwcTerm implements Term, AlternativeNames, Serializable {
030
031  /**
032   * CLASS TERMS
033   * Listed in the order given on the Darwin Core Quick Reference Guide.
034   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
035   * Location is not on this list because it is a term in the dcterm namespace.
036   */
037  Occurrence(DwcTerm.GROUP_OCCURRENCE, "DarwinCore", "SimpleDarwinCore"),
038  Organism(DwcTerm.GROUP_ORGANISM),
039  MaterialEntity(DwcTerm.GROUP_MATERIAL_ENTITY),
040  MaterialSample(DwcTerm.GROUP_MATERIAL_SAMPLE),
041  Event(DwcTerm.GROUP_EVENT),
042  GeologicalContext(DwcTerm.GROUP_GEOLOGICALCONTEXT),
043  Identification(DwcTerm.GROUP_IDENTIFICATION),
044  Taxon(DwcTerm.GROUP_TAXON),
045  MeasurementOrFact(DwcTerm.GROUP_MEASUREMENTORFACT),
046  ResourceRelationship(DwcTerm.GROUP_RESOURCERELATIONSHIP),
047
048  /**
049   * PROPERTY TERMS
050   * Listed in the order given on the Darwin Core Quick Reference Guide.
051   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
052   */
053  institutionID(DwcTerm.GROUP_RECORD),
054  collectionID(DwcTerm.GROUP_RECORD),
055  datasetID(DwcTerm.GROUP_RECORD),
056  institutionCode(DwcTerm.GROUP_RECORD),
057  collectionCode(DwcTerm.GROUP_RECORD),
058  datasetName(DwcTerm.GROUP_RECORD),
059  ownerInstitutionCode(DwcTerm.GROUP_RECORD),
060  basisOfRecord(DwcTerm.GROUP_RECORD),
061  informationWithheld(DwcTerm.GROUP_RECORD),
062  dataGeneralizations(DwcTerm.GROUP_RECORD),
063  dynamicProperties(DwcTerm.GROUP_RECORD),
064
065  occurrenceID(DwcTerm.GROUP_OCCURRENCE),
066  catalogNumber(DwcTerm.GROUP_OCCURRENCE, "catalogNumberNumeric"),
067  recordNumber(DwcTerm.GROUP_OCCURRENCE, "collectorNumber"),
068  recordedBy(DwcTerm.GROUP_OCCURRENCE, "collector"),
069  recordedByID(DwcTerm.GROUP_OCCURRENCE, "gbif:recordedByID", "http://rs.gbif.org/terms/1.0/recordedByID"),
070  individualCount(DwcTerm.GROUP_OCCURRENCE),
071  organismQuantity(DwcTerm.GROUP_OCCURRENCE),
072  organismQuantityType(DwcTerm.GROUP_OCCURRENCE),
073  @Vocabulary sex(DwcTerm.GROUP_OCCURRENCE),
074  @Vocabulary lifeStage(DwcTerm.GROUP_OCCURRENCE),
075  reproductiveCondition(DwcTerm.GROUP_OCCURRENCE),
076  caste(DwcTerm.GROUP_OCCURRENCE),
077  behavior(DwcTerm.GROUP_OCCURRENCE),
078  vitality(DwcTerm.GROUP_OCCURRENCE),
079  @Vocabulary establishmentMeans(DwcTerm.GROUP_OCCURRENCE),
080  @Vocabulary degreeOfEstablishment(DwcTerm.GROUP_OCCURRENCE),
081  @Vocabulary pathway(DwcTerm.GROUP_OCCURRENCE),
082  georeferenceVerificationStatus(DwcTerm.GROUP_OCCURRENCE),
083  occurrenceStatus(DwcTerm.GROUP_OCCURRENCE),
084  preparations(DwcTerm.GROUP_MATERIAL_ENTITY),
085  // Incorrect namespace was used in the GGBN Loan extension: https://rs.gbif.org/extension/ggbn/loan.xml#disposition
086  // https://github.com/gbif/rs.gbif.org/issues/132
087  disposition(DwcTerm.GROUP_MATERIAL_ENTITY, "http://purl.org/dc/terms/disposition"),
088  associatedMedia(DwcTerm.GROUP_OCCURRENCE),
089  associatedOccurrences(DwcTerm.GROUP_OCCURRENCE),
090  associatedReferences(DwcTerm.GROUP_OCCURRENCE),
091  associatedSequences(DwcTerm.GROUP_MATERIAL_ENTITY),
092  associatedTaxa(DwcTerm.GROUP_OCCURRENCE),
093  otherCatalogNumbers(DwcTerm.GROUP_OCCURRENCE),
094  occurrenceRemarks(DwcTerm.GROUP_OCCURRENCE),
095
096  organismID(DwcTerm.GROUP_ORGANISM, "individualID"),
097  organismName(DwcTerm.GROUP_ORGANISM),
098  organismScope(DwcTerm.GROUP_ORGANISM),
099  associatedOrganisms(DwcTerm.GROUP_ORGANISM),
100  previousIdentifications(DwcTerm.GROUP_ORGANISM),
101  organismRemarks(DwcTerm.GROUP_ORGANISM),
102
103  materialEntityID(DwcTerm.GROUP_MATERIAL_ENTITY),
104  materialEntityRemarks(DwcTerm.GROUP_MATERIAL_ENTITY),
105  verbatimLabel(DwcTerm.GROUP_MATERIAL_ENTITY),
106  materialSampleID(DwcTerm.GROUP_MATERIAL_SAMPLE),
107
108  eventID(DwcTerm.GROUP_EVENT),
109  parentEventID(DwcTerm.GROUP_EVENT),
110  @Vocabulary eventType(DwcTerm.GROUP_EVENT),
111  fieldNumber(DwcTerm.GROUP_EVENT),
112  eventDate(DwcTerm.GROUP_EVENT, "earliestDateCollected", "latestDateCollected"),
113  eventTime(DwcTerm.GROUP_EVENT),
114  startDayOfYear(DwcTerm.GROUP_EVENT),
115  endDayOfYear(DwcTerm.GROUP_EVENT),
116  year(DwcTerm.GROUP_EVENT),
117  month(DwcTerm.GROUP_EVENT),
118  day(DwcTerm.GROUP_EVENT),
119  verbatimEventDate(DwcTerm.GROUP_EVENT),
120  habitat(DwcTerm.GROUP_EVENT),
121  samplingProtocol(DwcTerm.GROUP_EVENT),
122  sampleSizeValue(DwcTerm.GROUP_EVENT),
123  sampleSizeUnit(DwcTerm.GROUP_EVENT),
124  samplingEffort(DwcTerm.GROUP_EVENT),
125  fieldNotes(DwcTerm.GROUP_EVENT),
126  eventRemarks(DwcTerm.GROUP_EVENT),
127  projectTitle(DwcTerm.GROUP_EVENT),
128  projectID(DwcTerm.GROUP_EVENT),
129  fundingAttribution(DwcTerm.GROUP_EVENT),
130  fundingAttributionID(DwcTerm.GROUP_EVENT),
131
132  locationID(DwcTerm.GROUP_LOCATION),
133  higherGeographyID(DwcTerm.GROUP_LOCATION),
134  higherGeography(DwcTerm.GROUP_LOCATION),
135  continent(DwcTerm.GROUP_LOCATION),
136  waterBody(DwcTerm.GROUP_LOCATION),
137  islandGroup(DwcTerm.GROUP_LOCATION),
138  island(DwcTerm.GROUP_LOCATION),
139  country(DwcTerm.GROUP_LOCATION),
140  countryCode(DwcTerm.GROUP_LOCATION),
141  stateProvince(DwcTerm.GROUP_LOCATION, "state", "province"),
142  county(DwcTerm.GROUP_LOCATION),
143  municipality(DwcTerm.GROUP_LOCATION, "city"),
144  locality(DwcTerm.GROUP_LOCATION),
145  verbatimLocality(DwcTerm.GROUP_LOCATION),
146  minimumElevationInMeters(DwcTerm.GROUP_LOCATION),
147  maximumElevationInMeters(DwcTerm.GROUP_LOCATION),
148  verbatimElevation(DwcTerm.GROUP_LOCATION),
149  verticalDatum(DwcTerm.GROUP_LOCATION),
150  minimumDepthInMeters(DwcTerm.GROUP_LOCATION),
151  maximumDepthInMeters(DwcTerm.GROUP_LOCATION),
152  verbatimDepth(DwcTerm.GROUP_LOCATION),
153  minimumDistanceAboveSurfaceInMeters(DwcTerm.GROUP_LOCATION),
154  maximumDistanceAboveSurfaceInMeters(DwcTerm.GROUP_LOCATION),
155  locationAccordingTo(DwcTerm.GROUP_LOCATION),
156  locationRemarks(DwcTerm.GROUP_LOCATION),
157  decimalLatitude(DwcTerm.GROUP_LOCATION, "latitude"),
158  decimalLongitude(DwcTerm.GROUP_LOCATION, "longitude"),
159  geodeticDatum(DwcTerm.GROUP_LOCATION, "datum", "horizontaldatum"),
160  coordinateUncertaintyInMeters(DwcTerm.GROUP_LOCATION),
161  coordinatePrecision(DwcTerm.GROUP_LOCATION),
162  pointRadiusSpatialFit(DwcTerm.GROUP_LOCATION),
163  verbatimCoordinates(DwcTerm.GROUP_LOCATION),
164  verbatimLatitude(DwcTerm.GROUP_LOCATION),
165  verbatimLongitude(DwcTerm.GROUP_LOCATION),
166  verbatimCoordinateSystem(DwcTerm.GROUP_LOCATION),
167  verbatimSRS(DwcTerm.GROUP_LOCATION),
168  footprintWKT(DwcTerm.GROUP_LOCATION),
169  footprintSRS(DwcTerm.GROUP_LOCATION),
170  footprintSpatialFit(DwcTerm.GROUP_LOCATION),
171  georeferencedBy(DwcTerm.GROUP_LOCATION),
172  georeferencedDate(DwcTerm.GROUP_LOCATION),
173  georeferenceProtocol(DwcTerm.GROUP_LOCATION),
174  georeferenceSources(DwcTerm.GROUP_LOCATION),
175  georeferenceRemarks(DwcTerm.GROUP_LOCATION),
176
177  geologicalContextID(DwcTerm.GROUP_GEOLOGICALCONTEXT),
178  @Vocabulary earliestEonOrLowestEonothem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
179  @Vocabulary latestEonOrHighestEonothem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
180  @Vocabulary earliestEraOrLowestErathem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
181  @Vocabulary latestEraOrHighestErathem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
182  @Vocabulary earliestPeriodOrLowestSystem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
183  @Vocabulary latestPeriodOrHighestSystem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
184  @Vocabulary earliestEpochOrLowestSeries(DwcTerm.GROUP_GEOLOGICALCONTEXT),
185  @Vocabulary latestEpochOrHighestSeries(DwcTerm.GROUP_GEOLOGICALCONTEXT),
186  @Vocabulary earliestAgeOrLowestStage(DwcTerm.GROUP_GEOLOGICALCONTEXT),
187  @Vocabulary latestAgeOrHighestStage(DwcTerm.GROUP_GEOLOGICALCONTEXT),
188  lowestBiostratigraphicZone(DwcTerm.GROUP_GEOLOGICALCONTEXT),
189  highestBiostratigraphicZone(DwcTerm.GROUP_GEOLOGICALCONTEXT),
190  lithostratigraphicTerms(DwcTerm.GROUP_GEOLOGICALCONTEXT),
191  group(DwcTerm.GROUP_GEOLOGICALCONTEXT),
192  formation(DwcTerm.GROUP_GEOLOGICALCONTEXT),
193  member(DwcTerm.GROUP_GEOLOGICALCONTEXT),
194  bed(DwcTerm.GROUP_GEOLOGICALCONTEXT),
195
196  identificationID(DwcTerm.GROUP_IDENTIFICATION),
197  verbatimIdentification(DwcTerm.GROUP_IDENTIFICATION),
198  identificationQualifier(DwcTerm.GROUP_IDENTIFICATION),
199  @Vocabulary typeStatus(DwcTerm.GROUP_IDENTIFICATION),
200  identifiedBy(DwcTerm.GROUP_IDENTIFICATION),
201  identifiedByID(DwcTerm.GROUP_IDENTIFICATION, "gbif:identifiedByID", "http://rs.gbif.org/terms/1.0/identifiedByID"),
202  dateIdentified(DwcTerm.GROUP_IDENTIFICATION),
203  identificationReferences(DwcTerm.GROUP_IDENTIFICATION),
204  identificationVerificationStatus(DwcTerm.GROUP_IDENTIFICATION),
205  identificationRemarks(DwcTerm.GROUP_IDENTIFICATION),
206
207  taxonID(DwcTerm.GROUP_TAXON, "nameUsageID"),
208  scientificNameID(DwcTerm.GROUP_TAXON, "nameID"),
209  acceptedNameUsageID(DwcTerm.GROUP_TAXON, "acceptedTaxonID"),
210  parentNameUsageID(DwcTerm.GROUP_TAXON, "higherNameUsageID", "parentTaxonID"),
211  originalNameUsageID(DwcTerm.GROUP_TAXON, "originalNameID", "basionymID"),
212  nameAccordingToID(DwcTerm.GROUP_TAXON, "taxonAccordingToID"),
213  namePublishedInID(DwcTerm.GROUP_TAXON),
214  taxonConceptID(DwcTerm.GROUP_TAXON),
215  scientificName(DwcTerm.GROUP_TAXON),
216  acceptedNameUsage(DwcTerm.GROUP_TAXON, "acceptedTaxon"),
217  parentNameUsage(DwcTerm.GROUP_TAXON, "parentTaxon", "higherTaxon", "higherNameUsage"),
218  originalNameUsage(DwcTerm.GROUP_TAXON, "originalName", "originalTaxon", "basionym"),
219  nameAccordingTo(DwcTerm.GROUP_TAXON, "taxonAccordingTo"),
220  namePublishedIn(DwcTerm.GROUP_TAXON),
221  namePublishedInYear(DwcTerm.GROUP_TAXON),
222  higherClassification(DwcTerm.GROUP_TAXON),
223  kingdom(DwcTerm.GROUP_TAXON),
224  phylum(DwcTerm.GROUP_TAXON),
225  /**
226   * The taxonomic class.
227   * The real Darwin Core term is class, but as java does not allow this name we use a variation instead.
228   */
229  class_(DwcTerm.GROUP_TAXON, "class"),
230  order(DwcTerm.GROUP_TAXON),
231  superfamily(DwcTerm.GROUP_TAXON),
232  family(DwcTerm.GROUP_TAXON),
233  subfamily(DwcTerm.GROUP_TAXON),
234  tribe(DwcTerm.GROUP_TAXON),
235  subtribe(DwcTerm.GROUP_TAXON),
236  genus(DwcTerm.GROUP_TAXON),
237  genericName(DwcTerm.GROUP_TAXON, "gbif:genericName", "http://rs.gbif.org/terms/1.0/genericName"),
238  subgenus(DwcTerm.GROUP_TAXON),
239  infragenericEpithet(DwcTerm.GROUP_TAXON),
240  specificEpithet(DwcTerm.GROUP_TAXON),
241  infraspecificEpithet(DwcTerm.GROUP_TAXON),
242  cultivarEpithet(DwcTerm.GROUP_TAXON),
243  taxonRank(DwcTerm.GROUP_TAXON, "rank"),
244  verbatimTaxonRank(DwcTerm.GROUP_TAXON),
245  scientificNameAuthorship(DwcTerm.GROUP_TAXON),
246  vernacularName(DwcTerm.GROUP_TAXON),
247  nomenclaturalCode(DwcTerm.GROUP_TAXON),
248  taxonomicStatus(DwcTerm.GROUP_TAXON),
249  nomenclaturalStatus(DwcTerm.GROUP_TAXON),
250  taxonRemarks(DwcTerm.GROUP_TAXON, "taxonRemark"),
251
252  measurementID(DwcTerm.GROUP_MEASUREMENTORFACT),
253  parentMeasurementID(DwcTerm.GROUP_MEASUREMENTORFACT),
254  measurementType(DwcTerm.GROUP_MEASUREMENTORFACT),
255  measurementValue(DwcTerm.GROUP_MEASUREMENTORFACT),
256  measurementAccuracy(DwcTerm.GROUP_MEASUREMENTORFACT),
257  measurementUnit(DwcTerm.GROUP_MEASUREMENTORFACT),
258  measurementDeterminedBy(DwcTerm.GROUP_MEASUREMENTORFACT),
259  measurementDeterminedDate(DwcTerm.GROUP_MEASUREMENTORFACT),
260  measurementMethod(DwcTerm.GROUP_MEASUREMENTORFACT),
261  measurementRemarks(DwcTerm.GROUP_MEASUREMENTORFACT),
262
263  resourceRelationshipID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
264  resourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
265  relationshipOfResourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
266  relatedResourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
267  relationshipOfResource(DwcTerm.GROUP_RESOURCERELATIONSHIP),
268  relationshipAccordingTo(DwcTerm.GROUP_RESOURCERELATIONSHIP),
269  relationshipEstablishedDate(DwcTerm.GROUP_RESOURCERELATIONSHIP),
270  relationshipRemarks(DwcTerm.GROUP_RESOURCERELATIONSHIP);
271
272  private static final String PREFIX = "dwc";
273  private static final String NS = "http://rs.tdwg.org/dwc/terms/";
274  private static final URI NS_URI = URI.create(NS);
275
276  public static final String GROUP_RECORD = "Record";
277  public static final String GROUP_OCCURRENCE = "Occurrence";
278  public static final String GROUP_ORGANISM = "Organism";
279  public static final String GROUP_MATERIAL_ENTITY = "MaterialEntity";
280  public static final String GROUP_MATERIAL_SAMPLE = "MaterialSample";
281  public static final String GROUP_EVENT = "Event";
282  public static final String GROUP_LOCATION = "Location";
283  public static final String GROUP_GEOLOGICALCONTEXT = "GeologicalContext";
284  public static final String GROUP_IDENTIFICATION = "Identification";
285  public static final String GROUP_TAXON = "Taxon";
286  public static final String GROUP_MEASUREMENTORFACT = "MeasurementOrFact";
287  public static final String GROUP_RESOURCERELATIONSHIP = "ResourceRelationship";
288
289  /**
290   * Lists all term groups in the order given on the Darwin Core Quick Reference Guide.
291   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
292   */
293  public static final String[] GROUPS =
294    {GROUP_RECORD, GROUP_OCCURRENCE, GROUP_ORGANISM,
295    GROUP_MATERIAL_ENTITY, GROUP_MATERIAL_SAMPLE, GROUP_EVENT, GROUP_LOCATION,
296          GROUP_GEOLOGICALCONTEXT, GROUP_IDENTIFICATION, GROUP_TAXON,
297          GROUP_MEASUREMENTORFACT, GROUP_RESOURCERELATIONSHIP};
298
299  public static final DwcTerm[] TAXONOMIC_TERMS = Arrays.stream(values())
300                                                        .filter(t -> !t.isClass() && t.getGroup().equals(GROUP_TAXON))
301                                                        .toArray(DwcTerm[]::new);
302
303  /**
304   * List of all higher rank terms in dwc, ordered by rank and starting with kingdom.
305   */
306  public static final DwcTerm[] HIGHER_RANKS =
307    {DwcTerm.kingdom, DwcTerm.phylum, DwcTerm.class_, DwcTerm.order,
308        DwcTerm.superfamily, DwcTerm.family, DwcTerm.subfamily,
309        DwcTerm.tribe, DwcTerm.subtribe,
310        DwcTerm.genus, DwcTerm.subgenus};
311
312  /**
313   * List of all class terms in dwc.
314   */
315  public static final DwcTerm[] CLASS_TERMS = Arrays.stream(values())
316                                                    .filter(DwcTerm::isClass)
317                                                    .toArray(DwcTerm[]::new);
318
319  private final String groupName;
320  public final String normQName;
321  public final String[] normAlts;
322
323  private DwcTerm(String groupName, String... alternatives) {
324    normQName = TermFactory.normaliseTerm(qualifiedName());
325    for (int i = 0; i < alternatives.length; i++) {
326      alternatives[i] = TermFactory.normaliseTerm(alternatives[i]);
327    }
328    normAlts = alternatives;
329    this.groupName = groupName;
330  }
331
332
333  /**
334   * The simple term name without a namespace.
335   * For example scientificName.
336   * @return simple term name
337   */
338  @Override
339  public String simpleName() {
340    if (this == class_) {
341      return "class";
342    }
343    return name();
344  }
345
346  /**
347   * Array of alternative simple names in use for the term.
348   * Often based on older dwc versions.
349   * @return simple term name
350   */
351  @Override
352  public String[] alternativeNames() {
353    return normAlts;
354  }
355
356  /**
357   * The optional group the term is grouped in.
358   * For example Taxon, Identification, etc.
359   */
360  public String getGroup() {
361    return groupName;
362  }
363
364  /**
365   * @return true if the dwc term is defining a class instead of a property, e.g. Taxon
366   */
367  @Override
368  public boolean isClass() {
369    return Character.isUpperCase(simpleName().charAt(0));
370  }
371
372  /**
373   * List all terms that belong to a given group.
374   *
375   * @param group the group to list terms for
376   *
377   * @return the list of dwc terms in the given group
378   */
379  public static List<DwcTerm> listByGroup(String group) {
380    List<DwcTerm> terms = new ArrayList<DwcTerm>();
381    for (DwcTerm t : DwcTerm.values()) {
382      if (t.getGroup().equalsIgnoreCase(group)) {
383        terms.add(t);
384      }
385    }
386    return terms;
387  }
388
389  @Override
390  public String prefix() {
391    return PREFIX;
392  }
393
394  @Override
395  public URI namespace() {
396    return NS_URI;
397  }
398
399  @Override
400  public String toString() {
401    return prefixedName();
402  }
403}