001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.io.Serializable;
019import java.net.URI;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.List;
023
024/**
025 * All Darwin Core terms with namespace http://rs.tdwg.org/dwc/terms/ as an
026 * enumeration with alternative term names found sometimes in data.
027 * Old, deprecated terms are kept but marked as such.
028 */
029public enum DwcTerm implements Term, AlternativeNames, Serializable {
030
031  /**
032   * CLASS TERMS
033   * Listed in the order given on the Darwin Core Quick Reference Guide.
034   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
035   * Location is not on this list because it is a term in the dcterm namespace.
036   */
037  Occurrence(DwcTerm.GROUP_OCCURRENCE, "DarwinCore", "SimpleDarwinCore"),
038  Organism(DwcTerm.GROUP_ORGANISM),
039  MaterialEntity(DwcTerm.GROUP_MATERIAL_ENTITY),
040  MaterialSample(DwcTerm.GROUP_MATERIAL_SAMPLE),
041  Event(DwcTerm.GROUP_EVENT),
042  GeologicalContext(DwcTerm.GROUP_GEOLOGICALCONTEXT),
043  Identification(DwcTerm.GROUP_IDENTIFICATION),
044  Taxon(DwcTerm.GROUP_TAXON),
045  MeasurementOrFact(DwcTerm.GROUP_MEASUREMENTORFACT),
046  ResourceRelationship(DwcTerm.GROUP_RESOURCERELATIONSHIP),
047
048  /**
049   * PROPERTY TERMS
050   * Listed in the order given on the Darwin Core Quick Reference Guide.
051   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
052   */
053  institutionID(DwcTerm.GROUP_RECORD),
054  collectionID(DwcTerm.GROUP_RECORD),
055  datasetID(DwcTerm.GROUP_RECORD),
056  institutionCode(DwcTerm.GROUP_RECORD),
057  collectionCode(DwcTerm.GROUP_RECORD),
058  datasetName(DwcTerm.GROUP_RECORD),
059  ownerInstitutionCode(DwcTerm.GROUP_RECORD),
060  basisOfRecord(DwcTerm.GROUP_RECORD),
061  informationWithheld(DwcTerm.GROUP_RECORD),
062  dataGeneralizations(DwcTerm.GROUP_RECORD),
063  dynamicProperties(DwcTerm.GROUP_RECORD),
064
065  occurrenceID(DwcTerm.GROUP_OCCURRENCE),
066  catalogNumber(DwcTerm.GROUP_OCCURRENCE, "catalogNumberNumeric"),
067  recordNumber(DwcTerm.GROUP_OCCURRENCE, "collectorNumber"),
068  recordedBy(DwcTerm.GROUP_OCCURRENCE, "collector"),
069  recordedByID(DwcTerm.GROUP_OCCURRENCE, "gbif:recordedByID", "http://rs.gbif.org/terms/1.0/recordedByID"),
070  individualCount(DwcTerm.GROUP_OCCURRENCE),
071  organismQuantity(DwcTerm.GROUP_OCCURRENCE),
072  organismQuantityType(DwcTerm.GROUP_OCCURRENCE),
073  sex(DwcTerm.GROUP_OCCURRENCE),
074  @Vocabulary lifeStage(DwcTerm.GROUP_OCCURRENCE),
075  reproductiveCondition(DwcTerm.GROUP_OCCURRENCE),
076  caste(DwcTerm.GROUP_OCCURRENCE),
077  behavior(DwcTerm.GROUP_OCCURRENCE),
078  vitality(DwcTerm.GROUP_OCCURRENCE),
079  @Vocabulary establishmentMeans(DwcTerm.GROUP_OCCURRENCE),
080  @Vocabulary degreeOfEstablishment(DwcTerm.GROUP_OCCURRENCE),
081  @Vocabulary pathway(DwcTerm.GROUP_OCCURRENCE),
082  georeferenceVerificationStatus(DwcTerm.GROUP_OCCURRENCE),
083  occurrenceStatus(DwcTerm.GROUP_OCCURRENCE),
084  preparations(DwcTerm.GROUP_MATERIAL_ENTITY),
085  disposition(DwcTerm.GROUP_MATERIAL_ENTITY),
086  associatedMedia(DwcTerm.GROUP_OCCURRENCE),
087  associatedOccurrences(DwcTerm.GROUP_OCCURRENCE),
088  associatedReferences(DwcTerm.GROUP_OCCURRENCE),
089  associatedSequences(DwcTerm.GROUP_MATERIAL_ENTITY),
090  associatedTaxa(DwcTerm.GROUP_OCCURRENCE),
091  otherCatalogNumbers(DwcTerm.GROUP_OCCURRENCE),
092  occurrenceRemarks(DwcTerm.GROUP_OCCURRENCE),
093
094  organismID(DwcTerm.GROUP_ORGANISM, "individualID"),
095  organismName(DwcTerm.GROUP_ORGANISM),
096  organismScope(DwcTerm.GROUP_ORGANISM),
097  associatedOrganisms(DwcTerm.GROUP_ORGANISM),
098  previousIdentifications(DwcTerm.GROUP_ORGANISM),
099  organismRemarks(DwcTerm.GROUP_ORGANISM),
100
101  materialEntityID(DwcTerm.GROUP_MATERIAL_ENTITY),
102  materialEntityRemarks(DwcTerm.GROUP_MATERIAL_ENTITY),
103  verbatimLabel(DwcTerm.GROUP_MATERIAL_ENTITY),
104  materialSampleID(DwcTerm.GROUP_MATERIAL_SAMPLE),
105
106  eventID(DwcTerm.GROUP_EVENT),
107  parentEventID(DwcTerm.GROUP_EVENT),
108  @Vocabulary eventType(DwcTerm.GROUP_EVENT),
109  fieldNumber(DwcTerm.GROUP_EVENT),
110  eventDate(DwcTerm.GROUP_EVENT, "earliestDateCollected", "latestDateCollected"),
111  eventTime(DwcTerm.GROUP_EVENT),
112  startDayOfYear(DwcTerm.GROUP_EVENT),
113  endDayOfYear(DwcTerm.GROUP_EVENT),
114  year(DwcTerm.GROUP_EVENT),
115  month(DwcTerm.GROUP_EVENT),
116  day(DwcTerm.GROUP_EVENT),
117  verbatimEventDate(DwcTerm.GROUP_EVENT),
118  habitat(DwcTerm.GROUP_EVENT),
119  samplingProtocol(DwcTerm.GROUP_EVENT),
120  sampleSizeValue(DwcTerm.GROUP_EVENT),
121  sampleSizeUnit(DwcTerm.GROUP_EVENT),
122  samplingEffort(DwcTerm.GROUP_EVENT),
123  fieldNotes(DwcTerm.GROUP_EVENT),
124  eventRemarks(DwcTerm.GROUP_EVENT),
125
126  locationID(DwcTerm.GROUP_LOCATION),
127  higherGeographyID(DwcTerm.GROUP_LOCATION),
128  higherGeography(DwcTerm.GROUP_LOCATION),
129  continent(DwcTerm.GROUP_LOCATION),
130  waterBody(DwcTerm.GROUP_LOCATION),
131  islandGroup(DwcTerm.GROUP_LOCATION),
132  island(DwcTerm.GROUP_LOCATION),
133  country(DwcTerm.GROUP_LOCATION),
134  countryCode(DwcTerm.GROUP_LOCATION),
135  stateProvince(DwcTerm.GROUP_LOCATION, "state", "province"),
136  county(DwcTerm.GROUP_LOCATION),
137  municipality(DwcTerm.GROUP_LOCATION, "city"),
138  locality(DwcTerm.GROUP_LOCATION),
139  verbatimLocality(DwcTerm.GROUP_LOCATION),
140  minimumElevationInMeters(DwcTerm.GROUP_LOCATION),
141  maximumElevationInMeters(DwcTerm.GROUP_LOCATION),
142  verbatimElevation(DwcTerm.GROUP_LOCATION),
143  verticalDatum(DwcTerm.GROUP_LOCATION),
144  minimumDepthInMeters(DwcTerm.GROUP_LOCATION),
145  maximumDepthInMeters(DwcTerm.GROUP_LOCATION),
146  verbatimDepth(DwcTerm.GROUP_LOCATION),
147  minimumDistanceAboveSurfaceInMeters(DwcTerm.GROUP_LOCATION),
148  maximumDistanceAboveSurfaceInMeters(DwcTerm.GROUP_LOCATION),
149  locationAccordingTo(DwcTerm.GROUP_LOCATION),
150  locationRemarks(DwcTerm.GROUP_LOCATION),
151  decimalLatitude(DwcTerm.GROUP_LOCATION, "latitude"),
152  decimalLongitude(DwcTerm.GROUP_LOCATION, "longitude"),
153  geodeticDatum(DwcTerm.GROUP_LOCATION, "datum", "horizontaldatum"),
154  coordinateUncertaintyInMeters(DwcTerm.GROUP_LOCATION),
155  coordinatePrecision(DwcTerm.GROUP_LOCATION),
156  pointRadiusSpatialFit(DwcTerm.GROUP_LOCATION),
157  verbatimCoordinates(DwcTerm.GROUP_LOCATION),
158  verbatimLatitude(DwcTerm.GROUP_LOCATION),
159  verbatimLongitude(DwcTerm.GROUP_LOCATION),
160  verbatimCoordinateSystem(DwcTerm.GROUP_LOCATION),
161  verbatimSRS(DwcTerm.GROUP_LOCATION),
162  footprintWKT(DwcTerm.GROUP_LOCATION),
163  footprintSRS(DwcTerm.GROUP_LOCATION),
164  footprintSpatialFit(DwcTerm.GROUP_LOCATION),
165  georeferencedBy(DwcTerm.GROUP_LOCATION),
166  georeferencedDate(DwcTerm.GROUP_LOCATION),
167  georeferenceProtocol(DwcTerm.GROUP_LOCATION),
168  georeferenceSources(DwcTerm.GROUP_LOCATION),
169  georeferenceRemarks(DwcTerm.GROUP_LOCATION),
170
171  geologicalContextID(DwcTerm.GROUP_GEOLOGICALCONTEXT),
172  @Vocabulary earliestEonOrLowestEonothem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
173  @Vocabulary latestEonOrHighestEonothem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
174  @Vocabulary earliestEraOrLowestErathem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
175  @Vocabulary latestEraOrHighestErathem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
176  @Vocabulary earliestPeriodOrLowestSystem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
177  @Vocabulary latestPeriodOrHighestSystem(DwcTerm.GROUP_GEOLOGICALCONTEXT),
178  @Vocabulary earliestEpochOrLowestSeries(DwcTerm.GROUP_GEOLOGICALCONTEXT),
179  @Vocabulary latestEpochOrHighestSeries(DwcTerm.GROUP_GEOLOGICALCONTEXT),
180  @Vocabulary earliestAgeOrLowestStage(DwcTerm.GROUP_GEOLOGICALCONTEXT),
181  @Vocabulary latestAgeOrHighestStage(DwcTerm.GROUP_GEOLOGICALCONTEXT),
182  lowestBiostratigraphicZone(DwcTerm.GROUP_GEOLOGICALCONTEXT),
183  highestBiostratigraphicZone(DwcTerm.GROUP_GEOLOGICALCONTEXT),
184  lithostratigraphicTerms(DwcTerm.GROUP_GEOLOGICALCONTEXT),
185  group(DwcTerm.GROUP_GEOLOGICALCONTEXT),
186  formation(DwcTerm.GROUP_GEOLOGICALCONTEXT),
187  member(DwcTerm.GROUP_GEOLOGICALCONTEXT),
188  bed(DwcTerm.GROUP_GEOLOGICALCONTEXT),
189
190  identificationID(DwcTerm.GROUP_IDENTIFICATION),
191  verbatimIdentification(DwcTerm.GROUP_IDENTIFICATION),
192  identificationQualifier(DwcTerm.GROUP_IDENTIFICATION),
193  typeStatus(DwcTerm.GROUP_IDENTIFICATION),
194  identifiedBy(DwcTerm.GROUP_IDENTIFICATION),
195  identifiedByID(DwcTerm.GROUP_IDENTIFICATION, "gbif:identifiedByID", "http://rs.gbif.org/terms/1.0/identifiedByID"),
196  dateIdentified(DwcTerm.GROUP_IDENTIFICATION),
197  identificationReferences(DwcTerm.GROUP_IDENTIFICATION),
198  identificationVerificationStatus(DwcTerm.GROUP_IDENTIFICATION),
199  identificationRemarks(DwcTerm.GROUP_IDENTIFICATION),
200
201  taxonID(DwcTerm.GROUP_TAXON, "nameUsageID"),
202  scientificNameID(DwcTerm.GROUP_TAXON, "nameID"),
203  acceptedNameUsageID(DwcTerm.GROUP_TAXON, "acceptedTaxonID"),
204  parentNameUsageID(DwcTerm.GROUP_TAXON, "higherNameUsageID", "parentTaxonID"),
205  originalNameUsageID(DwcTerm.GROUP_TAXON, "originalNameID", "basionymID"),
206  nameAccordingToID(DwcTerm.GROUP_TAXON, "taxonAccordingToID"),
207  namePublishedInID(DwcTerm.GROUP_TAXON),
208  taxonConceptID(DwcTerm.GROUP_TAXON),
209  scientificName(DwcTerm.GROUP_TAXON),
210  acceptedNameUsage(DwcTerm.GROUP_TAXON, "acceptedTaxon"),
211  parentNameUsage(DwcTerm.GROUP_TAXON, "parentTaxon", "higherTaxon", "higherNameUsage"),
212  originalNameUsage(DwcTerm.GROUP_TAXON, "originalName", "originalTaxon", "basionym"),
213  nameAccordingTo(DwcTerm.GROUP_TAXON, "taxonAccordingTo"),
214  namePublishedIn(DwcTerm.GROUP_TAXON),
215  namePublishedInYear(DwcTerm.GROUP_TAXON),
216  higherClassification(DwcTerm.GROUP_TAXON),
217  kingdom(DwcTerm.GROUP_TAXON),
218  phylum(DwcTerm.GROUP_TAXON),
219  /**
220   * The taxonomic class.
221   * The real Darwin Core term is class, but as java does not allow this name we use a variation instead.
222   */
223  class_(DwcTerm.GROUP_TAXON, "class"),
224  order(DwcTerm.GROUP_TAXON),
225  superfamily(DwcTerm.GROUP_TAXON),
226  family(DwcTerm.GROUP_TAXON),
227  subfamily(DwcTerm.GROUP_TAXON),
228  tribe(DwcTerm.GROUP_TAXON),
229  subtribe(DwcTerm.GROUP_TAXON),
230  genus(DwcTerm.GROUP_TAXON),
231  genericName(DwcTerm.GROUP_TAXON, "gbif:genericName", "http://rs.gbif.org/terms/1.0/genericName"),
232  subgenus(DwcTerm.GROUP_TAXON),
233  infragenericEpithet(DwcTerm.GROUP_TAXON),
234  specificEpithet(DwcTerm.GROUP_TAXON),
235  infraspecificEpithet(DwcTerm.GROUP_TAXON),
236  cultivarEpithet(DwcTerm.GROUP_TAXON),
237  taxonRank(DwcTerm.GROUP_TAXON, "rank"),
238  verbatimTaxonRank(DwcTerm.GROUP_TAXON),
239  scientificNameAuthorship(DwcTerm.GROUP_TAXON),
240  vernacularName(DwcTerm.GROUP_TAXON),
241  nomenclaturalCode(DwcTerm.GROUP_TAXON),
242  taxonomicStatus(DwcTerm.GROUP_TAXON),
243  nomenclaturalStatus(DwcTerm.GROUP_TAXON),
244  taxonRemarks(DwcTerm.GROUP_TAXON, "taxonRemark"),
245
246  measurementID(DwcTerm.GROUP_MEASUREMENTORFACT),
247  parentMeasurementID(DwcTerm.GROUP_MEASUREMENTORFACT),
248  measurementType(DwcTerm.GROUP_MEASUREMENTORFACT),
249  measurementValue(DwcTerm.GROUP_MEASUREMENTORFACT),
250  measurementAccuracy(DwcTerm.GROUP_MEASUREMENTORFACT),
251  measurementUnit(DwcTerm.GROUP_MEASUREMENTORFACT),
252  measurementDeterminedBy(DwcTerm.GROUP_MEASUREMENTORFACT),
253  measurementDeterminedDate(DwcTerm.GROUP_MEASUREMENTORFACT),
254  measurementMethod(DwcTerm.GROUP_MEASUREMENTORFACT),
255  measurementRemarks(DwcTerm.GROUP_MEASUREMENTORFACT),
256
257  resourceRelationshipID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
258  resourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
259  relationshipOfResourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
260  relatedResourceID(DwcTerm.GROUP_RESOURCERELATIONSHIP),
261  relationshipOfResource(DwcTerm.GROUP_RESOURCERELATIONSHIP),
262  relationshipAccordingTo(DwcTerm.GROUP_RESOURCERELATIONSHIP),
263  relationshipEstablishedDate(DwcTerm.GROUP_RESOURCERELATIONSHIP),
264  relationshipRemarks(DwcTerm.GROUP_RESOURCERELATIONSHIP);
265
266  private static final String PREFIX = "dwc";
267  private static final String NS = "http://rs.tdwg.org/dwc/terms/";
268  private static final URI NS_URI = URI.create(NS);
269
270  public static final String GROUP_RECORD = "Record";
271  public static final String GROUP_OCCURRENCE = "Occurrence";
272  public static final String GROUP_ORGANISM = "Organism";
273  public static final String GROUP_MATERIAL_ENTITY = "MaterialEntity";
274  public static final String GROUP_MATERIAL_SAMPLE = "MaterialSample";
275  public static final String GROUP_EVENT = "Event";
276  public static final String GROUP_LOCATION = "Location";
277  public static final String GROUP_GEOLOGICALCONTEXT = "GeologicalContext";
278  public static final String GROUP_IDENTIFICATION = "Identification";
279  public static final String GROUP_TAXON = "Taxon";
280  public static final String GROUP_MEASUREMENTORFACT = "MeasurementOrFact";
281  public static final String GROUP_RESOURCERELATIONSHIP = "ResourceRelationship";
282
283  /**
284   * Lists all term groups in the order given on the Darwin Core Quick Reference Guide.
285   * @see <a href="http://rs.tdwg.org/dwc/terms/index.htm#theterms">DwC Quick Reference Guide</a>
286   */
287  public static final String[] GROUPS =
288    {GROUP_RECORD, GROUP_OCCURRENCE, GROUP_ORGANISM,
289    GROUP_MATERIAL_ENTITY, GROUP_MATERIAL_SAMPLE, GROUP_EVENT, GROUP_LOCATION,
290          GROUP_GEOLOGICALCONTEXT, GROUP_IDENTIFICATION, GROUP_TAXON,
291          GROUP_MEASUREMENTORFACT, GROUP_RESOURCERELATIONSHIP};
292
293  public static final DwcTerm[] TAXONOMIC_TERMS = Arrays.stream(values())
294                                                        .filter(t -> !t.isClass() && t.getGroup().equals(GROUP_TAXON))
295                                                        .toArray(DwcTerm[]::new);
296
297  /**
298   * List of all higher rank terms in dwc, ordered by rank and starting with kingdom.
299   */
300  public static final DwcTerm[] HIGHER_RANKS =
301    {DwcTerm.kingdom, DwcTerm.phylum, DwcTerm.class_, DwcTerm.order,
302        DwcTerm.superfamily, DwcTerm.family, DwcTerm.subfamily,
303        DwcTerm.tribe, DwcTerm.subtribe,
304        DwcTerm.genus, DwcTerm.subgenus};
305
306  /**
307   * List of all class terms in dwc.
308   */
309  public static final DwcTerm[] CLASS_TERMS = Arrays.stream(values())
310                                                    .filter(DwcTerm::isClass)
311                                                    .toArray(DwcTerm[]::new);
312
313  private final String groupName;
314  public final String normQName;
315  public final String[] normAlts;
316
317  private DwcTerm(String groupName, String... alternatives) {
318    normQName = TermFactory.normaliseTerm(qualifiedName());
319    for (int i = 0; i < alternatives.length; i++) {
320      alternatives[i] = TermFactory.normaliseTerm(alternatives[i]);
321    }
322    normAlts = alternatives;
323    this.groupName = groupName;
324  }
325
326
327  /**
328   * The simple term name without a namespace.
329   * For example scientificName.
330   * @return simple term name
331   */
332  @Override
333  public String simpleName() {
334    if (this == class_) {
335      return "class";
336    }
337    return name();
338  }
339
340  /**
341   * Array of alternative simple names in use for the term.
342   * Often based on older dwc versions.
343   * @return simple term name
344   */
345  @Override
346  public String[] alternativeNames() {
347    return normAlts;
348  }
349
350  /**
351   * The optional group the term is grouped in.
352   * For example Taxon, Identification, etc.
353   */
354  public String getGroup() {
355    return groupName;
356  }
357
358  /**
359   * @return true if the dwc term is defining a class instead of a property, e.g. Taxon
360   */
361  @Override
362  public boolean isClass() {
363    return Character.isUpperCase(simpleName().charAt(0));
364  }
365
366  /**
367   * List all terms that belong to a given group.
368   *
369   * @param group the group to list terms for
370   *
371   * @return the list of dwc terms in the given group
372   */
373  public static List<DwcTerm> listByGroup(String group) {
374    List<DwcTerm> terms = new ArrayList<DwcTerm>();
375    for (DwcTerm t : DwcTerm.values()) {
376      if (t.getGroup().equalsIgnoreCase(group)) {
377        terms.add(t);
378      }
379    }
380    return terms;
381  }
382
383  @Override
384  public String prefix() {
385    return PREFIX;
386  }
387
388  @Override
389  public URI namespace() {
390    return NS_URI;
391  }
392
393  @Override
394  public String toString() {
395    return prefixedName();
396  }
397}