001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.api.vocabulary;
015
016import org.gbif.api.util.IdentifierUtils;
017import org.gbif.api.util.VocabularyUtils;
018
019import java.util.Arrays;
020import java.util.Collections;
021import java.util.List;
022
023import javax.annotation.Nullable;
024
025import org.apache.commons.lang3.StringUtils;
026
027/**
028 * Enumeration for all possible identifier types.
029 */
030public enum IdentifierType {
031
032  URL,
033
034  /**
035   * Reference controlled by a separate system, used for example by DOI.
036   * {http://en.wikipedia.org/wiki/Handle_(computing)}
037   */
038  LSID,
039
040  HANDLER,
041
042  DOI,
043
044  UUID,
045
046  FTP,
047
048  URI,
049
050  UNKNOWN,
051
052  /**
053   * Indicates the identifier originated from an auto_increment column in the portal.data_provider or
054   * portal.data_resource table respectively.
055   */
056  GBIF_PORTAL,
057
058  /**
059   * Identifies the node (e.g: 'DK' for Denmark, 'sp2000' for Species 2000).
060   */
061  GBIF_NODE,
062
063  /**
064   * Participant identifier from the GBIF Directory.
065   */
066  GBIF_PARTICIPANT,
067
068  /**
069   * ID migrated from GrSciColl.
070   */
071  GRSCICOLL_ID,
072
073  /**
074   * Cool URI migrated from GrSciColl.
075   */
076  GRSCICOLL_URI,
077
078  /**
079   * IRN of an IH record.
080   */
081  IH_IRN,
082
083  /**
084   * Research Organization Registry.
085   * https://ror.org
086   */
087  ROR,
088
089  /**
090   * https://www.grid.ac/institutes/
091   */
092  GRID,
093
094  /**
095   * https://cites.org/
096   */
097  CITES,
098  /**
099   * Symbiota IDs to help linking GrSciColl occurrences.
100   */
101  SYMBIOTA_UUID,
102  WIKIDATA,
103
104  /**
105   * https://www.ncbi.nlm.nih.gov/
106   */
107  NCBI_BIOCOLLECTION;
108
109  // TODO: Check if this is used, it didn't exist in the new Registry2 API, but I preserved it from the old vocabulary
110  public static final List<IdentifierType> TYPES;
111
112  static {
113    TYPES = Collections.unmodifiableList(Arrays.asList(IdentifierType.values()));
114  }
115
116  /**
117   * @return the matching IdentifierType or null
118   */
119  public static IdentifierType fromString(String identifierType) {
120    return VocabularyUtils.lookupEnum(identifierType, IdentifierType.class);
121  }
122
123  /**
124   * Tries to infer the identifier type from a given identifier.
125   * Most identifiers have a URI protocol prefix or a specific structure that
126   * allows the guess.
127   *
128   * @return the inferred identifier type or Unknown if identifier is null or cant be inferred.
129   */
130  public static IdentifierType inferFrom(@Nullable String identifier) {
131    String lcIdentifier = StringUtils.trimToEmpty(identifier).toLowerCase();
132
133    if (lcIdentifier.isEmpty()) {
134      return UNKNOWN;
135    }
136
137    if (lcIdentifier.startsWith(org.gbif.api.model.common.DOI.GBIF_PREFIX)
138        || lcIdentifier.startsWith(org.gbif.api.model.common.DOI.TEST_PREFIX)) {
139      return DOI;
140    }
141    if (lcIdentifier.startsWith("10.")
142        || lcIdentifier.startsWith("doi:10.")
143        || lcIdentifier.startsWith("urn:doi:10.")
144        || lcIdentifier.startsWith("http://dx.doi.org/10.")
145        || lcIdentifier.startsWith("https://dx.doi.org/10.")
146        || lcIdentifier.startsWith("http://doi.org/10.")
147        || lcIdentifier.startsWith("https://doi.org/10.")) {
148      return DOI;
149    }
150
151    if (lcIdentifier.startsWith("https://ror.org")) {
152      return ROR;
153    }
154
155    if(IdentifierUtils.isValidWikidataIdentifier(lcIdentifier)) {
156      return WIKIDATA;
157    }
158
159    if (lcIdentifier.startsWith("http:")
160        || lcIdentifier.startsWith("https:")
161        || lcIdentifier.startsWith("www.")) {
162      return URL;
163    }
164    if (lcIdentifier.startsWith("ftp:")) {
165      return FTP;
166    }
167    if (lcIdentifier.startsWith("urn:lsid:") || lcIdentifier.startsWith("lsid:")) {
168      return LSID;
169    }
170
171    if (lcIdentifier.startsWith("urn:uuid:") || lcIdentifier.startsWith("uuid:")) {
172      return UUID;
173    }
174
175    if (lcIdentifier.startsWith("gbif:ih:irn:")) {
176      return IH_IRN;
177    }
178
179    if (lcIdentifier.startsWith("grid")) {
180      return GRID;
181    }
182
183    try {
184      //noinspection ResultOfMethodCallIgnored
185      java.util.UUID.fromString(lcIdentifier);
186      return UUID;
187    } catch (IllegalArgumentException ignored) {
188      // We're just trying to convert a String to anything readable. Apparently the UUID approach failed.
189    }
190
191    return UNKNOWN;
192  }
193}