001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.vocabulary; 015 016import org.gbif.api.util.IdentifierUtils; 017import org.gbif.api.util.VocabularyUtils; 018 019import java.util.Arrays; 020import java.util.Collections; 021import java.util.List; 022 023import javax.annotation.Nullable; 024 025import org.apache.commons.lang3.StringUtils; 026 027/** 028 * Enumeration for all possible identifier types. 029 */ 030public enum IdentifierType { 031 032 URL, 033 034 /** 035 * Reference controlled by a separate system, used for example by DOI. 036 * {http://en.wikipedia.org/wiki/Handle_(computing)} 037 */ 038 LSID, 039 040 HANDLER, 041 042 DOI, 043 044 UUID, 045 046 FTP, 047 048 URI, 049 050 UNKNOWN, 051 052 /** 053 * Indicates the identifier originated from an auto_increment column in the portal.data_provider or 054 * portal.data_resource table respectively. 055 */ 056 GBIF_PORTAL, 057 058 /** 059 * Identifies the node (e.g: 'DK' for Denmark, 'sp2000' for Species 2000). 060 */ 061 GBIF_NODE, 062 063 /** 064 * Participant identifier from the GBIF Directory. 065 */ 066 GBIF_PARTICIPANT, 067 068 /** 069 * ID migrated from GrSciColl. 070 */ 071 GRSCICOLL_ID, 072 073 /** 074 * Cool URI migrated from GrSciColl. 075 */ 076 GRSCICOLL_URI, 077 078 /** 079 * IRN of an IH record. 080 */ 081 IH_IRN, 082 083 /** 084 * Research Organization Registry. 085 * https://ror.org 086 */ 087 ROR, 088 089 /** 090 * https://www.grid.ac/institutes/ 091 */ 092 GRID, 093 094 /** 095 * https://cites.org/ 096 */ 097 CITES, 098 /** 099 * Symbiota IDs to help linking GrSciColl occurrences. 100 */ 101 SYMBIOTA_UUID, 102 WIKIDATA, 103 104 /** 105 * https://www.ncbi.nlm.nih.gov/ 106 */ 107 NCBI_BIOCOLLECTION; 108 109 // TODO: Check if this is used, it didn't exist in the new Registry2 API, but I preserved it from the old vocabulary 110 public static final List<IdentifierType> TYPES; 111 112 static { 113 TYPES = Collections.unmodifiableList(Arrays.asList(IdentifierType.values())); 114 } 115 116 /** 117 * @return the matching IdentifierType or null 118 */ 119 public static IdentifierType fromString(String identifierType) { 120 return VocabularyUtils.lookupEnum(identifierType, IdentifierType.class); 121 } 122 123 /** 124 * Tries to infer the identifier type from a given identifier. 125 * Most identifiers have a URI protocol prefix or a specific structure that 126 * allows the guess. 127 * 128 * @return the inferred identifier type or Unknown if identifier is null or cant be inferred. 129 */ 130 public static IdentifierType inferFrom(@Nullable String identifier) { 131 String lcIdentifier = StringUtils.trimToEmpty(identifier).toLowerCase(); 132 133 if (lcIdentifier.isEmpty()) { 134 return UNKNOWN; 135 } 136 137 if (lcIdentifier.startsWith(org.gbif.api.model.common.DOI.GBIF_PREFIX) 138 || lcIdentifier.startsWith(org.gbif.api.model.common.DOI.TEST_PREFIX)) { 139 return DOI; 140 } 141 if (lcIdentifier.startsWith("10.") 142 || lcIdentifier.startsWith("doi:10.") 143 || lcIdentifier.startsWith("urn:doi:10.") 144 || lcIdentifier.startsWith("http://dx.doi.org/10.") 145 || lcIdentifier.startsWith("https://dx.doi.org/10.") 146 || lcIdentifier.startsWith("http://doi.org/10.") 147 || lcIdentifier.startsWith("https://doi.org/10.")) { 148 return DOI; 149 } 150 151 if (lcIdentifier.startsWith("https://ror.org")) { 152 return ROR; 153 } 154 155 if(IdentifierUtils.isValidWikidataIdentifier(lcIdentifier)) { 156 return WIKIDATA; 157 } 158 159 if (lcIdentifier.startsWith("http:") 160 || lcIdentifier.startsWith("https:") 161 || lcIdentifier.startsWith("www.")) { 162 return URL; 163 } 164 if (lcIdentifier.startsWith("ftp:")) { 165 return FTP; 166 } 167 if (lcIdentifier.startsWith("urn:lsid:") || lcIdentifier.startsWith("lsid:")) { 168 return LSID; 169 } 170 171 if (lcIdentifier.startsWith("urn:uuid:") || lcIdentifier.startsWith("uuid:")) { 172 return UUID; 173 } 174 175 if (lcIdentifier.startsWith("gbif:ih:irn:")) { 176 return IH_IRN; 177 } 178 179 if (lcIdentifier.startsWith("grid")) { 180 return GRID; 181 } 182 183 try { 184 //noinspection ResultOfMethodCallIgnored 185 java.util.UUID.fromString(lcIdentifier); 186 return UUID; 187 } catch (IllegalArgumentException ignored) { 188 // We're just trying to convert a String to anything readable. Apparently the UUID approach failed. 189 } 190 191 return UNKNOWN; 192 } 193}