001/* 002 * Copyright 2020 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.util; 017 018import org.gbif.api.vocabulary.Country; 019import org.gbif.api.vocabulary.IdentifierType; 020 021import javax.annotation.Nullable; 022 023import java.util.regex.Pattern; 024 025/** 026 * This class contains utility methods for identifiers. Currently there are 3 separate Identifier 027 * classes: </br> 1) org.gbif.api.model.checklistbank.Identifier 2) 028 * org.gbif.api.model.common.Identifier 3) org.gbif.api.model.registry.Identifier </br> Methods 029 * common to 2 or more classes should be listed here. 030 */ 031public class IdentifierUtils { 032 033 public static final Pattern WIKIDATA_PATTERN = 034 Pattern.compile("http(s)?://www.wikidata.org/entity/([A-Za-z][0-9]+)$"); 035 036 public static final Pattern ROR_PATTERN = 037 Pattern.compile("https?://ror\\.org/0[a-z0-9]{6}[0-9]{2}"); 038 039 public static final Pattern ISIL_PATTERN = 040 Pattern.compile("^[A-Za-z]{1,4}-[A-Za-z0-9:/-]{1,11}$"); 041 042 public static final Pattern CLB_DATASET_KEY_PATTERN = Pattern.compile("[1-9]\\d*"); 043 044 public static final Pattern RNC_PATTERN = 045 Pattern.compile("https?://rnc\\.humboldt(\\.org\\.co)?(/.*)?"); 046 047 048 /** 049 * Creates a http link for an identifier if possible by passing it to some known resolvers for the 050 * specific id type. If no link can be constructed, null is returned. 051 * 052 * @param identifier Identifier's identifier 053 * @param type Identifier's type 054 * @return the url or null if it cannot be created 055 */ 056 @Nullable 057 public static String getIdentifierLink(String identifier, IdentifierType type) { 058 if (identifier == null || type == null) { 059 return null; 060 } 061 switch (type) { 062 case HANDLER: 063 case URI: 064 case URL: 065 case FTP: 066 case ROR: 067 return identifier; 068 case DOI: 069 return "https://doi.org/" + identifier; 070 case LSID: 071 return "http://www.lsid.info/" + identifier; 072 case GBIF_PORTAL: 073 return "https://www.gbif.org/dataset/" + identifier; 074 case CLB_DATASET_KEY: 075 return "https://www.checklistbank.org/dataset/" + identifier; 076 } 077 return null; 078 } 079 080 /** CITES identifier validation according to https://cites.org/eng/common/reg/e_si.html. */ 081 public static boolean isValidCitesIdentifier(String identifier) { 082 if (identifier == null || identifier.isEmpty()) { 083 return false; 084 } 085 086 String[] parts = identifier.split("\\s+"); 087 if (parts.length < 2) { 088 return false; 089 } 090 091 if (parts[0].length() != 2) { 092 return false; 093 } 094 095 Country country = Country.fromIsoCode(parts[0]); 096 return country != null; 097 } 098 099 public static boolean isValidWikidataIdentifier(String identifier) { 100 if (identifier == null || identifier.isEmpty()) { 101 return false; 102 } 103 104 return WIKIDATA_PATTERN.matcher(identifier).matches(); 105 } 106 107 public static boolean isValidRORIdentifier(String identifier) { 108 if (identifier == null || identifier.isEmpty()) { 109 return false; 110 } 111 112 return ROR_PATTERN.matcher(identifier).matches(); 113 } 114 115 /** ISIL identifier validation according to 116 * https://slks.dk/english/work-areas/libraries-and-literature/library-standards/structure */ 117 public static boolean isValidISILIdentifier(String identifier) { 118 if (identifier == null || identifier.isEmpty()) { 119 return false; 120 } 121 122 return ISIL_PATTERN.matcher(identifier).matches(); 123 } 124 125 public static boolean isValidCLBDatasetKey(String identifier) { 126 if (identifier == null || identifier.isEmpty()) { 127 return false; 128 } 129 130 // Use a regular expression to check if the identifier is a positive integer 131 return CLB_DATASET_KEY_PATTERN.matcher(identifier).matches(); 132 } 133 134 public static boolean isValidRNCColombiaIdentifier(String identifier) { 135 if (identifier == null || identifier.isEmpty()) { 136 return false; 137 } 138 return RNC_PATTERN.matcher(identifier).matches(); 139 } 140}