001/*
002 * Copyright 2020 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.util;
017
018import org.gbif.api.vocabulary.Country;
019import org.gbif.api.vocabulary.IdentifierType;
020
021import javax.annotation.Nullable;
022
023import java.util.regex.Pattern;
024
025/**
026 * This class contains utility methods for identifiers. Currently there are 3 separate Identifier
027 * classes: </br> 1) org.gbif.api.model.checklistbank.Identifier 2)
028 * org.gbif.api.model.common.Identifier 3) org.gbif.api.model.registry.Identifier </br> Methods
029 * common to 2 or more classes should be listed here.
030 */
031public class IdentifierUtils {
032
033  public static final Pattern WIKIDATA_PATTERN =
034      Pattern.compile("http(s)?://www.wikidata.org/entity/([A-Za-z][0-9]+)$");
035
036  public static final Pattern ROR_PATTERN =
037    Pattern.compile("https?://ror\\.org/0[a-z0-9]{6}[0-9]{2}");
038
039  public static final Pattern ISIL_PATTERN =
040    Pattern.compile("^[A-Za-z]{1,4}-[A-Za-z0-9:/-]{1,11}$");
041
042  public static final Pattern CLB_DATASET_KEY_PATTERN = Pattern.compile("[1-9]\\d*");
043
044  public static final Pattern RNC_PATTERN =
045    Pattern.compile("https?://rnc\\.humboldt(\\.org\\.co)?(/.*)?");
046
047
048  /**
049   * Creates a http link for an identifier if possible by passing it to some known resolvers for the
050   * specific id type. If no link can be constructed, null is returned.
051   *
052   * @param identifier Identifier's identifier
053   * @param type Identifier's type
054   * @return the url or null if it cannot be created
055   */
056  @Nullable
057  public static String getIdentifierLink(String identifier, IdentifierType type) {
058    if (identifier == null || type == null) {
059      return null;
060    }
061    switch (type) {
062      case HANDLER:
063      case URI:
064      case URL:
065      case FTP:
066      case ROR:
067        return identifier;
068      case DOI:
069        return "https://doi.org/" + identifier;
070      case LSID:
071        return "http://www.lsid.info/" + identifier;
072      case GBIF_PORTAL:
073        return "https://www.gbif.org/dataset/" + identifier;
074      case CLB_DATASET_KEY:
075        return "https://www.checklistbank.org/dataset/" + identifier;
076    }
077    return null;
078  }
079
080  /** CITES identifier validation according to https://cites.org/eng/common/reg/e_si.html. */
081  public static boolean isValidCitesIdentifier(String identifier) {
082    if (identifier == null || identifier.isEmpty()) {
083      return false;
084    }
085
086    String[] parts = identifier.split("\\s+");
087    if (parts.length < 2) {
088      return false;
089    }
090
091    if (parts[0].length() != 2) {
092      return false;
093    }
094
095    Country country = Country.fromIsoCode(parts[0]);
096    return country != null;
097  }
098
099  public static boolean isValidWikidataIdentifier(String identifier) {
100    if (identifier == null || identifier.isEmpty()) {
101      return false;
102    }
103
104    return WIKIDATA_PATTERN.matcher(identifier).matches();
105  }
106
107  public static boolean isValidRORIdentifier(String identifier) {
108    if (identifier == null || identifier.isEmpty()) {
109      return false;
110    }
111
112    return ROR_PATTERN.matcher(identifier).matches();
113  }
114
115  /** ISIL identifier validation according to
116   * https://slks.dk/english/work-areas/libraries-and-literature/library-standards/structure */
117  public static boolean isValidISILIdentifier(String identifier) {
118    if (identifier == null || identifier.isEmpty()) {
119      return false;
120    }
121
122    return ISIL_PATTERN.matcher(identifier).matches();
123  }
124
125  public static boolean isValidCLBDatasetKey(String identifier) {
126    if (identifier == null || identifier.isEmpty()) {
127      return false;
128    }
129
130    // Use a regular expression to check if the identifier is a positive integer
131    return CLB_DATASET_KEY_PATTERN.matcher(identifier).matches();
132  }
133
134  public static boolean isValidRNCColombiaIdentifier(String identifier) {
135    if (identifier == null || identifier.isEmpty()) {
136      return false;
137    }
138    return RNC_PATTERN.matcher(identifier).matches();
139  }
140}