001/*
002 * Copyright 2020 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.util;
017
018import org.gbif.api.vocabulary.Country;
019import org.gbif.api.vocabulary.IdentifierType;
020
021import javax.annotation.Nullable;
022
023import java.util.regex.Pattern;
024
025/**
026 * This class contains utility methods for identifiers. Currently there are 3 separate Identifier
027 * classes: </br> 1) org.gbif.api.model.checklistbank.Identifier 2)
028 * org.gbif.api.model.common.Identifier 3) org.gbif.api.model.registry.Identifier </br> Methods
029 * common to 2 or more classes should be listed here.
030 */
031public class IdentifierUtils {
032
033  public static final Pattern WIKIDATA_PATTERN =
034      Pattern.compile("http(s)?://www.wikidata.org/entity/([A-Za-z][0-9]+)$");
035
036  public static final Pattern ROR_PATTERN =
037    Pattern.compile("https?://ror\\.org/0[a-z0-9]{6}[0-9]{2}");
038
039  public static final Pattern ISIL_PATTERN =
040    Pattern.compile("^[A-Za-z]{1,4}-[A-Za-z0-9:/-]{1,11}$");
041
042  public static final Pattern CLB_DATASET_KEY_PATTERN = Pattern.compile("[1-9]\\d*");
043
044  /**
045   * Creates a http link for an identifier if possible by passing it to some known resolvers for the
046   * specific id type. If no link can be constructed, null is returned.
047   *
048   * @param identifier Identifier's identifier
049   * @param type Identifier's type
050   * @return the url or null if it cannot be created
051   */
052  @Nullable
053  public static String getIdentifierLink(String identifier, IdentifierType type) {
054    if (identifier == null || type == null) {
055      return null;
056    }
057    switch (type) {
058      case HANDLER:
059      case URI:
060      case URL:
061      case FTP:
062      case ROR:
063        return identifier;
064      case DOI:
065        return "https://doi.org/" + identifier;
066      case LSID:
067        return "http://www.lsid.info/" + identifier;
068      case GBIF_PORTAL:
069        return "https://www.gbif.org/dataset/" + identifier;
070      case CLB_DATASET_KEY:
071        return "https://www.checklistbank.org/dataset/" + identifier;
072    }
073    return null;
074  }
075
076  /** CITES identifier validation according to https://cites.org/eng/common/reg/e_si.html. */
077  public static boolean isValidCitesIdentifier(String identifier) {
078    if (identifier == null || identifier.isEmpty()) {
079      return false;
080    }
081
082    String[] parts = identifier.split("\\s+");
083    if (parts.length < 2) {
084      return false;
085    }
086
087    if (parts[0].length() != 2) {
088      return false;
089    }
090
091    Country country = Country.fromIsoCode(parts[0]);
092    return country != null;
093  }
094
095  public static boolean isValidWikidataIdentifier(String identifier) {
096    if (identifier == null || identifier.isEmpty()) {
097      return false;
098    }
099
100    return WIKIDATA_PATTERN.matcher(identifier).matches();
101  }
102
103  public static boolean isValidRORIdentifier(String identifier) {
104    if (identifier == null || identifier.isEmpty()) {
105      return false;
106    }
107
108    return ROR_PATTERN.matcher(identifier).matches();
109  }
110
111  /** ISIL identifier validation according to
112   * https://slks.dk/english/work-areas/libraries-and-literature/library-standards/structure */
113  public static boolean isValidISILIdentifier(String identifier) {
114    if (identifier == null || identifier.isEmpty()) {
115      return false;
116    }
117
118    return ISIL_PATTERN.matcher(identifier).matches();
119  }
120
121  public static boolean isValidCLBDatasetKey(String identifier) {
122    if (identifier == null || identifier.isEmpty()) {
123      return false;
124    }
125
126    // Use a regular expression to check if the identifier is a positive integer
127    return CLB_DATASET_KEY_PATTERN.matcher(identifier).matches();
128  }
129}