001/*
002 * Copyright 2020 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.model.common;
017
018import java.io.IOException;
019import java.io.Serializable;
020import java.io.UnsupportedEncodingException;
021import java.net.URI;
022import java.net.URLEncoder;
023import java.util.Objects;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027import javax.validation.constraints.NotNull;
028
029import io.swagger.v3.oas.annotations.media.Schema;
030
031import org.apache.commons.lang3.StringUtils;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import com.fasterxml.jackson.core.JsonGenerator;
036import com.fasterxml.jackson.core.JsonParser;
037import com.fasterxml.jackson.databind.DeserializationContext;
038import com.fasterxml.jackson.databind.JsonDeserializer;
039import com.fasterxml.jackson.databind.JsonSerializer;
040import com.fasterxml.jackson.databind.SerializerProvider;
041import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
042import com.fasterxml.jackson.databind.annotation.JsonSerialize;
043
044import static org.gbif.api.util.PreconditionUtils.checkArgument;
045
046/**
047 * Class representing a single Digital Object Identifier (DOI) breaking it down to a prefix and suffix.
048 * For the syntax of DOI names see the <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.2">DOI Handbook</a>.
049 * All parsing is case-insensitive and resulting components will all be uppercased.
050 */
051@Schema(
052  description = "A Digital Object Identifier (DOI).",
053  pattern = "(10[.][0-9]{2,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\\\S)+)",
054  implementation = String.class
055)
056@JsonSerialize(using = DOI.DoiSerializer.class)
057@JsonDeserialize(using = DOI.DoiDeserializer.class)
058public class DOI implements Serializable {
059
060  private static final Logger LOG = LoggerFactory.getLogger(DOI.class);
061
062  /**
063   * Encoding to create URLs.
064   */
065  private static final String CHAR_ENCODING = "UTF-8";
066
067  /**
068   * The DOI prefix registered with DataCite to be used by GBIF-issued production DOIs.
069   */
070  public static final String GBIF_PREFIX = "10.15468";
071
072  /**
073   * A DOI prefix provided by DataCite to be used in tests.
074   */
075  public static final String TEST_PREFIX = "10.21373";
076
077  private static final Pattern HTTP = Pattern.compile("^https?://(dx\\.)?doi\\.org/"
078    + "(urn:)?(doi:)?", Pattern.CASE_INSENSITIVE);
079  private static final Pattern PARSER = Pattern.compile("^(?:urn:)?(?:doi:)?"           // optional
080    + "(10(?:\\.[0-9]+)+)"
081    + "/(.+)$", Pattern.CASE_INSENSITIVE);
082
083  private static final String RESOLVER = "https://doi.org/";
084  private static final String SCHEME = "doi:";
085  private String prefix;
086  private String suffix;
087
088  /**
089   * Returns true only if the source can be parsed into a DOI.
090   */
091  public static boolean isParsable(String source) {
092    if (StringUtils.isNotEmpty(source)) {
093      try {
094        return PARSER.matcher(decodeUrl(source)).find();
095      } catch (IllegalArgumentException iaEx) {
096        LOG.debug("Can not decode URL from the following DOI: {}", source);
097      }
098    }
099    return false;
100  }
101
102  /**
103   * Do not use this constructor.
104   * Required by JAXB to marshall this object without introducing adapters.
105   */
106  public DOI() {
107  }
108
109  /**
110   * Parses a simple DOI string of various forms incl URN, URL or plain DOI names.
111   *
112   * @param doi the full simple DOI string
113   * @throws java.lang.IllegalArgumentException if invalid DOI string is passed
114   */
115  public DOI(String doi) {
116    Objects.requireNonNull(doi, "DOI required");
117    Matcher m = PARSER.matcher(decodeUrl(doi));
118    if (m.find()) {
119      this.prefix = m.group(1).toLowerCase();
120      this.suffix = m.group(2).toLowerCase();
121    } else {
122      throw new IllegalArgumentException(doi + " is not a valid DOI");
123    }
124  }
125
126  /**
127   * Parses a simple DOI string of various forms incl URN, URL or plain DOI names.
128   *
129   * @param prefix a simple DOI prefix starting with 10.
130   * @param suffix arbitrary suffix part of the DOI
131   * @throws java.lang.IllegalArgumentException if invalid DOI prefix is given
132   */
133  public DOI(String prefix, String suffix) {
134    this.prefix = Objects.requireNonNull(prefix, "DOI prefix required").toLowerCase();
135    checkArgument(prefix.startsWith("10."));
136    this.suffix = Objects.requireNonNull(suffix, "DOI suffix required").toLowerCase();
137  }
138
139  /**
140   * If the doi is encoded as a URL this method strips the resolver and decodes the URL encoded string entities.
141   *
142   * @param doi not null doi represented as a String
143   * @return the path part if the doi is a URL otherwise the doi is returned as is.
144   * @throws IllegalArgumentException
145   */
146  private static String decodeUrl(@NotNull String doi) {
147    Matcher m = HTTP.matcher(doi);
148    if (m.find()) {
149      // strip resolver incl potentially starting paths using a badly encoded urn:doi
150      // (the colon would need to be encoded in a proper URL)
151      doi = m.replaceFirst("");
152      // now decode the URL path, we cannot possibly have query parameters or anchors as the DOIs encoded as a URL
153      // will just be a path
154      try {
155        return URI.create(URLEncoder.encode(doi, CHAR_ENCODING)).getPath();
156      } catch (UnsupportedEncodingException e) {
157        throw new IllegalArgumentException("Unsupported DOI encoding", e);
158      }
159    }
160    return doi;
161  }
162
163  public String getPrefix() {
164    return prefix;
165  }
166
167  public void setPrefix(String prefix) {
168    this.prefix = prefix;
169  }
170
171  public String getSuffix() {
172    return suffix;
173  }
174
175  public void setSuffix(String suffix) {
176    this.suffix = suffix;
177  }
178
179  /**
180   * See <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.6">DOI Handbook, Visual presentation and other representation of DOI names</a>.
181   *
182   * @return the resolved DOI using https://doi.org/
183   * @throws IllegalStateException if the encoding of the DOI is not supported
184   */
185  public URI getUrl() {
186    try {
187      return URI.create(RESOLVER + prefix + '/' + URLEncoder.encode(suffix, CHAR_ENCODING));
188    } catch (UnsupportedEncodingException e) {
189      throw new IllegalStateException("Unsupported DOI encoding", e);
190    }
191  }
192
193  /**
194   * @return the DOI name prefixed with "doi:", as recommended by the DOI Handbook.
195   */
196  public String getDoiString() {
197    return SCHEME + getDoiName();
198  }
199
200  /**
201   * @return the pure DOI name without any initial scheme name starting with the prefix, i.e. 10.
202   */
203  public String getDoiName() {
204    return prefix + '/' + suffix;
205  }
206
207  @Override
208  public String toString() {
209    return getDoiName();
210  }
211
212  @Override
213  public int hashCode() {
214    return Objects.hash(prefix, suffix);
215  }
216
217  @Override
218  public boolean equals(Object obj) {
219    if (this == obj) {
220      return true;
221    }
222    if (obj == null || getClass() != obj.getClass()) {
223      return false;
224    }
225    final DOI other = (DOI) obj;
226    // prefix and suffix are always uppercased so we can do simple equals here
227    return Objects.equals(this.prefix, other.prefix) && Objects.equals(this.suffix, other.suffix);
228  }
229
230  public static class DoiSerializer extends JsonSerializer<DOI> {
231
232    @Override
233    public void serialize(DOI value, JsonGenerator gen, SerializerProvider serializers) throws IOException {
234      gen.writeString(value.toString());
235    }
236  }
237
238  public static class DoiDeserializer extends JsonDeserializer<DOI> {
239
240    @Override
241    public DOI deserialize(JsonParser p, DeserializationContext ctxt) throws IOException {
242      if (p != null && p.getTextLength() > 0) {
243        return new DOI(p.getText());
244      }
245      return null;
246    }
247  }
248}