001/*
002 * Copyright 2020 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.api.model.common;
017
018import java.io.IOException;
019import java.io.Serializable;
020import java.io.UnsupportedEncodingException;
021import java.net.URI;
022import java.net.URLEncoder;
023import java.util.Objects;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027import javax.validation.constraints.NotNull;
028
029import io.swagger.v3.oas.annotations.media.Schema;
030
031import org.apache.commons.lang3.StringUtils;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import com.fasterxml.jackson.core.JsonGenerator;
036import com.fasterxml.jackson.core.JsonParser;
037import com.fasterxml.jackson.databind.DeserializationContext;
038import com.fasterxml.jackson.databind.JsonDeserializer;
039import com.fasterxml.jackson.databind.JsonSerializer;
040import com.fasterxml.jackson.databind.SerializerProvider;
041import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
042import com.fasterxml.jackson.databind.annotation.JsonSerialize;
043
044import static org.gbif.api.util.PreconditionUtils.checkArgument;
045
046/**
047 * Class representing a single Digital Object Identifier (DOI) breaking it down to a prefix and suffix.
048 * For the syntax of DOI names see the <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.2">DOI Handbook</a>.
049 * All parsing is case-insensitive and resulting components will all be uppercased.
050 */
051@Schema(
052  description = "A Digital Object Identifier (DOI).",
053  pattern = "(10[.][0-9]{2,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\\\S)+)",
054  example = "10.15468/igasai",
055  implementation = String.class
056)
057@JsonSerialize(using = DOI.DoiSerializer.class)
058@JsonDeserialize(using = DOI.DoiDeserializer.class)
059public class DOI implements Serializable {
060
061  private static final Logger LOG = LoggerFactory.getLogger(DOI.class);
062
063  /**
064   * Encoding to create URLs.
065   */
066  private static final String CHAR_ENCODING = "UTF-8";
067
068  /**
069   * The DOI prefix registered with DataCite to be used by GBIF-issued production DOIs.
070   */
071  public static final String GBIF_PREFIX = "10.15468";
072
073  /**
074   * A DOI prefix provided by DataCite to be used in tests.
075   */
076  public static final String TEST_PREFIX = "10.21373";
077
078  private static final Pattern HTTP = Pattern.compile("^https?://(dx\\.)?doi\\.org/"
079    + "(urn:)?(doi:)?", Pattern.CASE_INSENSITIVE);
080  private static final Pattern PARSER = Pattern.compile("^(?:urn:)?(?:doi:)?"           // optional
081    + "(10(?:\\.[0-9]+)+)"
082    + "/(.+)$", Pattern.CASE_INSENSITIVE);
083
084  private static final String RESOLVER = "https://doi.org/";
085  private static final String SCHEME = "doi:";
086  private String prefix;
087  private String suffix;
088
089  /**
090   * Returns true only if the source can be parsed into a DOI.
091   */
092  public static boolean isParsable(String source) {
093    if (StringUtils.isNotEmpty(source)) {
094      try {
095        return PARSER.matcher(decodeUrl(source)).find();
096      } catch (IllegalArgumentException iaEx) {
097        LOG.debug("Can not decode URL from the following DOI: {}", source);
098      }
099    }
100    return false;
101  }
102
103  /**
104   * Do not use this constructor.
105   * Required by JAXB to marshall this object without introducing adapters.
106   */
107  public DOI() {
108  }
109
110  /**
111   * Parses a simple DOI string of various forms incl URN, URL or plain DOI names.
112   *
113   * @param doi the full simple DOI string
114   * @throws java.lang.IllegalArgumentException if invalid DOI string is passed
115   */
116  public DOI(String doi) {
117    Objects.requireNonNull(doi, "DOI required");
118    Matcher m = PARSER.matcher(decodeUrl(doi));
119    if (m.find()) {
120      this.prefix = m.group(1).toLowerCase();
121      this.suffix = m.group(2).toLowerCase();
122    } else {
123      throw new IllegalArgumentException(doi + " is not a valid DOI");
124    }
125  }
126
127  /**
128   * Parses a simple DOI string of various forms incl URN, URL or plain DOI names.
129   *
130   * @param prefix a simple DOI prefix starting with 10.
131   * @param suffix arbitrary suffix part of the DOI
132   * @throws java.lang.IllegalArgumentException if invalid DOI prefix is given
133   */
134  public DOI(String prefix, String suffix) {
135    this.prefix = Objects.requireNonNull(prefix, "DOI prefix required").toLowerCase();
136    checkArgument(prefix.startsWith("10."));
137    this.suffix = Objects.requireNonNull(suffix, "DOI suffix required").toLowerCase();
138  }
139
140  /**
141   * If the doi is encoded as a URL this method strips the resolver and decodes the URL encoded string entities.
142   *
143   * @param doi not null doi represented as a String
144   * @return the path part if the doi is a URL otherwise the doi is returned as is.
145   * @throws IllegalArgumentException
146   */
147  private static String decodeUrl(@NotNull String doi) {
148    Matcher m = HTTP.matcher(doi);
149    if (m.find()) {
150      // strip resolver incl potentially starting paths using a badly encoded urn:doi
151      // (the colon would need to be encoded in a proper URL)
152      doi = m.replaceFirst("");
153      // now decode the URL path, we cannot possibly have query parameters or anchors as the DOIs encoded as a URL
154      // will just be a path
155      try {
156        return URI.create(URLEncoder.encode(doi, CHAR_ENCODING)).getPath();
157      } catch (UnsupportedEncodingException e) {
158        throw new IllegalArgumentException("Unsupported DOI encoding", e);
159      }
160    }
161    return doi;
162  }
163
164  public String getPrefix() {
165    return prefix;
166  }
167
168  public void setPrefix(String prefix) {
169    this.prefix = prefix;
170  }
171
172  public String getSuffix() {
173    return suffix;
174  }
175
176  public void setSuffix(String suffix) {
177    this.suffix = suffix;
178  }
179
180  /**
181   * See <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.6">DOI Handbook, Visual presentation and other representation of DOI names</a>.
182   *
183   * @return the resolved DOI using https://doi.org/
184   * @throws IllegalStateException if the encoding of the DOI is not supported
185   */
186  public URI getUrl() {
187    try {
188      return URI.create(RESOLVER + prefix + '/' + URLEncoder.encode(suffix, CHAR_ENCODING));
189    } catch (UnsupportedEncodingException e) {
190      throw new IllegalStateException("Unsupported DOI encoding", e);
191    }
192  }
193
194  /**
195   * @return the DOI name prefixed with "doi:", as recommended by the DOI Handbook.
196   */
197  public String getDoiString() {
198    return SCHEME + getDoiName();
199  }
200
201  /**
202   * @return the pure DOI name without any initial scheme name starting with the prefix, i.e. 10.
203   */
204  public String getDoiName() {
205    return prefix + '/' + suffix;
206  }
207
208  @Override
209  public String toString() {
210    return getDoiName();
211  }
212
213  @Override
214  public int hashCode() {
215    return Objects.hash(prefix, suffix);
216  }
217
218  @Override
219  public boolean equals(Object obj) {
220    if (this == obj) {
221      return true;
222    }
223    if (obj == null || getClass() != obj.getClass()) {
224      return false;
225    }
226    final DOI other = (DOI) obj;
227    // prefix and suffix are always uppercased so we can do simple equals here
228    return Objects.equals(this.prefix, other.prefix) && Objects.equals(this.suffix, other.suffix);
229  }
230
231  public static class DoiSerializer extends JsonSerializer<DOI> {
232
233    @Override
234    public void serialize(DOI value, JsonGenerator gen, SerializerProvider serializers) throws IOException {
235      gen.writeString(value.toString());
236    }
237  }
238
239  public static class DoiDeserializer extends JsonDeserializer<DOI> {
240
241    @Override
242    public DOI deserialize(JsonParser p, DeserializationContext ctxt) throws IOException {
243      if (p != null && p.getTextLength() > 0) {
244        return new DOI(p.getText());
245      }
246      return null;
247    }
248  }
249}