001package org.gbif.api.model.common;
002
003import java.io.IOException;
004import java.net.URI;
005import java.util.Objects;
006import java.util.regex.Matcher;
007import java.util.regex.Pattern;
008import javax.validation.constraints.NotNull;
009
010import com.google.common.base.Preconditions;
011import com.google.common.base.Strings;
012import org.codehaus.jackson.JsonGenerator;
013import org.codehaus.jackson.JsonParser;
014import org.codehaus.jackson.map.DeserializationContext;
015import org.codehaus.jackson.map.JsonDeserializer;
016import org.codehaus.jackson.map.SerializerProvider;
017import org.codehaus.jackson.map.annotate.JsonDeserialize;
018import org.codehaus.jackson.map.annotate.JsonSerialize;
019import org.codehaus.jackson.map.ser.std.SerializerBase;
020import org.slf4j.Logger;
021import org.slf4j.LoggerFactory;
022
023/**
024 * Class representing a single Digital Object Identifier (DOI) breaking it down to a prefix and suffix.
025 * For the syntax of DOI names see the <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.2">DOI Handbook</a>.
026 * All parsing is case insensitive and resulting components will all be upper cased.
027 */
028@JsonSerialize(using = DOI.Serializer.class)
029@JsonDeserialize(using = DOI.Deserializer.class)
030public class DOI {
031
032  private static final Logger LOG = LoggerFactory.getLogger(DOI.class);
033
034  /**
035   * The DOI prefix registered with DataCite to be used by GBIF issued production DOIs.
036   */
037  public static final String GBIF_PREFIX = "10.15468";
038
039  /**
040   * A DOI prefix provided by DataCite to be used in tests.
041   */
042  public static final String TEST_PREFIX = "10.5072";
043
044  private static final Pattern HTTP = Pattern.compile("^https?://(dx\\.)?doi\\.org/"
045                                                      + "(urn:)?(doi:)?", Pattern.CASE_INSENSITIVE);
046  private static final Pattern PARSER = Pattern.compile("^(?:urn:)?(?:doi:)?"           // optional
047                                                        + "(10(?:\\.[0-9]+)+)"
048                                                        + "/(.+)$", Pattern.CASE_INSENSITIVE);
049
050  private static final String RESOLVER = "https://doi.org/";
051  private static final String SCHEME = "doi:";
052  private String prefix;
053  private String suffix;
054
055  /**
056   * Returns true only if the source can be parsed into a DOI.
057   */
058  public static boolean isParsable(String source) {
059    if (!Strings.isNullOrEmpty(source)) {
060      try {
061        return PARSER.matcher(decodeUrl(source)).find();
062      }
063      catch (IllegalArgumentException iaEx){
064        LOG.debug("Can not decode URL from the following DOI: {}", source);
065      }
066    }
067    return false;
068  }
069
070  /**
071   * Do not use this constructor.
072   * Required by JAXB to marshall this object without introducing adapters.
073   */
074  public DOI() {
075  }
076
077  /**
078   * Parses a simple DOI string of various forms incl URN, URL or plain DOI names.
079   * @param doi the full simple DOI string
080   * @throws java.lang.IllegalArgumentException if invalid DOI string is passed
081   */
082  public DOI(String doi) {
083    Preconditions.checkNotNull(doi, "DOI required");
084    Matcher m = PARSER.matcher(decodeUrl(doi));
085    if (m.find()) {
086      this.prefix = m.group(1).toLowerCase();
087      this.suffix = m.group(2).toLowerCase();
088    } else {
089      throw new IllegalArgumentException(doi + " is not a valid DOI");
090    }
091  }
092
093  /**
094   * Parses a simple DOI string of various forms incl URN, URL or plain DOI names.
095   * @param prefix a simple DOI prefix starting with 10.
096   * @param suffix arbitrary suffix part of the DOI
097   * @throws java.lang.IllegalArgumentException if invalid DOI prefix is given
098   */
099  public DOI(String prefix, String suffix) {
100    this.prefix = Preconditions.checkNotNull(prefix, "DOI prefix required").toLowerCase();
101    Preconditions.checkArgument(prefix.startsWith("10."));
102    this.suffix = Preconditions.checkNotNull(suffix, "DOI suffix required").toLowerCase();
103  }
104
105  /**
106   * If the doi is encoded as a URL this method strips the resolver and decodes the URL encoded string entities.
107   * @param doi not null doi represented as a String
108   * @return the path part if the doi is a URL otherwise the doi is returned as is.
109   * @throws IllegalArgumentException
110   */
111  private static String decodeUrl(@NotNull String doi) {
112    Matcher m = HTTP.matcher(doi);
113    if (m.find()) {
114      // strip resolver incl potentially starting paths using a badly encoded urn:doi
115      // (the colon would need to be encoded in a proper URL)
116      doi = m.replaceFirst("");
117      // now decode the URL path, we cannot possibly have query parameters or anchors as the DOIs encoded as a URL
118      // will just be a path
119      return URI.create(doi).getPath();
120    }
121    return doi;
122  }
123
124  public String getPrefix() {
125    return prefix;
126  }
127
128  public void setPrefix(String prefix) {
129    this.prefix = prefix;
130  }
131
132  public String getSuffix() {
133    return suffix;
134  }
135
136  public void setSuffix(String suffix) {
137    this.suffix = suffix;
138  }
139
140  /**
141   * See <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.6">DOI Hanbook, Visual presentation and other representation of DOI names</a>.
142   * @return the resolved DOI using http://dx.doi.org
143   */
144  public URI getUrl() {
145    return URI.create(RESOLVER + prefix + '/' + suffix);
146  }
147
148  /**
149   * @return the pure DOI name without any initial scheme name starting with the prefix, i.e. 10.
150   */
151  public String getDoiName() {
152    return prefix + '/' + suffix;
153  }
154
155  @Override
156  public String toString() {
157    return SCHEME + prefix + '/' + suffix;
158  }
159
160  @Override
161  public int hashCode() {
162    return Objects.hash(prefix, suffix);
163  }
164
165  @Override
166  public boolean equals(Object obj) {
167    if (this == obj) {
168      return true;
169    }
170    if (obj == null || getClass() != obj.getClass()) {
171      return false;
172    }
173    final DOI other = (DOI) obj;
174    // prefix and suffix are always upper cased so we can do simple equals here
175    return Objects.equals(this.prefix, other.prefix) && Objects.equals(this.suffix, other.suffix);
176  }
177
178
179
180  /**
181   * Serializes a DOI as doi name with a doi: scheme.
182   * For example doi:10.1038/nature.2014.16460
183   */
184  public static class Serializer extends SerializerBase<DOI> {
185
186    public Serializer() {
187      super(DOI.class);
188    }
189
190    @Override
191    public void serialize(DOI value, JsonGenerator jgen, SerializerProvider provider) throws IOException {
192      jgen.writeString(value.toString());
193    }
194  }
195
196  /**
197   * Deserializes a DOI from various string based formats.
198   * See DOI constructor for details.
199   */
200  public static class Deserializer extends JsonDeserializer<DOI> {
201
202    @Override
203    public DOI deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException {
204      if (jp != null && jp.getTextLength() > 0) {
205        return new DOI(jp.getText());
206      }
207      return null;
208    }
209  }
210
211}