001/* 002 * Copyright 2020 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.model.common; 017 018import java.io.IOException; 019import java.io.Serializable; 020import java.io.UnsupportedEncodingException; 021import java.net.URI; 022import java.net.URLEncoder; 023import java.util.Objects; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import javax.validation.constraints.NotNull; 028 029import io.swagger.v3.oas.annotations.media.Schema; 030 031import org.apache.commons.lang3.StringUtils; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import com.fasterxml.jackson.core.JsonGenerator; 036import com.fasterxml.jackson.core.JsonParser; 037import com.fasterxml.jackson.databind.DeserializationContext; 038import com.fasterxml.jackson.databind.JsonDeserializer; 039import com.fasterxml.jackson.databind.JsonSerializer; 040import com.fasterxml.jackson.databind.SerializerProvider; 041import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 042import com.fasterxml.jackson.databind.annotation.JsonSerialize; 043 044import static org.gbif.api.util.PreconditionUtils.checkArgument; 045 046/** 047 * Class representing a single Digital Object Identifier (DOI) breaking it down to a prefix and suffix. 048 * For the syntax of DOI names see the <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.2">DOI Handbook</a>. 049 * All parsing is case-insensitive and resulting components will all be uppercased. 050 */ 051@Schema( 052 description = "A Digital Object Identifier (DOI).", 053 pattern = "(10[.][0-9]{2,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\\\S)+)", 054 implementation = String.class 055) 056@JsonSerialize(using = DOI.DoiSerializer.class) 057@JsonDeserialize(using = DOI.DoiDeserializer.class) 058public class DOI implements Serializable { 059 060 private static final Logger LOG = LoggerFactory.getLogger(DOI.class); 061 062 /** 063 * Encoding to create URLs. 064 */ 065 private static final String CHAR_ENCODING = "UTF-8"; 066 067 /** 068 * The DOI prefix registered with DataCite to be used by GBIF-issued production DOIs. 069 */ 070 public static final String GBIF_PREFIX = "10.15468"; 071 072 /** 073 * A DOI prefix provided by DataCite to be used in tests. 074 */ 075 public static final String TEST_PREFIX = "10.21373"; 076 077 private static final Pattern HTTP = Pattern.compile("^https?://(dx\\.)?doi\\.org/" 078 + "(urn:)?(doi:)?", Pattern.CASE_INSENSITIVE); 079 private static final Pattern PARSER = Pattern.compile("^(?:urn:)?(?:doi:)?" // optional 080 + "(10(?:\\.[0-9]+)+)" 081 + "/(.+)$", Pattern.CASE_INSENSITIVE); 082 083 private static final String RESOLVER = "https://doi.org/"; 084 private static final String SCHEME = "doi:"; 085 private String prefix; 086 private String suffix; 087 088 /** 089 * Returns true only if the source can be parsed into a DOI. 090 */ 091 public static boolean isParsable(String source) { 092 if (StringUtils.isNotEmpty(source)) { 093 try { 094 return PARSER.matcher(decodeUrl(source)).find(); 095 } catch (IllegalArgumentException iaEx) { 096 LOG.debug("Can not decode URL from the following DOI: {}", source); 097 } 098 } 099 return false; 100 } 101 102 /** 103 * Do not use this constructor. 104 * Required by JAXB to marshall this object without introducing adapters. 105 */ 106 public DOI() { 107 } 108 109 /** 110 * Parses a simple DOI string of various forms incl URN, URL or plain DOI names. 111 * 112 * @param doi the full simple DOI string 113 * @throws java.lang.IllegalArgumentException if invalid DOI string is passed 114 */ 115 public DOI(String doi) { 116 Objects.requireNonNull(doi, "DOI required"); 117 Matcher m = PARSER.matcher(decodeUrl(doi)); 118 if (m.find()) { 119 this.prefix = m.group(1).toLowerCase(); 120 this.suffix = m.group(2).toLowerCase(); 121 } else { 122 throw new IllegalArgumentException(doi + " is not a valid DOI"); 123 } 124 } 125 126 /** 127 * Parses a simple DOI string of various forms incl URN, URL or plain DOI names. 128 * 129 * @param prefix a simple DOI prefix starting with 10. 130 * @param suffix arbitrary suffix part of the DOI 131 * @throws java.lang.IllegalArgumentException if invalid DOI prefix is given 132 */ 133 public DOI(String prefix, String suffix) { 134 this.prefix = Objects.requireNonNull(prefix, "DOI prefix required").toLowerCase(); 135 checkArgument(prefix.startsWith("10.")); 136 this.suffix = Objects.requireNonNull(suffix, "DOI suffix required").toLowerCase(); 137 } 138 139 /** 140 * If the doi is encoded as a URL this method strips the resolver and decodes the URL encoded string entities. 141 * 142 * @param doi not null doi represented as a String 143 * @return the path part if the doi is a URL otherwise the doi is returned as is. 144 * @throws IllegalArgumentException 145 */ 146 private static String decodeUrl(@NotNull String doi) { 147 Matcher m = HTTP.matcher(doi); 148 if (m.find()) { 149 // strip resolver incl potentially starting paths using a badly encoded urn:doi 150 // (the colon would need to be encoded in a proper URL) 151 doi = m.replaceFirst(""); 152 // now decode the URL path, we cannot possibly have query parameters or anchors as the DOIs encoded as a URL 153 // will just be a path 154 try { 155 return URI.create(URLEncoder.encode(doi, CHAR_ENCODING)).getPath(); 156 } catch (UnsupportedEncodingException e) { 157 throw new IllegalArgumentException("Unsupported DOI encoding", e); 158 } 159 } 160 return doi; 161 } 162 163 public String getPrefix() { 164 return prefix; 165 } 166 167 public void setPrefix(String prefix) { 168 this.prefix = prefix; 169 } 170 171 public String getSuffix() { 172 return suffix; 173 } 174 175 public void setSuffix(String suffix) { 176 this.suffix = suffix; 177 } 178 179 /** 180 * See <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.6">DOI Handbook, Visual presentation and other representation of DOI names</a>. 181 * 182 * @return the resolved DOI using https://doi.org/ 183 * @throws IllegalStateException if the encoding of the DOI is not supported 184 */ 185 public URI getUrl() { 186 try { 187 return URI.create(RESOLVER + prefix + '/' + URLEncoder.encode(suffix, CHAR_ENCODING)); 188 } catch (UnsupportedEncodingException e) { 189 throw new IllegalStateException("Unsupported DOI encoding", e); 190 } 191 } 192 193 /** 194 * @return the DOI name prefixed with "doi:", as recommended by the DOI Handbook. 195 */ 196 public String getDoiString() { 197 return SCHEME + getDoiName(); 198 } 199 200 /** 201 * @return the pure DOI name without any initial scheme name starting with the prefix, i.e. 10. 202 */ 203 public String getDoiName() { 204 return prefix + '/' + suffix; 205 } 206 207 @Override 208 public String toString() { 209 return getDoiName(); 210 } 211 212 @Override 213 public int hashCode() { 214 return Objects.hash(prefix, suffix); 215 } 216 217 @Override 218 public boolean equals(Object obj) { 219 if (this == obj) { 220 return true; 221 } 222 if (obj == null || getClass() != obj.getClass()) { 223 return false; 224 } 225 final DOI other = (DOI) obj; 226 // prefix and suffix are always uppercased so we can do simple equals here 227 return Objects.equals(this.prefix, other.prefix) && Objects.equals(this.suffix, other.suffix); 228 } 229 230 public static class DoiSerializer extends JsonSerializer<DOI> { 231 232 @Override 233 public void serialize(DOI value, JsonGenerator gen, SerializerProvider serializers) throws IOException { 234 gen.writeString(value.toString()); 235 } 236 } 237 238 public static class DoiDeserializer extends JsonDeserializer<DOI> { 239 240 @Override 241 public DOI deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { 242 if (p != null && p.getTextLength() > 0) { 243 return new DOI(p.getText()); 244 } 245 return null; 246 } 247 } 248}