001/* 002 * Copyright 2020 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.api.model.common; 017 018import java.io.IOException; 019import java.io.Serializable; 020import java.io.UnsupportedEncodingException; 021import java.net.URI; 022import java.net.URLEncoder; 023import java.util.Objects; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import javax.validation.constraints.NotNull; 028 029import io.swagger.v3.oas.annotations.media.Schema; 030 031import org.apache.commons.lang3.StringUtils; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import com.fasterxml.jackson.core.JsonGenerator; 036import com.fasterxml.jackson.core.JsonParser; 037import com.fasterxml.jackson.databind.DeserializationContext; 038import com.fasterxml.jackson.databind.JsonDeserializer; 039import com.fasterxml.jackson.databind.JsonSerializer; 040import com.fasterxml.jackson.databind.SerializerProvider; 041import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 042import com.fasterxml.jackson.databind.annotation.JsonSerialize; 043 044import static org.gbif.api.util.PreconditionUtils.checkArgument; 045 046/** 047 * Class representing a single Digital Object Identifier (DOI) breaking it down to a prefix and suffix. 048 * For the syntax of DOI names see the <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.2">DOI Handbook</a>. 049 * All parsing is case-insensitive and resulting components will all be uppercased. 050 */ 051@Schema( 052 description = "A Digital Object Identifier (DOI).", 053 pattern = "(10[.][0-9]{2,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\\\S)+)", 054 example = "10.15468/igasai", 055 implementation = String.class 056) 057@JsonSerialize(using = DOI.DoiSerializer.class) 058@JsonDeserialize(using = DOI.DoiDeserializer.class) 059public class DOI implements Serializable { 060 061 private static final Logger LOG = LoggerFactory.getLogger(DOI.class); 062 063 /** 064 * Encoding to create URLs. 065 */ 066 private static final String CHAR_ENCODING = "UTF-8"; 067 068 /** 069 * The DOI prefix registered with DataCite to be used by GBIF-issued production DOIs. 070 */ 071 public static final String GBIF_PREFIX = "10.15468"; 072 073 /** 074 * A DOI prefix provided by DataCite to be used in tests. 075 */ 076 public static final String TEST_PREFIX = "10.21373"; 077 078 private static final Pattern HTTP = Pattern.compile("^https?://(dx\\.)?doi\\.org/" 079 + "(urn:)?(doi:)?", Pattern.CASE_INSENSITIVE); 080 private static final Pattern PARSER = Pattern.compile("^(?:urn:)?(?:doi:)?" // optional 081 + "(10(?:\\.[0-9]+)+)" 082 + "/(.+)$", Pattern.CASE_INSENSITIVE); 083 084 private static final String RESOLVER = "https://doi.org/"; 085 private static final String SCHEME = "doi:"; 086 private String prefix; 087 private String suffix; 088 089 /** 090 * Returns true only if the source can be parsed into a DOI. 091 */ 092 public static boolean isParsable(String source) { 093 if (StringUtils.isNotEmpty(source)) { 094 try { 095 return PARSER.matcher(decodeUrl(source)).find(); 096 } catch (IllegalArgumentException iaEx) { 097 LOG.debug("Can not decode URL from the following DOI: {}", source); 098 } 099 } 100 return false; 101 } 102 103 /** 104 * Do not use this constructor. 105 * Required by JAXB to marshall this object without introducing adapters. 106 */ 107 public DOI() { 108 } 109 110 /** 111 * Parses a simple DOI string of various forms incl URN, URL or plain DOI names. 112 * 113 * @param doi the full simple DOI string 114 * @throws java.lang.IllegalArgumentException if invalid DOI string is passed 115 */ 116 public DOI(String doi) { 117 Objects.requireNonNull(doi, "DOI required"); 118 Matcher m = PARSER.matcher(decodeUrl(doi)); 119 if (m.find()) { 120 this.prefix = m.group(1).toLowerCase(); 121 this.suffix = m.group(2).toLowerCase(); 122 } else { 123 throw new IllegalArgumentException(doi + " is not a valid DOI"); 124 } 125 } 126 127 /** 128 * Parses a simple DOI string of various forms incl URN, URL or plain DOI names. 129 * 130 * @param prefix a simple DOI prefix starting with 10. 131 * @param suffix arbitrary suffix part of the DOI 132 * @throws java.lang.IllegalArgumentException if invalid DOI prefix is given 133 */ 134 public DOI(String prefix, String suffix) { 135 this.prefix = Objects.requireNonNull(prefix, "DOI prefix required").toLowerCase(); 136 checkArgument(prefix.startsWith("10.")); 137 this.suffix = Objects.requireNonNull(suffix, "DOI suffix required").toLowerCase(); 138 } 139 140 /** 141 * If the doi is encoded as a URL this method strips the resolver and decodes the URL encoded string entities. 142 * 143 * @param doi not null doi represented as a String 144 * @return the path part if the doi is a URL otherwise the doi is returned as is. 145 * @throws IllegalArgumentException 146 */ 147 private static String decodeUrl(@NotNull String doi) { 148 Matcher m = HTTP.matcher(doi); 149 if (m.find()) { 150 // strip resolver incl potentially starting paths using a badly encoded urn:doi 151 // (the colon would need to be encoded in a proper URL) 152 doi = m.replaceFirst(""); 153 // now decode the URL path, we cannot possibly have query parameters or anchors as the DOIs encoded as a URL 154 // will just be a path 155 try { 156 return URI.create(URLEncoder.encode(doi, CHAR_ENCODING)).getPath(); 157 } catch (UnsupportedEncodingException e) { 158 throw new IllegalArgumentException("Unsupported DOI encoding", e); 159 } 160 } 161 return doi; 162 } 163 164 public String getPrefix() { 165 return prefix; 166 } 167 168 public void setPrefix(String prefix) { 169 this.prefix = prefix; 170 } 171 172 public String getSuffix() { 173 return suffix; 174 } 175 176 public void setSuffix(String suffix) { 177 this.suffix = suffix; 178 } 179 180 /** 181 * See <a href="http://www.doi.org/doi_handbook/2_Numbering.html#2.6">DOI Handbook, Visual presentation and other representation of DOI names</a>. 182 * 183 * @return the resolved DOI using https://doi.org/ 184 * @throws IllegalStateException if the encoding of the DOI is not supported 185 */ 186 public URI getUrl() { 187 try { 188 return URI.create(RESOLVER + prefix + '/' + URLEncoder.encode(suffix, CHAR_ENCODING)); 189 } catch (UnsupportedEncodingException e) { 190 throw new IllegalStateException("Unsupported DOI encoding", e); 191 } 192 } 193 194 /** 195 * @return the DOI name prefixed with "doi:", as recommended by the DOI Handbook. 196 */ 197 public String getDoiString() { 198 return SCHEME + getDoiName(); 199 } 200 201 /** 202 * @return the pure DOI name without any initial scheme name starting with the prefix, i.e. 10. 203 */ 204 public String getDoiName() { 205 return prefix + '/' + suffix; 206 } 207 208 @Override 209 public String toString() { 210 return getDoiName(); 211 } 212 213 @Override 214 public int hashCode() { 215 return Objects.hash(prefix, suffix); 216 } 217 218 @Override 219 public boolean equals(Object obj) { 220 if (this == obj) { 221 return true; 222 } 223 if (obj == null || getClass() != obj.getClass()) { 224 return false; 225 } 226 final DOI other = (DOI) obj; 227 // prefix and suffix are always uppercased so we can do simple equals here 228 return Objects.equals(this.prefix, other.prefix) && Objects.equals(this.suffix, other.suffix); 229 } 230 231 public static class DoiSerializer extends JsonSerializer<DOI> { 232 233 @Override 234 public void serialize(DOI value, JsonGenerator gen, SerializerProvider serializers) throws IOException { 235 gen.writeString(value.toString()); 236 } 237 } 238 239 public static class DoiDeserializer extends JsonDeserializer<DOI> { 240 241 @Override 242 public DOI deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { 243 if (p != null && p.getTextLength() > 0) { 244 return new DOI(p.getText()); 245 } 246 return null; 247 } 248 } 249}