001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.util; 015 016import org.gbif.api.model.registry.Citation; 017import org.gbif.api.model.registry.CitationContact; 018import org.gbif.api.model.registry.Contact; 019import org.gbif.api.model.registry.Dataset; 020import org.gbif.api.model.registry.Endpoint; 021import org.gbif.api.model.registry.Organization; 022import org.gbif.api.vocabulary.ContactType; 023 024import java.io.UnsupportedEncodingException; 025import java.net.URLDecoder; 026import java.nio.charset.StandardCharsets; 027import java.time.LocalDate; 028import java.time.ZoneId; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Collections; 032import java.util.EnumSet; 033import java.util.HashSet; 034import java.util.List; 035import java.util.Objects; 036import java.util.Optional; 037import java.util.StringJoiner; 038import java.util.function.Predicate; 039import java.util.stream.Collectors; 040 041import org.apache.commons.lang3.StringUtils; 042 043import lombok.Builder; 044import lombok.Data; 045import org.gbif.api.vocabulary.EndpointType; 046 047/** 048 * Helper class tha generates a Citation String from {@link Dataset} and {@link Organization} 049 * objects. Documentation : /docs/citations.md 050 */ 051public final class CitationGenerator { 052 053 private static final ZoneId UTC = ZoneId.of("UTC"); 054 private static final ContactType MANDATORY_CONTACT_TYPE = ContactType.ORIGINATOR; 055 private static final EnumSet<ContactType> AUTHOR_CONTACT_TYPE = 056 EnumSet.of(ContactType.ORIGINATOR, ContactType.METADATA_AUTHOR); 057 private static final EnumSet<ContactType> CAMTRAP_CITATION_CONTACT_TYPE = 058 EnumSet.of(ContactType.POINT_OF_CONTACT, ContactType.PRINCIPAL_INVESTIGATOR, ContactType.CONTENT_PROVIDER); 059 private static final Predicate<Contact> IS_NAME_PROVIDED_FCT = 060 ctc -> StringUtils.isNotBlank(ctc.getLastName()); 061 private static final Predicate<CitationContact> IS_CONTACT_NAME_PROVIDED = 062 ctc -> StringUtils.isNotBlank(ctc.getLastName()); 063 private static final Predicate<Contact> IS_ELIGIBLE_CONTACT_TYPE = 064 ctc -> AUTHOR_CONTACT_TYPE.contains(ctc.getType()); 065 private static final Predicate<Contact> IS_ELIGIBLE_CAMTRAP_CONTACT_TYPE = 066 ctc -> CAMTRAP_CITATION_CONTACT_TYPE.contains(ctc.getType()); 067 068 /** Utility class */ 069 private CitationGenerator() {} 070 071 @SuppressWarnings("unused") 072 public static CitationData generateCitation(Dataset dataset, Organization org) { 073 Objects.requireNonNull(org, "Organization shall be provided"); 074 return generateCitation(dataset, org.getTitle()); 075 } 076 077 /** 078 * Generate a citation for a {@link Dataset} using the publisher's provided citation. 079 * @param dataset dataset 080 * @return generated citation as {@link String} 081 */ 082 @SuppressWarnings("unused") 083 public static String generatePublisherProvidedCitation(Dataset dataset) { 084 Objects.requireNonNull(dataset, "Dataset shall be provided"); 085 Objects.requireNonNull(dataset.getCitation(), "Dataset.citation shall be provided"); 086 String originalCitationText = dataset.getCitation().getText(); 087 Objects.requireNonNull(originalCitationText, "Dataset.citation.text shall be provided"); 088 089 StringJoiner joiner = new StringJoiner(" "); 090 091 joiner.add(originalCitationText); 092 093 // Check DOI exists, and append it if it doesn't. 094 if (!originalCitationText.toLowerCase().contains("doi.org") 095 && !originalCitationText.toLowerCase().contains("doi:")) { 096 try { 097 joiner.add(URLDecoder.decode(dataset.getDoi().getUrl().toString(), "UTF-8")); 098 } catch (UnsupportedEncodingException e) { 099 throw new IllegalArgumentException("Couldn't decode DOI URL", e); 100 } 101 } 102 103 joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + "."); 104 105 return joiner.toString(); 106 } 107 108 /** 109 * Generate a citation for a {@link Dataset} and its {@link Organization}. TODO add support for 110 * i18n 111 * @return generated citation as {@link String} 112 */ 113 public static CitationData generateCitation(Dataset dataset, String organizationTitle) { 114 115 Objects.requireNonNull(dataset, "Dataset shall be provided"); 116 Objects.requireNonNull(organizationTitle, "Organization title shall be provided"); 117 118 Citation citation = new Citation(); 119 120 List<CitationContact> contacts = getAuthors(dataset); 121 122 StringJoiner joiner = new StringJoiner(" "); 123 List<String> authorsName = generateAuthorsName(contacts); 124 String authors = String.join(", ", authorsName); 125 126 boolean authorsNameAvailable = StringUtils.isNotBlank(authors); 127 authors = authorsNameAvailable ? authors : organizationTitle; 128 129 // only add a dot if we are not going to add it with the year 130 authors += dataset.getPubDate() == null ? "." : ""; 131 joiner.add(authors); 132 133 if (dataset.getPubDate() != null) { 134 joiner.add("(" + dataset.getPubDate().toInstant().atZone(UTC).getYear() + ")."); 135 } 136 137 // add title 138 joiner.add(StringUtils.trim(dataset.getTitle()) + "."); 139 140 // add version 141 if (dataset.getVersion() != null) { 142 joiner.add("Version " + dataset.getVersion() + "."); 143 } 144 145 // add publisher except if it was used instead of the authors 146 if (authorsNameAvailable) { 147 joiner.add(StringUtils.trim(organizationTitle) + "."); 148 } 149 150 if (dataset.getType() != null) { 151 joiner.add(StringUtils.capitalize(dataset.getType().name().replace('_', ' ').toLowerCase())); 152 } 153 joiner.add("dataset"); 154 155 // add DOI as the identifier. 156 if (dataset.getDoi() != null) { 157 try { 158 joiner.add( 159 URLDecoder.decode(dataset.getDoi().getUrl().toString(), StandardCharsets.UTF_8.name())); 160 } catch (UnsupportedEncodingException e) { 161 throw new IllegalArgumentException("Couldn't decode DOI URL", e); 162 } 163 } 164 165 joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + "."); 166 167 citation.setText(joiner.toString()); 168 citation.setCitationProvidedBySource(false); 169 170 return CitationData.builder().citation(citation).contacts(contacts).build(); 171 } 172 173 public static List<CitationContact> getAuthors(Dataset dataset) { 174 boolean isCamtrap = dataset.getEndpoints().stream() 175 .map(Endpoint::getType) 176 .anyMatch(e -> e == EndpointType.CAMTRAP_DP); 177 178 return isCamtrap ? getAuthorsForCamtrap(dataset.getContacts()) : getAuthors(dataset.getContacts()); 179 } 180 181 public static List<CitationContact> getAuthorsForCamtrap(List<Contact> contacts) { 182 if (contacts == null || contacts.isEmpty()) { 183 return Collections.emptyList(); 184 } 185 186 return getUniqueAuthors( 187 contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CAMTRAP_CONTACT_TYPE).test(ctc)); 188 } 189 190 /** 191 * Extracts an ordered list of unique authors from a list of contacts. A {@link Contact} is 192 * identified as an author when his {@link ContactType} is contained in {@link 193 * #AUTHOR_CONTACT_TYPE}. But, we shall at least have one contact of type MANDATORY_CONTACT_TYPE. 194 * 195 * @param contacts list of contacts available 196 * @return ordered list of authors or empty list, never null 197 */ 198 public static List<CitationContact> getAuthors(List<Contact> contacts) { 199 if (contacts == null || contacts.isEmpty()) { 200 return Collections.emptyList(); 201 } 202 203 List<CitationContact> uniqueContacts = 204 getUniqueAuthors( 205 contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CONTACT_TYPE).test(ctc)); 206 207 // make sure we have at least one instance of {@link #MANDATORY_CONTACT_TYPE} 208 Optional<CitationContact> firstOriginator = 209 uniqueContacts.stream() 210 .filter(ctc -> ctc.getRoles().contains(MANDATORY_CONTACT_TYPE)) 211 .findFirst(); 212 213 if (firstOriginator.isPresent()) { 214 return uniqueContacts; 215 } 216 return Collections.emptyList(); 217 } 218 219 /** 220 * Given a list of authors, generates a {@link List} of {@link String} representing the authors 221 * name. If a contact doesn't have a first AND last name it will not be included. 222 * 223 * @param authors ordered list of authors 224 * @return list of author names (if it can be generated) or empty list, never null 225 */ 226 public static List<String> generateAuthorsName(List<CitationContact> authors) { 227 if (authors == null || authors.isEmpty()) { 228 return Collections.emptyList(); 229 } 230 231 return authors.stream() 232 .filter(IS_CONTACT_NAME_PROVIDED) 233 .map(CitationContact::getAbbreviatedName) 234 .collect(Collectors.toList()); 235 } 236 237 /** 238 * This method is used to get the list of "unique" authors. Currently, uniqueness is based on 239 * lastName + firstNames. The order of the provided list will be preserved which also means the 240 * first {@link ContactType} found for a contact is the one that will be used for this contact 241 * (after applying the filter). 242 * 243 * @param authors a list of contacts representing possible authors 244 * @param filter {@link Predicate} used to pre-filter contacts 245 * @return list of contacts 246 */ 247 private static List<CitationContact> getUniqueAuthors( 248 List<Contact> authors, Predicate<Contact> filter) { 249 List<CitationContact> uniqueContact = new ArrayList<>(); 250 if (authors != null) { 251 authors.forEach( 252 ctc -> { 253 if (filter.test(ctc)) { 254 Optional<CitationContact> author = findInAuthorList(ctc, uniqueContact); 255 if (!author.isPresent()) { 256 HashSet<ContactType> contactTypes = new HashSet<>(); 257 if (ctc.getType() != null) { 258 contactTypes.add(ctc.getType()); 259 } 260 HashSet<String> userIds = new HashSet<>(); 261 if (ctc.getUserId() != null && !ctc.getUserId().isEmpty()) { 262 userIds.addAll(ctc.getUserId()); 263 } 264 uniqueContact.add( 265 new CitationContact( 266 ctc.getKey(), 267 getAuthorName(ctc), 268 ctc.getFirstName(), 269 ctc.getLastName(), 270 contactTypes, 271 userIds)); 272 } else { 273 author.ifPresent( 274 a -> { 275 a.getRoles().add(ctc.getType()); 276 if (ctc.getUserId() != null) { 277 a.getUserId().addAll(ctc.getUserId()); 278 } 279 }); 280 } 281 } 282 }); 283 } 284 return uniqueContact; 285 } 286 287 /** 288 * Check if a specific {@link Contact} is NOT already in the list of "unique" contact. Currently, 289 * uniqueness is based on the comparisons of lastName and firstNames. 290 * 291 * @param ctc contact to check 292 * @param uniqueContacts list of contacts 293 * @return contact wrapped in optional 294 */ 295 private static Optional<CitationContact> findInAuthorList( 296 Contact ctc, List<CitationContact> uniqueContacts) { 297 return uniqueContacts.stream() 298 .filter( 299 author -> 300 StringUtils.equalsIgnoreCase(ctc.getLastName(), author.getLastName()) 301 && StringUtils.equalsIgnoreCase(ctc.getFirstName(), author.getFirstName())) 302 .findFirst(); 303 } 304 305 /** 306 * Given a {@link Contact}, generates a String for that contact for citation purpose. The 307 * organization will be used (if present) in case we don't have both lastName and firstNames of 308 * the contact. 309 * 310 * @param creator contact object 311 * @return name 312 */ 313 public static String getAuthorName(Contact creator) { 314 StringBuilder sb = new StringBuilder(); 315 String lastName = StringUtils.trimToNull(creator.getLastName()); 316 String firstNames = StringUtils.trimToNull(creator.getFirstName()); 317 String organization = StringUtils.trimToNull(creator.getOrganization()); 318 319 if (lastName != null && firstNames != null) { 320 sb.append(lastName); 321 sb.append(" "); 322 // add first initial of each first name, capitalized 323 String[] names = firstNames.split("\\s+"); 324 325 sb.append( 326 Arrays.stream(names) 327 .filter(str -> !StringUtils.isBlank(str)) 328 .map(str -> StringUtils.upperCase(String.valueOf(str.charAt(0)))) 329 .collect(Collectors.joining(" "))); 330 } else if (lastName != null) { 331 sb.append(lastName); 332 } else if (organization != null) { 333 sb.append(organization); 334 } 335 return sb.toString(); 336 } 337 338 @Data 339 @Builder 340 public static class CitationData { 341 342 private final Citation citation; 343 private final List<CitationContact> contacts; 344 } 345}