001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.util; 015 016import org.gbif.api.model.registry.Citation; 017import org.gbif.api.model.registry.CitationContact; 018import org.gbif.api.model.registry.Contact; 019import org.gbif.api.model.registry.Dataset; 020import org.gbif.api.model.registry.Organization; 021import org.gbif.api.vocabulary.ContactType; 022 023import java.io.UnsupportedEncodingException; 024import java.net.URLDecoder; 025import java.nio.charset.StandardCharsets; 026import java.time.LocalDate; 027import java.time.ZoneId; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collections; 031import java.util.EnumSet; 032import java.util.HashSet; 033import java.util.List; 034import java.util.Objects; 035import java.util.Optional; 036import java.util.StringJoiner; 037import java.util.function.Predicate; 038import java.util.stream.Collectors; 039 040import org.apache.commons.lang3.StringUtils; 041 042import lombok.Builder; 043import lombok.Data; 044 045/** 046 * Helper class tha generates a Citation String from {@link Dataset} and {@link Organization} 047 * objects. Documentation : /docs/citations.md 048 */ 049public final class CitationGenerator { 050 051 private static final ZoneId UTC = ZoneId.of("UTC"); 052 private static final ContactType MANDATORY_CONTACT_TYPE = ContactType.ORIGINATOR; 053 private static final EnumSet<ContactType> AUTHOR_CONTACT_TYPE = 054 EnumSet.of(ContactType.ORIGINATOR, ContactType.METADATA_AUTHOR); 055 private static final Predicate<Contact> IS_NAME_PROVIDED_FCT = 056 ctc -> StringUtils.isNotBlank(ctc.getLastName()); 057 private static final Predicate<CitationContact> IS_CONTACT_NAME_PROVIDED = 058 ctc -> StringUtils.isNotBlank(ctc.getLastName()); 059 private static final Predicate<Contact> IS_ELIGIBLE_CONTACT_TYPE = 060 ctc -> AUTHOR_CONTACT_TYPE.contains(ctc.getType()); 061 062 /** Utility class */ 063 private CitationGenerator() {} 064 065 @SuppressWarnings("unused") 066 public static CitationData generateCitation(Dataset dataset, Organization org) { 067 Objects.requireNonNull(org, "Organization shall be provided"); 068 return generateCitation(dataset, org.getTitle()); 069 } 070 071 /** 072 * Generate a citation for a {@link Dataset} using the publisher's provided citation. 073 * @param dataset dataset 074 * @return generated citation as {@link String} 075 */ 076 @SuppressWarnings("unused") 077 public static String generatePublisherProvidedCitation(Dataset dataset) { 078 Objects.requireNonNull(dataset, "Dataset shall be provided"); 079 Objects.requireNonNull(dataset.getCitation(), "Dataset.citation shall be provided"); 080 String originalCitationText = dataset.getCitation().getText(); 081 Objects.requireNonNull(originalCitationText, "Dataset.citation.text shall be provided"); 082 083 StringJoiner joiner = new StringJoiner(" "); 084 085 joiner.add(originalCitationText); 086 087 // Check DOI exists, and append it if it doesn't. 088 if (!originalCitationText.toLowerCase().contains("doi.org") 089 && !originalCitationText.toLowerCase().contains("doi:")) { 090 try { 091 joiner.add(URLDecoder.decode(dataset.getDoi().getUrl().toString(), "UTF-8")); 092 } catch (UnsupportedEncodingException e) { 093 throw new IllegalArgumentException("Couldn't decode DOI URL", e); 094 } 095 } 096 097 joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + "."); 098 099 return joiner.toString(); 100 } 101 102 /** 103 * Generate a citation for a {@link Dataset} and its {@link Organization}. TODO add support for 104 * i18n 105 * @return generated citation as {@link String} 106 */ 107 public static CitationData generateCitation(Dataset dataset, String organizationTitle) { 108 109 Objects.requireNonNull(dataset, "Dataset shall be provided"); 110 Objects.requireNonNull(organizationTitle, "Organization title shall be provided"); 111 112 Citation citation = new Citation(); 113 114 List<CitationContact> contacts = getAuthors(dataset.getContacts()); 115 116 StringJoiner joiner = new StringJoiner(" "); 117 List<String> authorsName = generateAuthorsName(contacts); 118 String authors = String.join(", ", authorsName); 119 120 boolean authorsNameAvailable = StringUtils.isNotBlank(authors); 121 authors = authorsNameAvailable ? authors : organizationTitle; 122 123 // only add a dot if we are not going to add it with the year 124 authors += dataset.getPubDate() == null ? "." : ""; 125 joiner.add(authors); 126 127 if (dataset.getPubDate() != null) { 128 joiner.add("(" + dataset.getPubDate().toInstant().atZone(UTC).getYear() + ")."); 129 } 130 131 // add title 132 joiner.add(StringUtils.trim(dataset.getTitle()) + "."); 133 134 // add version 135 if (dataset.getVersion() != null) { 136 joiner.add("Version " + dataset.getVersion() + "."); 137 } 138 139 // add publisher except if it was used instead of the authors 140 if (authorsNameAvailable) { 141 joiner.add(StringUtils.trim(organizationTitle) + "."); 142 } 143 144 if (dataset.getType() != null) { 145 joiner.add(StringUtils.capitalize(dataset.getType().name().replace('_', ' ').toLowerCase())); 146 } 147 joiner.add("dataset"); 148 149 // add DOI as the identifier. 150 if (dataset.getDoi() != null) { 151 try { 152 joiner.add( 153 URLDecoder.decode(dataset.getDoi().getUrl().toString(), StandardCharsets.UTF_8.name())); 154 } catch (UnsupportedEncodingException e) { 155 throw new IllegalArgumentException("Couldn't decode DOI URL", e); 156 } 157 } 158 159 joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + "."); 160 161 citation.setText(joiner.toString()); 162 citation.setCitationProvidedBySource(false); 163 164 return CitationData.builder().citation(citation).contacts(contacts).build(); 165 } 166 167 /** 168 * Extracts an ordered list of unique authors from a list of contacts. A {@link Contact} is 169 * identified as an author when his {@link ContactType} is contained in {@link 170 * #AUTHOR_CONTACT_TYPE}. But, we shall at least have one contact of type MANDATORY_CONTACT_TYPE. 171 * 172 * @param contacts list of contacts available 173 * @return ordered list of authors or empty list, never null 174 */ 175 public static List<CitationContact> getAuthors(List<Contact> contacts) { 176 if (contacts == null || contacts.isEmpty()) { 177 return Collections.emptyList(); 178 } 179 180 List<CitationContact> uniqueContacts = 181 getUniqueAuthors( 182 contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CONTACT_TYPE).test(ctc)); 183 184 // make sure we have at least one instance of {@link #MANDATORY_CONTACT_TYPE} 185 Optional<CitationContact> firstOriginator = 186 uniqueContacts.stream() 187 .filter(ctc -> ctc.getRoles().contains(MANDATORY_CONTACT_TYPE)) 188 .findFirst(); 189 190 if (firstOriginator.isPresent()) { 191 return uniqueContacts; 192 } 193 return Collections.emptyList(); 194 } 195 196 /** 197 * Given a list of authors, generates a {@link List} of {@link String} representing the authors 198 * name. If a contact doesn't have a first AND last name it will not be included. 199 * 200 * @param authors ordered list of authors 201 * @return list of author names (if it can be generated) or empty list, never null 202 */ 203 public static List<String> generateAuthorsName(List<CitationContact> authors) { 204 if (authors == null || authors.isEmpty()) { 205 return Collections.emptyList(); 206 } 207 208 return authors.stream() 209 .filter(IS_CONTACT_NAME_PROVIDED) 210 .map(CitationContact::getAbbreviatedName) 211 .collect(Collectors.toList()); 212 } 213 214 /** 215 * This method is used to get the list of "unique" authors. Currently, uniqueness is based on 216 * lastName + firstNames. The order of the provided list will be preserved which also means the 217 * first {@link ContactType} found for a contact is the one that will be used for this contact 218 * (after applying the filter). 219 * 220 * @param authors a list of contacts representing possible authors 221 * @param filter {@link Predicate} used to pre-filter contacts 222 * @return list of contacts 223 */ 224 private static List<CitationContact> getUniqueAuthors( 225 List<Contact> authors, Predicate<Contact> filter) { 226 List<CitationContact> uniqueContact = new ArrayList<>(); 227 if (authors != null) { 228 authors.forEach( 229 ctc -> { 230 if (filter.test(ctc)) { 231 Optional<CitationContact> author = findInAuthorList(ctc, uniqueContact); 232 if (!author.isPresent()) { 233 HashSet<ContactType> contactTypes = new HashSet<>(); 234 if (ctc.getType() != null) { 235 contactTypes.add(ctc.getType()); 236 } 237 HashSet<String> userIds = new HashSet<>(); 238 if (ctc.getUserId() != null && !ctc.getUserId().isEmpty()) { 239 userIds.addAll(ctc.getUserId()); 240 } 241 uniqueContact.add( 242 new CitationContact( 243 ctc.getKey(), 244 getAuthorName(ctc), 245 ctc.getFirstName(), 246 ctc.getLastName(), 247 contactTypes, 248 userIds)); 249 } else { 250 author.ifPresent( 251 a -> { 252 a.getRoles().add(ctc.getType()); 253 if (ctc.getUserId() != null) { 254 a.getUserId().addAll(ctc.getUserId()); 255 } 256 }); 257 } 258 } 259 }); 260 } 261 return uniqueContact; 262 } 263 264 /** 265 * Check if a specific {@link Contact} is NOT already in the list of "unique" contact. Currently, 266 * uniqueness is based on the comparisons of lastName and firstNames. 267 * 268 * @param ctc contact to check 269 * @param uniqueContacts list of contacts 270 * @return contact wrapped in optional 271 */ 272 private static Optional<CitationContact> findInAuthorList( 273 Contact ctc, List<CitationContact> uniqueContacts) { 274 return uniqueContacts.stream() 275 .filter( 276 author -> 277 StringUtils.equalsIgnoreCase(ctc.getLastName(), author.getLastName()) 278 && StringUtils.equalsIgnoreCase(ctc.getFirstName(), author.getFirstName())) 279 .findFirst(); 280 } 281 282 /** 283 * Given a {@link Contact}, generates a String for that contact for citation purpose. The 284 * organization will be used (if present) in case we don't have both lastName and firstNames of 285 * the contact. 286 * 287 * @param creator contact object 288 * @return name 289 */ 290 public static String getAuthorName(Contact creator) { 291 StringBuilder sb = new StringBuilder(); 292 String lastName = StringUtils.trimToNull(creator.getLastName()); 293 String firstNames = StringUtils.trimToNull(creator.getFirstName()); 294 String organization = StringUtils.trimToNull(creator.getOrganization()); 295 296 if (lastName != null && firstNames != null) { 297 sb.append(lastName); 298 sb.append(" "); 299 // add first initial of each first name, capitalized 300 String[] names = firstNames.split("\\s+"); 301 302 sb.append( 303 Arrays.stream(names) 304 .filter(str -> !StringUtils.isBlank(str)) 305 .map(str -> StringUtils.upperCase(String.valueOf(str.charAt(0)))) 306 .collect(Collectors.joining(" "))); 307 } else if (lastName != null) { 308 sb.append(lastName); 309 } else if (organization != null) { 310 sb.append(organization); 311 } 312 return sb.toString(); 313 } 314 315 @Data 316 @Builder 317 public static class CitationData { 318 319 private final Citation citation; 320 private final List<CitationContact> contacts; 321 } 322}