001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.api.util; 015 016import org.gbif.api.model.registry.Citation; 017import org.gbif.api.model.registry.CitationContact; 018import org.gbif.api.model.registry.Contact; 019import org.gbif.api.model.registry.Dataset; 020import org.gbif.api.model.registry.Organization; 021import org.gbif.api.vocabulary.ContactType; 022 023import java.io.UnsupportedEncodingException; 024import java.net.URLDecoder; 025import java.nio.charset.StandardCharsets; 026import java.time.LocalDate; 027import java.time.ZoneId; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collections; 031import java.util.EnumSet; 032import java.util.HashSet; 033import java.util.List; 034import java.util.Objects; 035import java.util.Optional; 036import java.util.StringJoiner; 037import java.util.function.Predicate; 038import java.util.stream.Collectors; 039 040import org.apache.commons.lang3.StringUtils; 041 042import lombok.Builder; 043import lombok.Data; 044import org.gbif.api.vocabulary.EndpointType; 045 046/** 047 * Helper class tha generates a Citation String from {@link Dataset} and {@link Organization} 048 * objects. Documentation : /docs/citations.md 049 */ 050public final class CitationGenerator { 051 052 private static final ZoneId UTC = ZoneId.of("UTC"); 053 private static final ContactType MANDATORY_CONTACT_TYPE = ContactType.ORIGINATOR; 054 private static final EnumSet<ContactType> AUTHOR_CONTACT_TYPE = 055 EnumSet.of(ContactType.ORIGINATOR, ContactType.METADATA_AUTHOR); 056 private static final Predicate<Contact> IS_NAME_PROVIDED_FCT = 057 ctc -> StringUtils.isNotBlank(ctc.getLastName()); 058 private static final Predicate<CitationContact> IS_CONTACT_NAME_PROVIDED = 059 ctc -> StringUtils.isNotBlank(ctc.getLastName()); 060 private static final Predicate<Contact> IS_ELIGIBLE_CONTACT_TYPE = 061 ctc -> AUTHOR_CONTACT_TYPE.contains(ctc.getType()); 062 063 /** Utility class */ 064 private CitationGenerator() {} 065 066 @SuppressWarnings("unused") 067 public static CitationData generateCitation(Dataset dataset, Organization org) { 068 Objects.requireNonNull(org, "Organization shall be provided"); 069 return generateCitation(dataset, org.getTitle()); 070 } 071 072 /** 073 * Generate a citation for a {@link Dataset} using the publisher's provided citation. 074 * @param dataset dataset 075 * @return generated citation as {@link String} 076 */ 077 @SuppressWarnings("unused") 078 public static String generatePublisherProvidedCitation(Dataset dataset) { 079 Objects.requireNonNull(dataset, "Dataset shall be provided"); 080 Objects.requireNonNull(dataset.getCitation(), "Dataset.citation shall be provided"); 081 String originalCitationText = dataset.getCitation().getText(); 082 Objects.requireNonNull(originalCitationText, "Dataset.citation.text shall be provided"); 083 084 StringJoiner joiner = new StringJoiner(" "); 085 086 joiner.add(originalCitationText); 087 088 // Check DOI exists, and append it if it doesn't. 089 if (!originalCitationText.toLowerCase().contains("doi.org") 090 && !originalCitationText.toLowerCase().contains("doi:")) { 091 try { 092 joiner.add(URLDecoder.decode(dataset.getDoi().getUrl().toString(), "UTF-8")); 093 } catch (UnsupportedEncodingException e) { 094 throw new IllegalArgumentException("Couldn't decode DOI URL", e); 095 } 096 } 097 098 joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + "."); 099 100 return joiner.toString(); 101 } 102 103 /** 104 * Generate a citation for a {@link Dataset} and its {@link Organization}. TODO add support for 105 * i18n 106 * @return generated citation as {@link String} 107 */ 108 public static CitationData generateCitation(Dataset dataset, String organizationTitle) { 109 110 Objects.requireNonNull(dataset, "Dataset shall be provided"); 111 Objects.requireNonNull(organizationTitle, "Organization title shall be provided"); 112 113 Citation citation = new Citation(); 114 115 List<CitationContact> contacts = getAuthors(dataset.getContacts()); 116 117 StringJoiner joiner = new StringJoiner(" "); 118 List<String> authorsName = generateAuthorsName(contacts); 119 String authors = String.join(", ", authorsName); 120 121 boolean authorsNameAvailable = StringUtils.isNotBlank(authors); 122 authors = authorsNameAvailable ? authors : organizationTitle; 123 124 // only add a dot if we are not going to add it with the year 125 authors += dataset.getPubDate() == null ? "." : ""; 126 joiner.add(authors); 127 128 if (dataset.getPubDate() != null) { 129 joiner.add("(" + dataset.getPubDate().toInstant().atZone(UTC).getYear() + ")."); 130 } 131 132 // add title 133 joiner.add(StringUtils.trim(dataset.getTitle()) + "."); 134 135 // add version 136 if (dataset.getVersion() != null) { 137 joiner.add("Version " + dataset.getVersion() + "."); 138 } 139 140 // add publisher except if it was used instead of the authors 141 if (authorsNameAvailable) { 142 joiner.add(StringUtils.trim(organizationTitle) + "."); 143 } 144 145 if (dataset.getType() != null) { 146 joiner.add(StringUtils.capitalize(dataset.getType().name().replace('_', ' ').toLowerCase())); 147 } 148 joiner.add("dataset"); 149 150 // add DOI as the identifier. 151 if (dataset.getDoi() != null) { 152 try { 153 joiner.add( 154 URLDecoder.decode(dataset.getDoi().getUrl().toString(), StandardCharsets.UTF_8.name())); 155 } catch (UnsupportedEncodingException e) { 156 throw new IllegalArgumentException("Couldn't decode DOI URL", e); 157 } 158 } 159 160 joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + "."); 161 162 citation.setText(joiner.toString()); 163 citation.setCitationProvidedBySource(false); 164 165 return CitationData.builder().citation(citation).contacts(contacts).build(); 166 } 167 168 /** 169 * Extracts an ordered list of unique authors from a list of contacts. A {@link Contact} is 170 * identified as an author when his {@link ContactType} is contained in {@link 171 * #AUTHOR_CONTACT_TYPE}. But, we shall at least have one contact of type MANDATORY_CONTACT_TYPE. 172 * 173 * @param contacts list of contacts available 174 * @return ordered list of authors or empty list, never null 175 */ 176 public static List<CitationContact> getAuthors(List<Contact> contacts) { 177 if (contacts == null || contacts.isEmpty()) { 178 return Collections.emptyList(); 179 } 180 181 List<CitationContact> uniqueContacts = 182 getUniqueAuthors( 183 contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CONTACT_TYPE).test(ctc)); 184 185 // make sure we have at least one instance of {@link #MANDATORY_CONTACT_TYPE} 186 Optional<CitationContact> firstOriginator = 187 uniqueContacts.stream() 188 .filter(ctc -> ctc.getRoles().contains(MANDATORY_CONTACT_TYPE)) 189 .findFirst(); 190 191 if (firstOriginator.isPresent()) { 192 return uniqueContacts; 193 } 194 return Collections.emptyList(); 195 } 196 197 /** 198 * Given a list of authors, generates a {@link List} of {@link String} representing the authors 199 * name. If a contact doesn't have a first AND last name it will not be included. 200 * 201 * @param authors ordered list of authors 202 * @return list of author names (if it can be generated) or empty list, never null 203 */ 204 public static List<String> generateAuthorsName(List<CitationContact> authors) { 205 if (authors == null || authors.isEmpty()) { 206 return Collections.emptyList(); 207 } 208 209 return authors.stream() 210 .filter(IS_CONTACT_NAME_PROVIDED) 211 .map(CitationContact::getAbbreviatedName) 212 .collect(Collectors.toList()); 213 } 214 215 /** 216 * This method is used to get the list of "unique" authors. Currently, uniqueness is based on 217 * lastName + firstNames. The order of the provided list will be preserved which also means the 218 * first {@link ContactType} found for a contact is the one that will be used for this contact 219 * (after applying the filter). 220 * 221 * @param authors a list of contacts representing possible authors 222 * @param filter {@link Predicate} used to pre-filter contacts 223 * @return list of contacts 224 */ 225 private static List<CitationContact> getUniqueAuthors( 226 List<Contact> authors, Predicate<Contact> filter) { 227 List<CitationContact> uniqueContact = new ArrayList<>(); 228 if (authors != null) { 229 authors.forEach( 230 ctc -> { 231 if (filter.test(ctc)) { 232 Optional<CitationContact> author = findInAuthorList(ctc, uniqueContact); 233 if (!author.isPresent()) { 234 HashSet<ContactType> contactTypes = new HashSet<>(); 235 if (ctc.getType() != null) { 236 contactTypes.add(ctc.getType()); 237 } 238 HashSet<String> userIds = new HashSet<>(); 239 if (ctc.getUserId() != null && !ctc.getUserId().isEmpty()) { 240 userIds.addAll(ctc.getUserId()); 241 } 242 uniqueContact.add( 243 new CitationContact( 244 ctc.getKey(), 245 getAuthorName(ctc), 246 ctc.getFirstName(), 247 ctc.getLastName(), 248 contactTypes, 249 userIds)); 250 } else { 251 author.ifPresent( 252 a -> { 253 a.getRoles().add(ctc.getType()); 254 if (ctc.getUserId() != null) { 255 a.getUserId().addAll(ctc.getUserId()); 256 } 257 }); 258 } 259 } 260 }); 261 } 262 return uniqueContact; 263 } 264 265 /** 266 * Check if a specific {@link Contact} is NOT already in the list of "unique" contact. Currently, 267 * uniqueness is based on the comparisons of lastName and firstNames. 268 * 269 * @param ctc contact to check 270 * @param uniqueContacts list of contacts 271 * @return contact wrapped in optional 272 */ 273 private static Optional<CitationContact> findInAuthorList( 274 Contact ctc, List<CitationContact> uniqueContacts) { 275 return uniqueContacts.stream() 276 .filter( 277 author -> 278 StringUtils.equalsIgnoreCase(ctc.getLastName(), author.getLastName()) 279 && StringUtils.equalsIgnoreCase(ctc.getFirstName(), author.getFirstName())) 280 .findFirst(); 281 } 282 283 /** 284 * Given a {@link Contact}, generates a String for that contact for citation purpose. The 285 * organization will be used (if present) in case we don't have both lastName and firstNames of 286 * the contact. 287 * 288 * @param creator contact object 289 * @return name 290 */ 291 public static String getAuthorName(Contact creator) { 292 StringBuilder sb = new StringBuilder(); 293 String lastName = StringUtils.trimToNull(creator.getLastName()); 294 String firstNames = StringUtils.trimToNull(creator.getFirstName()); 295 String organization = StringUtils.trimToNull(creator.getOrganization()); 296 297 if (lastName != null && firstNames != null) { 298 sb.append(lastName); 299 sb.append(" "); 300 // add first initial of each first name, capitalized 301 String[] names = firstNames.split("\\s+"); 302 303 sb.append( 304 Arrays.stream(names) 305 .filter(str -> !StringUtils.isBlank(str)) 306 .map(str -> StringUtils.upperCase(String.valueOf(str.charAt(0)))) 307 .collect(Collectors.joining(" "))); 308 } else if (lastName != null) { 309 sb.append(lastName); 310 } else if (organization != null) { 311 sb.append(organization); 312 } 313 return sb.toString(); 314 } 315 316 @Data 317 @Builder 318 public static class CitationData { 319 320 private final Citation citation; 321 private final List<CitationContact> contacts; 322 } 323}