001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.api.util;
015
016import org.gbif.api.model.registry.Citation;
017import org.gbif.api.model.registry.CitationContact;
018import org.gbif.api.model.registry.Contact;
019import org.gbif.api.model.registry.Dataset;
020import org.gbif.api.model.registry.Endpoint;
021import org.gbif.api.model.registry.Organization;
022import org.gbif.api.vocabulary.ContactType;
023
024import java.io.UnsupportedEncodingException;
025import java.net.URLDecoder;
026import java.nio.charset.StandardCharsets;
027import java.time.LocalDate;
028import java.time.ZoneId;
029import java.util.ArrayList;
030import java.util.Arrays;
031import java.util.Collections;
032import java.util.EnumSet;
033import java.util.HashSet;
034import java.util.List;
035import java.util.Objects;
036import java.util.Optional;
037import java.util.StringJoiner;
038import java.util.function.Predicate;
039import java.util.stream.Collectors;
040
041import org.apache.commons.lang3.StringUtils;
042
043import lombok.Builder;
044import lombok.Data;
045import org.gbif.api.vocabulary.EndpointType;
046
047/**
048 * Helper class tha generates a Citation String from {@link Dataset} and {@link Organization}
049 * objects. Documentation : /docs/citations.md
050 */
051public final class CitationGenerator {
052
053  private static final ZoneId UTC = ZoneId.of("UTC");
054  private static final ContactType MANDATORY_CONTACT_TYPE = ContactType.ORIGINATOR;
055  private static final EnumSet<ContactType> AUTHOR_CONTACT_TYPE =
056      EnumSet.of(ContactType.ORIGINATOR, ContactType.METADATA_AUTHOR);
057  private static final EnumSet<ContactType> CAMTRAP_CITATION_CONTACT_TYPE =
058      EnumSet.of(ContactType.POINT_OF_CONTACT, ContactType.PRINCIPAL_INVESTIGATOR, ContactType.CONTENT_PROVIDER);
059  private static final Predicate<Contact> IS_NAME_PROVIDED_FCT =
060      ctc -> StringUtils.isNotBlank(ctc.getLastName());
061  private static final Predicate<CitationContact> IS_CONTACT_NAME_PROVIDED =
062      ctc -> StringUtils.isNotBlank(ctc.getLastName());
063  private static final Predicate<Contact> IS_ELIGIBLE_CONTACT_TYPE =
064      ctc -> AUTHOR_CONTACT_TYPE.contains(ctc.getType());
065  private static final Predicate<Contact> IS_ELIGIBLE_CAMTRAP_CONTACT_TYPE =
066      ctc -> CAMTRAP_CITATION_CONTACT_TYPE.contains(ctc.getType());
067
068  /** Utility class */
069  private CitationGenerator() {}
070
071  @SuppressWarnings("unused")
072  public static CitationData generateCitation(Dataset dataset, Organization org) {
073    Objects.requireNonNull(org, "Organization shall be provided");
074    return generateCitation(dataset, org.getTitle());
075  }
076
077  /**
078   * Generate a citation for a {@link Dataset} using the publisher's provided citation.
079   * @param dataset dataset
080   * @return generated citation as {@link String}
081   */
082  @SuppressWarnings("unused")
083  public static String generatePublisherProvidedCitation(Dataset dataset) {
084    Objects.requireNonNull(dataset, "Dataset shall be provided");
085    Objects.requireNonNull(dataset.getCitation(), "Dataset.citation shall be provided");
086    String originalCitationText = dataset.getCitation().getText();
087    Objects.requireNonNull(originalCitationText, "Dataset.citation.text shall be provided");
088
089    StringJoiner joiner = new StringJoiner(" ");
090
091    joiner.add(originalCitationText);
092
093    // Check DOI exists, and append it if it doesn't.
094    if (!originalCitationText.toLowerCase().contains("doi.org")
095        && !originalCitationText.toLowerCase().contains("doi:")) {
096      try {
097        joiner.add(URLDecoder.decode(dataset.getDoi().getUrl().toString(), "UTF-8"));
098      } catch (UnsupportedEncodingException e) {
099        throw new IllegalArgumentException("Couldn't decode DOI URL", e);
100      }
101    }
102
103    joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + ".");
104
105    return joiner.toString();
106  }
107
108  /**
109   * Generate a citation for a {@link Dataset} and its {@link Organization}. TODO add support for
110   * i18n
111   * @return generated citation as {@link String}
112   */
113  public static CitationData generateCitation(Dataset dataset, String organizationTitle) {
114
115    Objects.requireNonNull(dataset, "Dataset shall be provided");
116    Objects.requireNonNull(organizationTitle, "Organization title shall be provided");
117
118    Citation citation = new Citation();
119
120    List<CitationContact> contacts = getAuthors(dataset);
121
122    StringJoiner joiner = new StringJoiner(" ");
123    List<String> authorsName = generateAuthorsName(contacts);
124    String authors = String.join(", ", authorsName);
125
126    boolean authorsNameAvailable = StringUtils.isNotBlank(authors);
127    authors = authorsNameAvailable ? authors : organizationTitle;
128
129    // only add a dot if we are not going to add it with the year
130    authors += dataset.getPubDate() == null ? "." : "";
131    joiner.add(authors);
132
133    if (dataset.getPubDate() != null) {
134      joiner.add("(" + dataset.getPubDate().toInstant().atZone(UTC).getYear() + ").");
135    }
136
137    // add title
138    joiner.add(StringUtils.trim(dataset.getTitle()) + ".");
139
140    // add version
141    if (dataset.getVersion() != null) {
142      joiner.add("Version " + dataset.getVersion() + ".");
143    }
144
145    // add publisher except if it was used instead of the authors
146    if (authorsNameAvailable) {
147      joiner.add(StringUtils.trim(organizationTitle) + ".");
148    }
149
150    if (dataset.getType() != null) {
151      joiner.add(StringUtils.capitalize(dataset.getType().name().replace('_', ' ').toLowerCase()));
152    }
153    joiner.add("dataset");
154
155    // add DOI as the identifier.
156    if (dataset.getDoi() != null) {
157      try {
158        joiner.add(
159            URLDecoder.decode(dataset.getDoi().getUrl().toString(), StandardCharsets.UTF_8.name()));
160      } catch (UnsupportedEncodingException e) {
161        throw new IllegalArgumentException("Couldn't decode DOI URL", e);
162      }
163    }
164
165    joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + ".");
166
167    citation.setText(joiner.toString());
168    citation.setCitationProvidedBySource(false);
169
170    return CitationData.builder().citation(citation).contacts(contacts).build();
171  }
172
173  public static List<CitationContact> getAuthors(Dataset dataset) {
174    boolean isCamtrap = dataset.getEndpoints().stream()
175        .map(Endpoint::getType)
176        .anyMatch(e -> e == EndpointType.CAMTRAP_DP);
177
178    return isCamtrap ? getAuthorsForCamtrap(dataset.getContacts()) : getAuthors(dataset.getContacts());
179  }
180
181  public static List<CitationContact> getAuthorsForCamtrap(List<Contact> contacts) {
182    if (contacts == null || contacts.isEmpty()) {
183      return Collections.emptyList();
184    }
185
186    return getUniqueAuthors(
187        contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CAMTRAP_CONTACT_TYPE).test(ctc));
188  }
189
190  /**
191   * Extracts an ordered list of unique authors from a list of contacts. A {@link Contact} is
192   * identified as an author when his {@link ContactType} is contained in {@link
193   * #AUTHOR_CONTACT_TYPE}. But, we shall at least have one contact of type MANDATORY_CONTACT_TYPE.
194   *
195   * @param contacts list of contacts available
196   * @return ordered list of authors or empty list, never null
197   */
198  public static List<CitationContact> getAuthors(List<Contact> contacts) {
199    if (contacts == null || contacts.isEmpty()) {
200      return Collections.emptyList();
201    }
202
203    List<CitationContact> uniqueContacts =
204        getUniqueAuthors(
205            contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CONTACT_TYPE).test(ctc));
206
207    // make sure we have at least one instance of {@link #MANDATORY_CONTACT_TYPE}
208    Optional<CitationContact> firstOriginator =
209        uniqueContacts.stream()
210            .filter(ctc -> ctc.getRoles().contains(MANDATORY_CONTACT_TYPE))
211            .findFirst();
212
213    if (firstOriginator.isPresent()) {
214      return uniqueContacts;
215    }
216    return Collections.emptyList();
217  }
218
219  /**
220   * Given a list of authors, generates a {@link List} of {@link String} representing the authors
221   * name. If a contact doesn't have a first AND last name it will not be included.
222   *
223   * @param authors ordered list of authors
224   * @return list of author names (if it can be generated) or empty list, never null
225   */
226  public static List<String> generateAuthorsName(List<CitationContact> authors) {
227    if (authors == null || authors.isEmpty()) {
228      return Collections.emptyList();
229    }
230
231    return authors.stream()
232        .filter(IS_CONTACT_NAME_PROVIDED)
233        .map(CitationContact::getAbbreviatedName)
234        .collect(Collectors.toList());
235  }
236
237  /**
238   * This method is used to get the list of "unique" authors. Currently, uniqueness is based on
239   * lastName + firstNames. The order of the provided list will be preserved which also means the
240   * first {@link ContactType} found for a contact is the one that will be used for this contact
241   * (after applying the filter).
242   *
243   * @param authors a list of contacts representing possible authors
244   * @param filter {@link Predicate} used to pre-filter contacts
245   * @return list of contacts
246   */
247  private static List<CitationContact> getUniqueAuthors(
248      List<Contact> authors, Predicate<Contact> filter) {
249    List<CitationContact> uniqueContact = new ArrayList<>();
250    if (authors != null) {
251      authors.forEach(
252          ctc -> {
253            if (filter.test(ctc)) {
254              Optional<CitationContact> author = findInAuthorList(ctc, uniqueContact);
255              if (!author.isPresent()) {
256                HashSet<ContactType> contactTypes = new HashSet<>();
257                if (ctc.getType() != null) {
258                  contactTypes.add(ctc.getType());
259                }
260                HashSet<String> userIds = new HashSet<>();
261                if (ctc.getUserId() != null && !ctc.getUserId().isEmpty()) {
262                  userIds.addAll(ctc.getUserId());
263                }
264                uniqueContact.add(
265                    new CitationContact(
266                        ctc.getKey(),
267                        getAuthorName(ctc),
268                        ctc.getFirstName(),
269                        ctc.getLastName(),
270                        contactTypes,
271                        userIds));
272              } else {
273                author.ifPresent(
274                    a -> {
275                      a.getRoles().add(ctc.getType());
276                      if (ctc.getUserId() != null) {
277                        a.getUserId().addAll(ctc.getUserId());
278                      }
279                    });
280              }
281            }
282          });
283    }
284    return uniqueContact;
285  }
286
287  /**
288   * Check if a specific {@link Contact} is NOT already in the list of "unique" contact. Currently,
289   * uniqueness is based on the comparisons of lastName and firstNames.
290   *
291   * @param ctc contact to check
292   * @param uniqueContacts list of contacts
293   * @return contact wrapped in optional
294   */
295  private static Optional<CitationContact> findInAuthorList(
296      Contact ctc, List<CitationContact> uniqueContacts) {
297    return uniqueContacts.stream()
298        .filter(
299            author ->
300                StringUtils.equalsIgnoreCase(ctc.getLastName(), author.getLastName())
301                    && StringUtils.equalsIgnoreCase(ctc.getFirstName(), author.getFirstName()))
302        .findFirst();
303  }
304
305  /**
306   * Given a {@link Contact}, generates a String for that contact for citation purpose. The
307   * organization will be used (if present) in case we don't have both lastName and firstNames of
308   * the contact.
309   *
310   * @param creator contact object
311   * @return name
312   */
313  public static String getAuthorName(Contact creator) {
314    StringBuilder sb = new StringBuilder();
315    String lastName = StringUtils.trimToNull(creator.getLastName());
316    String firstNames = StringUtils.trimToNull(creator.getFirstName());
317    String organization = StringUtils.trimToNull(creator.getOrganization());
318
319    if (lastName != null && firstNames != null) {
320      sb.append(lastName);
321      sb.append(" ");
322      // add first initial of each first name, capitalized
323      String[] names = firstNames.split("\\s+");
324
325      sb.append(
326          Arrays.stream(names)
327              .filter(str -> !StringUtils.isBlank(str))
328              .map(str -> StringUtils.upperCase(String.valueOf(str.charAt(0))))
329              .collect(Collectors.joining(" ")));
330    } else if (lastName != null) {
331      sb.append(lastName);
332    } else if (organization != null) {
333      sb.append(organization);
334    }
335    return sb.toString();
336  }
337
338  @Data
339  @Builder
340  public static class CitationData {
341
342    private final Citation citation;
343    private final List<CitationContact> contacts;
344  }
345}