001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.api.util;
015
016import org.gbif.api.model.registry.Citation;
017import org.gbif.api.model.registry.CitationContact;
018import org.gbif.api.model.registry.Contact;
019import org.gbif.api.model.registry.Dataset;
020import org.gbif.api.model.registry.Organization;
021import org.gbif.api.vocabulary.ContactType;
022
023import java.io.UnsupportedEncodingException;
024import java.net.URLDecoder;
025import java.nio.charset.StandardCharsets;
026import java.time.LocalDate;
027import java.time.ZoneId;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collections;
031import java.util.EnumSet;
032import java.util.HashSet;
033import java.util.List;
034import java.util.Objects;
035import java.util.Optional;
036import java.util.StringJoiner;
037import java.util.function.Predicate;
038import java.util.stream.Collectors;
039
040import org.apache.commons.lang3.StringUtils;
041
042import lombok.Builder;
043import lombok.Data;
044
045/**
046 * Helper class tha generates a Citation String from {@link Dataset} and {@link Organization}
047 * objects. Documentation : /docs/citations.md
048 */
049public final class CitationGenerator {
050
051  private static final ZoneId UTC = ZoneId.of("UTC");
052  private static final ContactType MANDATORY_CONTACT_TYPE = ContactType.ORIGINATOR;
053  private static final EnumSet<ContactType> AUTHOR_CONTACT_TYPE =
054      EnumSet.of(ContactType.ORIGINATOR, ContactType.METADATA_AUTHOR);
055  private static final Predicate<Contact> IS_NAME_PROVIDED_FCT =
056      ctc -> StringUtils.isNotBlank(ctc.getLastName());
057  private static final Predicate<CitationContact> IS_CONTACT_NAME_PROVIDED =
058      ctc -> StringUtils.isNotBlank(ctc.getLastName());
059  private static final Predicate<Contact> IS_ELIGIBLE_CONTACT_TYPE =
060      ctc -> AUTHOR_CONTACT_TYPE.contains(ctc.getType());
061
062  /** Utility class */
063  private CitationGenerator() {}
064
065  @SuppressWarnings("unused")
066  public static CitationData generateCitation(Dataset dataset, Organization org) {
067    Objects.requireNonNull(org, "Organization shall be provided");
068    return generateCitation(dataset, org.getTitle());
069  }
070
071  /**
072   * Generate a citation for a {@link Dataset} using the publisher's provided citation.
073   * @param dataset dataset
074   * @return generated citation as {@link String}
075   */
076  @SuppressWarnings("unused")
077  public static String generatePublisherProvidedCitation(Dataset dataset) {
078    Objects.requireNonNull(dataset, "Dataset shall be provided");
079    Objects.requireNonNull(dataset.getCitation(), "Dataset.citation shall be provided");
080    String originalCitationText = dataset.getCitation().getText();
081    Objects.requireNonNull(originalCitationText, "Dataset.citation.text shall be provided");
082
083    StringJoiner joiner = new StringJoiner(" ");
084
085    joiner.add(originalCitationText);
086
087    // Check DOI exists, and append it if it doesn't.
088    if (!originalCitationText.toLowerCase().contains("doi.org")
089        && !originalCitationText.toLowerCase().contains("doi:")) {
090      try {
091        joiner.add(URLDecoder.decode(dataset.getDoi().getUrl().toString(), "UTF-8"));
092      } catch (UnsupportedEncodingException e) {
093        throw new IllegalArgumentException("Couldn't decode DOI URL", e);
094      }
095    }
096
097    joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + ".");
098
099    return joiner.toString();
100  }
101
102  /**
103   * Generate a citation for a {@link Dataset} and its {@link Organization}. TODO add support for
104   * i18n
105   * @return generated citation as {@link String}
106   */
107  public static CitationData generateCitation(Dataset dataset, String organizationTitle) {
108
109    Objects.requireNonNull(dataset, "Dataset shall be provided");
110    Objects.requireNonNull(organizationTitle, "Organization title shall be provided");
111
112    Citation citation = new Citation();
113
114    List<CitationContact> contacts = getAuthors(dataset.getContacts());
115
116    StringJoiner joiner = new StringJoiner(" ");
117    List<String> authorsName = generateAuthorsName(contacts);
118    String authors = String.join(", ", authorsName);
119
120    boolean authorsNameAvailable = StringUtils.isNotBlank(authors);
121    authors = authorsNameAvailable ? authors : organizationTitle;
122
123    // only add a dot if we are not going to add it with the year
124    authors += dataset.getPubDate() == null ? "." : "";
125    joiner.add(authors);
126
127    if (dataset.getPubDate() != null) {
128      joiner.add("(" + dataset.getPubDate().toInstant().atZone(UTC).getYear() + ").");
129    }
130
131    // add title
132    joiner.add(StringUtils.trim(dataset.getTitle()) + ".");
133
134    // add version
135    if (dataset.getVersion() != null) {
136      joiner.add("Version " + dataset.getVersion() + ".");
137    }
138
139    // add publisher except if it was used instead of the authors
140    if (authorsNameAvailable) {
141      joiner.add(StringUtils.trim(organizationTitle) + ".");
142    }
143
144    if (dataset.getType() != null) {
145      joiner.add(StringUtils.capitalize(dataset.getType().name().replace('_', ' ').toLowerCase()));
146    }
147    joiner.add("dataset");
148
149    // add DOI as the identifier.
150    if (dataset.getDoi() != null) {
151      try {
152        joiner.add(
153            URLDecoder.decode(dataset.getDoi().getUrl().toString(), StandardCharsets.UTF_8.name()));
154      } catch (UnsupportedEncodingException e) {
155        throw new IllegalArgumentException("Couldn't decode DOI URL", e);
156      }
157    }
158
159    joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + ".");
160
161    citation.setText(joiner.toString());
162    citation.setCitationProvidedBySource(false);
163
164    return CitationData.builder().citation(citation).contacts(contacts).build();
165  }
166
167  /**
168   * Extracts an ordered list of unique authors from a list of contacts. A {@link Contact} is
169   * identified as an author when his {@link ContactType} is contained in {@link
170   * #AUTHOR_CONTACT_TYPE}. But, we shall at least have one contact of type MANDATORY_CONTACT_TYPE.
171   *
172   * @param contacts list of contacts available
173   * @return ordered list of authors or empty list, never null
174   */
175  public static List<CitationContact> getAuthors(List<Contact> contacts) {
176    if (contacts == null || contacts.isEmpty()) {
177      return Collections.emptyList();
178    }
179
180    List<CitationContact> uniqueContacts =
181        getUniqueAuthors(
182            contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CONTACT_TYPE).test(ctc));
183
184    // make sure we have at least one instance of {@link #MANDATORY_CONTACT_TYPE}
185    Optional<CitationContact> firstOriginator =
186        uniqueContacts.stream()
187            .filter(ctc -> ctc.getRoles().contains(MANDATORY_CONTACT_TYPE))
188            .findFirst();
189
190    if (firstOriginator.isPresent()) {
191      return uniqueContacts;
192    }
193    return Collections.emptyList();
194  }
195
196  /**
197   * Given a list of authors, generates a {@link List} of {@link String} representing the authors
198   * name. If a contact doesn't have a first AND last name it will not be included.
199   *
200   * @param authors ordered list of authors
201   * @return list of author names (if it can be generated) or empty list, never null
202   */
203  public static List<String> generateAuthorsName(List<CitationContact> authors) {
204    if (authors == null || authors.isEmpty()) {
205      return Collections.emptyList();
206    }
207
208    return authors.stream()
209        .filter(IS_CONTACT_NAME_PROVIDED)
210        .map(CitationContact::getAbbreviatedName)
211        .collect(Collectors.toList());
212  }
213
214  /**
215   * This method is used to get the list of "unique" authors. Currently, uniqueness is based on
216   * lastName + firstNames. The order of the provided list will be preserved which also means the
217   * first {@link ContactType} found for a contact is the one that will be used for this contact
218   * (after applying the filter).
219   *
220   * @param authors a list of contacts representing possible authors
221   * @param filter {@link Predicate} used to pre-filter contacts
222   * @return list of contacts
223   */
224  private static List<CitationContact> getUniqueAuthors(
225      List<Contact> authors, Predicate<Contact> filter) {
226    List<CitationContact> uniqueContact = new ArrayList<>();
227    if (authors != null) {
228      authors.forEach(
229          ctc -> {
230            if (filter.test(ctc)) {
231              Optional<CitationContact> author = findInAuthorList(ctc, uniqueContact);
232              if (!author.isPresent()) {
233                HashSet<ContactType> contactTypes = new HashSet<>();
234                if (ctc.getType() != null) {
235                  contactTypes.add(ctc.getType());
236                }
237                HashSet<String> userIds = new HashSet<>();
238                if (ctc.getUserId() != null && !ctc.getUserId().isEmpty()) {
239                  userIds.addAll(ctc.getUserId());
240                }
241                uniqueContact.add(
242                    new CitationContact(
243                        ctc.getKey(),
244                        getAuthorName(ctc),
245                        ctc.getFirstName(),
246                        ctc.getLastName(),
247                        contactTypes,
248                        userIds));
249              } else {
250                author.ifPresent(
251                    a -> {
252                      a.getRoles().add(ctc.getType());
253                      if (ctc.getUserId() != null) {
254                        a.getUserId().addAll(ctc.getUserId());
255                      }
256                    });
257              }
258            }
259          });
260    }
261    return uniqueContact;
262  }
263
264  /**
265   * Check if a specific {@link Contact} is NOT already in the list of "unique" contact. Currently,
266   * uniqueness is based on the comparisons of lastName and firstNames.
267   *
268   * @param ctc contact to check
269   * @param uniqueContacts list of contacts
270   * @return contact wrapped in optional
271   */
272  private static Optional<CitationContact> findInAuthorList(
273      Contact ctc, List<CitationContact> uniqueContacts) {
274    return uniqueContacts.stream()
275        .filter(
276            author ->
277                StringUtils.equalsIgnoreCase(ctc.getLastName(), author.getLastName())
278                    && StringUtils.equalsIgnoreCase(ctc.getFirstName(), author.getFirstName()))
279        .findFirst();
280  }
281
282  /**
283   * Given a {@link Contact}, generates a String for that contact for citation purpose. The
284   * organization will be used (if present) in case we don't have both lastName and firstNames of
285   * the contact.
286   *
287   * @param creator contact object
288   * @return name
289   */
290  public static String getAuthorName(Contact creator) {
291    StringBuilder sb = new StringBuilder();
292    String lastName = StringUtils.trimToNull(creator.getLastName());
293    String firstNames = StringUtils.trimToNull(creator.getFirstName());
294    String organization = StringUtils.trimToNull(creator.getOrganization());
295
296    if (lastName != null && firstNames != null) {
297      sb.append(lastName);
298      sb.append(" ");
299      // add first initial of each first name, capitalized
300      String[] names = firstNames.split("\\s+");
301
302      sb.append(
303          Arrays.stream(names)
304              .filter(str -> !StringUtils.isBlank(str))
305              .map(str -> StringUtils.upperCase(String.valueOf(str.charAt(0))))
306              .collect(Collectors.joining(" ")));
307    } else if (lastName != null) {
308      sb.append(lastName);
309    } else if (organization != null) {
310      sb.append(organization);
311    }
312    return sb.toString();
313  }
314
315  @Data
316  @Builder
317  public static class CitationData {
318
319    private final Citation citation;
320    private final List<CitationContact> contacts;
321  }
322}