001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.api.util;
015
016import org.gbif.api.model.registry.Citation;
017import org.gbif.api.model.registry.CitationContact;
018import org.gbif.api.model.registry.Contact;
019import org.gbif.api.model.registry.Dataset;
020import org.gbif.api.model.registry.Organization;
021import org.gbif.api.vocabulary.ContactType;
022
023import java.io.UnsupportedEncodingException;
024import java.net.URLDecoder;
025import java.nio.charset.StandardCharsets;
026import java.time.LocalDate;
027import java.time.ZoneId;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collections;
031import java.util.EnumSet;
032import java.util.HashSet;
033import java.util.List;
034import java.util.Objects;
035import java.util.Optional;
036import java.util.StringJoiner;
037import java.util.function.Predicate;
038import java.util.stream.Collectors;
039
040import org.apache.commons.lang3.StringUtils;
041
042import lombok.Builder;
043import lombok.Data;
044import org.gbif.api.vocabulary.EndpointType;
045
046/**
047 * Helper class tha generates a Citation String from {@link Dataset} and {@link Organization}
048 * objects. Documentation : /docs/citations.md
049 */
050public final class CitationGenerator {
051
052  private static final ZoneId UTC = ZoneId.of("UTC");
053  private static final ContactType MANDATORY_CONTACT_TYPE = ContactType.ORIGINATOR;
054  private static final EnumSet<ContactType> AUTHOR_CONTACT_TYPE =
055      EnumSet.of(ContactType.ORIGINATOR, ContactType.METADATA_AUTHOR);
056  private static final Predicate<Contact> IS_NAME_PROVIDED_FCT =
057      ctc -> StringUtils.isNotBlank(ctc.getLastName());
058  private static final Predicate<CitationContact> IS_CONTACT_NAME_PROVIDED =
059      ctc -> StringUtils.isNotBlank(ctc.getLastName());
060  private static final Predicate<Contact> IS_ELIGIBLE_CONTACT_TYPE =
061      ctc -> AUTHOR_CONTACT_TYPE.contains(ctc.getType());
062
063  /** Utility class */
064  private CitationGenerator() {}
065
066  @SuppressWarnings("unused")
067  public static CitationData generateCitation(Dataset dataset, Organization org) {
068    Objects.requireNonNull(org, "Organization shall be provided");
069    return generateCitation(dataset, org.getTitle());
070  }
071
072  /**
073   * Generate a citation for a {@link Dataset} using the publisher's provided citation.
074   * @param dataset dataset
075   * @return generated citation as {@link String}
076   */
077  @SuppressWarnings("unused")
078  public static String generatePublisherProvidedCitation(Dataset dataset) {
079    Objects.requireNonNull(dataset, "Dataset shall be provided");
080    Objects.requireNonNull(dataset.getCitation(), "Dataset.citation shall be provided");
081    String originalCitationText = dataset.getCitation().getText();
082    Objects.requireNonNull(originalCitationText, "Dataset.citation.text shall be provided");
083
084    StringJoiner joiner = new StringJoiner(" ");
085
086    joiner.add(originalCitationText);
087
088    // Check DOI exists, and append it if it doesn't.
089    if (!originalCitationText.toLowerCase().contains("doi.org")
090        && !originalCitationText.toLowerCase().contains("doi:")) {
091      try {
092        joiner.add(URLDecoder.decode(dataset.getDoi().getUrl().toString(), "UTF-8"));
093      } catch (UnsupportedEncodingException e) {
094        throw new IllegalArgumentException("Couldn't decode DOI URL", e);
095      }
096    }
097
098    joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + ".");
099
100    return joiner.toString();
101  }
102
103  /**
104   * Generate a citation for a {@link Dataset} and its {@link Organization}. TODO add support for
105   * i18n
106   * @return generated citation as {@link String}
107   */
108  public static CitationData generateCitation(Dataset dataset, String organizationTitle) {
109
110    Objects.requireNonNull(dataset, "Dataset shall be provided");
111    Objects.requireNonNull(organizationTitle, "Organization title shall be provided");
112
113    Citation citation = new Citation();
114
115    List<CitationContact> contacts = getAuthors(dataset.getContacts());
116
117    StringJoiner joiner = new StringJoiner(" ");
118    List<String> authorsName = generateAuthorsName(contacts);
119    String authors = String.join(", ", authorsName);
120
121    boolean authorsNameAvailable = StringUtils.isNotBlank(authors);
122    authors = authorsNameAvailable ? authors : organizationTitle;
123
124    // only add a dot if we are not going to add it with the year
125    authors += dataset.getPubDate() == null ? "." : "";
126    joiner.add(authors);
127
128    if (dataset.getPubDate() != null) {
129      joiner.add("(" + dataset.getPubDate().toInstant().atZone(UTC).getYear() + ").");
130    }
131
132    // add title
133    joiner.add(StringUtils.trim(dataset.getTitle()) + ".");
134
135    // add version
136    if (dataset.getVersion() != null) {
137      joiner.add("Version " + dataset.getVersion() + ".");
138    }
139
140    // add publisher except if it was used instead of the authors
141    if (authorsNameAvailable) {
142      joiner.add(StringUtils.trim(organizationTitle) + ".");
143    }
144
145    if (dataset.getType() != null) {
146      joiner.add(StringUtils.capitalize(dataset.getType().name().replace('_', ' ').toLowerCase()));
147    }
148    joiner.add("dataset");
149
150    // add DOI as the identifier.
151    if (dataset.getDoi() != null) {
152      try {
153        joiner.add(
154            URLDecoder.decode(dataset.getDoi().getUrl().toString(), StandardCharsets.UTF_8.name()));
155      } catch (UnsupportedEncodingException e) {
156        throw new IllegalArgumentException("Couldn't decode DOI URL", e);
157      }
158    }
159
160    joiner.add("accessed via GBIF.org on " + LocalDate.now(UTC) + ".");
161
162    citation.setText(joiner.toString());
163    citation.setCitationProvidedBySource(false);
164
165    return CitationData.builder().citation(citation).contacts(contacts).build();
166  }
167
168  /**
169   * Extracts an ordered list of unique authors from a list of contacts. A {@link Contact} is
170   * identified as an author when his {@link ContactType} is contained in {@link
171   * #AUTHOR_CONTACT_TYPE}. But, we shall at least have one contact of type MANDATORY_CONTACT_TYPE.
172   *
173   * @param contacts list of contacts available
174   * @return ordered list of authors or empty list, never null
175   */
176  public static List<CitationContact> getAuthors(List<Contact> contacts) {
177    if (contacts == null || contacts.isEmpty()) {
178      return Collections.emptyList();
179    }
180
181    List<CitationContact> uniqueContacts =
182        getUniqueAuthors(
183            contacts, ctc -> IS_NAME_PROVIDED_FCT.and(IS_ELIGIBLE_CONTACT_TYPE).test(ctc));
184
185    // make sure we have at least one instance of {@link #MANDATORY_CONTACT_TYPE}
186    Optional<CitationContact> firstOriginator =
187        uniqueContacts.stream()
188            .filter(ctc -> ctc.getRoles().contains(MANDATORY_CONTACT_TYPE))
189            .findFirst();
190
191    if (firstOriginator.isPresent()) {
192      return uniqueContacts;
193    }
194    return Collections.emptyList();
195  }
196
197  /**
198   * Given a list of authors, generates a {@link List} of {@link String} representing the authors
199   * name. If a contact doesn't have a first AND last name it will not be included.
200   *
201   * @param authors ordered list of authors
202   * @return list of author names (if it can be generated) or empty list, never null
203   */
204  public static List<String> generateAuthorsName(List<CitationContact> authors) {
205    if (authors == null || authors.isEmpty()) {
206      return Collections.emptyList();
207    }
208
209    return authors.stream()
210        .filter(IS_CONTACT_NAME_PROVIDED)
211        .map(CitationContact::getAbbreviatedName)
212        .collect(Collectors.toList());
213  }
214
215  /**
216   * This method is used to get the list of "unique" authors. Currently, uniqueness is based on
217   * lastName + firstNames. The order of the provided list will be preserved which also means the
218   * first {@link ContactType} found for a contact is the one that will be used for this contact
219   * (after applying the filter).
220   *
221   * @param authors a list of contacts representing possible authors
222   * @param filter {@link Predicate} used to pre-filter contacts
223   * @return list of contacts
224   */
225  private static List<CitationContact> getUniqueAuthors(
226      List<Contact> authors, Predicate<Contact> filter) {
227    List<CitationContact> uniqueContact = new ArrayList<>();
228    if (authors != null) {
229      authors.forEach(
230          ctc -> {
231            if (filter.test(ctc)) {
232              Optional<CitationContact> author = findInAuthorList(ctc, uniqueContact);
233              if (!author.isPresent()) {
234                HashSet<ContactType> contactTypes = new HashSet<>();
235                if (ctc.getType() != null) {
236                  contactTypes.add(ctc.getType());
237                }
238                HashSet<String> userIds = new HashSet<>();
239                if (ctc.getUserId() != null && !ctc.getUserId().isEmpty()) {
240                  userIds.addAll(ctc.getUserId());
241                }
242                uniqueContact.add(
243                    new CitationContact(
244                        ctc.getKey(),
245                        getAuthorName(ctc),
246                        ctc.getFirstName(),
247                        ctc.getLastName(),
248                        contactTypes,
249                        userIds));
250              } else {
251                author.ifPresent(
252                    a -> {
253                      a.getRoles().add(ctc.getType());
254                      if (ctc.getUserId() != null) {
255                        a.getUserId().addAll(ctc.getUserId());
256                      }
257                    });
258              }
259            }
260          });
261    }
262    return uniqueContact;
263  }
264
265  /**
266   * Check if a specific {@link Contact} is NOT already in the list of "unique" contact. Currently,
267   * uniqueness is based on the comparisons of lastName and firstNames.
268   *
269   * @param ctc contact to check
270   * @param uniqueContacts list of contacts
271   * @return contact wrapped in optional
272   */
273  private static Optional<CitationContact> findInAuthorList(
274      Contact ctc, List<CitationContact> uniqueContacts) {
275    return uniqueContacts.stream()
276        .filter(
277            author ->
278                StringUtils.equalsIgnoreCase(ctc.getLastName(), author.getLastName())
279                    && StringUtils.equalsIgnoreCase(ctc.getFirstName(), author.getFirstName()))
280        .findFirst();
281  }
282
283  /**
284   * Given a {@link Contact}, generates a String for that contact for citation purpose. The
285   * organization will be used (if present) in case we don't have both lastName and firstNames of
286   * the contact.
287   *
288   * @param creator contact object
289   * @return name
290   */
291  public static String getAuthorName(Contact creator) {
292    StringBuilder sb = new StringBuilder();
293    String lastName = StringUtils.trimToNull(creator.getLastName());
294    String firstNames = StringUtils.trimToNull(creator.getFirstName());
295    String organization = StringUtils.trimToNull(creator.getOrganization());
296
297    if (lastName != null && firstNames != null) {
298      sb.append(lastName);
299      sb.append(" ");
300      // add first initial of each first name, capitalized
301      String[] names = firstNames.split("\\s+");
302
303      sb.append(
304          Arrays.stream(names)
305              .filter(str -> !StringUtils.isBlank(str))
306              .map(str -> StringUtils.upperCase(String.valueOf(str.charAt(0))))
307              .collect(Collectors.joining(" ")));
308    } else if (lastName != null) {
309      sb.append(lastName);
310    } else if (organization != null) {
311      sb.append(organization);
312    }
313    return sb.toString();
314  }
315
316  @Data
317  @Builder
318  public static class CitationData {
319
320    private final Citation citation;
321    private final List<CitationContact> contacts;
322  }
323}