001package org.gbif.common.parsers.date;
002
003import com.google.common.base.Strings;
004import com.google.common.collect.Range;
005import lombok.extern.slf4j.Slf4j;
006import org.apache.commons.lang3.StringUtils;
007import org.gbif.api.vocabulary.OccurrenceIssue;
008import org.gbif.common.parsers.core.OccurrenceParseResult;
009import org.gbif.common.parsers.core.ParseResult;
010
011import java.io.Serializable;
012import java.time.LocalDate;
013import java.time.temporal.ChronoField;
014import java.time.temporal.TemporalAccessor;
015import java.util.Collections;
016import java.util.EnumSet;
017import java.util.List;
018import java.util.Optional;
019import java.util.Set;
020
021import static org.gbif.common.parsers.core.ParseResult.CONFIDENCE.DEFINITE;
022import static org.gbif.common.parsers.core.ParseResult.CONFIDENCE.PROBABLE;
023
024/**
025 * A date parser accepting multiple dates, and returning a common interpretation of them.
026 */
027@Slf4j
028public class MultiinputTemporalParser implements Serializable {
029
030  private static final long serialVersionUID = -8845127337324812802L;
031
032  private static final LocalDate MIN_LOCAL_DATE = LocalDate.of(1500, 1, 1);
033
034  private final TemporalParser temporalParser;
035
036  private MultiinputTemporalParser(List<DateComponentOrdering> orderings) {
037    if (orderings != null && !orderings.isEmpty()) {
038      DateComponentOrdering[] array = orderings.toArray(new DateComponentOrdering[0]);
039      temporalParser = CustomizedTextDateParser.getInstance(array);
040    } else {
041      temporalParser = DateParsers.defaultTemporalParser();
042    }
043  }
044
045  public static MultiinputTemporalParser create(List<DateComponentOrdering> orderings) {
046    return new MultiinputTemporalParser(orderings);
047  }
048
049  public static MultiinputTemporalParser create() {
050    return create(Collections.emptyList());
051  }
052
053  public OccurrenceParseResult<TemporalAccessor> parseRecordedDate(
054      String year, String month, String day, String dateString) {
055    return parseRecordedDate(year, month, day, dateString, null);
056  }
057
058  /**
059   * Three dates are provided:
060   *
061   * <ul>
062   *   <li>year, month and day
063   *   <li>dateString
064   *   <li>year and dayOfYear
065   * </ul>
066   *
067   * <p>Produces a single date at the best resolution possible, ignoring missing values.
068   *
069   * <p>Years are verified to be before this year and after 1500.
070   *
071   * @return interpretation result, never null
072   */
073  public OccurrenceParseResult<TemporalAccessor> parseRecordedDate(
074      String year, String month, String day, String dateString, String dayOfYear) {
075
076    ParseResult.CONFIDENCE[] confidence = new ParseResult.CONFIDENCE[1];
077    confidence[0] = DEFINITE;
078
079    Set<OccurrenceIssue> issues = EnumSet.noneOf(OccurrenceIssue.class);
080
081    // Clean up bad values from number-only fields.
082    year = cleanNumberString(year, confidence, issues);
083    month = cleanNumberString(month, confidence, issues);
084    day = cleanNumberString(day, confidence, issues);
085    dayOfYear = cleanNumberString(dayOfYear, confidence, issues);
086
087    boolean ymdProvided =
088        StringUtils.isNotBlank(year)
089            || StringUtils.isNotBlank(month)
090            || StringUtils.isNotBlank(day);
091    boolean dateStringProvided = StringUtils.isNotBlank(dateString);
092    boolean yDoyProvided = StringUtils.isNotBlank(year) && StringUtils.isNotBlank(dayOfYear);
093
094    // If we have only a year and dayOfYear, don't parse the year alone (without the month and day)
095    if (yDoyProvided && (StringUtils.isBlank(month) && StringUtils.isBlank(day))) {
096      ymdProvided = false;
097    }
098
099    boolean twoOrMoreProvided = (ymdProvided ? 1 : 0) + (dateStringProvided ? 1 : 0) + (yDoyProvided ? 1 : 0) >= 2;
100
101    if (!ymdProvided && !dateStringProvided && !yDoyProvided) {
102      log.trace("Date {}|{}|{}|{}|{} is all null", year, month, day, dateString, dayOfYear);
103      return OccurrenceParseResult.fail();
104    }
105
106    TemporalAccessor parsedTemporalAccessor;
107
108    // Parse all three possible dates
109    ParseResult<TemporalAccessor> parsedYMDResult =
110        ymdProvided ? temporalParser.parse(year, month, day) : ParseResult.fail();
111    ParseResult<TemporalAccessor> parsedDateResult =
112        dateStringProvided ? temporalParser.parse(dateString) : ParseResult.fail();
113    ParseResult<TemporalAccessor> parsedYearDoyResult =
114        yDoyProvided ? temporalParser.parse(year, dayOfYear) : ParseResult.fail();
115    TemporalAccessor parsedYmdTa = parsedYMDResult.getPayload();
116    TemporalAccessor parsedDateTa = parsedDateResult.getPayload();
117    TemporalAccessor parsedYearDoyTa = parsedYearDoyResult.getPayload();
118
119    int ymdResolution = -1, dateStringResolution = -1;
120    if (ymdProvided && parsedYMDResult.isSuccessful()) {
121      ymdResolution = TemporalAccessorUtils.resolution(parsedYmdTa);
122    }
123    if (dateStringProvided && parsedDateResult.isSuccessful()) {
124      dateStringResolution = TemporalAccessorUtils.resolution(parsedDateTa);
125    }
126
127    // Add issues if we failed to parse any dates that were present
128    if (ymdProvided && !parsedYMDResult.isSuccessful()) {
129      issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
130    }
131    if (dateStringProvided && !parsedDateResult.isSuccessful()) {
132      issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
133    }
134    if (yDoyProvided && !parsedYearDoyResult.isSuccessful()) {
135      issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
136    }
137
138    log.trace("Date {}|{}|{}|{}|{} parsed to {}—{}—{}", year, month, day, dateString, dayOfYear, parsedYMDResult, parsedDateResult, parsedYearDoyResult);
139
140    // If a dateString is provided with something else, handle the case where it doesn't match.
141    boolean ambiguityResolved = false;
142    if (ymdProvided
143        && dateStringProvided
144        && !TemporalAccessorUtils.sameOrContained(parsedYmdTa, parsedDateTa)
145        && parsedDateResult.getAlternativePayloads() != null) {
146
147      // eventDate could be ambiguous (5/4/2014), but disambiguated by year-month-day.
148      Optional<TemporalAccessor> resolved =
149          TemporalAccessorUtils.resolveAmbiguousDates(
150              parsedYmdTa, parsedDateResult.getAlternativePayloads());
151      if (resolved.isPresent()) {
152        parsedDateTa = resolved.get();
153        ambiguityResolved = true;
154        log.trace("Date {}|{}|{}|{}|{} ambiguous₁ y-m-d resolved {}", year, month, day, dateString, dayOfYear, parsedDateTa);
155      }
156      // still a conflict
157      if (!ambiguityResolved) {
158        if (parsedYmdTa == null || parsedDateTa == null) {
159          log.debug("Date {}|{}|{}|{}|{} ambiguous₁ invalid", year, month, day, dateString, dayOfYear);
160          issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
161        } else {
162          log.debug("Date {}|{}|{}|{}|{} ambiguous₁ mismatch", year, month, day, dateString, dayOfYear);
163          issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH);
164        }
165      }
166    } else if (ymdProvided
167        && yDoyProvided
168        && !TemporalAccessorUtils.sameOrContained(parsedYearDoyTa, parsedDateTa)
169        && parsedDateResult.getAlternativePayloads() != null) {
170
171      // eventDate could be ambiguous (5/4/2014), but disambiguated by year-month-day.
172      Optional<TemporalAccessor> resolved =
173          TemporalAccessorUtils.resolveAmbiguousDates(
174              parsedYearDoyTa, parsedDateResult.getAlternativePayloads());
175      if (resolved.isPresent()) {
176        parsedDateTa = resolved.get();
177        ambiguityResolved = true;
178        log.trace("Date {}|{}|{}|{}|{} ambiguous₂ y-doy resolved {}", year, month, day, dateString, dayOfYear, parsedDateTa);
179      }
180      // still a conflict
181      if (!ambiguityResolved) {
182        if (parsedYmdTa == null || parsedYearDoyTa == null) {
183          log.debug("Date {}|{}|{}|{}|{} ambiguous₂ invalid", year, month, day, dateString, dayOfYear);
184          issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
185        } else {
186          log.debug("Date {}|{}|{}|{}|{} ambiguous₂ mismatch", year, month, day, dateString, dayOfYear);
187          issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH);
188        }
189      }
190    }
191
192    // Add an issue if there is any conflict between the dates
193    if (TemporalAccessorUtils.sameOrContainedOrNull(parsedYmdTa, parsedDateTa)
194        && TemporalAccessorUtils.sameOrContainedOrNull(parsedYmdTa, parsedYearDoyTa)
195        && TemporalAccessorUtils.sameOrContainedOrNull(parsedDateTa, parsedYearDoyTa)) {
196
197      confidence[0] = ParseResult.CONFIDENCE.lowerOf(confidence[0],
198        parsedDateTa != null
199          ? parsedDateResult.getConfidence()
200          : (parsedYmdTa != null
201          ? parsedYMDResult.getConfidence()
202          : parsedYearDoyResult.getConfidence()));
203    } else {
204      log.debug("Date {}|{}|{}|{}|{} mismatch (conflict)", year, month, day, dateString, dayOfYear);
205      issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH);
206      confidence[0] = PROBABLE;
207    }
208
209    // Add an issue if the resolution af ymd / date / yDoy is different
210    if (ymdResolution > 0 && dateStringResolution > 0) {
211      if (ymdResolution != dateStringResolution) {
212        log.debug("Date {}|{}|{}|{}|{} mismatch (resolution)", year, month, day, dateString, dayOfYear);
213        issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH);
214      }
215    }
216
217    // Best we can get from the three parts.
218    // Note 2000-01-01 and 2000-01 and 2000 will return 2000-01-01.
219    Optional<TemporalAccessor> nonConflictingTa =
220        TemporalAccessorUtils.nonConflictingDateParts(parsedYmdTa, parsedDateTa, parsedYearDoyTa);
221
222    if (nonConflictingTa.isPresent()) {
223      parsedTemporalAccessor = nonConflictingTa.get();
224      // if one of the parses failed we can not set the confidence to DEFINITE
225      confidence[0] =
226          ((ymdProvided && parsedYmdTa == null)
227                  || (dateStringProvided && parsedDateTa == null)
228                  || (yDoyProvided && parsedYearDoyTa == null))
229              ? PROBABLE
230              : confidence[0];
231    } else {
232      log.debug("Date {}|{}|{}|{}|{} mismatch (conflicting)", year, month, day, dateString, dayOfYear);
233      if (twoOrMoreProvided) {
234        issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH);
235      }
236      return OccurrenceParseResult.fail(issues);
237    }
238
239    if (!isValidDate(parsedTemporalAccessor)) {
240      if (parsedTemporalAccessor == null) {
241        log.debug("Date {}|{}|{}|{}|{} mismatch (invalid)", year, month, day, dateString, dayOfYear);
242        issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
243      } else {
244        log.debug("Date {}|{}|{}|{}|{} mismatch (unlikely)", year, month, day, dateString, dayOfYear);
245        issues.add(OccurrenceIssue.RECORDED_DATE_UNLIKELY);
246      }
247
248      return OccurrenceParseResult.fail(issues);
249    }
250
251    return OccurrenceParseResult.success(confidence[0], parsedTemporalAccessor, issues);
252  }
253
254  public OccurrenceParseResult<TemporalAccessor> parseRecordedDate(String dateString) {
255    return parseRecordedDate(null, null, null, dateString, null);
256  }
257
258  /** @return TemporalAccessor that represents a LocalDate or LocalDateTime */
259  public OccurrenceParseResult<TemporalAccessor> parseLocalDate(
260    String dateString, Range<LocalDate> likelyRange, OccurrenceIssue unlikelyIssue) {
261    return parseLocalDate(dateString, likelyRange, unlikelyIssue, null);
262  }
263
264  /** @return TemporalAccessor that represents a LocalDate or LocalDateTime */
265  public OccurrenceParseResult<TemporalAccessor> parseLocalDate(
266      String dateString, Range<LocalDate> likelyRange, OccurrenceIssue unlikelyIssue, OccurrenceIssue failIssue) {
267    if (!Strings.isNullOrEmpty(dateString)) {
268      OccurrenceParseResult<TemporalAccessor> result =
269          new OccurrenceParseResult<>(temporalParser.parse(dateString));
270      // check year makes sense
271      if (result.isSuccessful() && !isValidDate(result.getPayload(), likelyRange)) {
272        log.debug("Unlikely date parsed, ignore [{}].", dateString);
273        Optional.ofNullable(unlikelyIssue).ifPresent(result::addIssue);
274      } else if (!result.isSuccessful()) {
275        Optional.ofNullable(failIssue).ifPresent(result::addIssue);
276      }
277      return result;
278    }
279    return OccurrenceParseResult.fail();
280  }
281
282  /**
283   * Check if a date express as TemporalAccessor falls between the predefined range. Lower bound
284   * defined by {@link #MIN_LOCAL_DATE} and upper bound by current date + 1 day
285   *
286   * @return valid or not according to the predefined range.
287   */
288  protected static boolean isValidDate(TemporalAccessor temporalAccessor) {
289    LocalDate upperBound = LocalDate.now().plusDays(1);
290    return isValidDate(temporalAccessor, Range.closed(MIN_LOCAL_DATE, upperBound));
291  }
292
293  /** Check if a date express as TemporalAccessor falls between the provided range. */
294  protected static boolean isValidDate(
295      TemporalAccessor temporalAccessor, Range<LocalDate> likelyRange) {
296
297    if (temporalAccessor == null) {
298      return false;
299    }
300
301    // if partial dates should be considered valid
302    int year;
303    int month = 1;
304    int day = 1;
305    if (temporalAccessor.isSupported(ChronoField.YEAR)) {
306      year = temporalAccessor.get(ChronoField.YEAR);
307    } else {
308      return false;
309    }
310
311    if (temporalAccessor.isSupported(ChronoField.MONTH_OF_YEAR)) {
312      month = temporalAccessor.get(ChronoField.MONTH_OF_YEAR);
313    }
314
315    if (temporalAccessor.isSupported(ChronoField.DAY_OF_MONTH)) {
316      day = temporalAccessor.get(ChronoField.DAY_OF_MONTH);
317    }
318
319    return likelyRange.contains(LocalDate.of(year, month, day));
320  }
321
322  /**
323   * Removes a zero value if present.  If it was present, reduces the confidence
324   * and adds an issue.
325   */
326  private static String cleanNumberString(String number, ParseResult.CONFIDENCE[] confidence, Set<OccurrenceIssue> issues) {
327    if (StringUtils.trimToNull(number) == null) {
328      return null;
329    }
330
331    number = number.trim();
332    try {
333      if (Integer.parseInt(number) == 0) {
334        confidence[0] = PROBABLE;
335        issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
336        return null;
337      }
338    } catch (NumberFormatException e) {
339      confidence[0] = PROBABLE;
340      issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
341    }
342
343    return StringUtils.trimToNull(number);
344  }
345}