001package org.gbif.common.parsers.date; 002 003import com.google.common.base.Strings; 004import com.google.common.collect.Range; 005import lombok.extern.slf4j.Slf4j; 006import org.apache.commons.lang3.StringUtils; 007import org.gbif.api.vocabulary.OccurrenceIssue; 008import org.gbif.common.parsers.core.OccurrenceParseResult; 009import org.gbif.common.parsers.core.ParseResult; 010 011import java.io.Serializable; 012import java.time.LocalDate; 013import java.time.temporal.ChronoField; 014import java.time.temporal.TemporalAccessor; 015import java.util.Collections; 016import java.util.EnumSet; 017import java.util.List; 018import java.util.Optional; 019import java.util.Set; 020 021import static org.gbif.common.parsers.core.ParseResult.CONFIDENCE.DEFINITE; 022import static org.gbif.common.parsers.core.ParseResult.CONFIDENCE.PROBABLE; 023 024/** 025 * A date parser accepting multiple dates, and returning a common interpretation of them. 026 */ 027@Slf4j 028public class MultiinputTemporalParser implements Serializable { 029 030 private static final long serialVersionUID = -8845127337324812802L; 031 032 private static final LocalDate MIN_LOCAL_DATE = LocalDate.of(1500, 1, 1); 033 034 private final TemporalParser temporalParser; 035 036 private MultiinputTemporalParser(List<DateComponentOrdering> orderings) { 037 if (orderings != null && !orderings.isEmpty()) { 038 DateComponentOrdering[] array = orderings.toArray(new DateComponentOrdering[0]); 039 temporalParser = CustomizedTextDateParser.getInstance(array); 040 } else { 041 temporalParser = DateParsers.defaultTemporalParser(); 042 } 043 } 044 045 public static MultiinputTemporalParser create(List<DateComponentOrdering> orderings) { 046 return new MultiinputTemporalParser(orderings); 047 } 048 049 public static MultiinputTemporalParser create() { 050 return create(Collections.emptyList()); 051 } 052 053 public OccurrenceParseResult<TemporalAccessor> parseRecordedDate( 054 String year, String month, String day, String dateString) { 055 return parseRecordedDate(year, month, day, dateString, null); 056 } 057 058 /** 059 * Three dates are provided: 060 * 061 * <ul> 062 * <li>year, month and day 063 * <li>dateString 064 * <li>year and dayOfYear 065 * </ul> 066 * 067 * <p>Produces a single date at the best resolution possible, ignoring missing values. 068 * 069 * <p>Years are verified to be before this year and after 1500. 070 * 071 * @return interpretation result, never null 072 */ 073 public OccurrenceParseResult<TemporalAccessor> parseRecordedDate( 074 String year, String month, String day, String dateString, String dayOfYear) { 075 076 ParseResult.CONFIDENCE[] confidence = new ParseResult.CONFIDENCE[1]; 077 confidence[0] = DEFINITE; 078 079 Set<OccurrenceIssue> issues = EnumSet.noneOf(OccurrenceIssue.class); 080 081 // Clean up bad values from number-only fields. 082 year = cleanNumberString(year, confidence, issues); 083 month = cleanNumberString(month, confidence, issues); 084 day = cleanNumberString(day, confidence, issues); 085 dayOfYear = cleanNumberString(dayOfYear, confidence, issues); 086 087 boolean ymdProvided = 088 StringUtils.isNotBlank(year) 089 || StringUtils.isNotBlank(month) 090 || StringUtils.isNotBlank(day); 091 boolean dateStringProvided = StringUtils.isNotBlank(dateString); 092 boolean yDoyProvided = StringUtils.isNotBlank(year) && StringUtils.isNotBlank(dayOfYear); 093 094 // If we have only a year and dayOfYear, don't parse the year alone (without the month and day) 095 if (yDoyProvided && (StringUtils.isBlank(month) && StringUtils.isBlank(day))) { 096 ymdProvided = false; 097 } 098 099 boolean twoOrMoreProvided = (ymdProvided ? 1 : 0) + (dateStringProvided ? 1 : 0) + (yDoyProvided ? 1 : 0) >= 2; 100 101 if (!ymdProvided && !dateStringProvided && !yDoyProvided) { 102 log.trace("Date {}|{}|{}|{}|{} is all null", year, month, day, dateString, dayOfYear); 103 return OccurrenceParseResult.fail(); 104 } 105 106 TemporalAccessor parsedTemporalAccessor; 107 108 // Parse all three possible dates 109 ParseResult<TemporalAccessor> parsedYMDResult = 110 ymdProvided ? temporalParser.parse(year, month, day) : ParseResult.fail(); 111 ParseResult<TemporalAccessor> parsedDateResult = 112 dateStringProvided ? temporalParser.parse(dateString) : ParseResult.fail(); 113 ParseResult<TemporalAccessor> parsedYearDoyResult = 114 yDoyProvided ? temporalParser.parse(year, dayOfYear) : ParseResult.fail(); 115 TemporalAccessor parsedYmdTa = parsedYMDResult.getPayload(); 116 TemporalAccessor parsedDateTa = parsedDateResult.getPayload(); 117 TemporalAccessor parsedYearDoyTa = parsedYearDoyResult.getPayload(); 118 119 int ymdResolution = -1, dateStringResolution = -1; 120 if (ymdProvided && parsedYMDResult.isSuccessful()) { 121 ymdResolution = TemporalAccessorUtils.resolution(parsedYmdTa); 122 } 123 if (dateStringProvided && parsedDateResult.isSuccessful()) { 124 dateStringResolution = TemporalAccessorUtils.resolution(parsedDateTa); 125 } 126 127 // Add issues if we failed to parse any dates that were present 128 if (ymdProvided && !parsedYMDResult.isSuccessful()) { 129 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 130 } 131 if (dateStringProvided && !parsedDateResult.isSuccessful()) { 132 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 133 } 134 if (yDoyProvided && !parsedYearDoyResult.isSuccessful()) { 135 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 136 } 137 138 log.trace("Date {}|{}|{}|{}|{} parsed to {}—{}—{}", year, month, day, dateString, dayOfYear, parsedYMDResult, parsedDateResult, parsedYearDoyResult); 139 140 // If a dateString is provided with something else, handle the case where it doesn't match. 141 boolean ambiguityResolved = false; 142 if (ymdProvided 143 && dateStringProvided 144 && !TemporalAccessorUtils.sameOrContained(parsedYmdTa, parsedDateTa) 145 && parsedDateResult.getAlternativePayloads() != null) { 146 147 // eventDate could be ambiguous (5/4/2014), but disambiguated by year-month-day. 148 Optional<TemporalAccessor> resolved = 149 TemporalAccessorUtils.resolveAmbiguousDates( 150 parsedYmdTa, parsedDateResult.getAlternativePayloads()); 151 if (resolved.isPresent()) { 152 parsedDateTa = resolved.get(); 153 ambiguityResolved = true; 154 log.trace("Date {}|{}|{}|{}|{} ambiguous₁ y-m-d resolved {}", year, month, day, dateString, dayOfYear, parsedDateTa); 155 } 156 // still a conflict 157 if (!ambiguityResolved) { 158 if (parsedYmdTa == null || parsedDateTa == null) { 159 log.debug("Date {}|{}|{}|{}|{} ambiguous₁ invalid", year, month, day, dateString, dayOfYear); 160 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 161 } else { 162 log.debug("Date {}|{}|{}|{}|{} ambiguous₁ mismatch", year, month, day, dateString, dayOfYear); 163 issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH); 164 } 165 } 166 } else if (ymdProvided 167 && yDoyProvided 168 && !TemporalAccessorUtils.sameOrContained(parsedYearDoyTa, parsedDateTa) 169 && parsedDateResult.getAlternativePayloads() != null) { 170 171 // eventDate could be ambiguous (5/4/2014), but disambiguated by year-month-day. 172 Optional<TemporalAccessor> resolved = 173 TemporalAccessorUtils.resolveAmbiguousDates( 174 parsedYearDoyTa, parsedDateResult.getAlternativePayloads()); 175 if (resolved.isPresent()) { 176 parsedDateTa = resolved.get(); 177 ambiguityResolved = true; 178 log.trace("Date {}|{}|{}|{}|{} ambiguous₂ y-doy resolved {}", year, month, day, dateString, dayOfYear, parsedDateTa); 179 } 180 // still a conflict 181 if (!ambiguityResolved) { 182 if (parsedYmdTa == null || parsedYearDoyTa == null) { 183 log.debug("Date {}|{}|{}|{}|{} ambiguous₂ invalid", year, month, day, dateString, dayOfYear); 184 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 185 } else { 186 log.debug("Date {}|{}|{}|{}|{} ambiguous₂ mismatch", year, month, day, dateString, dayOfYear); 187 issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH); 188 } 189 } 190 } 191 192 // Add an issue if there is any conflict between the dates 193 if (TemporalAccessorUtils.sameOrContainedOrNull(parsedYmdTa, parsedDateTa) 194 && TemporalAccessorUtils.sameOrContainedOrNull(parsedYmdTa, parsedYearDoyTa) 195 && TemporalAccessorUtils.sameOrContainedOrNull(parsedDateTa, parsedYearDoyTa)) { 196 197 confidence[0] = ParseResult.CONFIDENCE.lowerOf(confidence[0], 198 parsedDateTa != null 199 ? parsedDateResult.getConfidence() 200 : (parsedYmdTa != null 201 ? parsedYMDResult.getConfidence() 202 : parsedYearDoyResult.getConfidence())); 203 } else { 204 log.debug("Date {}|{}|{}|{}|{} mismatch (conflict)", year, month, day, dateString, dayOfYear); 205 issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH); 206 confidence[0] = PROBABLE; 207 } 208 209 // Add an issue if the resolution af ymd / date / yDoy is different 210 if (ymdResolution > 0 && dateStringResolution > 0) { 211 if (ymdResolution != dateStringResolution) { 212 log.debug("Date {}|{}|{}|{}|{} mismatch (resolution)", year, month, day, dateString, dayOfYear); 213 issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH); 214 } 215 } 216 217 // Best we can get from the three parts. 218 // Note 2000-01-01 and 2000-01 and 2000 will return 2000-01-01. 219 Optional<TemporalAccessor> nonConflictingTa = 220 TemporalAccessorUtils.nonConflictingDateParts(parsedYmdTa, parsedDateTa, parsedYearDoyTa); 221 222 if (nonConflictingTa.isPresent()) { 223 parsedTemporalAccessor = nonConflictingTa.get(); 224 // if one of the parses failed we can not set the confidence to DEFINITE 225 confidence[0] = 226 ((ymdProvided && parsedYmdTa == null) 227 || (dateStringProvided && parsedDateTa == null) 228 || (yDoyProvided && parsedYearDoyTa == null)) 229 ? PROBABLE 230 : confidence[0]; 231 } else { 232 log.debug("Date {}|{}|{}|{}|{} mismatch (conflicting)", year, month, day, dateString, dayOfYear); 233 if (twoOrMoreProvided) { 234 issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH); 235 } 236 return OccurrenceParseResult.fail(issues); 237 } 238 239 if (!isValidDate(parsedTemporalAccessor)) { 240 if (parsedTemporalAccessor == null) { 241 log.debug("Date {}|{}|{}|{}|{} mismatch (invalid)", year, month, day, dateString, dayOfYear); 242 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 243 } else { 244 log.debug("Date {}|{}|{}|{}|{} mismatch (unlikely)", year, month, day, dateString, dayOfYear); 245 issues.add(OccurrenceIssue.RECORDED_DATE_UNLIKELY); 246 } 247 248 return OccurrenceParseResult.fail(issues); 249 } 250 251 return OccurrenceParseResult.success(confidence[0], parsedTemporalAccessor, issues); 252 } 253 254 public OccurrenceParseResult<TemporalAccessor> parseRecordedDate(String dateString) { 255 return parseRecordedDate(null, null, null, dateString, null); 256 } 257 258 /** @return TemporalAccessor that represents a LocalDate or LocalDateTime */ 259 public OccurrenceParseResult<TemporalAccessor> parseLocalDate( 260 String dateString, Range<LocalDate> likelyRange, OccurrenceIssue unlikelyIssue) { 261 return parseLocalDate(dateString, likelyRange, unlikelyIssue, null); 262 } 263 264 /** @return TemporalAccessor that represents a LocalDate or LocalDateTime */ 265 public OccurrenceParseResult<TemporalAccessor> parseLocalDate( 266 String dateString, Range<LocalDate> likelyRange, OccurrenceIssue unlikelyIssue, OccurrenceIssue failIssue) { 267 if (!Strings.isNullOrEmpty(dateString)) { 268 OccurrenceParseResult<TemporalAccessor> result = 269 new OccurrenceParseResult<>(temporalParser.parse(dateString)); 270 // check year makes sense 271 if (result.isSuccessful() && !isValidDate(result.getPayload(), likelyRange)) { 272 log.debug("Unlikely date parsed, ignore [{}].", dateString); 273 Optional.ofNullable(unlikelyIssue).ifPresent(result::addIssue); 274 } else if (!result.isSuccessful()) { 275 Optional.ofNullable(failIssue).ifPresent(result::addIssue); 276 } 277 return result; 278 } 279 return OccurrenceParseResult.fail(); 280 } 281 282 /** 283 * Check if a date express as TemporalAccessor falls between the predefined range. Lower bound 284 * defined by {@link #MIN_LOCAL_DATE} and upper bound by current date + 1 day 285 * 286 * @return valid or not according to the predefined range. 287 */ 288 protected static boolean isValidDate(TemporalAccessor temporalAccessor) { 289 LocalDate upperBound = LocalDate.now().plusDays(1); 290 return isValidDate(temporalAccessor, Range.closed(MIN_LOCAL_DATE, upperBound)); 291 } 292 293 /** Check if a date express as TemporalAccessor falls between the provided range. */ 294 protected static boolean isValidDate( 295 TemporalAccessor temporalAccessor, Range<LocalDate> likelyRange) { 296 297 if (temporalAccessor == null) { 298 return false; 299 } 300 301 // if partial dates should be considered valid 302 int year; 303 int month = 1; 304 int day = 1; 305 if (temporalAccessor.isSupported(ChronoField.YEAR)) { 306 year = temporalAccessor.get(ChronoField.YEAR); 307 } else { 308 return false; 309 } 310 311 if (temporalAccessor.isSupported(ChronoField.MONTH_OF_YEAR)) { 312 month = temporalAccessor.get(ChronoField.MONTH_OF_YEAR); 313 } 314 315 if (temporalAccessor.isSupported(ChronoField.DAY_OF_MONTH)) { 316 day = temporalAccessor.get(ChronoField.DAY_OF_MONTH); 317 } 318 319 return likelyRange.contains(LocalDate.of(year, month, day)); 320 } 321 322 /** 323 * Removes a zero value if present. If it was present, reduces the confidence 324 * and adds an issue. 325 */ 326 private static String cleanNumberString(String number, ParseResult.CONFIDENCE[] confidence, Set<OccurrenceIssue> issues) { 327 if (StringUtils.trimToNull(number) == null) { 328 return null; 329 } 330 331 number = number.trim(); 332 try { 333 if (Integer.parseInt(number) == 0) { 334 confidence[0] = PROBABLE; 335 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 336 return null; 337 } 338 } catch (NumberFormatException e) { 339 confidence[0] = PROBABLE; 340 issues.add(OccurrenceIssue.RECORDED_DATE_INVALID); 341 } 342 343 return StringUtils.trimToNull(number); 344 } 345}