001package org.gbif.common.parsers.utils;
002
003import java.util.regex.Pattern;
004import lombok.AccessLevel;
005import lombok.NoArgsConstructor;
006
007/**
008 * Contains methods which can delimit raw string into raw array, example: "10-10-2010" to {"10",
009 * "10", "2010"} and etc.
010 */
011@NoArgsConstructor(access = AccessLevel.PRIVATE)
012public class DelimiterUtils {
013  // Date values
014  private static final String CHAR_PERIOD = "/";
015  private static final Pattern RGX_PERIOD = Pattern.compile(CHAR_PERIOD);
016
017  //  1990-01-02/15, and none-ISO: 1990-1-2, but does not support 19000101/12
018  private static final String ISO_YMD_RANGE =
019      "^([0-9]{4})-(1[0-2]|0[1-9]|[1-9])-(3[01]|0[1-9]|[12][0-9]|[1-9])/(3[01]|0[1-9]|[12][0-9]|[1-9])";
020  // 1990-01/12, and none-ISO: 1990-1/2
021  private static final String ISO_YM_RANGE =
022      "^([0-9]{4})-(1[0-2]|0[1-9]|[1-9])/(1[0-2]|0[1-9]|[1-9])";
023  //  1990-01-02/09-15, and none-ISO: 1990-1-2/9-15
024  private static final String ISO_YMD_MD_RANGE =
025      "^([0-9]{4})-(1[0-2]|0[1-9]|[1-9])-(3[01]|0[1-9]|[12][0-9]|[1-9])/(1[0-2]|0[1-9]|[1-9])-(3[01]|0[1-9]|[12][0-9]|[1-9])";
026
027  /**
028   * Attempt to split the rawPeriod into raw periods by delimiter '/' symbol, the symbol must be the
029   * only one in the rawPeriod
030   *
031   * @param rawPeriod raw string period dates
032   * @return always two elements array
033   */
034  public static String[] splitPeriod(String rawPeriod) {
035    if (rawPeriod == null || rawPeriod.isEmpty()) {
036      return new String[] {"", ""};
037    }
038    // todo can be improved by adding more date patterns
039    String[] isoRange = splitISODateRange(rawPeriod);
040    if (isoRange != null) {
041      return isoRange;
042    }
043    // If the delimiter for the first position and last position are the same, symbol only one in
044    // string,
045    // check length, the length must be greater than seven to avoid case as "1999/2", where it looks
046    // as year and month
047    boolean canSplit =
048        rawPeriod.lastIndexOf(CHAR_PERIOD) == rawPeriod.indexOf(CHAR_PERIOD)
049            && rawPeriod.length() > 7;
050    String[] splitted =
051        canSplit ? RGX_PERIOD.split(rawPeriod) : new String[] {rawPeriod, rawPeriod};
052    // Returns an array of the same length each time
053    return splitted.length < 2 ? new String[] {splitted[0], splitted[0]} : splitted;
054  }
055
056  /** Handle day/month ranges Examples: 1991-1/3 : 1991-1, 1991-3 1991-1-2/5 : 1991-1-1, 1991-1-5 */
057  public static String[] splitISODateRange(String src) {
058    if (src.matches(ISO_YMD_RANGE) || src.matches(ISO_YM_RANGE)) {
059      String[] periods = src.split(CHAR_PERIOD);
060      String from = periods[0];
061      String to = periods[1];
062      String ym = from.substring(0, from.lastIndexOf('-')); // Get YM
063      to = ym + "-" + to;
064      return new String[] {from, to};
065    } else if (src.matches(ISO_YMD_MD_RANGE)) {
066      // 1990-01-02/09-15
067      String[] periods = src.split(CHAR_PERIOD);
068      String from = periods[0];
069      String to = periods[1];
070      String y = from.substring(0, from.indexOf('-')); // Get Y
071      to = y + "-" + to;
072      return new String[] {from, to};
073    }
074    return null;
075  }
076}