001package org.gbif.common.parsers.utils; 002 003import java.util.regex.Pattern; 004import lombok.AccessLevel; 005import lombok.NoArgsConstructor; 006 007/** 008 * Contains methods which can delimit raw string into raw array, example: "10-10-2010" to {"10", 009 * "10", "2010"} and etc. 010 */ 011@NoArgsConstructor(access = AccessLevel.PRIVATE) 012public class DelimiterUtils { 013 // Date values 014 private static final String CHAR_PERIOD = "/"; 015 private static final Pattern RGX_PERIOD = Pattern.compile(CHAR_PERIOD); 016 017 // 1990-01-02/15, and none-ISO: 1990-1-2, but does not support 19000101/12 018 private static final String ISO_YMD_RANGE = 019 "^([0-9]{4})-(1[0-2]|0[1-9]|[1-9])-(3[01]|0[1-9]|[12][0-9]|[1-9])/(3[01]|0[1-9]|[12][0-9]|[1-9])"; 020 // 1990-01/12, and none-ISO: 1990-1/2 021 private static final String ISO_YM_RANGE = 022 "^([0-9]{4})-(1[0-2]|0[1-9]|[1-9])/(1[0-2]|0[1-9]|[1-9])"; 023 // 1990-01-02/09-15, and none-ISO: 1990-1-2/9-15 024 private static final String ISO_YMD_MD_RANGE = 025 "^([0-9]{4})-(1[0-2]|0[1-9]|[1-9])-(3[01]|0[1-9]|[12][0-9]|[1-9])/(1[0-2]|0[1-9]|[1-9])-(3[01]|0[1-9]|[12][0-9]|[1-9])"; 026 027 /** 028 * Attempt to split the rawPeriod into raw periods by delimiter '/' symbol, the symbol must be the 029 * only one in the rawPeriod 030 * 031 * @param rawPeriod raw string period dates 032 * @return always two elements array 033 */ 034 public static String[] splitPeriod(String rawPeriod) { 035 if (rawPeriod == null || rawPeriod.isEmpty()) { 036 return new String[] {"", ""}; 037 } 038 // todo can be improved by adding more date patterns 039 String[] isoRange = splitISODateRange(rawPeriod); 040 if (isoRange != null) { 041 return isoRange; 042 } 043 // If the delimiter for the first position and last position are the same, symbol only one in 044 // string, 045 // check length, the length must be greater than seven to avoid case as "1999/2", where it looks 046 // as year and month 047 boolean canSplit = 048 rawPeriod.lastIndexOf(CHAR_PERIOD) == rawPeriod.indexOf(CHAR_PERIOD) 049 && rawPeriod.length() > 7; 050 String[] splitted = 051 canSplit ? RGX_PERIOD.split(rawPeriod) : new String[] {rawPeriod, rawPeriod}; 052 // Returns an array of the same length each time 053 return splitted.length < 2 ? new String[] {splitted[0], splitted[0]} : splitted; 054 } 055 056 /** Handle day/month ranges Examples: 1991-1/3 : 1991-1, 1991-3 1991-1-2/5 : 1991-1-1, 1991-1-5 */ 057 public static String[] splitISODateRange(String src) { 058 if (src.matches(ISO_YMD_RANGE) || src.matches(ISO_YM_RANGE)) { 059 String[] periods = src.split(CHAR_PERIOD); 060 String from = periods[0]; 061 String to = periods[1]; 062 String ym = from.substring(0, from.lastIndexOf('-')); // Get YM 063 to = ym + "-" + to; 064 return new String[] {from, to}; 065 } else if (src.matches(ISO_YMD_MD_RANGE)) { 066 // 1990-01-02/09-15 067 String[] periods = src.split(CHAR_PERIOD); 068 String from = periods[0]; 069 String to = periods[1]; 070 String y = from.substring(0, from.indexOf('-')); // Get Y 071 to = y + "-" + to; 072 return new String[] {from, to}; 073 } 074 return null; 075 } 076}