001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.common.parsers.geospatial;
015
016import org.gbif.api.vocabulary.OccurrenceIssue;
017import org.gbif.common.parsers.NumberParser;
018import org.gbif.common.parsers.core.OccurrenceParseResult;
019import org.gbif.common.parsers.core.ParseResult;
020
021import java.util.EnumSet;
022import java.util.Set;
023import java.util.regex.Matcher;
024import java.util.regex.Pattern;
025
026import org.apache.commons.lang3.StringUtils;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030/**
031 * Utilities for assisting in the parsing of latitude and longitude strings into Decimals.
032 */
033public class CoordinateParseUtils {
034  private final static String DMS = "\\s*(\\d{1,3})\\s*(?:°|d|º| |g|o)"  // The degrees
035                                  + "\\s*([0-6]?\\d)\\s*(?:'|m| |´|’|′)" // The minutes
036                                  + "\\s*(?:"                            // Non-capturing group
037                                  + "([0-6]?\\d(?:[,.]\\d+)?)"           // Seconds and optional decimal
038                                  + "\\s*(?:\"|''|s|´´|″)?"
039                                  + ")?\\s*";
040  private final static String DM = "\\s*(\\d{1,3})\\s*(?:°|d|º| |g|o)" // The degrees
041                                 + "\\s*(?:"                           // Non-capturing group
042                                 + "([0-6]?\\d(?:[,.]\\d+)?)"          // Minutes and optional decimal
043                                 + "\\s*(?:'|m| |´|’|′)?"
044                                 + ")?\\s*";
045  private final static String D = "\\s*(\\d{1,3}(?:[,.]\\d+)?)\\s*(?:°|d|º| |g|o|)\\s*"; // The degrees and optional decimal
046  private final static Pattern DMS_SINGLE = Pattern.compile("^" + DMS + "$", Pattern.CASE_INSENSITIVE);
047  private final static Pattern DM_SINGLE = Pattern.compile("^" + DM + "$", Pattern.CASE_INSENSITIVE);
048  private final static Pattern D_SINGLE = Pattern.compile("^" + D + "$", Pattern.CASE_INSENSITIVE);
049  private final static Pattern DMS_COORD = Pattern.compile("^" + DMS + "([NSEOW])" + "[ ,;/]?" + DMS + "([NSEOW])$", Pattern.CASE_INSENSITIVE);
050  //private final static Pattern DM_COORD = Pattern.compile("^" + DM + "([NSEOW])" + "[ ,;/]?" + DM + "([NSEOW])$", Pattern.CASE_INSENSITIVE);
051  //private final static Pattern D_COORD = Pattern.compile("^" + D + "([NSEOW])" + "[ ,;/]?" + D + "([NSEOW])$", Pattern.CASE_INSENSITIVE);
052  private final static String POSITIVE = "NEO";
053  private CoordinateParseUtils() {
054    throw new UnsupportedOperationException("Can't initialize class");
055  }
056
057  private static final Logger LOG = LoggerFactory.getLogger(CoordinateParseUtils.class);
058
059  /**
060   * This parses string representations of latitude and longitude values. It tries its best to interpret the values and
061   * indicates any problems in its result as {@link org.gbif.api.vocabulary.OccurrenceIssue}.
062   * When the {@link ParseResult.STATUS} is FAIL the payload will be null and one or more issues should be set
063   * in {@link org.gbif.common.parsers.core.OccurrenceParseResult#getIssues()}.
064   *
065   * Coordinate precision will be 6 decimals at most, any more precise values will be rounded.
066   *
067   * Supported standard formats are the following, with dots or optionally a comma as the decimal marker, and variations
068   * on the units also accepted e.g. °, d, º, g, o.
069   * <ul>
070   *   <li>43.63871944444445</li>
071   *   <li>N43°38'19.39"</li>
072   *   <li>43°38'19.39"N</li>
073   *   <li>43°38.3232'N</li>
074   *   <li>43d 38m 19.39s N</li>
075   *   <li>43 38 19.39</li>
076   *   <li>433819N</li>
077   * </ul>
078   *
079   * @param latitude  The decimal latitude
080   * @param longitude The decimal longitude
081   *
082   * @return The parse result
083   */
084  public static OccurrenceParseResult<LatLng> parseLatLng(final String latitude, final String longitude) {
085    if (StringUtils.isEmpty(latitude) || StringUtils.isEmpty(longitude)) {
086      return OccurrenceParseResult.fail();
087    }
088    Double lat = NumberParser.parseDouble(latitude);
089    Double lng = NumberParser.parseDouble(longitude);
090    if (lat == null || lng == null) {
091      // try degree minute seconds
092      try {
093        lat = parseDMS(latitude, true);
094        lng = parseDMS(longitude, false);
095      } catch (IllegalArgumentException e) {
096        return OccurrenceParseResult.fail(OccurrenceIssue.COORDINATE_INVALID);
097      }
098    }
099
100    return validateAndRound(lat, lng);
101  }
102
103  private static boolean inRange(double lat, double lon) {
104    if (Double.compare(lat, 90) <= 0 && Double.compare(lat, -90) >= 0 && Double.compare(lon, 180) <= 0 && Double.compare(lon, -180) >= 0) {
105      return true;
106    }
107    return false;
108  }
109
110  private static boolean isLat(String direction) {
111    if ("NS".contains(direction.toUpperCase())) {
112      return true;
113    }
114    return false;
115  }
116
117  private static int coordSign(String direction) {
118    return POSITIVE.contains(direction.toUpperCase()) ? 1 : -1;
119  }
120
121  // 02° 49' 52" N      131° 47' 03" E
122  public static OccurrenceParseResult<LatLng> parseVerbatimCoordinates(final String coordinates) {
123    if (StringUtils.isEmpty(coordinates)) {
124      return OccurrenceParseResult.fail();
125    }
126    Matcher m = DMS_COORD.matcher(coordinates);
127    if (m.find()) {
128      final String dir1 = m.group(4);
129      final String dir2 = m.group(8);
130      // first parse coords regardless whether they are lat or lon
131      double c1 = coordFromMatcher(m, 1,2,3, dir1);
132      double c2 = coordFromMatcher(m, 5,6,7, dir2);
133      // now see what order the coords are in:
134      if (isLat(dir1) && !isLat(dir2)) {
135        return validateAndRound(c1, c2);
136
137      } else if (!isLat(dir1) && isLat(dir2)) {
138        return validateAndRound(c2, c1);
139
140      } else {
141        return OccurrenceParseResult.fail(OccurrenceIssue.COORDINATE_INVALID);
142      }
143
144    } else if(coordinates.length() > 4) {
145      // try to split and then use lat/lon parsing
146      for (final char delim : ",;/ ".toCharArray()) {
147        int cnt = StringUtils.countMatches(coordinates, String.valueOf(delim));
148        if (cnt == 1) {
149          String[] latlon = StringUtils.split(coordinates, delim);
150          if (latlon.length == 2) {
151            return parseLatLng(latlon[0], latlon[1]);
152          }
153        }
154      }
155    }
156    return OccurrenceParseResult.fail(OccurrenceIssue.COORDINATE_INVALID);
157  }
158
159  private static OccurrenceParseResult<LatLng> validateAndRound(double lat, double lon) {
160    // collecting issues for result
161    Set<OccurrenceIssue> issues = EnumSet.noneOf(OccurrenceIssue.class);
162
163    // round to 6 decimals
164    final double latOrig = lat;
165    final double lngOrig = lon;
166    lat = roundTo6decimals(lat);
167    lon = roundTo6decimals(lon);
168    if (Double.compare(lat, latOrig) != 0 || Double.compare(lon, lngOrig) != 0) {
169      issues.add(OccurrenceIssue.COORDINATE_ROUNDED);
170    }
171
172    // 0,0 is too suspicious
173    if (Double.compare(lat, 0) == 0 && Double.compare(lon, 0) == 0) {
174      issues.add(OccurrenceIssue.ZERO_COORDINATE);
175      return OccurrenceParseResult.success(ParseResult.CONFIDENCE.POSSIBLE, new LatLng(0, 0), issues);
176    }
177
178    // if everything falls in range
179    if (inRange(lat, lon)) {
180      return OccurrenceParseResult.success(ParseResult.CONFIDENCE.DEFINITE, new LatLng(lat, lon), issues);
181    }
182
183    // if lat is out of range, but in range of the lng, assume swapped coordinates.
184    // note that should we desire to trust the following records, we would need to clear the flag for the records to
185    // appear in search results and maps etc. however, this is logic decision, that goes above the capabilities of this method
186    if (Double.compare(lat, 90) > 0 || Double.compare(lat, -90) < 0) {
187      // try and swap
188      if (inRange(lon, lat)) {
189        issues.add(OccurrenceIssue.PRESUMED_SWAPPED_COORDINATE);
190        return OccurrenceParseResult.success(ParseResult.CONFIDENCE.PROBABLE, new LatLng(lon, lat), issues);
191      }
192    }
193
194    // then something is out of range
195    issues.add(OccurrenceIssue.COORDINATE_OUT_OF_RANGE);
196    return OccurrenceParseResult.fail(issues);
197
198  }
199
200  /**
201   * Parses a single DMS coordinate
202   * @param coord
203   * @param lat
204   * @return the converted decimal up to 6 decimals accuracy
205   */
206  protected static double parseDMS(String coord, boolean lat) {
207    final String DIRS = lat ? "NS" : "EOW";
208    coord = coord.trim().toUpperCase();
209
210    if (coord.length() > 3) {
211      // preparse the direction and remove it from the string to avoid a very complex regex
212      char dir = 'n';
213      if (DIRS.contains(String.valueOf(coord.charAt(0)))) {
214        dir = coord.charAt(0);
215        coord = coord.substring(1);
216      } else if (DIRS.contains(String.valueOf(coord.charAt(coord.length()-1)))) {
217        dir = coord.charAt(coord.length()-1);
218        coord = coord.substring(0, coord.length()-1);
219      }
220      // without the direction chuck it at the regex
221      Matcher m = DMS_SINGLE.matcher(coord);
222      if (m.find()) {
223        return coordFromMatcher(m, 1,2,3, String.valueOf(dir));
224      } else {
225        m = DM_SINGLE.matcher(coord);
226        if (m.find()) {
227          return coordFromMatcher(m, 1, 2, String.valueOf(dir));
228        } else {
229          m = D_SINGLE.matcher(coord);
230          if (m.find()) {
231            return coordFromMatcher(m, 1, String.valueOf(dir));
232          }
233        }
234      }
235    }
236    throw new IllegalArgumentException();
237  }
238
239  private static double coordFromMatcher(Matcher m, int idx1, int idx2, int idx3, String sign) {
240    return roundTo6decimals(coordSign(sign) *
241      dmsToDecimal( NumberParser.parseDouble(m.group(idx1)), NumberParser.parseDouble(m.group(idx2)), NumberParser.parseDouble(m.group(idx3)) ));
242  }
243
244  private static double coordFromMatcher(Matcher m, int idx1, int idx2, String sign) {
245    return roundTo6decimals(coordSign(sign) *
246      dmsToDecimal( NumberParser.parseDouble(m.group(idx1)), NumberParser.parseDouble(m.group(idx2)), 0.0));
247  }
248
249  private static double coordFromMatcher(Matcher m, int idx1, String sign) {
250    return roundTo6decimals(coordSign(sign) *
251      dmsToDecimal( NumberParser.parseDouble(m.group(idx1)), 0.0, 0.0));
252  }
253
254  private static double dmsToDecimal(double degree, Double minutes, Double seconds) {
255    minutes = minutes == null ? 0 : minutes;
256    seconds = seconds == null ? 0 : seconds;
257    return degree + (minutes / 60) + (seconds / 3600);
258  }
259
260  // round to 6 decimals (~1m precision) since no way we're getting anything legitimately more precise
261  private static Double roundTo6decimals(Double x) {
262    return x == null ? null : Math.round(x * Math.pow(10, 6)) / Math.pow(10, 6);
263  }
264}