001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.common.parsers.geospatial; 015 016import org.gbif.api.vocabulary.OccurrenceIssue; 017import org.gbif.common.parsers.NumberParser; 018import org.gbif.common.parsers.core.OccurrenceParseResult; 019import org.gbif.common.parsers.core.ParseResult; 020 021import java.util.EnumSet; 022import java.util.Set; 023import java.util.regex.Matcher; 024import java.util.regex.Pattern; 025 026import org.apache.commons.lang3.StringUtils; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029 030/** 031 * Utilities for assisting in the parsing of latitude and longitude strings into Decimals. 032 */ 033public class CoordinateParseUtils { 034 private final static String DMS = "\\s*(\\d{1,3})\\s*(?:°|d|º| |g|o)" // The degrees 035 + "\\s*([0-6]?\\d)\\s*(?:'|m| |´|’|′)" // The minutes 036 + "\\s*(?:" // Non-capturing group 037 + "([0-6]?\\d(?:[,.]\\d+)?)" // Seconds and optional decimal 038 + "\\s*(?:\"|''|s|´´|″)?" 039 + ")?\\s*"; 040 private final static String DM = "\\s*(\\d{1,3})\\s*(?:°|d|º| |g|o)" // The degrees 041 + "\\s*(?:" // Non-capturing group 042 + "([0-6]?\\d(?:[,.]\\d+)?)" // Minutes and optional decimal 043 + "\\s*(?:'|m| |´|’|′)?" 044 + ")?\\s*"; 045 private final static String D = "\\s*(\\d{1,3}(?:[,.]\\d+)?)\\s*(?:°|d|º| |g|o|)\\s*"; // The degrees and optional decimal 046 private final static Pattern DMS_SINGLE = Pattern.compile("^" + DMS + "$", Pattern.CASE_INSENSITIVE); 047 private final static Pattern DM_SINGLE = Pattern.compile("^" + DM + "$", Pattern.CASE_INSENSITIVE); 048 private final static Pattern D_SINGLE = Pattern.compile("^" + D + "$", Pattern.CASE_INSENSITIVE); 049 private final static Pattern DMS_COORD = Pattern.compile("^" + DMS + "([NSEOW])" + "[ ,;/]?" + DMS + "([NSEOW])$", Pattern.CASE_INSENSITIVE); 050 //private final static Pattern DM_COORD = Pattern.compile("^" + DM + "([NSEOW])" + "[ ,;/]?" + DM + "([NSEOW])$", Pattern.CASE_INSENSITIVE); 051 //private final static Pattern D_COORD = Pattern.compile("^" + D + "([NSEOW])" + "[ ,;/]?" + D + "([NSEOW])$", Pattern.CASE_INSENSITIVE); 052 private final static String POSITIVE = "NEO"; 053 private CoordinateParseUtils() { 054 throw new UnsupportedOperationException("Can't initialize class"); 055 } 056 057 private static final Logger LOG = LoggerFactory.getLogger(CoordinateParseUtils.class); 058 059 /** 060 * This parses string representations of latitude and longitude values. It tries its best to interpret the values and 061 * indicates any problems in its result as {@link org.gbif.api.vocabulary.OccurrenceIssue}. 062 * When the {@link ParseResult.STATUS} is FAIL the payload will be null and one or more issues should be set 063 * in {@link org.gbif.common.parsers.core.OccurrenceParseResult#getIssues()}. 064 * 065 * Coordinate precision will be 6 decimals at most, any more precise values will be rounded. 066 * 067 * Supported standard formats are the following, with dots or optionally a comma as the decimal marker, and variations 068 * on the units also accepted e.g. °, d, º, g, o. 069 * <ul> 070 * <li>43.63871944444445</li> 071 * <li>N43°38'19.39"</li> 072 * <li>43°38'19.39"N</li> 073 * <li>43°38.3232'N</li> 074 * <li>43d 38m 19.39s N</li> 075 * <li>43 38 19.39</li> 076 * <li>433819N</li> 077 * </ul> 078 * 079 * @param latitude The decimal latitude 080 * @param longitude The decimal longitude 081 * 082 * @return The parse result 083 */ 084 public static OccurrenceParseResult<LatLng> parseLatLng(final String latitude, final String longitude) { 085 if (StringUtils.isEmpty(latitude) || StringUtils.isEmpty(longitude)) { 086 return OccurrenceParseResult.fail(); 087 } 088 Double lat = NumberParser.parseDouble(latitude); 089 Double lng = NumberParser.parseDouble(longitude); 090 if (lat == null || lng == null) { 091 // try degree minute seconds 092 try { 093 lat = parseDMS(latitude, true); 094 lng = parseDMS(longitude, false); 095 } catch (IllegalArgumentException e) { 096 return OccurrenceParseResult.fail(OccurrenceIssue.COORDINATE_INVALID); 097 } 098 } 099 100 return validateAndRound(lat, lng); 101 } 102 103 private static boolean inRange(double lat, double lon) { 104 if (Double.compare(lat, 90) <= 0 && Double.compare(lat, -90) >= 0 && Double.compare(lon, 180) <= 0 && Double.compare(lon, -180) >= 0) { 105 return true; 106 } 107 return false; 108 } 109 110 private static boolean isLat(String direction) { 111 if ("NS".contains(direction.toUpperCase())) { 112 return true; 113 } 114 return false; 115 } 116 117 private static int coordSign(String direction) { 118 return POSITIVE.contains(direction.toUpperCase()) ? 1 : -1; 119 } 120 121 // 02° 49' 52" N 131° 47' 03" E 122 public static OccurrenceParseResult<LatLng> parseVerbatimCoordinates(final String coordinates) { 123 if (StringUtils.isEmpty(coordinates)) { 124 return OccurrenceParseResult.fail(); 125 } 126 Matcher m = DMS_COORD.matcher(coordinates); 127 if (m.find()) { 128 final String dir1 = m.group(4); 129 final String dir2 = m.group(8); 130 // first parse coords regardless whether they are lat or lon 131 double c1 = coordFromMatcher(m, 1,2,3, dir1); 132 double c2 = coordFromMatcher(m, 5,6,7, dir2); 133 // now see what order the coords are in: 134 if (isLat(dir1) && !isLat(dir2)) { 135 return validateAndRound(c1, c2); 136 137 } else if (!isLat(dir1) && isLat(dir2)) { 138 return validateAndRound(c2, c1); 139 140 } else { 141 return OccurrenceParseResult.fail(OccurrenceIssue.COORDINATE_INVALID); 142 } 143 144 } else if(coordinates.length() > 4) { 145 // try to split and then use lat/lon parsing 146 for (final char delim : ",;/ ".toCharArray()) { 147 int cnt = StringUtils.countMatches(coordinates, String.valueOf(delim)); 148 if (cnt == 1) { 149 String[] latlon = StringUtils.split(coordinates, delim); 150 if (latlon.length == 2) { 151 return parseLatLng(latlon[0], latlon[1]); 152 } 153 } 154 } 155 } 156 return OccurrenceParseResult.fail(OccurrenceIssue.COORDINATE_INVALID); 157 } 158 159 private static OccurrenceParseResult<LatLng> validateAndRound(double lat, double lon) { 160 // collecting issues for result 161 Set<OccurrenceIssue> issues = EnumSet.noneOf(OccurrenceIssue.class); 162 163 // round to 6 decimals 164 final double latOrig = lat; 165 final double lngOrig = lon; 166 lat = roundTo6decimals(lat); 167 lon = roundTo6decimals(lon); 168 if (Double.compare(lat, latOrig) != 0 || Double.compare(lon, lngOrig) != 0) { 169 issues.add(OccurrenceIssue.COORDINATE_ROUNDED); 170 } 171 172 // 0,0 is too suspicious 173 if (Double.compare(lat, 0) == 0 && Double.compare(lon, 0) == 0) { 174 issues.add(OccurrenceIssue.ZERO_COORDINATE); 175 return OccurrenceParseResult.success(ParseResult.CONFIDENCE.POSSIBLE, new LatLng(0, 0), issues); 176 } 177 178 // if everything falls in range 179 if (inRange(lat, lon)) { 180 return OccurrenceParseResult.success(ParseResult.CONFIDENCE.DEFINITE, new LatLng(lat, lon), issues); 181 } 182 183 // if lat is out of range, but in range of the lng, assume swapped coordinates. 184 // note that should we desire to trust the following records, we would need to clear the flag for the records to 185 // appear in search results and maps etc. however, this is logic decision, that goes above the capabilities of this method 186 if (Double.compare(lat, 90) > 0 || Double.compare(lat, -90) < 0) { 187 // try and swap 188 if (inRange(lon, lat)) { 189 issues.add(OccurrenceIssue.PRESUMED_SWAPPED_COORDINATE); 190 return OccurrenceParseResult.success(ParseResult.CONFIDENCE.PROBABLE, new LatLng(lon, lat), issues); 191 } 192 } 193 194 // then something is out of range 195 issues.add(OccurrenceIssue.COORDINATE_OUT_OF_RANGE); 196 return OccurrenceParseResult.fail(issues); 197 198 } 199 200 /** 201 * Parses a single DMS coordinate 202 * @param coord 203 * @param lat 204 * @return the converted decimal up to 6 decimals accuracy 205 */ 206 protected static double parseDMS(String coord, boolean lat) { 207 final String DIRS = lat ? "NS" : "EOW"; 208 coord = coord.trim().toUpperCase(); 209 210 if (coord.length() > 3) { 211 // preparse the direction and remove it from the string to avoid a very complex regex 212 char dir = 'n'; 213 if (DIRS.contains(String.valueOf(coord.charAt(0)))) { 214 dir = coord.charAt(0); 215 coord = coord.substring(1); 216 } else if (DIRS.contains(String.valueOf(coord.charAt(coord.length()-1)))) { 217 dir = coord.charAt(coord.length()-1); 218 coord = coord.substring(0, coord.length()-1); 219 } 220 // without the direction chuck it at the regex 221 Matcher m = DMS_SINGLE.matcher(coord); 222 if (m.find()) { 223 return coordFromMatcher(m, 1,2,3, String.valueOf(dir)); 224 } else { 225 m = DM_SINGLE.matcher(coord); 226 if (m.find()) { 227 return coordFromMatcher(m, 1, 2, String.valueOf(dir)); 228 } else { 229 m = D_SINGLE.matcher(coord); 230 if (m.find()) { 231 return coordFromMatcher(m, 1, String.valueOf(dir)); 232 } 233 } 234 } 235 } 236 throw new IllegalArgumentException(); 237 } 238 239 private static double coordFromMatcher(Matcher m, int idx1, int idx2, int idx3, String sign) { 240 return roundTo6decimals(coordSign(sign) * 241 dmsToDecimal( NumberParser.parseDouble(m.group(idx1)), NumberParser.parseDouble(m.group(idx2)), NumberParser.parseDouble(m.group(idx3)) )); 242 } 243 244 private static double coordFromMatcher(Matcher m, int idx1, int idx2, String sign) { 245 return roundTo6decimals(coordSign(sign) * 246 dmsToDecimal( NumberParser.parseDouble(m.group(idx1)), NumberParser.parseDouble(m.group(idx2)), 0.0)); 247 } 248 249 private static double coordFromMatcher(Matcher m, int idx1, String sign) { 250 return roundTo6decimals(coordSign(sign) * 251 dmsToDecimal( NumberParser.parseDouble(m.group(idx1)), 0.0, 0.0)); 252 } 253 254 private static double dmsToDecimal(double degree, Double minutes, Double seconds) { 255 minutes = minutes == null ? 0 : minutes; 256 seconds = seconds == null ? 0 : seconds; 257 return degree + (minutes / 60) + (seconds / 3600); 258 } 259 260 // round to 6 decimals (~1m precision) since no way we're getting anything legitimately more precise 261 private static Double roundTo6decimals(Double x) { 262 return x == null ? null : Math.round(x * Math.pow(10, 6)) / Math.pow(10, 6); 263 } 264}