001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.common.parsers.geospatial; 015 016import org.gbif.api.vocabulary.OccurrenceIssue; 017import org.gbif.common.parsers.NumberParser; 018import org.gbif.common.parsers.core.OccurrenceParseResult; 019import org.gbif.common.parsers.core.ParseResult; 020 021import java.util.HashSet; 022import java.util.Set; 023import java.util.regex.Pattern; 024 025import javax.annotation.Nullable; 026 027import org.apache.commons.lang3.StringUtils; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031/** 032 * Utilities for parsing min/max meter measurements in general plus specific additions and 033 * validations for elevation, depth and distance above surface. 034 * 035 * Accepts metres and converts centimetres, kilometres, nautical miles, fathoms, feet and inches to 036 * metres. 037 * 038 * TODO: consider to use the JScience Library: 039 * http://jscience.org/api/javax/measure/unit/package-summary.html 040 * http://jscience.org/api/javax/measure/unit/Unit.html 041 */ 042public class MeterRangeParser { 043 private static final Logger LOG = LoggerFactory.getLogger(MeterRangeParser.class); 044 045 /** 046 * Pattern for removing measurement denominations 047 */ 048 private static final Pattern MEASURE_MARKER_PATTERN = Pattern.compile("[a-zA-Zµ]"); 049 050 /** 051 * Pattern to remove measurement markers (like "m", "FT.") 052 */ 053 private static final Pattern REMOVE_MEASURE_MARKER_PATTERN = Pattern.compile("[a-zA-Zµ\" ]\\.?"); 054 055 /** 056 * Pattern for recognising measurements in nautical miles 057 */ 058 private static final Pattern NAUTICAL_MILES_MARKER_PATTERN = Pattern.compile("nm|nmi", Pattern.CASE_INSENSITIVE); 059 060 /** 061 * Pattern for recognising measurements in fathoms 062 */ 063 private static final Pattern FATHOMS_MARKER_PATTERN = Pattern.compile("fm|fathom|fathoms", Pattern.CASE_INSENSITIVE); 064 065 /** 066 * Pattern for recognising measurements in feet 067 */ 068 private static final Pattern FEET_MARKER_PATTERN = Pattern.compile("ft|'|feet", Pattern.CASE_INSENSITIVE); 069 070 /** 071 * Pattern for recognising measurements in inches 072 */ 073 private static final Pattern INCHES_MARKER_PATTERN = Pattern.compile("in|\"|inch|inches", Pattern.CASE_INSENSITIVE); 074 075 /** 076 * Pattern for recognising measurements in km 077 */ 078 private static final Pattern KM_MARKER_PATTERN = Pattern.compile("km|kilometres|kilometers", Pattern.CASE_INSENSITIVE); 079 080 /** 081 * Pattern for recognising measurements in cm 082 */ 083 private static final Pattern CM_MARKER_PATTERN = Pattern.compile("cm|centimetres|centimeters", Pattern.CASE_INSENSITIVE); 084 085 /** 086 * Pattern for recognising a range value 087 */ 088 private static final Pattern SEP_MARKER_PATTERN = Pattern.compile("\\d\\s*-\\s*\\d"); 089 090 /** 091 * Constant factor to convert from nautical miles to metres. 092 */ 093 private static final float NAUTICAL_MILES_TO_METRES = 1852f; 094 095 /** 096 * Constant factor to convert from fathoms to metres. 097 */ 098 private static final float FATHOMS_TO_METRES = 6 * 0.3048f; 099 100 /** 101 * Constant factor to convert from feet to metres. 102 */ 103 private static final float FEET_TO_METRES = 0.3048f; 104 105 /** 106 * Constant factor to convert from inches to metres. 107 */ 108 private static final float INCHES_TO_METRES = 0.0254f; 109 110 /** 111 * Constant factor to convert from km to metres. 112 */ 113 private static final float KM_TO_METRES = 1000f; 114 115 /** 116 * Constant factor to convert from cm to metres. 117 */ 118 private static final float CM_TO_METRES = 0.1f; 119 120 /** 121 * The lowest elevation value recognised as valid: -430m 122 * 123 * @See <a href="https://github.com/tdwg/bdq/issues/112">TG2-VALIDATION_MAXELEVATION_INRANGE</a> 124 */ 125 private static final int MIN_ELEVATION = -430; 126 127 /** 128 * The highest elevation value recognised as valid, Mount Everest 8,848 m (29,029 ft). 129 * Highest point in the Troposphere: 17km 130 * Highest point in the Stratosphere: 55km 131 * Max altitude of airline cruises: 13km 132 * Max altitude of weather ballons: 34km 133 * 134 * @see <a href="http://en.wikipedia.org/wiki/Atmosphere_of_Earth">Atmosphere in wikipedia</a> 135 */ 136 private static final int MAX_ELEVATION = 17000; 137 138 /** 139 * The lowest elevation value recognised as valid: 140 * 10,971 m (35,994 ft) Challenger Deep, Mariana Trench[32] 141 */ 142 private static final int MAX_DEPTH = 11000; 143 144 /** 145 * Largest holes dug into the earth are ~4km. 146 */ 147 private static final int MIN_DISTANCE = -5000; 148 149 /** 150 * Same as elevation, we use the upper end of the Troposphere. 151 */ 152 private static final int MAX_DISTANCE = MAX_ELEVATION; 153 154 static class MeasurementWrapper<T> { 155 private T measurement; 156 private boolean isInNauticalMiles; 157 private boolean isInFathoms; 158 private boolean isInFeet; 159 private boolean isInInches; 160 private boolean isInCm; 161 private boolean isInKm; 162 private boolean containsNonNumeric; 163 private boolean minMaxSwapped; 164 private boolean tooLarge; 165 166 public T getMeasurement() { 167 return measurement; 168 } 169 170 public boolean isInNauticalMiles() { 171 return isInNauticalMiles; 172 } 173 174 public boolean isInFathoms() { 175 return isInFathoms; 176 } 177 178 public boolean isInFeet() { 179 return isInFeet; 180 } 181 182 public boolean isInInches() { 183 return isInInches; 184 } 185 186 public boolean isInCm() { 187 return isInCm; 188 } 189 190 public boolean isInKm() { 191 return isInKm; 192 } 193 194 public boolean containsNonNumeric() { 195 return containsNonNumeric; 196 } 197 198 public boolean isMinMaxSwapped() { 199 return minMaxSwapped; 200 } 201 202 public boolean isTooLarge() { 203 return tooLarge; 204 } 205 206 public void addIssues(MeasurementWrapper<?> issues) { 207 isInNauticalMiles = isInNauticalMiles || issues.isInNauticalMiles; 208 isInFathoms = isInFathoms || issues.isInFathoms; 209 isInFeet = isInFeet || issues.isInFeet; 210 isInInches = isInInches || issues.isInInches; 211 isInCm = isInCm || issues.isInCm; 212 isInKm = isInKm || issues.isInKm; 213 containsNonNumeric = containsNonNumeric || issues.containsNonNumeric; 214 minMaxSwapped = minMaxSwapped || issues.minMaxSwapped; 215 tooLarge = tooLarge || issues.tooLarge; 216 } 217 } 218 219 /** 220 * Takes min and max values in metres and a known precision and calculates a single mean value and 221 * its accuracy. This method tries also to parse common measurements given in fathoms, feet, 222 * inches, km or cm and converts them to metres. 223 */ 224 public static MeasurementWrapper<DoubleAccuracy> parseMeterRange( 225 String minRaw, @Nullable String maxRaw, @Nullable String precisionRaw) { 226 MeasurementWrapper<DoubleAccuracy> result = new MeasurementWrapper<DoubleAccuracy>(); 227 228 MeasurementWrapper<Double> min = parseInMeter(minRaw); 229 MeasurementWrapper<Double> max = parseInMeter(maxRaw); 230 MeasurementWrapper<Double> prec = parseInMeter(precisionRaw); 231 232 result.addIssues(min); 233 result.addIssues(max); 234 result.addIssues(prec); 235 236 if (min.measurement == null && max.measurement == null) { 237 // both are null, return issues only 238 return result; 239 } 240 241 // final result vars 242 Double value; 243 Double accuracy; 244 245 // check for swapped values and apply precision if min & max exist 246 if (min.measurement != null && max.measurement != null) { 247 // flag swapped min/max 248 if (min.measurement > max.measurement) { 249 result.minMaxSwapped = true; 250 Double oldMin = min.measurement; 251 min.measurement = max.measurement; 252 max.measurement = oldMin; 253 } 254 // apply precision to min max if we have it 255 if (prec.measurement != null) { 256 min.measurement -= prec.measurement; 257 max.measurement += prec.measurement; 258 } 259 // build the arithmetic mean and set accuracy 260 value = (min.measurement + max.measurement) / 2d; 261 accuracy = (max.measurement - min.measurement) / 2d; 262 263 } else { 264 // use the only value and precision for accuracy 265 value = min.measurement == null ? max.measurement : min.measurement; 266 accuracy = prec.measurement; 267 } 268 269 if (value != null) { 270 result.measurement = new DoubleAccuracy(value, accuracy); 271 } 272 273 // finally a result, bye bye! 274 return result; 275 } 276 277 public static OccurrenceParseResult<DoubleAccuracy> parseElevation(@Nullable String min, @Nullable String max, @Nullable String precision) { 278 279 MeasurementWrapper<DoubleAccuracy> elevation = parseMeterRange(min, max, precision); 280 281 Set<OccurrenceIssue> issues = new HashSet<>(); 282 if (elevation.containsNonNumeric) { 283 issues.add(OccurrenceIssue.ELEVATION_NON_NUMERIC); 284 } 285 if (elevation.isInNauticalMiles || elevation.isInFathoms 286 || elevation.isInFeet || elevation.isInInches) { 287 issues.add(OccurrenceIssue.ELEVATION_NOT_METRIC); 288 } 289 if (elevation.minMaxSwapped) { 290 issues.add(OccurrenceIssue.ELEVATION_MIN_MAX_SWAPPED); 291 } 292 if (elevation.tooLarge) { 293 issues.add(OccurrenceIssue.ELEVATION_UNLIKELY); 294 } 295 296 if (elevation.measurement == null || elevation.measurement.getValue() == null) { 297 return OccurrenceParseResult.fail(issues); 298 } 299 300 DoubleAccuracy result = elevation.measurement; 301 // record the number of records with altitude out of range 302 if (result.getValue() > MAX_ELEVATION || result.getValue() < MIN_ELEVATION) { 303 issues.add(OccurrenceIssue.ELEVATION_UNLIKELY); 304 return OccurrenceParseResult.fail(issues); 305 } 306 307 return OccurrenceParseResult.success(ParseResult.CONFIDENCE.DEFINITE, result, issues); 308 } 309 310 public static OccurrenceParseResult<DoubleAccuracy> parseDepth(@Nullable String min, @Nullable String max, @Nullable String precision) { 311 MeasurementWrapper<DoubleAccuracy> depth = parseMeterRange(min, max, precision); 312 313 Set<OccurrenceIssue> issues = new HashSet<>(); 314 if(depth.containsNonNumeric) { 315 issues.add(OccurrenceIssue.DEPTH_NON_NUMERIC); 316 } 317 if(depth.isInNauticalMiles || depth.isInFathoms || depth.isInFeet || depth.isInInches) { 318 issues.add(OccurrenceIssue.DEPTH_NOT_METRIC); 319 } 320 if(depth.minMaxSwapped) { 321 issues.add(OccurrenceIssue.DEPTH_MIN_MAX_SWAPPED); 322 } 323 if(depth.tooLarge) { 324 issues.add(OccurrenceIssue.DEPTH_UNLIKELY); 325 } 326 327 if (depth.measurement == null || depth.measurement.getValue() == null) { 328 return OccurrenceParseResult.fail(issues); 329 } 330 331 DoubleAccuracy result = depth.measurement; 332 333 // negate depth if its negative 334 if (result.getValue() < 0) { 335 result = new DoubleAccuracy(-1 * result.getValue(), result.getAccuracy()); 336 issues.add(OccurrenceIssue.DEPTH_UNLIKELY); 337 } 338 339 // record the number of records with depth out of range 340 if (result.getValue() > MAX_DEPTH) { 341 issues.add(OccurrenceIssue.DEPTH_UNLIKELY); 342 return OccurrenceParseResult.fail(issues); 343 } 344 345 return OccurrenceParseResult.success(ParseResult.CONFIDENCE.DEFINITE, result, issues); 346 } 347 348 /** 349 * Parses a string supposed to be a value in metres. 350 * Accepts nautical miles, fathoms, feet, inches if marked with a unit and converts them 351 */ 352 public static ParseResult<Double> parseMeters(String meter) { 353 MeasurementWrapper<Double> result = parseInMeter(meter); 354 if (result.getMeasurement() == null) { 355 return ParseResult.fail(); 356 } 357 return ParseResult.success(ParseResult.CONFIDENCE.DEFINITE, result.getMeasurement()); 358 } 359 360 private static MeasurementWrapper<Double> parseInMeter(String meter) { 361 MeasurementWrapper<Double> iMeter = new MeasurementWrapper<>(); 362 363 if (StringUtils.isEmpty(meter)) { 364 return iMeter; 365 } 366 367 try { 368 iMeter.containsNonNumeric = MEASURE_MARKER_PATTERN.matcher(meter).find(); 369 370 if (!iMeter.containsNonNumeric()) { 371 iMeter.measurement = NumberParser.parseDouble(meter); 372 373 } else { 374 iMeter.isInNauticalMiles = NAUTICAL_MILES_MARKER_PATTERN.matcher(meter).find(); 375 iMeter.isInFathoms = FATHOMS_MARKER_PATTERN.matcher(meter).find(); 376 iMeter.isInFeet = FEET_MARKER_PATTERN.matcher(meter).find(); 377 iMeter.isInInches = INCHES_MARKER_PATTERN.matcher(meter).find(); 378 iMeter.isInKm = KM_MARKER_PATTERN.matcher(meter).find(); 379 iMeter.isInCm = CM_MARKER_PATTERN.matcher(meter).find(); 380 381 // handle 6-7m values 382 if (SEP_MARKER_PATTERN.matcher(meter).find()) { 383 // we have been given a range 384 try { 385 String min = meter.substring(0, meter.indexOf('-')).trim(); 386 min = removeMeasurementMarkers(min); 387 String max = meter.substring(meter.indexOf('-') + 1).trim(); 388 max = removeMeasurementMarkers(max); 389 390 Double minDouble = NumberParser.parseDouble(min); 391 Double maxDouble = NumberParser.parseDouble(max); 392 393 if (minDouble == null && maxDouble != null){ 394 iMeter.measurement = maxDouble; 395 } else if(maxDouble == null && minDouble != null){ 396 iMeter.measurement = maxDouble; 397 } else if (minDouble != null && maxDouble != null && minDouble != 0 && maxDouble != 0 398 && maxDouble - minDouble != 0) { 399 iMeter.measurement = (maxDouble + minDouble) / 2; 400 } 401 } catch (NumberFormatException ignored) { 402 } 403 404 } else { 405 iMeter.measurement = NumberParser.parseDouble(removeMeasurementMarkers(meter)); 406 } 407 408 if (iMeter.measurement != null) { 409 // convert to metric 410 if (iMeter.isInNauticalMiles) { 411 iMeter.measurement = convertNauticalMilesToMetres(iMeter.measurement); 412 } else if (iMeter.isInFathoms) { 413 iMeter.measurement = convertFathomsToMetres(iMeter.measurement); 414 } else if (iMeter.isInFeet) { 415 iMeter.measurement = convertFeetToMetres(iMeter.measurement); 416 } else if (iMeter.isInInches) { 417 iMeter.measurement = convertInchesToMetres(iMeter.measurement); 418 } else if (iMeter.isInKm){ 419 iMeter.measurement = convertKmToMetres(iMeter.measurement); 420 } else if (iMeter.isInCm){ 421 iMeter.measurement = convertCmToMetres(iMeter.measurement); 422 } 423 } 424 } 425 } catch (NumberFormatException e) { 426 LOG.debug("Unparsable metre measurement: {}, {}", meter, e.getMessage()); 427 } 428 429 // round to centimetres 430 if (iMeter.measurement != null) { 431 iMeter.measurement = Math.round(iMeter.measurement * 100.0) / 100.0; 432 } 433 return iMeter; 434 } 435 436 /** 437 * Remove "m" etc. 438 * 439 * @param s to remove measurement markers from. 440 * 441 * @return a new string with all measurements removed (i.e. replaced by the empty string) 442 */ 443 private static String removeMeasurementMarkers(String s) { 444 if (s == null) return null; 445 return REMOVE_MEASURE_MARKER_PATTERN.matcher(s).replaceAll(""); 446 } 447 448 private static double convertNauticalMilesToMetres(double nauticalMiles) { 449 return nauticalMiles * NAUTICAL_MILES_TO_METRES; 450 } 451 452 private static double convertFathomsToMetres(double fathoms) { 453 return fathoms * FATHOMS_TO_METRES; 454 } 455 456 private static double convertFeetToMetres(double feet) { 457 return feet * FEET_TO_METRES; 458 } 459 460 private static double convertInchesToMetres(double inches) { 461 return inches * INCHES_TO_METRES; 462 } 463 464 private static double convertKmToMetres(double km) { 465 return km * KM_TO_METRES; 466 } 467 468 private static double convertCmToMetres(double cm) { 469 return cm * CM_TO_METRES; 470 } 471 472 /** 473 * @return rounded int value or null if it was null or exceeds the maximum an int can hold 474 */ 475 private static Integer roundedInt(Double x) { 476 if (x == null) return null; 477 478 Long xl = Math.round(x); 479 if (xl > Integer.MAX_VALUE) { 480 throw new IllegalArgumentException("Long too big for an integer"); 481 } 482 return xl.intValue(); 483 } 484 485}