001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.common.parsers.geospatial;
015
016import org.gbif.api.vocabulary.OccurrenceIssue;
017import org.gbif.common.parsers.NumberParser;
018import org.gbif.common.parsers.core.OccurrenceParseResult;
019import org.gbif.common.parsers.core.ParseResult;
020
021import java.util.HashSet;
022import java.util.Set;
023import java.util.regex.Pattern;
024
025import javax.annotation.Nullable;
026
027import org.apache.commons.lang3.StringUtils;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031/**
032 * Utilities for parsing min/max meter measurements in general plus specific additions and
033 * validations for elevation, depth and distance above surface.
034 *
035 * Accepts metres and converts centimetres, kilometres, nautical miles, fathoms, feet and inches to
036 * metres.
037 *
038 * TODO: consider to use the JScience Library:
039 *  http://jscience.org/api/javax/measure/unit/package-summary.html
040 *  http://jscience.org/api/javax/measure/unit/Unit.html
041 */
042public class MeterRangeParser {
043  private static final Logger LOG = LoggerFactory.getLogger(MeterRangeParser.class);
044
045  /**
046   * Pattern for removing measurement denominations
047   */
048  private static final Pattern MEASURE_MARKER_PATTERN = Pattern.compile("[a-zA-Zµ]");
049
050  /**
051   * Pattern to remove measurement markers (like "m", "FT.")
052   */
053  private static final Pattern REMOVE_MEASURE_MARKER_PATTERN = Pattern.compile("[a-zA-Zµ\" ]\\.?");
054
055  /**
056   * Pattern for recognising measurements in nautical miles
057   */
058  private static final Pattern NAUTICAL_MILES_MARKER_PATTERN = Pattern.compile("nm|nmi", Pattern.CASE_INSENSITIVE);
059
060  /**
061   * Pattern for recognising measurements in fathoms
062   */
063  private static final Pattern FATHOMS_MARKER_PATTERN = Pattern.compile("fm|fathom|fathoms", Pattern.CASE_INSENSITIVE);
064
065  /**
066   * Pattern for recognising measurements in feet
067   */
068  private static final Pattern FEET_MARKER_PATTERN = Pattern.compile("ft|'|feet", Pattern.CASE_INSENSITIVE);
069
070  /**
071   * Pattern for recognising measurements in inches
072   */
073  private static final Pattern INCHES_MARKER_PATTERN = Pattern.compile("in|\"|inch|inches", Pattern.CASE_INSENSITIVE);
074
075  /**
076   * Pattern for recognising measurements in km
077   */
078  private static final Pattern KM_MARKER_PATTERN = Pattern.compile("km|kilometres|kilometers", Pattern.CASE_INSENSITIVE);
079
080  /**
081   * Pattern for recognising measurements in cm
082   */
083  private static final Pattern CM_MARKER_PATTERN = Pattern.compile("cm|centimetres|centimeters", Pattern.CASE_INSENSITIVE);
084
085  /**
086   * Pattern for recognising a range value
087   */
088  private static final Pattern SEP_MARKER_PATTERN = Pattern.compile("\\d\\s*-\\s*\\d");
089
090  /**
091   * Constant factor to convert from nautical miles to metres.
092   */
093  private static final float NAUTICAL_MILES_TO_METRES = 1852f;
094
095  /**
096   * Constant factor to convert from fathoms to metres.
097   */
098  private static final float FATHOMS_TO_METRES = 6 * 0.3048f;
099
100  /**
101   * Constant factor to convert from feet to metres.
102   */
103  private static final float FEET_TO_METRES = 0.3048f;
104
105  /**
106   * Constant factor to convert from inches to metres.
107   */
108  private static final float INCHES_TO_METRES = 0.0254f;
109
110  /**
111   * Constant factor to convert from km to metres.
112   */
113  private static final float KM_TO_METRES = 1000f;
114
115  /**
116   * Constant factor to convert from cm to metres.
117   */
118  private static final float CM_TO_METRES = 0.1f;
119
120  /**
121   * The lowest elevation value recognised as valid: -430m
122   *
123   * @See <a href="https://github.com/tdwg/bdq/issues/112">TG2-VALIDATION_MAXELEVATION_INRANGE</a>
124   */
125  private static final int MIN_ELEVATION = -430;
126
127  /**
128   * The highest elevation value recognised as valid, Mount Everest 8,848 m (29,029 ft).
129   * Highest point in the Troposphere: 17km
130   * Highest point in the Stratosphere: 55km
131   * Max altitude of airline cruises: 13km
132   * Max altitude of weather ballons: 34km
133   *
134   * @see <a href="http://en.wikipedia.org/wiki/Atmosphere_of_Earth">Atmosphere in wikipedia</a>
135   */
136  private static final int MAX_ELEVATION = 17000;
137
138  /**
139   * The lowest elevation value recognised as valid:
140   * 10,971 m (35,994 ft) Challenger Deep, Mariana Trench[32]
141   */
142  private static final int MAX_DEPTH = 11000;
143
144  /**
145   * Largest holes dug into the earth are ~4km.
146   */
147  private static final int MIN_DISTANCE = -5000;
148
149  /**
150   * Same as elevation, we use the upper end of the Troposphere.
151   */
152  private static final int MAX_DISTANCE = MAX_ELEVATION;
153
154  static class MeasurementWrapper<T> {
155    private T measurement;
156    private boolean isInNauticalMiles;
157    private boolean isInFathoms;
158    private boolean isInFeet;
159    private boolean isInInches;
160    private boolean isInCm;
161    private boolean isInKm;
162    private boolean containsNonNumeric;
163    private boolean minMaxSwapped;
164    private boolean tooLarge;
165
166    public T getMeasurement() {
167      return measurement;
168    }
169
170    public boolean isInNauticalMiles() {
171      return isInNauticalMiles;
172    }
173
174    public boolean isInFathoms() {
175      return isInFathoms;
176    }
177
178    public boolean isInFeet() {
179      return isInFeet;
180    }
181
182    public boolean isInInches() {
183      return isInInches;
184    }
185
186    public boolean isInCm() {
187      return isInCm;
188    }
189
190    public boolean isInKm() {
191      return isInKm;
192    }
193
194    public boolean containsNonNumeric() {
195      return containsNonNumeric;
196    }
197
198    public boolean isMinMaxSwapped() {
199      return minMaxSwapped;
200    }
201
202    public boolean isTooLarge() {
203      return tooLarge;
204    }
205
206    public void addIssues(MeasurementWrapper<?> issues) {
207      isInNauticalMiles = isInNauticalMiles || issues.isInNauticalMiles;
208      isInFathoms = isInFathoms || issues.isInFathoms;
209      isInFeet = isInFeet || issues.isInFeet;
210      isInInches = isInInches || issues.isInInches;
211      isInCm = isInCm || issues.isInCm;
212      isInKm = isInKm || issues.isInKm;
213      containsNonNumeric = containsNonNumeric || issues.containsNonNumeric;
214      minMaxSwapped = minMaxSwapped || issues.minMaxSwapped;
215      tooLarge = tooLarge || issues.tooLarge;
216    }
217  }
218
219  /**
220   * Takes min and max values in metres and a known precision and calculates a single mean value and
221   * its accuracy. This method tries also to parse common measurements given in fathoms, feet,
222   * inches, km or cm and converts them to metres.
223   */
224  public static MeasurementWrapper<DoubleAccuracy> parseMeterRange(
225      String minRaw, @Nullable String maxRaw, @Nullable String precisionRaw) {
226    MeasurementWrapper<DoubleAccuracy> result = new MeasurementWrapper<DoubleAccuracy>();
227
228    MeasurementWrapper<Double> min = parseInMeter(minRaw);
229    MeasurementWrapper<Double> max = parseInMeter(maxRaw);
230    MeasurementWrapper<Double> prec = parseInMeter(precisionRaw);
231
232    result.addIssues(min);
233    result.addIssues(max);
234    result.addIssues(prec);
235
236    if (min.measurement == null && max.measurement == null) {
237      // both are null, return issues only
238      return result;
239    }
240
241    // final result vars
242    Double value;
243    Double accuracy;
244
245    // check for swapped values and apply precision if min & max exist
246    if (min.measurement != null && max.measurement != null) {
247      // flag swapped min/max
248      if (min.measurement > max.measurement) {
249        result.minMaxSwapped = true;
250        Double oldMin = min.measurement;
251        min.measurement = max.measurement;
252        max.measurement = oldMin;
253      }
254      // apply precision to min max if we have it
255      if (prec.measurement != null) {
256        min.measurement -= prec.measurement;
257        max.measurement += prec.measurement;
258      }
259      // build the arithmetic mean and set accuracy
260      value = (min.measurement + max.measurement) / 2d;
261      accuracy = (max.measurement - min.measurement) / 2d;
262
263    } else {
264      // use the only value and precision for accuracy
265      value = min.measurement == null ? max.measurement : min.measurement;
266      accuracy = prec.measurement;
267    }
268
269    if (value != null) {
270      result.measurement = new DoubleAccuracy(value, accuracy);
271    }
272
273    // finally a result, bye bye!
274    return result;
275  }
276
277  public static OccurrenceParseResult<DoubleAccuracy> parseElevation(@Nullable String min, @Nullable String max, @Nullable String precision) {
278
279    MeasurementWrapper<DoubleAccuracy> elevation = parseMeterRange(min, max, precision);
280
281    Set<OccurrenceIssue> issues = new HashSet<>();
282    if (elevation.containsNonNumeric) {
283      issues.add(OccurrenceIssue.ELEVATION_NON_NUMERIC);
284    }
285    if (elevation.isInNauticalMiles || elevation.isInFathoms
286      || elevation.isInFeet || elevation.isInInches) {
287      issues.add(OccurrenceIssue.ELEVATION_NOT_METRIC);
288    }
289    if (elevation.minMaxSwapped) {
290      issues.add(OccurrenceIssue.ELEVATION_MIN_MAX_SWAPPED);
291    }
292    if (elevation.tooLarge) {
293      issues.add(OccurrenceIssue.ELEVATION_UNLIKELY);
294    }
295
296    if (elevation.measurement == null || elevation.measurement.getValue() == null) {
297      return OccurrenceParseResult.fail(issues);
298    }
299
300    DoubleAccuracy result = elevation.measurement;
301    // record the number of records with altitude out of range
302    if (result.getValue() > MAX_ELEVATION || result.getValue() < MIN_ELEVATION) {
303      issues.add(OccurrenceIssue.ELEVATION_UNLIKELY);
304      return OccurrenceParseResult.fail(issues);
305    }
306
307    return OccurrenceParseResult.success(ParseResult.CONFIDENCE.DEFINITE, result, issues);
308  }
309
310  public static OccurrenceParseResult<DoubleAccuracy> parseDepth(@Nullable String min, @Nullable String max, @Nullable String precision) {
311    MeasurementWrapper<DoubleAccuracy> depth = parseMeterRange(min, max, precision);
312
313    Set<OccurrenceIssue> issues = new HashSet<>();
314    if(depth.containsNonNumeric) {
315      issues.add(OccurrenceIssue.DEPTH_NON_NUMERIC);
316    }
317    if(depth.isInNauticalMiles || depth.isInFathoms || depth.isInFeet || depth.isInInches) {
318      issues.add(OccurrenceIssue.DEPTH_NOT_METRIC);
319    }
320    if(depth.minMaxSwapped) {
321      issues.add(OccurrenceIssue.DEPTH_MIN_MAX_SWAPPED);
322    }
323    if(depth.tooLarge) {
324      issues.add(OccurrenceIssue.DEPTH_UNLIKELY);
325    }
326
327    if (depth.measurement == null || depth.measurement.getValue() == null) {
328      return OccurrenceParseResult.fail(issues);
329    }
330
331    DoubleAccuracy result = depth.measurement;
332
333    // negate depth if its negative
334    if (result.getValue() < 0) {
335      result = new DoubleAccuracy(-1 * result.getValue(), result.getAccuracy());
336      issues.add(OccurrenceIssue.DEPTH_UNLIKELY);
337    }
338
339    // record the number of records with depth out of range
340    if (result.getValue() > MAX_DEPTH) {
341      issues.add(OccurrenceIssue.DEPTH_UNLIKELY);
342      return OccurrenceParseResult.fail(issues);
343    }
344
345    return OccurrenceParseResult.success(ParseResult.CONFIDENCE.DEFINITE, result, issues);
346  }
347
348  /**
349   * Parses a string supposed to be a value in metres.
350   * Accepts nautical miles, fathoms, feet, inches if marked with a unit and converts them
351   */
352  public static ParseResult<Double> parseMeters(String meter) {
353    MeasurementWrapper<Double> result = parseInMeter(meter);
354    if (result.getMeasurement() == null) {
355      return ParseResult.fail();
356    }
357    return ParseResult.success(ParseResult.CONFIDENCE.DEFINITE, result.getMeasurement());
358  }
359
360  private static MeasurementWrapper<Double> parseInMeter(String meter) {
361    MeasurementWrapper<Double> iMeter = new MeasurementWrapper<>();
362
363    if (StringUtils.isEmpty(meter)) {
364      return iMeter;
365    }
366
367    try {
368      iMeter.containsNonNumeric = MEASURE_MARKER_PATTERN.matcher(meter).find();
369
370      if (!iMeter.containsNonNumeric()) {
371        iMeter.measurement = NumberParser.parseDouble(meter);
372
373      } else {
374        iMeter.isInNauticalMiles = NAUTICAL_MILES_MARKER_PATTERN.matcher(meter).find();
375        iMeter.isInFathoms = FATHOMS_MARKER_PATTERN.matcher(meter).find();
376        iMeter.isInFeet = FEET_MARKER_PATTERN.matcher(meter).find();
377        iMeter.isInInches = INCHES_MARKER_PATTERN.matcher(meter).find();
378        iMeter.isInKm = KM_MARKER_PATTERN.matcher(meter).find();
379        iMeter.isInCm = CM_MARKER_PATTERN.matcher(meter).find();
380
381        // handle 6-7m values
382        if (SEP_MARKER_PATTERN.matcher(meter).find()) {
383          // we have been given a range
384          try {
385            String min = meter.substring(0, meter.indexOf('-')).trim();
386            min = removeMeasurementMarkers(min);
387            String max = meter.substring(meter.indexOf('-') + 1).trim();
388            max = removeMeasurementMarkers(max);
389
390            Double minDouble = NumberParser.parseDouble(min);
391            Double maxDouble = NumberParser.parseDouble(max);
392
393            if (minDouble == null && maxDouble != null){
394              iMeter.measurement = maxDouble;
395            } else if(maxDouble == null && minDouble != null){
396              iMeter.measurement = maxDouble;
397            } else if (minDouble != null && maxDouble != null && minDouble != 0 && maxDouble != 0
398              && maxDouble - minDouble != 0) {
399              iMeter.measurement = (maxDouble + minDouble) / 2;
400            }
401          } catch (NumberFormatException ignored) {
402          }
403
404        } else {
405          iMeter.measurement = NumberParser.parseDouble(removeMeasurementMarkers(meter));
406        }
407
408        if (iMeter.measurement != null) {
409          // convert to metric
410          if (iMeter.isInNauticalMiles) {
411            iMeter.measurement = convertNauticalMilesToMetres(iMeter.measurement);
412          } else if (iMeter.isInFathoms) {
413            iMeter.measurement = convertFathomsToMetres(iMeter.measurement);
414          } else if (iMeter.isInFeet) {
415            iMeter.measurement = convertFeetToMetres(iMeter.measurement);
416          } else if (iMeter.isInInches) {
417            iMeter.measurement = convertInchesToMetres(iMeter.measurement);
418          } else if (iMeter.isInKm){
419            iMeter.measurement = convertKmToMetres(iMeter.measurement);
420          } else if (iMeter.isInCm){
421            iMeter.measurement = convertCmToMetres(iMeter.measurement);
422          }
423        }
424      }
425    } catch (NumberFormatException e) {
426      LOG.debug("Unparsable metre measurement: {}, {}", meter, e.getMessage());
427    }
428
429    // round to centimetres
430    if (iMeter.measurement != null) {
431      iMeter.measurement = Math.round(iMeter.measurement * 100.0) / 100.0;
432    }
433    return iMeter;
434  }
435
436  /**
437   * Remove "m" etc.
438   *
439   * @param s to remove measurement markers from.
440   *
441   * @return a new string with all measurements removed (i.e. replaced by the empty string)
442   */
443  private static String removeMeasurementMarkers(String s) {
444    if (s == null) return null;
445    return REMOVE_MEASURE_MARKER_PATTERN.matcher(s).replaceAll("");
446  }
447
448  private static double convertNauticalMilesToMetres(double nauticalMiles) {
449    return nauticalMiles * NAUTICAL_MILES_TO_METRES;
450  }
451
452  private static double convertFathomsToMetres(double fathoms) {
453    return fathoms * FATHOMS_TO_METRES;
454  }
455
456  private static double convertFeetToMetres(double feet) {
457    return feet * FEET_TO_METRES;
458  }
459
460  private static double convertInchesToMetres(double inches) {
461    return inches * INCHES_TO_METRES;
462  }
463
464  private static double convertKmToMetres(double km) {
465    return km * KM_TO_METRES;
466  }
467
468  private static double convertCmToMetres(double cm) {
469    return cm * CM_TO_METRES;
470  }
471
472  /**
473   * @return rounded int value or null if it was null or exceeds the maximum an int can hold
474   */
475  private static Integer roundedInt(Double x) {
476    if (x == null) return null;
477
478    Long xl = Math.round(x);
479    if (xl > Integer.MAX_VALUE) {
480      throw new IllegalArgumentException("Long too big for an integer");
481    }
482    return xl.intValue();
483  }
484
485}