001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.common.parsers.geospatial;
015
016import org.gbif.common.parsers.core.ASCIIParser;
017import org.gbif.common.parsers.core.FileBasedDictionaryParser;
018import org.gbif.common.parsers.core.ParseResult;
019
020import java.util.regex.Matcher;
021import java.util.regex.Pattern;
022
023import org.apache.commons.lang3.StringUtils;
024
025/**
026 * Parser for geodetic datum strings into EPSG integer codes.
027 * These codes are suitable for instantiating spatial reference systems (SRS) in spatial libraries like geotools.
028 * See <a href="http://docs.geotools.org/latest/userguide/library/referencing/epsg.html">EPSG on Geotools</a>.
029 *
030 * For mapping common names to EPSG codes use the code registry search here:
031 * <ul>
032 *   <li>http://georepository.com/search/by-name/?query=samoa</li>
033 *   <li>http://epsg.io/</li>
034 *   <li>http://www.epsg-registry.org/</li>
035 *   <li>http://prj2epsg.org/apidocs.html</li>
036 * </ul>
037 *
038 */
039public class DatumParser extends FileBasedDictionaryParser<Integer> {
040  private static DatumParser singletonObject;
041  private final Pattern EPSG = Pattern.compile("\\s*(EPSG|ESPG)\\s*:+\\s*(\\d+)\\s*$", Pattern.CASE_INSENSITIVE);
042  private final Pattern NORMALIZER = Pattern.compile("[^a-zA-Z0-9]+");
043  private final ASCIIParser ascii = ASCIIParser.getInstance();
044
045  private DatumParser() {
046    super(false);
047    init(DatumParser.class.getResourceAsStream("/dictionaries/parse/datum.tsv"));
048  }
049
050  public static DatumParser getInstance() {
051    synchronized (DatumParser.class) {
052      if (singletonObject == null) {
053        singletonObject = new DatumParser();
054      }
055    }
056    return singletonObject;
057  }
058
059  @Override
060  public ParseResult<Integer> parse(String input) {
061    if (StringUtils.isEmpty(input)) {
062      return null;
063    }
064    // try EPSG codes directly, allow common typo
065    Matcher m = EPSG.matcher(input);
066    if (m.find()) {
067      Integer code = Integer.valueOf(m.group(2));
068      return ParseResult.success(ParseResult.CONFIDENCE.DEFINITE, code);
069    }
070    // try dictionary for well known SRS names
071    return super.parse(input);
072  }
073
074  @Override
075  protected String normalize(String value) {
076    if (StringUtils.isEmpty(value)) return null;
077    // convert to ascii
078    ParseResult<String> asci = ascii.parse(value);
079    return NORMALIZER.matcher(asci.getPayload()).replaceAll("").toUpperCase();
080  }
081
082  @Override
083  protected Integer fromDictFile(String value) {
084    if (StringUtils.isEmpty(value)) {
085      return null;
086    }
087    return Integer.valueOf(value);
088  }
089
090}