001package org.gbif.dwc.terms;
002
003import java.util.HashMap;
004import java.util.Map;
005import java.util.regex.Pattern;
006
007import org.slf4j.Logger;
008import org.slf4j.LoggerFactory;
009
010/**
011 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for
012 * all unknown Term instances.
013 */
014public class TermFactory {
015
016  private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class);
017  private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+");
018  private static final String UNKNOWN_NAMESPACE = "http://unknown.org/";
019  private static TermFactory singleton;
020  private static boolean initialized = false;
021  private static final Object LOCK = new Object();
022
023  private final Map<String, Term> terms = new HashMap<String, Term>();
024
025  public static TermFactory instance() {
026    if (initialized) {
027      return singleton;
028    }
029
030    synchronized (LOCK) {
031      if (singleton == null) {
032        LOG.debug("Building new TermFactory instance");
033        singleton = new TermFactory();
034        singleton.loadKnownTerms();
035        initialized = true;
036      }
037    }
038
039    return singleton;
040  }
041
042  private TermFactory() {
043  }
044
045  private void loadKnownTerms() {
046    addTerms(DwcTerm.values(), DwcTerm.PREFIXES);
047    addTerms(DcTerm.values(), DcTerm.PREFIXES);
048    addTerms(GbifTerm.values(), GbifTerm.PREFIXES);
049    addTerms(GbifInternalTerm.values(), new String[0]);
050    addTerms(IucnTerm.values(), IucnTerm.PREFIXES);
051    addTerms(DcElement.values(), DcElement.PREFIXES);
052    addTerms(AcTerm.values(), AcTerm.PREFIXES);
053    addTerms(XmpTerm.values(), XmpTerm.PREFIXES);
054    addTerms(XmpRightsTerm.values(), XmpRightsTerm.PREFIXES);
055    addTerms(EolReferenceTerm.values(), EolReferenceTerm.PREFIXES);
056  }
057
058  private <T extends Term & AlternativeNames> void addTerms(T[] terms, String[] prefixes) {
059    for (T term : terms) {
060      addTerm(term.simpleName(), term, true);
061      addTerm(term.qualifiedName(), term);
062      for (String pre : prefixes) {
063        addTerm(pre + term.simpleName(), term);
064      }
065      // also index alt names
066      for (String alt : term.alternativeNames()) {
067        addTerm(alt, term);
068        for (String pre : prefixes) {
069          addTerm(pre + alt, term);
070        }
071      }
072    }
073  }
074
075  public void addTerm(String key, Term term) {
076    addTerm(key, term, false);
077  }
078
079  public void addTerm(String key, Term term, boolean isClassTerm) {
080    if (key == null || key.trim().isEmpty()) {
081      return;
082    }
083    key = normaliseTerm(key, isClassTerm);
084    if (terms.containsKey(key)) {
085      Term t1 = terms.get(key);
086      if (!t1.equals(term)) {
087        LOG
088          .warn("Terms {} and {} are both known as \"{}\". Keeping only {}", terms.get(key), term, key, terms.get(key));
089      }
090    } else {
091      terms.put(key, term);
092    }
093  }
094
095  /**
096   * @return a purely alphanumerical, lower cased term with all other characters replaced
097   */
098  public static String normaliseTerm(String term) {
099    return normaliseTerm(term, false);
100  }
101
102  public static String normaliseTerm(String term, boolean keepInitialCase) {
103    String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("");
104    if (x.isEmpty()) {
105      return "";
106    } else if (x.length() == 1) {
107      return keepInitialCase ? String.valueOf(x.charAt(0)) : x.toLowerCase();
108    } else {
109      return keepInitialCase ? x.charAt(0)+x.substring(1).toLowerCase() : x.toLowerCase();
110    }
111  }
112
113  /**
114   * This is the main method to get a term from the factory.
115   * It will lookup matching terms applying some normalization and known synonyms first.
116   * If nothing matches the factory creates a new UnknownTerm instance and keeps it for further requests so that
117   * all terms with the same qualified name return a single UnknownTerm instance.
118   *
119   * For clearly bad term names an IllegalArgumentException is thrown.
120   * For example in the case of a simple name containing whitespace like "hello tom".
121   * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes
122   * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm
123   * instance is created.
124   */
125  public Term findTerm(final String termName) throws IllegalArgumentException {
126    if (termName == null || termName.trim().isEmpty()) {
127      return null;
128    }
129    // first try term just as it is
130    if (terms.containsKey(termName)) {
131      return terms.get(termName);
132    }
133
134    // try normalised term next with initial
135    if (terms.containsKey(normaliseTerm(termName, true))) {
136      return terms.get(normaliseTerm(termName, true));
137
138    } else if (terms.containsKey(normaliseTerm(termName))) {
139        return terms.get(normaliseTerm(termName));
140
141    } else {
142      return createUnknownTerm(termName);
143    }
144  }
145
146  private Term createUnknownTerm(String termName) {
147    // create new term instance
148    Term term;
149    try {
150      term = UnknownTerm.build(termName);
151      addTerm(termName, term);
152    } catch (IllegalArgumentException e) {
153      // simple names as found in ATB file headers are rejected
154      // convert into a standard unknown term namespace and try again
155      term = UnknownTerm.build(UNKNOWN_NAMESPACE + termName);
156      addTerm(termName, term);
157      addTerm(term.qualifiedName(), term);
158    }
159    return term;
160  }
161
162}