001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Map;
022import java.util.Set;
023import java.util.regex.Pattern;
024
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028/**
029 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for
030 * all unknown Term instances.
031 */
032public class TermFactory {
033
034  private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class);
035  private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+");
036  private static TermFactory singleton;
037  private static boolean initialized = false;
038  private static final Object LOCK = new Object();
039
040  private final Map<String, Term> terms = new HashMap<String, Term>();
041  private final Map<String, Term> classTerms = new HashMap<String, Term>();
042  private final Set<Class<? extends Enum>> registeredEnumClasses = new HashSet<>();
043
044  public static TermFactory instance() {
045    if (initialized) {
046      return singleton;
047    }
048
049    synchronized (LOCK) {
050      if (singleton == null) {
051        LOG.debug("Building new TermFactory instance");
052        singleton = new TermFactory();
053        singleton.loadKnownTerms();
054        initialized = true;
055      }
056    }
057
058    return singleton;
059  }
060
061  private TermFactory() {
062  }
063
064  private void loadKnownTerms() {
065    registerTermEnum(DwcTerm.class);
066    registerTermEnum(DcTerm.class, "dct");
067    registerTermEnum(GbifTerm.class);
068    registerTermEnum(GbifInternalTerm.class);
069    registerTermEnum(IucnTerm.class);
070    registerTermEnum(DcElement.class);
071    registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/");
072    registerTermEnum(AcTerm.class);
073    registerTermEnum(PlaziTerm.class);
074    registerTermEnum(GadmTerm.class);
075    registerTermEnum(DwcaTerm.class);
076    registerTermEnum(XmpTerm.class, "adobe");
077    registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names
078  
079    registerQualifiedTermEnum(DwcaTerm.class);
080    addTerm(BibTexTerm.CLASS_TERM);
081  }
082
083  /**
084   * @return the set of term enum classes that have been registered with this TermFactory
085   */
086  public Set<Class<? extends Enum>> listRegisteredTermEnums() {
087    return Collections.unmodifiableSet(registeredEnumClasses);
088  }
089
090  public void registerTerm(Term term) {
091    addTerm(term);
092  }
093
094  public void registerTerm(UnknownTerm term) {
095    addTerm(term.qualifiedName(), term);
096  }
097
098  /**
099   * Registers all terms from a term enumeration.
100   * If the same class is registered again it will be silently ignored.
101   *
102   * @param altPrefixes alternative prefixes to be used to register simple prefixed term names
103   */
104  public synchronized <T extends Enum & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) {
105    if (registeredEnumClasses.contains(termClass)) {
106      LOG.debug("{} is already registered", termClass);
107    } else {
108      registeredEnumClasses.add(termClass);
109      for (T term : termClass.getEnumConstants()) {
110        // add regular term representations (simple, prefixed & qualified)
111        addTerm(term, altPrefixes);
112        // add alternatives
113        for (String alt : term.alternativeNames()) {
114          addTerm(alt, term);
115          if (!alt.startsWith("http") && !alt.contains(":")) {
116            addTerm(term.prefix() + ":" + alt, term);
117            addTerm(term.namespace().resolve(alt).toString(), term);
118            for (String pre : altPrefixes) {
119              addTerm(pre + ":" + alt, term);
120            }
121          }
122        }
123      }
124    }
125  }
126  
127  /**
128   * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names.
129   * This is to avoid clashes with other usually more important terms that should be known by their simple name.
130   */
131  public <T extends Enum & Term> void registerQualifiedTermEnum(Class<T> termClass) {
132    if (registeredEnumClasses.contains(termClass)) {
133      LOG.debug("{} is already registered", termClass);
134    } else {
135      registeredEnumClasses.add(termClass);
136      for (T term : termClass.getEnumConstants()) {
137        // add only the prefixed and qualified representation to avoid clashes
138        addTerm(term.prefixedName(), term);
139        addTerm(term.qualifiedName(), term);
140      }
141    }
142  }
143  
144  private void addTerm(Term term, String ... altPrefixes) {
145    addTerm(term.simpleName(), term);
146    addTerm(term.prefixedName(), term);
147    addTerm(term.qualifiedName(), term);
148    for (String pre : altPrefixes) {
149      addTerm(pre + ":" + term.simpleName(), term);
150    }
151  }
152
153  private void addTerm(String key, Term term) {
154    if (key == null || key.trim().isEmpty()) {
155      return;
156    }
157
158    // keep class terms distinct
159    Map<String, Term> map = termMap(term.isClass());
160    if (map.containsKey(key)) {
161      Term t1 = map.get(key);
162      if (!t1.equals(term)) {
163        LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key));
164      }
165    } else {
166      map.put(key, term);
167      // also add a normalised version
168      key = normaliseTerm(key);
169      if (!map.containsKey(key)) {
170        map.put(key, term);
171      }
172    }
173  }
174
175  private Map<String, Term> termMap(boolean isClass) {
176    return isClass ? classTerms : terms;
177  }
178
179  /**
180   * @return a purely alphanumerical, lower cased term with all other characters replaced
181   */
182  public static String normaliseTerm(String term) {
183    String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("");
184    // remove http(s)
185    x = x.replaceFirst("^https?", "");
186    if (x.isEmpty()) {
187      return "";
188    }
189    return x.toLowerCase();
190  }
191
192  /**
193   * This is the main method to get a term from the factory searching both for property or class terms.
194   * It will lookup matching terms applying some normalization and known synonyms first.
195   * In case of ambiguous terms Class terms will be preferred.
196   *
197   * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that
198   * all terms with the same qualified name return a single UnknownTerm instance.
199   *
200   * For clearly bad term names an IllegalArgumentException is thrown.
201   * For example in the case of a simple name containing whitespace like "hello tom".
202   * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes
203   * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm
204   * instance is created.
205   */
206  public Term findTerm(final String termName) throws IllegalArgumentException {
207    // First try an exact match.
208    if (terms.containsKey(termName)) {
209      return terms.get(termName);
210    }
211
212    // Try class term
213    Term t = findTermOnly(termName, true);
214    if (t == null) {
215      // Try property term
216      t = findTermOnly(termName, false);
217    }
218    // create new term if needed
219    if (t == null) {
220      if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX+":")) {
221        t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS));
222      } else {
223        t = createUnknownTerm(termName, false);
224      }
225    }
226    return t;
227  }
228
229  /**
230   * This method works just as findTerm(final String termName) but restricts
231   * the results to just property terms.
232   */
233  public Term findPropertyTerm(final String termName) throws IllegalArgumentException {
234    return findTerm(termName, false);
235  }
236
237  /**
238   * This method works just as findTerm(final String termName) but restricts
239   * the results to just class terms.
240   */
241  public Term findClassTerm(final String termName) throws IllegalArgumentException {
242    return findTerm(termName, true);
243  }
244
245  /**
246   * This method works just as findTerm(final String termName) but restricts
247   * the results to just property or class terms.
248   */
249  public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException {
250    if (termName == null || termName.trim().isEmpty()) {
251      return null;
252    }
253
254    Term t = findTermOnly(termName, isClassTerm);
255    // create new term if needed
256    if (t == null) {
257      t = createUnknownTerm(termName, isClassTerm);
258    }
259    return t;
260  }
261
262  /**
263   * Does not create Unknown terms
264   */
265  private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException {
266    if (termName == null || termName.trim().isEmpty()) {
267      return null;
268    }
269
270    Map<String, Term> map = termMap(isClassTerm);
271    // first try term just as it is
272    if (map.containsKey(termName)) {
273      return map.get(termName);
274    }
275
276    // try normalised term otherwise
277    if (map.containsKey(normaliseTerm(termName))) {
278      return map.get(normaliseTerm(termName));
279    }
280    return null;
281  }
282
283  private Term createUnknownTerm(String termName, boolean isClassTerm) {
284    // create new term instance
285    Term term = UnknownTerm.build(termName, isClassTerm);
286    addTerm(termName, term);
287    addTerm(term.qualifiedName(), term);
288    return term;
289  }
290
291  private Term createBibtexTerm(String termName, boolean qualified) {
292    // create new term instance
293    Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName);
294    addTerm(term.qualifiedName(), term);
295    addTerm(term.prefixedName(), term);
296    return term;
297  }
298
299}