001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Locale;
022import java.util.Map;
023import java.util.Set;
024import java.util.regex.Pattern;
025
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029/**
030 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for
031 * all unknown Term instances.
032 */
033public class TermFactory {
034
035  private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class);
036  private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+");
037  private static TermFactory singleton;
038  private static boolean initialized = false;
039  private static final Object LOCK = new Object();
040
041  private final Map<String, Term> terms = new HashMap<>();
042  private final Map<String, Term> classTerms = new HashMap<>();
043  private final Set<Class<? extends Enum<?>>> registeredEnumClasses = new HashSet<>();
044
045  public static TermFactory instance() {
046    if (initialized) {
047      return singleton;
048    }
049
050    synchronized (LOCK) {
051      if (singleton == null) {
052        LOG.debug("Building new TermFactory instance");
053        singleton = new TermFactory();
054        singleton.loadKnownTerms();
055        initialized = true;
056      }
057    }
058
059    return singleton;
060  }
061
062  private TermFactory() {
063  }
064
065  private void loadKnownTerms() {
066    registerTermEnum(DwcTerm.class);
067    registerTermEnum(DcTerm.class, "dct");
068    registerTermEnum(GbifTerm.class);
069    registerTermEnum(GbifInternalTerm.class);
070    registerTermEnum(IucnTerm.class);
071    registerTermEnum(DcElement.class);
072    registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/");
073    registerTermEnum(PlaziTerm.class);
074    registerTermEnum(GadmTerm.class);
075    registerTermEnum(DwcaTerm.class);
076
077    // Audubon core
078    registerTermEnum(AcTerm.class);
079    registerTermEnum(ExifTerm.class);
080    registerTermEnum(IptcTerm.class);
081    registerTermEnum(PhotoshopTerm.class);
082    registerTermEnum(XmpTerm.class, "adobe");
083    registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names
084
085    // DWCA extensions
086    registerTermEnum(ChronoTerm.class);
087    registerTermEnum(GbifDnaTerm.class);
088    registerTermEnum(GbifMiqeTerm.class);
089    registerTermEnum(GermplasmTerm.class);
090    registerTermEnum(GgbnTerm.class);
091    registerTermEnum(MixsTerm.class);
092    registerTermEnum(ObisTerm.class);
093    registerTermEnum(Wgs84GeoPositioningTerm.class);
094    registerTermEnum(EcoTerm.class);
095
096    registerQualifiedTermEnum(DwcaTerm.class);
097    addTerm(BibTexTerm.CLASS_TERM);
098  }
099
100  /**
101   * @return the set of term enum classes that have been registered with this TermFactory
102   */
103  public Set<Class<? extends Enum<?>>> listRegisteredTermEnums() {
104    return Collections.unmodifiableSet(registeredEnumClasses);
105  }
106
107  public void registerTerm(Term term) {
108    addTerm(term);
109  }
110
111  public void registerTerm(UnknownTerm term) {
112    addTerm(term.qualifiedName(), term);
113  }
114
115  /**
116   * Registers all terms from a term enumeration.
117   * If the same class is registered again it will be silently ignored.
118   *
119   * @param altPrefixes alternative prefixes to be used to register simple prefixed term names
120   */
121  public synchronized <T extends Enum<?> & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) {
122    if (registeredEnumClasses.contains(termClass)) {
123      LOG.debug("{} is already registered", termClass);
124    } else {
125      registeredEnumClasses.add(termClass);
126      for (T term : termClass.getEnumConstants()) {
127        // add regular term representations (simple, prefixed & qualified)
128        addTerm(term, altPrefixes);
129        // add alternatives
130        for (String alt : term.alternativeNames()) {
131          addTerm(alt, term);
132          if (!alt.startsWith("http") && !alt.contains(":")) {
133            addTerm(term.prefix() + ":" + alt, term);
134            addTerm(term.namespace().resolve(alt).toString(), term);
135            for (String pre : altPrefixes) {
136              addTerm(pre + ":" + alt, term);
137            }
138          }
139        }
140      }
141    }
142  }
143
144  /**
145   * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names.
146   * This is to avoid clashes with other usually more important terms that should be known by their simple name.
147   */
148  public <T extends Enum<?> & Term> void registerQualifiedTermEnum(Class<T> termClass) {
149    if (registeredEnumClasses.contains(termClass)) {
150      LOG.debug("{} is already registered", termClass);
151    } else {
152      registeredEnumClasses.add(termClass);
153      for (T term : termClass.getEnumConstants()) {
154        // add only the prefixed and qualified representation to avoid clashes
155        addTerm(term.prefixedName(), term);
156        addTerm(term.qualifiedName(), term);
157      }
158    }
159  }
160
161  private void addTerm(Term term, String ... altPrefixes) {
162    addTerm(term.simpleName(), term);
163    addTerm(term.prefixedName(), term);
164    addTerm(term.qualifiedName(), term);
165    for (String pre : altPrefixes) {
166      addTerm(pre + ":" + term.simpleName(), term);
167    }
168  }
169
170  /**
171   * Checks whether a string is null or empty (after trimming).
172   */
173  private static boolean isNullOrEmpty(String s) {
174    return s == null || s.trim().isEmpty();
175  }
176
177  private void addTerm(String key, Term term) {
178    if (isNullOrEmpty(key)) {
179      return;
180    }
181
182    // keep class terms distinct
183    Map<String, Term> map = termMap(term.isClass());
184    if (map.containsKey(key)) {
185      Term t1 = map.get(key);
186      if (!t1.equals(term)) {
187        LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key));
188      }
189    } else {
190      map.put(key, term);
191      // also add a normalised version
192      key = normaliseTerm(key);
193      if (!map.containsKey(key)) {
194        map.put(key, term);
195      }
196    }
197  }
198
199  private Map<String, Term> termMap(boolean isClass) {
200    return isClass ? classTerms : terms;
201  }
202
203  /**
204   * @return a purely alphanumerical, lower cased term with all other characters replaced
205   */
206  public static String normaliseTerm(String term) {
207    return NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("")
208                .replaceFirst("^https?", "")
209                .toLowerCase(Locale.ROOT); // remove http(s)
210  }
211
212  /**
213   * This is the main method to get a term from the factory searching both for property or class terms.
214   * It will lookup matching terms applying some normalization and known synonyms first.
215   * In case of ambiguous terms Class terms will be preferred.
216   *
217   * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that
218   * all terms with the same qualified name return a single UnknownTerm instance.
219   *
220   * For clearly bad term names an IllegalArgumentException is thrown.
221   * For example in the case of a simple name containing whitespace like "hello tom".
222   * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes
223   * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm
224   * instance is created.
225   */
226  public Term findTerm(final String termName) throws IllegalArgumentException {
227    // First try an exact match.
228    if (terms.containsKey(termName)) {
229      return terms.get(termName);
230    }
231
232    // Try class term
233    Term t = findTermOnly(termName, true);
234    if (t == null) {
235      // Try property term
236      t = findTermOnly(termName, false);
237    }
238    // create new term if needed
239    if (t == null) {
240      if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX + ":")) {
241        t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS));
242      } else {
243        t = createUnknownTerm(termName, false);
244      }
245    }
246    return t;
247  }
248
249  /**
250   * This method works just as findTerm(final String termName) but restricts
251   * the results to just property terms.
252   */
253  public Term findPropertyTerm(final String termName) throws IllegalArgumentException {
254    return findTerm(termName, false);
255  }
256
257  /**
258   * This method works just as findTerm(final String termName) but restricts
259   * the results to just class terms.
260   */
261  public Term findClassTerm(final String termName) throws IllegalArgumentException {
262    return findTerm(termName, true);
263  }
264
265  /**
266   * This method works just as findTerm(final String termName) but restricts
267   * the results to just property or class terms.
268   */
269  public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException {
270    if (isNullOrEmpty(termName)) {
271      return null;
272    }
273
274    Term t = findTermOnly(termName, isClassTerm);
275    // create new term if needed
276    if (t == null) {
277      t = createUnknownTerm(termName, isClassTerm);
278    }
279    return t;
280  }
281
282  /**
283   * Does not create Unknown terms
284   */
285  private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException {
286    if (isNullOrEmpty(termName)) {
287      return null;
288    }
289
290    Map<String, Term> map = termMap(isClassTerm);
291    // first try term just as it is
292    if (map.containsKey(termName)) {
293      return map.get(termName);
294    }
295
296    // try normalised term otherwise
297    if (map.containsKey(normaliseTerm(termName))) {
298      return map.get(normaliseTerm(termName));
299    }
300    return null;
301  }
302
303  private Term createUnknownTerm(String termName, boolean isClassTerm) {
304    // create new term instance
305    Term term = UnknownTerm.build(termName, isClassTerm);
306    addTerm(termName, term);
307    addTerm(term.qualifiedName(), term);
308    return term;
309  }
310
311  private Term createBibtexTerm(String termName, boolean qualified) {
312    // create new term instance
313    Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName);
314    addTerm(term.qualifiedName(), term);
315    addTerm(term.prefixedName(), term);
316    return term;
317  }
318
319}