001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Locale;
022import java.util.Map;
023import java.util.Set;
024import java.util.regex.Pattern;
025
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029/**
030 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for
031 * all unknown Term instances.
032 */
033public class TermFactory {
034
035  private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class);
036  private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+");
037  private static TermFactory singleton;
038  private static boolean initialized = false;
039  private static final Object LOCK = new Object();
040
041  private final Map<String, Term> terms = new HashMap<>();
042  private final Map<String, Term> classTerms = new HashMap<>();
043  private final Set<Class<? extends Enum<?>>> registeredEnumClasses = new HashSet<>();
044
045  public static TermFactory instance() {
046    if (initialized) {
047      return singleton;
048    }
049
050    synchronized (LOCK) {
051      if (singleton == null) {
052        LOG.debug("Building new TermFactory instance");
053        singleton = new TermFactory();
054        singleton.loadKnownTerms();
055        initialized = true;
056      }
057    }
058
059    return singleton;
060  }
061
062  private TermFactory() {
063  }
064
065  private void loadKnownTerms() {
066    registerTermEnum(DwcTerm.class);
067    registerTermEnum(DcTerm.class, "dct");
068    registerTermEnum(GbifTerm.class);
069    registerTermEnum(GbifInternalTerm.class);
070    registerTermEnum(IucnTerm.class);
071    registerTermEnum(DcElement.class);
072    registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/");
073    registerTermEnum(PlaziTerm.class);
074    registerTermEnum(GadmTerm.class);
075    registerTermEnum(DwcaTerm.class);
076
077    // Audubon core
078    registerTermEnum(AcTerm.class);
079    registerTermEnum(ExifTerm.class);
080    registerTermEnum(IptcTerm.class);
081    registerTermEnum(PhotoshopTerm.class);
082    registerTermEnum(XmpTerm.class, "adobe");
083    registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names
084
085    // DWCA extensions
086    registerTermEnum(ChronoTerm.class);
087    registerTermEnum(GbifDnaTerm.class);
088    registerTermEnum(GbifMiqeTerm.class);
089    registerTermEnum(GermplasmTerm.class);
090    registerTermEnum(GgbnTerm.class);
091    registerTermEnum(MixsTerm.class);
092    registerTermEnum(ObisTerm.class);
093    registerTermEnum(Wgs84GeoPositioningTerm.class);
094    registerTermEnum(EcoTerm.class);
095
096    registerQualifiedTermEnum(DwcaTerm.class);
097    addTerm(BibTexTerm.CLASS_TERM);
098  }
099
100  /**
101   * @return the set of term enum classes that have been registered with this TermFactory
102   */
103  public Set<Class<? extends Enum<?>>> listRegisteredTermEnums() {
104    return Collections.unmodifiableSet(registeredEnumClasses);
105  }
106
107  public void registerTerm(Term term) {
108    addTerm(term);
109  }
110
111  public void registerTerm(UnknownTerm term) {
112    addTerm(term.qualifiedName(), term);
113  }
114
115  /**
116   * Registers all terms from a term enumeration.
117   * If the same class is registered again it will be silently ignored.
118   *
119   * @param altPrefixes alternative prefixes to be used to register simple prefixed term names
120   */
121  public synchronized <T extends Enum<?> & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) {
122    if (registeredEnumClasses.contains(termClass)) {
123      LOG.debug("{} is already registered", termClass);
124    } else {
125      registeredEnumClasses.add(termClass);
126      for (T term : termClass.getEnumConstants()) {
127        // add regular term representations (simple, prefixed & qualified)
128        addTerm(term, altPrefixes);
129        // add alternatives
130        for (String alt : term.alternativeNames()) {
131          addTerm(alt, term);
132          if (!alt.startsWith("http") && !alt.contains(":")) {
133            addTerm(term.prefix() + ":" + alt, term);
134            addTerm(term.namespace().resolve(alt).toString(), term);
135            for (String pre : altPrefixes) {
136              addTerm(pre + ":" + alt, term);
137            }
138          }
139        }
140      }
141    }
142  }
143
144  /**
145   * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names.
146   * This is to avoid clashes with other usually more important terms that should be known by their simple name.
147   */
148  public <T extends Enum<?> & Term> void registerQualifiedTermEnum(Class<T> termClass) {
149    if (registeredEnumClasses.contains(termClass)) {
150      LOG.debug("{} is already registered", termClass);
151    } else {
152      registeredEnumClasses.add(termClass);
153      for (T term : termClass.getEnumConstants()) {
154        // add only the prefixed and qualified representation to avoid clashes
155        addTerm(term.prefixedName(), term);
156        addTerm(term.qualifiedName(), term);
157      }
158    }
159  }
160
161  private void addTerm(Term term, String ... altPrefixes) {
162    addTerm(term.simpleName(), term);
163    addTerm(term.prefixedName(), term);
164    addTerm(term.qualifiedName(), term);
165    for (String pre : altPrefixes) {
166      addTerm(pre + ":" + term.simpleName(), term);
167    }
168  }
169
170  /**
171   * Checks whether a string is null or empty (after trimming).
172   */
173  private static boolean isNullOrEmpty(String s) {
174    return s == null || s.trim().isEmpty();
175  }
176
177  private void addTerm(String key, Term term) {
178    if (isNullOrEmpty(key)) {
179      return;
180    }
181
182    // keep class terms distinct
183    Map<String, Term> map = termMap(term.isClass());
184    if (map.containsKey(key)) {
185      Term t1 = map.get(key);
186      if (!t1.equals(term)) {
187        LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key));
188      }
189    } else {
190      map.put(key, term);
191      // also add a normalised version
192      key = normaliseTerm(key);
193      map.computeIfAbsent(key, k -> term);
194    }
195  }
196
197  private Map<String, Term> termMap(boolean isClass) {
198    return isClass ? classTerms : terms;
199  }
200
201  /**
202   * @return a purely alphanumerical, lower cased term with all other characters replaced
203   */
204  public static String normaliseTerm(String term) {
205    return NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("")
206                .replaceFirst("^https?", "")
207                .toLowerCase(Locale.ROOT); // remove http(s)
208  }
209
210  /**
211   * This is the main method to get a term from the factory searching both for property or class terms.
212   * It will lookup matching terms applying some normalization and known synonyms first.
213   * In case of ambiguous terms Class terms will be preferred.
214   *
215   * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that
216   * all terms with the same qualified name return a single UnknownTerm instance.
217   *
218   * For clearly bad term names an IllegalArgumentException is thrown.
219   * For example in the case of a simple name containing whitespace like "hello tom".
220   * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes
221   * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm
222   * instance is created.
223   */
224  public Term findTerm(final String termName) throws IllegalArgumentException {
225    // First try an exact match.
226    if (terms.containsKey(termName)) {
227      return terms.get(termName);
228    }
229
230    // Try class term
231    Term t = findTermOnly(termName, true);
232    if (t == null) {
233      // Try property term
234      t = findTermOnly(termName, false);
235    }
236    // create new term if needed
237    if (t == null) {
238      if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX + ":")) {
239        t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS));
240      } else {
241        t = createUnknownTerm(termName, false);
242      }
243    }
244    return t;
245  }
246
247  /**
248   * This method works just as findTerm(final String termName) but restricts
249   * the results to just property terms.
250   */
251  public Term findPropertyTerm(final String termName) throws IllegalArgumentException {
252    return findTerm(termName, false);
253  }
254
255  /**
256   * This method works just as findTerm(final String termName) but restricts
257   * the results to just class terms.
258   */
259  public Term findClassTerm(final String termName) throws IllegalArgumentException {
260    return findTerm(termName, true);
261  }
262
263  /**
264   * This method works just as findTerm(final String termName) but restricts
265   * the results to just property or class terms.
266   */
267  public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException {
268    if (isNullOrEmpty(termName)) {
269      return null;
270    }
271
272    Term t = findTermOnly(termName, isClassTerm);
273    // create new term if needed
274    if (t == null) {
275      t = createUnknownTerm(termName, isClassTerm);
276    }
277    return t;
278  }
279
280  /**
281   * Does not create Unknown terms
282   */
283  private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException {
284    if (isNullOrEmpty(termName)) {
285      return null;
286    }
287
288    Map<String, Term> map = termMap(isClassTerm);
289    // first try term just as it is
290    if (map.containsKey(termName)) {
291      return map.get(termName);
292    }
293
294    // try normalised term otherwise
295    if (map.containsKey(normaliseTerm(termName))) {
296      return map.get(normaliseTerm(termName));
297    }
298    return null;
299  }
300
301  private Term createUnknownTerm(String termName, boolean isClassTerm) {
302    // create new term instance
303    Term term = UnknownTerm.build(termName, isClassTerm);
304    addTerm(termName, term);
305    addTerm(term.qualifiedName(), term);
306    return term;
307  }
308
309  private Term createBibtexTerm(String termName, boolean qualified) {
310    // create new term instance
311    Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName);
312    addTerm(term.qualifiedName(), term);
313    addTerm(term.prefixedName(), term);
314    return term;
315  }
316
317}