001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Map;
022import java.util.Set;
023import java.util.regex.Pattern;
024
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028/**
029 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for
030 * all unknown Term instances.
031 */
032public class TermFactory {
033
034  private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class);
035  private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+");
036  private static TermFactory singleton;
037  private static boolean initialized = false;
038  private static final Object LOCK = new Object();
039
040  private final Map<String, Term> terms = new HashMap<String, Term>();
041  private final Map<String, Term> classTerms = new HashMap<String, Term>();
042  private final Set<Class<? extends Enum>> registeredEnumClasses = new HashSet<>();
043
044  public static TermFactory instance() {
045    if (initialized) {
046      return singleton;
047    }
048
049    synchronized (LOCK) {
050      if (singleton == null) {
051        LOG.debug("Building new TermFactory instance");
052        singleton = new TermFactory();
053        singleton.loadKnownTerms();
054        initialized = true;
055      }
056    }
057
058    return singleton;
059  }
060
061  private TermFactory() {
062  }
063
064  private void loadKnownTerms() {
065    registerTermEnum(DwcTerm.class);
066    registerTermEnum(DcTerm.class, "dct");
067    registerTermEnum(GbifTerm.class);
068    registerTermEnum(GbifInternalTerm.class);
069    registerTermEnum(IucnTerm.class);
070    registerTermEnum(DcElement.class);
071    registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/");
072    registerTermEnum(PlaziTerm.class);
073    registerTermEnum(GadmTerm.class);
074    registerTermEnum(DwcaTerm.class);
075
076    // Audubon core
077    registerTermEnum(AcTerm.class);
078    registerTermEnum(ExifTerm.class);
079    registerTermEnum(IptcTerm.class);
080    registerTermEnum(PhotoshopTerm.class);
081    registerTermEnum(XmpTerm.class, "adobe");
082    registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names
083
084    // DWCA extensions
085    registerTermEnum(ChronoTerm.class);
086    registerTermEnum(GbifDnaTerm.class);
087    registerTermEnum(GbifMiqeTerm.class);
088    registerTermEnum(GermplasmTerm.class);
089    registerTermEnum(GgbnTerm.class);
090    registerTermEnum(MixsTerm.class);
091    registerTermEnum(ObisTerm.class);
092    registerTermEnum(Wgs84GeoPositioningTerm.class);
093
094    registerQualifiedTermEnum(DwcaTerm.class);
095    addTerm(BibTexTerm.CLASS_TERM);
096  }
097
098  /**
099   * @return the set of term enum classes that have been registered with this TermFactory
100   */
101  public Set<Class<? extends Enum>> listRegisteredTermEnums() {
102    return Collections.unmodifiableSet(registeredEnumClasses);
103  }
104
105  public void registerTerm(Term term) {
106    addTerm(term);
107  }
108
109  public void registerTerm(UnknownTerm term) {
110    addTerm(term.qualifiedName(), term);
111  }
112
113  /**
114   * Registers all terms from a term enumeration.
115   * If the same class is registered again it will be silently ignored.
116   *
117   * @param altPrefixes alternative prefixes to be used to register simple prefixed term names
118   */
119  public synchronized <T extends Enum & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) {
120    if (registeredEnumClasses.contains(termClass)) {
121      LOG.debug("{} is already registered", termClass);
122    } else {
123      registeredEnumClasses.add(termClass);
124      for (T term : termClass.getEnumConstants()) {
125        // add regular term representations (simple, prefixed & qualified)
126        addTerm(term, altPrefixes);
127        // add alternatives
128        for (String alt : term.alternativeNames()) {
129          addTerm(alt, term);
130          if (!alt.startsWith("http") && !alt.contains(":")) {
131            addTerm(term.prefix() + ":" + alt, term);
132            addTerm(term.namespace().resolve(alt).toString(), term);
133            for (String pre : altPrefixes) {
134              addTerm(pre + ":" + alt, term);
135            }
136          }
137        }
138      }
139    }
140  }
141
142  /**
143   * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names.
144   * This is to avoid clashes with other usually more important terms that should be known by their simple name.
145   */
146  public <T extends Enum & Term> void registerQualifiedTermEnum(Class<T> termClass) {
147    if (registeredEnumClasses.contains(termClass)) {
148      LOG.debug("{} is already registered", termClass);
149    } else {
150      registeredEnumClasses.add(termClass);
151      for (T term : termClass.getEnumConstants()) {
152        // add only the prefixed and qualified representation to avoid clashes
153        addTerm(term.prefixedName(), term);
154        addTerm(term.qualifiedName(), term);
155      }
156    }
157  }
158
159  private void addTerm(Term term, String ... altPrefixes) {
160    addTerm(term.simpleName(), term);
161    addTerm(term.prefixedName(), term);
162    addTerm(term.qualifiedName(), term);
163    for (String pre : altPrefixes) {
164      addTerm(pre + ":" + term.simpleName(), term);
165    }
166  }
167
168  private void addTerm(String key, Term term) {
169    if (key == null || key.trim().isEmpty()) {
170      return;
171    }
172
173    // keep class terms distinct
174    Map<String, Term> map = termMap(term.isClass());
175    if (map.containsKey(key)) {
176      Term t1 = map.get(key);
177      if (!t1.equals(term)) {
178        LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key));
179      }
180    } else {
181      map.put(key, term);
182      // also add a normalised version
183      key = normaliseTerm(key);
184      if (!map.containsKey(key)) {
185        map.put(key, term);
186      }
187    }
188  }
189
190  private Map<String, Term> termMap(boolean isClass) {
191    return isClass ? classTerms : terms;
192  }
193
194  /**
195   * @return a purely alphanumerical, lower cased term with all other characters replaced
196   */
197  public static String normaliseTerm(String term) {
198    String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("");
199    // remove http(s)
200    x = x.replaceFirst("^https?", "");
201    if (x.isEmpty()) {
202      return "";
203    }
204    return x.toLowerCase();
205  }
206
207  /**
208   * This is the main method to get a term from the factory searching both for property or class terms.
209   * It will lookup matching terms applying some normalization and known synonyms first.
210   * In case of ambiguous terms Class terms will be preferred.
211   *
212   * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that
213   * all terms with the same qualified name return a single UnknownTerm instance.
214   *
215   * For clearly bad term names an IllegalArgumentException is thrown.
216   * For example in the case of a simple name containing whitespace like "hello tom".
217   * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes
218   * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm
219   * instance is created.
220   */
221  public Term findTerm(final String termName) throws IllegalArgumentException {
222    // First try an exact match.
223    if (terms.containsKey(termName)) {
224      return terms.get(termName);
225    }
226
227    // Try class term
228    Term t = findTermOnly(termName, true);
229    if (t == null) {
230      // Try property term
231      t = findTermOnly(termName, false);
232    }
233    // create new term if needed
234    if (t == null) {
235      if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX+":")) {
236        t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS));
237      } else {
238        t = createUnknownTerm(termName, false);
239      }
240    }
241    return t;
242  }
243
244  /**
245   * This method works just as findTerm(final String termName) but restricts
246   * the results to just property terms.
247   */
248  public Term findPropertyTerm(final String termName) throws IllegalArgumentException {
249    return findTerm(termName, false);
250  }
251
252  /**
253   * This method works just as findTerm(final String termName) but restricts
254   * the results to just class terms.
255   */
256  public Term findClassTerm(final String termName) throws IllegalArgumentException {
257    return findTerm(termName, true);
258  }
259
260  /**
261   * This method works just as findTerm(final String termName) but restricts
262   * the results to just property or class terms.
263   */
264  public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException {
265    if (termName == null || termName.trim().isEmpty()) {
266      return null;
267    }
268
269    Term t = findTermOnly(termName, isClassTerm);
270    // create new term if needed
271    if (t == null) {
272      t = createUnknownTerm(termName, isClassTerm);
273    }
274    return t;
275  }
276
277  /**
278   * Does not create Unknown terms
279   */
280  private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException {
281    if (termName == null || termName.trim().isEmpty()) {
282      return null;
283    }
284
285    Map<String, Term> map = termMap(isClassTerm);
286    // first try term just as it is
287    if (map.containsKey(termName)) {
288      return map.get(termName);
289    }
290
291    // try normalised term otherwise
292    if (map.containsKey(normaliseTerm(termName))) {
293      return map.get(normaliseTerm(termName));
294    }
295    return null;
296  }
297
298  private Term createUnknownTerm(String termName, boolean isClassTerm) {
299    // create new term instance
300    Term term = UnknownTerm.build(termName, isClassTerm);
301    addTerm(termName, term);
302    addTerm(term.qualifiedName(), term);
303    return term;
304  }
305
306  private Term createBibtexTerm(String termName, boolean qualified) {
307    // create new term instance
308    Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName);
309    addTerm(term.qualifiedName(), term);
310    addTerm(term.prefixedName(), term);
311    return term;
312  }
313
314}