001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Map;
022import java.util.Set;
023import java.util.regex.Pattern;
024
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028/**
029 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for
030 * all unknown Term instances.
031 */
032public class TermFactory {
033
034  private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class);
035  private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+");
036  private static TermFactory singleton;
037  private static boolean initialized = false;
038  private static final Object LOCK = new Object();
039
040  private final Map<String, Term> terms = new HashMap<String, Term>();
041  private final Map<String, Term> classTerms = new HashMap<String, Term>();
042  private final Set<Class<? extends Enum>> registeredEnumClasses = new HashSet<>();
043
044  public static TermFactory instance() {
045    if (initialized) {
046      return singleton;
047    }
048
049    synchronized (LOCK) {
050      if (singleton == null) {
051        LOG.debug("Building new TermFactory instance");
052        singleton = new TermFactory();
053        singleton.loadKnownTerms();
054        initialized = true;
055      }
056    }
057
058    return singleton;
059  }
060
061  private TermFactory() {
062  }
063
064  private void loadKnownTerms() {
065    registerTermEnum(DwcTerm.class);
066    registerTermEnum(DcTerm.class, "dct");
067    registerTermEnum(GbifTerm.class);
068    registerTermEnum(GbifInternalTerm.class);
069    registerTermEnum(IucnTerm.class);
070    registerTermEnum(DcElement.class);
071    registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/");
072    registerTermEnum(PlaziTerm.class);
073    registerTermEnum(GadmTerm.class);
074    registerTermEnum(DwcaTerm.class);
075
076    // Audubon core
077    registerTermEnum(AcTerm.class);
078    registerTermEnum(ExifTerm.class);
079    registerTermEnum(IptcTerm.class);
080    registerTermEnum(PhotoshopTerm.class);
081    registerTermEnum(XmpTerm.class, "adobe");
082    registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names
083
084    // DWCA extensions
085    registerTermEnum(ChronoTerm.class);
086    registerTermEnum(GbifDnaTerm.class);
087    registerTermEnum(GbifMiqeTerm.class);
088    registerTermEnum(GermplasmTerm.class);
089    registerTermEnum(GgbnTerm.class);
090    registerTermEnum(MixsTerm.class);
091    registerTermEnum(ObisTerm.class);
092    registerTermEnum(Wgs84GeoPositioningTerm.class);
093    registerTermEnum(EcoTerm.class);
094
095    registerQualifiedTermEnum(DwcaTerm.class);
096    addTerm(BibTexTerm.CLASS_TERM);
097  }
098
099  /**
100   * @return the set of term enum classes that have been registered with this TermFactory
101   */
102  public Set<Class<? extends Enum>> listRegisteredTermEnums() {
103    return Collections.unmodifiableSet(registeredEnumClasses);
104  }
105
106  public void registerTerm(Term term) {
107    addTerm(term);
108  }
109
110  public void registerTerm(UnknownTerm term) {
111    addTerm(term.qualifiedName(), term);
112  }
113
114  /**
115   * Registers all terms from a term enumeration.
116   * If the same class is registered again it will be silently ignored.
117   *
118   * @param altPrefixes alternative prefixes to be used to register simple prefixed term names
119   */
120  public synchronized <T extends Enum & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) {
121    if (registeredEnumClasses.contains(termClass)) {
122      LOG.debug("{} is already registered", termClass);
123    } else {
124      registeredEnumClasses.add(termClass);
125      for (T term : termClass.getEnumConstants()) {
126        // add regular term representations (simple, prefixed & qualified)
127        addTerm(term, altPrefixes);
128        // add alternatives
129        for (String alt : term.alternativeNames()) {
130          addTerm(alt, term);
131          if (!alt.startsWith("http") && !alt.contains(":")) {
132            addTerm(term.prefix() + ":" + alt, term);
133            addTerm(term.namespace().resolve(alt).toString(), term);
134            for (String pre : altPrefixes) {
135              addTerm(pre + ":" + alt, term);
136            }
137          }
138        }
139      }
140    }
141  }
142
143  /**
144   * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names.
145   * This is to avoid clashes with other usually more important terms that should be known by their simple name.
146   */
147  public <T extends Enum & Term> void registerQualifiedTermEnum(Class<T> termClass) {
148    if (registeredEnumClasses.contains(termClass)) {
149      LOG.debug("{} is already registered", termClass);
150    } else {
151      registeredEnumClasses.add(termClass);
152      for (T term : termClass.getEnumConstants()) {
153        // add only the prefixed and qualified representation to avoid clashes
154        addTerm(term.prefixedName(), term);
155        addTerm(term.qualifiedName(), term);
156      }
157    }
158  }
159
160  private void addTerm(Term term, String ... altPrefixes) {
161    addTerm(term.simpleName(), term);
162    addTerm(term.prefixedName(), term);
163    addTerm(term.qualifiedName(), term);
164    for (String pre : altPrefixes) {
165      addTerm(pre + ":" + term.simpleName(), term);
166    }
167  }
168
169  private void addTerm(String key, Term term) {
170    if (key == null || key.trim().isEmpty()) {
171      return;
172    }
173
174    // keep class terms distinct
175    Map<String, Term> map = termMap(term.isClass());
176    if (map.containsKey(key)) {
177      Term t1 = map.get(key);
178      if (!t1.equals(term)) {
179        LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key));
180      }
181    } else {
182      map.put(key, term);
183      // also add a normalised version
184      key = normaliseTerm(key);
185      if (!map.containsKey(key)) {
186        map.put(key, term);
187      }
188    }
189  }
190
191  private Map<String, Term> termMap(boolean isClass) {
192    return isClass ? classTerms : terms;
193  }
194
195  /**
196   * @return a purely alphanumerical, lower cased term with all other characters replaced
197   */
198  public static String normaliseTerm(String term) {
199    String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("");
200    // remove http(s)
201    x = x.replaceFirst("^https?", "");
202    if (x.isEmpty()) {
203      return "";
204    }
205    return x.toLowerCase();
206  }
207
208  /**
209   * This is the main method to get a term from the factory searching both for property or class terms.
210   * It will lookup matching terms applying some normalization and known synonyms first.
211   * In case of ambiguous terms Class terms will be preferred.
212   *
213   * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that
214   * all terms with the same qualified name return a single UnknownTerm instance.
215   *
216   * For clearly bad term names an IllegalArgumentException is thrown.
217   * For example in the case of a simple name containing whitespace like "hello tom".
218   * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes
219   * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm
220   * instance is created.
221   */
222  public Term findTerm(final String termName) throws IllegalArgumentException {
223    // First try an exact match.
224    if (terms.containsKey(termName)) {
225      return terms.get(termName);
226    }
227
228    // Try class term
229    Term t = findTermOnly(termName, true);
230    if (t == null) {
231      // Try property term
232      t = findTermOnly(termName, false);
233    }
234    // create new term if needed
235    if (t == null) {
236      if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX+":")) {
237        t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS));
238      } else {
239        t = createUnknownTerm(termName, false);
240      }
241    }
242    return t;
243  }
244
245  /**
246   * This method works just as findTerm(final String termName) but restricts
247   * the results to just property terms.
248   */
249  public Term findPropertyTerm(final String termName) throws IllegalArgumentException {
250    return findTerm(termName, false);
251  }
252
253  /**
254   * This method works just as findTerm(final String termName) but restricts
255   * the results to just class terms.
256   */
257  public Term findClassTerm(final String termName) throws IllegalArgumentException {
258    return findTerm(termName, true);
259  }
260
261  /**
262   * This method works just as findTerm(final String termName) but restricts
263   * the results to just property or class terms.
264   */
265  public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException {
266    if (termName == null || termName.trim().isEmpty()) {
267      return null;
268    }
269
270    Term t = findTermOnly(termName, isClassTerm);
271    // create new term if needed
272    if (t == null) {
273      t = createUnknownTerm(termName, isClassTerm);
274    }
275    return t;
276  }
277
278  /**
279   * Does not create Unknown terms
280   */
281  private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException {
282    if (termName == null || termName.trim().isEmpty()) {
283      return null;
284    }
285
286    Map<String, Term> map = termMap(isClassTerm);
287    // first try term just as it is
288    if (map.containsKey(termName)) {
289      return map.get(termName);
290    }
291
292    // try normalised term otherwise
293    if (map.containsKey(normaliseTerm(termName))) {
294      return map.get(normaliseTerm(termName));
295    }
296    return null;
297  }
298
299  private Term createUnknownTerm(String termName, boolean isClassTerm) {
300    // create new term instance
301    Term term = UnknownTerm.build(termName, isClassTerm);
302    addTerm(termName, term);
303    addTerm(term.qualifiedName(), term);
304    return term;
305  }
306
307  private Term createBibtexTerm(String termName, boolean qualified) {
308    // create new term instance
309    Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName);
310    addTerm(term.qualifiedName(), term);
311    addTerm(term.prefixedName(), term);
312    return term;
313  }
314
315}