001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Map;
022import java.util.Set;
023import java.util.regex.Pattern;
024
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028/**
029 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for
030 * all unknown Term instances.
031 */
032public class TermFactory {
033
034  private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class);
035  private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+");
036  private static TermFactory singleton;
037  private static boolean initialized = false;
038  private static final Object LOCK = new Object();
039
040  private final Map<String, Term> terms = new HashMap<String, Term>();
041  private final Map<String, Term> classTerms = new HashMap<String, Term>();
042  private final Set<Class<? extends Enum>> registeredEnumClasses = new HashSet<>();
043
044  public static TermFactory instance() {
045    if (initialized) {
046      return singleton;
047    }
048
049    synchronized (LOCK) {
050      if (singleton == null) {
051        LOG.debug("Building new TermFactory instance");
052        singleton = new TermFactory();
053        singleton.loadKnownTerms();
054        initialized = true;
055      }
056    }
057
058    return singleton;
059  }
060
061  private TermFactory() {
062  }
063
064  private void loadKnownTerms() {
065    registerTermEnum(DwcTerm.class);
066    registerTermEnum(DcTerm.class, "dct");
067    registerTermEnum(GbifTerm.class);
068    registerTermEnum(GbifInternalTerm.class);
069    registerTermEnum(IucnTerm.class);
070    registerTermEnum(DcElement.class);
071    registerTermEnum(AcefTerm.class);
072    registerTermEnum(AcTerm.class);
073    registerTermEnum(XmpTerm.class, "adobe");
074    registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names
075  
076    registerQualifiedTermEnum(DwcaTerm.class);
077    addTerm(BibTexTerm.CLASS_TERM);
078  }
079
080  /**
081   * @return the set of term enum classes that have been registered with this TermFactory
082   */
083  public Set<Class<? extends Enum>> listRegisteredTermEnums() {
084    return Collections.unmodifiableSet(registeredEnumClasses);
085  }
086
087  public void registerTerm(Term term) {
088    addTerm(term);
089  }
090
091  public void registerTerm(UnknownTerm term) {
092    addTerm(term.qualifiedName(), term);
093  }
094
095  /**
096   * Registers all terms from a term enumeration.
097   * If the same class is registered again it will be silently ignored.
098   *
099   * @param altPrefixes alternative prefixes to be used to register simple prefixed term names
100   */
101  public <T extends Enum & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) {
102    if (registeredEnumClasses.contains(termClass)) {
103      LOG.debug("{} is already registered", termClass);
104    } else {
105      registeredEnumClasses.add(termClass);
106      for (T term : termClass.getEnumConstants()) {
107        // add regular term representations (simple, prefixed & qualified)
108        addTerm(term, altPrefixes);
109        // add alternatives
110        for (String alt : term.alternativeNames()) {
111          addTerm(alt, term);
112          if (!alt.startsWith("http") && !alt.contains(":")) {
113            addTerm(term.prefix() + ":" + alt, term);
114            addTerm(term.namespace().resolve(alt).toString(), term);
115            for (String pre : altPrefixes) {
116              addTerm(pre + ":" + alt, term);
117            }
118          }
119        }
120      }
121    }
122  }
123  
124  /**
125   * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names.
126   * This is to avoid clashes with other usually more important terms that should be known by their simple name.
127   */
128  public <T extends Enum & Term> void registerQualifiedTermEnum(Class<T> termClass) {
129    if (registeredEnumClasses.contains(termClass)) {
130      LOG.debug("{} is already registered", termClass);
131    } else {
132      registeredEnumClasses.add(termClass);
133      for (T term : termClass.getEnumConstants()) {
134        // add only the prefixed and qualified representation to avoid clashes
135        addTerm(term.prefixedName(), term);
136        addTerm(term.qualifiedName(), term);
137      }
138    }
139  }
140  
141  private void addTerm(Term term, String ... altPrefixes) {
142    addTerm(term.simpleName(), term);
143    addTerm(term.prefixedName(), term);
144    addTerm(term.qualifiedName(), term);
145    for (String pre : altPrefixes) {
146      addTerm(pre + ":" + term.simpleName(), term);
147    }
148  }
149
150  private void addTerm(String key, Term term) {
151    if (key == null || key.trim().isEmpty()) {
152      return;
153    }
154
155    // keep class terms distinct
156    Map<String, Term> map = termMap(term.isClass());
157    if (map.containsKey(key)) {
158      Term t1 = map.get(key);
159      if (!t1.equals(term)) {
160        LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key));
161      }
162    } else {
163      map.put(key, term);
164      // also add a normalised version
165      key = normaliseTerm(key);
166      if (!map.containsKey(key)) {
167        map.put(key, term);
168      }
169    }
170  }
171
172  private Map<String, Term> termMap(boolean isClass) {
173    return isClass ? classTerms : terms;
174  }
175
176  /**
177   * @return a purely alphanumerical, lower cased term with all other characters replaced
178   */
179  public static String normaliseTerm(String term) {
180    String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("");
181    // remove http(s)
182    x = x.replaceFirst("^https?", "");
183    if (x.isEmpty()) {
184      return "";
185    }
186    return x.toLowerCase();
187  }
188
189  /**
190   * This is the main method to get a term from the factory searching both for property or class terms.
191   * It will lookup matching terms applying some normalization and known synonyms first.
192   * In case of ambiguous terms Class terms will be preferred.
193   *
194   * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that
195   * all terms with the same qualified name return a single UnknownTerm instance.
196   *
197   * For clearly bad term names an IllegalArgumentException is thrown.
198   * For example in the case of a simple name containing whitespace like "hello tom".
199   * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes
200   * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm
201   * instance is created.
202   */
203  public Term findTerm(final String termName) throws IllegalArgumentException {
204    // First try an exact match.
205    if (terms.containsKey(termName)) {
206      return terms.get(termName);
207    }
208
209    // Try class term
210    Term t = findTermOnly(termName, true);
211    if (t == null) {
212      // Try property term
213      t = findTermOnly(termName, false);
214    }
215    // create new term if needed
216    if (t == null) {
217      if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX+":")) {
218        t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS));
219      } else {
220        t = createUnknownTerm(termName, false);
221      }
222    }
223    return t;
224  }
225
226  /**
227   * This method works just as findTerm(final String termName) but restricts
228   * the results to just property terms.
229   */
230  public Term findPropertyTerm(final String termName) throws IllegalArgumentException {
231    return findTerm(termName, false);
232  }
233
234  /**
235   * This method works just as findTerm(final String termName) but restricts
236   * the results to just class terms.
237   */
238  public Term findClassTerm(final String termName) throws IllegalArgumentException {
239    return findTerm(termName, true);
240  }
241
242  /**
243   * This method works just as findTerm(final String termName) but restricts
244   * the results to just property or class terms.
245   */
246  public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException {
247    if (termName == null || termName.trim().isEmpty()) {
248      return null;
249    }
250
251    Term t = findTermOnly(termName, isClassTerm);
252    // create new term if needed
253    if (t == null) {
254      t = createUnknownTerm(termName, isClassTerm);
255    }
256    return t;
257  }
258
259  /**
260   * Does not create Unknown terms
261   */
262  private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException {
263    if (termName == null || termName.trim().isEmpty()) {
264      return null;
265    }
266
267    Map<String, Term> map = termMap(isClassTerm);
268    // first try term just as it is
269    if (map.containsKey(termName)) {
270      return map.get(termName);
271    }
272
273    // try normalised term otherwise
274    if (map.containsKey(normaliseTerm(termName))) {
275      return map.get(normaliseTerm(termName));
276    }
277    return null;
278  }
279
280  private Term createUnknownTerm(String termName, boolean isClassTerm) {
281    // create new term instance
282    Term term = UnknownTerm.build(termName, isClassTerm);
283    addTerm(termName, term);
284    addTerm(term.qualifiedName(), term);
285    return term;
286  }
287
288  private Term createBibtexTerm(String termName, boolean qualified) {
289    // create new term instance
290    Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName);
291    addTerm(term.qualifiedName(), term);
292    addTerm(term.prefixedName(), term);
293    return term;
294  }
295
296}