001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.dwc.terms; 017 018import java.util.Collections; 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Map; 022import java.util.Set; 023import java.util.regex.Pattern; 024 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027 028/** 029 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for 030 * all unknown Term instances. 031 */ 032public class TermFactory { 033 034 private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class); 035 private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+"); 036 private static TermFactory singleton; 037 private static boolean initialized = false; 038 private static final Object LOCK = new Object(); 039 040 private final Map<String, Term> terms = new HashMap<String, Term>(); 041 private final Map<String, Term> classTerms = new HashMap<String, Term>(); 042 private final Set<Class<? extends Enum>> registeredEnumClasses = new HashSet<>(); 043 044 public static TermFactory instance() { 045 if (initialized) { 046 return singleton; 047 } 048 049 synchronized (LOCK) { 050 if (singleton == null) { 051 LOG.debug("Building new TermFactory instance"); 052 singleton = new TermFactory(); 053 singleton.loadKnownTerms(); 054 initialized = true; 055 } 056 } 057 058 return singleton; 059 } 060 061 private TermFactory() { 062 } 063 064 private void loadKnownTerms() { 065 registerTermEnum(DwcTerm.class); 066 registerTermEnum(DcTerm.class, "dct"); 067 registerTermEnum(GbifTerm.class); 068 registerTermEnum(GbifInternalTerm.class); 069 registerTermEnum(IucnTerm.class); 070 registerTermEnum(DcElement.class); 071 registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/"); 072 registerTermEnum(AcTerm.class); 073 registerTermEnum(PlaziTerm.class); 074 registerTermEnum(GadmTerm.class); 075 registerTermEnum(DwcaTerm.class); 076 registerTermEnum(XmpTerm.class, "adobe"); 077 registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names 078 079 registerQualifiedTermEnum(DwcaTerm.class); 080 addTerm(BibTexTerm.CLASS_TERM); 081 } 082 083 /** 084 * @return the set of term enum classes that have been registered with this TermFactory 085 */ 086 public Set<Class<? extends Enum>> listRegisteredTermEnums() { 087 return Collections.unmodifiableSet(registeredEnumClasses); 088 } 089 090 public void registerTerm(Term term) { 091 addTerm(term); 092 } 093 094 public void registerTerm(UnknownTerm term) { 095 addTerm(term.qualifiedName(), term); 096 } 097 098 /** 099 * Registers all terms from a term enumeration. 100 * If the same class is registered again it will be silently ignored. 101 * 102 * @param altPrefixes alternative prefixes to be used to register simple prefixed term names 103 */ 104 public synchronized <T extends Enum & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) { 105 if (registeredEnumClasses.contains(termClass)) { 106 LOG.debug("{} is already registered", termClass); 107 } else { 108 registeredEnumClasses.add(termClass); 109 for (T term : termClass.getEnumConstants()) { 110 // add regular term representations (simple, prefixed & qualified) 111 addTerm(term, altPrefixes); 112 // add alternatives 113 for (String alt : term.alternativeNames()) { 114 addTerm(alt, term); 115 if (!alt.startsWith("http") && !alt.contains(":")) { 116 addTerm(term.prefix() + ":" + alt, term); 117 addTerm(term.namespace().resolve(alt).toString(), term); 118 for (String pre : altPrefixes) { 119 addTerm(pre + ":" + alt, term); 120 } 121 } 122 } 123 } 124 } 125 } 126 127 /** 128 * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names. 129 * This is to avoid clashes with other usually more important terms that should be known by their simple name. 130 */ 131 public <T extends Enum & Term> void registerQualifiedTermEnum(Class<T> termClass) { 132 if (registeredEnumClasses.contains(termClass)) { 133 LOG.debug("{} is already registered", termClass); 134 } else { 135 registeredEnumClasses.add(termClass); 136 for (T term : termClass.getEnumConstants()) { 137 // add only the prefixed and qualified representation to avoid clashes 138 addTerm(term.prefixedName(), term); 139 addTerm(term.qualifiedName(), term); 140 } 141 } 142 } 143 144 private void addTerm(Term term, String ... altPrefixes) { 145 addTerm(term.simpleName(), term); 146 addTerm(term.prefixedName(), term); 147 addTerm(term.qualifiedName(), term); 148 for (String pre : altPrefixes) { 149 addTerm(pre + ":" + term.simpleName(), term); 150 } 151 } 152 153 private void addTerm(String key, Term term) { 154 if (key == null || key.trim().isEmpty()) { 155 return; 156 } 157 158 // keep class terms distinct 159 Map<String, Term> map = termMap(term.isClass()); 160 if (map.containsKey(key)) { 161 Term t1 = map.get(key); 162 if (!t1.equals(term)) { 163 LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key)); 164 } 165 } else { 166 map.put(key, term); 167 // also add a normalised version 168 key = normaliseTerm(key); 169 if (!map.containsKey(key)) { 170 map.put(key, term); 171 } 172 } 173 } 174 175 private Map<String, Term> termMap(boolean isClass) { 176 return isClass ? classTerms : terms; 177 } 178 179 /** 180 * @return a purely alphanumerical, lower cased term with all other characters replaced 181 */ 182 public static String normaliseTerm(String term) { 183 String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll(""); 184 // remove http(s) 185 x = x.replaceFirst("^https?", ""); 186 if (x.isEmpty()) { 187 return ""; 188 } 189 return x.toLowerCase(); 190 } 191 192 /** 193 * This is the main method to get a term from the factory searching both for property or class terms. 194 * It will lookup matching terms applying some normalization and known synonyms first. 195 * In case of ambiguous terms Class terms will be preferred. 196 * 197 * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that 198 * all terms with the same qualified name return a single UnknownTerm instance. 199 * 200 * For clearly bad term names an IllegalArgumentException is thrown. 201 * For example in the case of a simple name containing whitespace like "hello tom". 202 * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes 203 * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm 204 * instance is created. 205 */ 206 public Term findTerm(final String termName) throws IllegalArgumentException { 207 // First try an exact match. 208 if (terms.containsKey(termName)) { 209 return terms.get(termName); 210 } 211 212 // Try class term 213 Term t = findTermOnly(termName, true); 214 if (t == null) { 215 // Try property term 216 t = findTermOnly(termName, false); 217 } 218 // create new term if needed 219 if (t == null) { 220 if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX+":")) { 221 t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS)); 222 } else { 223 t = createUnknownTerm(termName, false); 224 } 225 } 226 return t; 227 } 228 229 /** 230 * This method works just as findTerm(final String termName) but restricts 231 * the results to just property terms. 232 */ 233 public Term findPropertyTerm(final String termName) throws IllegalArgumentException { 234 return findTerm(termName, false); 235 } 236 237 /** 238 * This method works just as findTerm(final String termName) but restricts 239 * the results to just class terms. 240 */ 241 public Term findClassTerm(final String termName) throws IllegalArgumentException { 242 return findTerm(termName, true); 243 } 244 245 /** 246 * This method works just as findTerm(final String termName) but restricts 247 * the results to just property or class terms. 248 */ 249 public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException { 250 if (termName == null || termName.trim().isEmpty()) { 251 return null; 252 } 253 254 Term t = findTermOnly(termName, isClassTerm); 255 // create new term if needed 256 if (t == null) { 257 t = createUnknownTerm(termName, isClassTerm); 258 } 259 return t; 260 } 261 262 /** 263 * Does not create Unknown terms 264 */ 265 private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException { 266 if (termName == null || termName.trim().isEmpty()) { 267 return null; 268 } 269 270 Map<String, Term> map = termMap(isClassTerm); 271 // first try term just as it is 272 if (map.containsKey(termName)) { 273 return map.get(termName); 274 } 275 276 // try normalised term otherwise 277 if (map.containsKey(normaliseTerm(termName))) { 278 return map.get(normaliseTerm(termName)); 279 } 280 return null; 281 } 282 283 private Term createUnknownTerm(String termName, boolean isClassTerm) { 284 // create new term instance 285 Term term = UnknownTerm.build(termName, isClassTerm); 286 addTerm(termName, term); 287 addTerm(term.qualifiedName(), term); 288 return term; 289 } 290 291 private Term createBibtexTerm(String termName, boolean qualified) { 292 // create new term instance 293 Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName); 294 addTerm(term.qualifiedName(), term); 295 addTerm(term.prefixedName(), term); 296 return term; 297 } 298 299}