001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.dwc.terms; 017 018import java.util.Collections; 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Map; 022import java.util.Set; 023import java.util.regex.Pattern; 024 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027 028/** 029 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for 030 * all unknown Term instances. 031 */ 032public class TermFactory { 033 034 private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class); 035 private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+"); 036 private static TermFactory singleton; 037 private static boolean initialized = false; 038 private static final Object LOCK = new Object(); 039 040 private final Map<String, Term> terms = new HashMap<String, Term>(); 041 private final Map<String, Term> classTerms = new HashMap<String, Term>(); 042 private final Set<Class<? extends Enum>> registeredEnumClasses = new HashSet<>(); 043 044 public static TermFactory instance() { 045 if (initialized) { 046 return singleton; 047 } 048 049 synchronized (LOCK) { 050 if (singleton == null) { 051 LOG.debug("Building new TermFactory instance"); 052 singleton = new TermFactory(); 053 singleton.loadKnownTerms(); 054 initialized = true; 055 } 056 } 057 058 return singleton; 059 } 060 061 private TermFactory() { 062 } 063 064 private void loadKnownTerms() { 065 registerTermEnum(DwcTerm.class); 066 registerTermEnum(DcTerm.class, "dct"); 067 registerTermEnum(GbifTerm.class); 068 registerTermEnum(GbifInternalTerm.class); 069 registerTermEnum(IucnTerm.class); 070 registerTermEnum(DcElement.class); 071 registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/"); 072 registerTermEnum(PlaziTerm.class); 073 registerTermEnum(GadmTerm.class); 074 registerTermEnum(DwcaTerm.class); 075 076 // Audubon core 077 registerTermEnum(AcTerm.class); 078 registerTermEnum(ExifTerm.class); 079 registerTermEnum(IptcTerm.class); 080 registerTermEnum(PhotoshopTerm.class); 081 registerTermEnum(XmpTerm.class, "adobe"); 082 registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names 083 084 // DWCA extensions 085 registerTermEnum(ChronoTerm.class); 086 registerTermEnum(GbifDnaTerm.class); 087 registerTermEnum(GbifMiqeTerm.class); 088 registerTermEnum(GermplasmTerm.class); 089 registerTermEnum(GgbnTerm.class); 090 registerTermEnum(MixsTerm.class); 091 registerTermEnum(ObisTerm.class); 092 registerTermEnum(Wgs84GeoPositioningTerm.class); 093 094 registerQualifiedTermEnum(DwcaTerm.class); 095 addTerm(BibTexTerm.CLASS_TERM); 096 } 097 098 /** 099 * @return the set of term enum classes that have been registered with this TermFactory 100 */ 101 public Set<Class<? extends Enum>> listRegisteredTermEnums() { 102 return Collections.unmodifiableSet(registeredEnumClasses); 103 } 104 105 public void registerTerm(Term term) { 106 addTerm(term); 107 } 108 109 public void registerTerm(UnknownTerm term) { 110 addTerm(term.qualifiedName(), term); 111 } 112 113 /** 114 * Registers all terms from a term enumeration. 115 * If the same class is registered again it will be silently ignored. 116 * 117 * @param altPrefixes alternative prefixes to be used to register simple prefixed term names 118 */ 119 public synchronized <T extends Enum & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) { 120 if (registeredEnumClasses.contains(termClass)) { 121 LOG.debug("{} is already registered", termClass); 122 } else { 123 registeredEnumClasses.add(termClass); 124 for (T term : termClass.getEnumConstants()) { 125 // add regular term representations (simple, prefixed & qualified) 126 addTerm(term, altPrefixes); 127 // add alternatives 128 for (String alt : term.alternativeNames()) { 129 addTerm(alt, term); 130 if (!alt.startsWith("http") && !alt.contains(":")) { 131 addTerm(term.prefix() + ":" + alt, term); 132 addTerm(term.namespace().resolve(alt).toString(), term); 133 for (String pre : altPrefixes) { 134 addTerm(pre + ":" + alt, term); 135 } 136 } 137 } 138 } 139 } 140 } 141 142 /** 143 * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names. 144 * This is to avoid clashes with other usually more important terms that should be known by their simple name. 145 */ 146 public <T extends Enum & Term> void registerQualifiedTermEnum(Class<T> termClass) { 147 if (registeredEnumClasses.contains(termClass)) { 148 LOG.debug("{} is already registered", termClass); 149 } else { 150 registeredEnumClasses.add(termClass); 151 for (T term : termClass.getEnumConstants()) { 152 // add only the prefixed and qualified representation to avoid clashes 153 addTerm(term.prefixedName(), term); 154 addTerm(term.qualifiedName(), term); 155 } 156 } 157 } 158 159 private void addTerm(Term term, String ... altPrefixes) { 160 addTerm(term.simpleName(), term); 161 addTerm(term.prefixedName(), term); 162 addTerm(term.qualifiedName(), term); 163 for (String pre : altPrefixes) { 164 addTerm(pre + ":" + term.simpleName(), term); 165 } 166 } 167 168 private void addTerm(String key, Term term) { 169 if (key == null || key.trim().isEmpty()) { 170 return; 171 } 172 173 // keep class terms distinct 174 Map<String, Term> map = termMap(term.isClass()); 175 if (map.containsKey(key)) { 176 Term t1 = map.get(key); 177 if (!t1.equals(term)) { 178 LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key)); 179 } 180 } else { 181 map.put(key, term); 182 // also add a normalised version 183 key = normaliseTerm(key); 184 if (!map.containsKey(key)) { 185 map.put(key, term); 186 } 187 } 188 } 189 190 private Map<String, Term> termMap(boolean isClass) { 191 return isClass ? classTerms : terms; 192 } 193 194 /** 195 * @return a purely alphanumerical, lower cased term with all other characters replaced 196 */ 197 public static String normaliseTerm(String term) { 198 String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll(""); 199 // remove http(s) 200 x = x.replaceFirst("^https?", ""); 201 if (x.isEmpty()) { 202 return ""; 203 } 204 return x.toLowerCase(); 205 } 206 207 /** 208 * This is the main method to get a term from the factory searching both for property or class terms. 209 * It will lookup matching terms applying some normalization and known synonyms first. 210 * In case of ambiguous terms Class terms will be preferred. 211 * 212 * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that 213 * all terms with the same qualified name return a single UnknownTerm instance. 214 * 215 * For clearly bad term names an IllegalArgumentException is thrown. 216 * For example in the case of a simple name containing whitespace like "hello tom". 217 * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes 218 * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm 219 * instance is created. 220 */ 221 public Term findTerm(final String termName) throws IllegalArgumentException { 222 // First try an exact match. 223 if (terms.containsKey(termName)) { 224 return terms.get(termName); 225 } 226 227 // Try class term 228 Term t = findTermOnly(termName, true); 229 if (t == null) { 230 // Try property term 231 t = findTermOnly(termName, false); 232 } 233 // create new term if needed 234 if (t == null) { 235 if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX+":")) { 236 t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS)); 237 } else { 238 t = createUnknownTerm(termName, false); 239 } 240 } 241 return t; 242 } 243 244 /** 245 * This method works just as findTerm(final String termName) but restricts 246 * the results to just property terms. 247 */ 248 public Term findPropertyTerm(final String termName) throws IllegalArgumentException { 249 return findTerm(termName, false); 250 } 251 252 /** 253 * This method works just as findTerm(final String termName) but restricts 254 * the results to just class terms. 255 */ 256 public Term findClassTerm(final String termName) throws IllegalArgumentException { 257 return findTerm(termName, true); 258 } 259 260 /** 261 * This method works just as findTerm(final String termName) but restricts 262 * the results to just property or class terms. 263 */ 264 public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException { 265 if (termName == null || termName.trim().isEmpty()) { 266 return null; 267 } 268 269 Term t = findTermOnly(termName, isClassTerm); 270 // create new term if needed 271 if (t == null) { 272 t = createUnknownTerm(termName, isClassTerm); 273 } 274 return t; 275 } 276 277 /** 278 * Does not create Unknown terms 279 */ 280 private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException { 281 if (termName == null || termName.trim().isEmpty()) { 282 return null; 283 } 284 285 Map<String, Term> map = termMap(isClassTerm); 286 // first try term just as it is 287 if (map.containsKey(termName)) { 288 return map.get(termName); 289 } 290 291 // try normalised term otherwise 292 if (map.containsKey(normaliseTerm(termName))) { 293 return map.get(normaliseTerm(termName)); 294 } 295 return null; 296 } 297 298 private Term createUnknownTerm(String termName, boolean isClassTerm) { 299 // create new term instance 300 Term term = UnknownTerm.build(termName, isClassTerm); 301 addTerm(termName, term); 302 addTerm(term.qualifiedName(), term); 303 return term; 304 } 305 306 private Term createBibtexTerm(String termName, boolean qualified) { 307 // create new term instance 308 Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName); 309 addTerm(term.qualifiedName(), term); 310 addTerm(term.prefixedName(), term); 311 return term; 312 } 313 314}