001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.dwc.terms; 017 018import java.util.Collections; 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Map; 022import java.util.Set; 023import java.util.regex.Pattern; 024 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027 028/** 029 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for 030 * all unknown Term instances. 031 */ 032public class TermFactory { 033 034 private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class); 035 private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+"); 036 private static TermFactory singleton; 037 private static boolean initialized = false; 038 private static final Object LOCK = new Object(); 039 040 private final Map<String, Term> terms = new HashMap<String, Term>(); 041 private final Map<String, Term> classTerms = new HashMap<String, Term>(); 042 private final Set<Class<? extends Enum>> registeredEnumClasses = new HashSet<>(); 043 044 public static TermFactory instance() { 045 if (initialized) { 046 return singleton; 047 } 048 049 synchronized (LOCK) { 050 if (singleton == null) { 051 LOG.debug("Building new TermFactory instance"); 052 singleton = new TermFactory(); 053 singleton.loadKnownTerms(); 054 initialized = true; 055 } 056 } 057 058 return singleton; 059 } 060 061 private TermFactory() { 062 } 063 064 private void loadKnownTerms() { 065 registerTermEnum(DwcTerm.class); 066 registerTermEnum(DcTerm.class, "dct"); 067 registerTermEnum(GbifTerm.class); 068 registerTermEnum(GbifInternalTerm.class); 069 registerTermEnum(IucnTerm.class); 070 registerTermEnum(DcElement.class); 071 registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/"); 072 registerTermEnum(PlaziTerm.class); 073 registerTermEnum(GadmTerm.class); 074 registerTermEnum(DwcaTerm.class); 075 076 // Audubon core 077 registerTermEnum(AcTerm.class); 078 registerTermEnum(ExifTerm.class); 079 registerTermEnum(IptcTerm.class); 080 registerTermEnum(PhotoshopTerm.class); 081 registerTermEnum(XmpTerm.class, "adobe"); 082 registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names 083 084 // DWCA extensions 085 registerTermEnum(ChronoTerm.class); 086 registerTermEnum(GbifDnaTerm.class); 087 registerTermEnum(GbifMiqeTerm.class); 088 registerTermEnum(GermplasmTerm.class); 089 registerTermEnum(GgbnTerm.class); 090 registerTermEnum(MixsTerm.class); 091 registerTermEnum(ObisTerm.class); 092 registerTermEnum(Wgs84GeoPositioningTerm.class); 093 registerTermEnum(EcoTerm.class); 094 095 registerQualifiedTermEnum(DwcaTerm.class); 096 addTerm(BibTexTerm.CLASS_TERM); 097 } 098 099 /** 100 * @return the set of term enum classes that have been registered with this TermFactory 101 */ 102 public Set<Class<? extends Enum>> listRegisteredTermEnums() { 103 return Collections.unmodifiableSet(registeredEnumClasses); 104 } 105 106 public void registerTerm(Term term) { 107 addTerm(term); 108 } 109 110 public void registerTerm(UnknownTerm term) { 111 addTerm(term.qualifiedName(), term); 112 } 113 114 /** 115 * Registers all terms from a term enumeration. 116 * If the same class is registered again it will be silently ignored. 117 * 118 * @param altPrefixes alternative prefixes to be used to register simple prefixed term names 119 */ 120 public synchronized <T extends Enum & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) { 121 if (registeredEnumClasses.contains(termClass)) { 122 LOG.debug("{} is already registered", termClass); 123 } else { 124 registeredEnumClasses.add(termClass); 125 for (T term : termClass.getEnumConstants()) { 126 // add regular term representations (simple, prefixed & qualified) 127 addTerm(term, altPrefixes); 128 // add alternatives 129 for (String alt : term.alternativeNames()) { 130 addTerm(alt, term); 131 if (!alt.startsWith("http") && !alt.contains(":")) { 132 addTerm(term.prefix() + ":" + alt, term); 133 addTerm(term.namespace().resolve(alt).toString(), term); 134 for (String pre : altPrefixes) { 135 addTerm(pre + ":" + alt, term); 136 } 137 } 138 } 139 } 140 } 141 } 142 143 /** 144 * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names. 145 * This is to avoid clashes with other usually more important terms that should be known by their simple name. 146 */ 147 public <T extends Enum & Term> void registerQualifiedTermEnum(Class<T> termClass) { 148 if (registeredEnumClasses.contains(termClass)) { 149 LOG.debug("{} is already registered", termClass); 150 } else { 151 registeredEnumClasses.add(termClass); 152 for (T term : termClass.getEnumConstants()) { 153 // add only the prefixed and qualified representation to avoid clashes 154 addTerm(term.prefixedName(), term); 155 addTerm(term.qualifiedName(), term); 156 } 157 } 158 } 159 160 private void addTerm(Term term, String ... altPrefixes) { 161 addTerm(term.simpleName(), term); 162 addTerm(term.prefixedName(), term); 163 addTerm(term.qualifiedName(), term); 164 for (String pre : altPrefixes) { 165 addTerm(pre + ":" + term.simpleName(), term); 166 } 167 } 168 169 private void addTerm(String key, Term term) { 170 if (key == null || key.trim().isEmpty()) { 171 return; 172 } 173 174 // keep class terms distinct 175 Map<String, Term> map = termMap(term.isClass()); 176 if (map.containsKey(key)) { 177 Term t1 = map.get(key); 178 if (!t1.equals(term)) { 179 LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key)); 180 } 181 } else { 182 map.put(key, term); 183 // also add a normalised version 184 key = normaliseTerm(key); 185 if (!map.containsKey(key)) { 186 map.put(key, term); 187 } 188 } 189 } 190 191 private Map<String, Term> termMap(boolean isClass) { 192 return isClass ? classTerms : terms; 193 } 194 195 /** 196 * @return a purely alphanumerical, lower cased term with all other characters replaced 197 */ 198 public static String normaliseTerm(String term) { 199 String x = NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll(""); 200 // remove http(s) 201 x = x.replaceFirst("^https?", ""); 202 if (x.isEmpty()) { 203 return ""; 204 } 205 return x.toLowerCase(); 206 } 207 208 /** 209 * This is the main method to get a term from the factory searching both for property or class terms. 210 * It will lookup matching terms applying some normalization and known synonyms first. 211 * In case of ambiguous terms Class terms will be preferred. 212 * 213 * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that 214 * all terms with the same qualified name return a single UnknownTerm instance. 215 * 216 * For clearly bad term names an IllegalArgumentException is thrown. 217 * For example in the case of a simple name containing whitespace like "hello tom". 218 * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes 219 * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm 220 * instance is created. 221 */ 222 public Term findTerm(final String termName) throws IllegalArgumentException { 223 // First try an exact match. 224 if (terms.containsKey(termName)) { 225 return terms.get(termName); 226 } 227 228 // Try class term 229 Term t = findTermOnly(termName, true); 230 if (t == null) { 231 // Try property term 232 t = findTermOnly(termName, false); 233 } 234 // create new term if needed 235 if (t == null) { 236 if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX+":")) { 237 t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS)); 238 } else { 239 t = createUnknownTerm(termName, false); 240 } 241 } 242 return t; 243 } 244 245 /** 246 * This method works just as findTerm(final String termName) but restricts 247 * the results to just property terms. 248 */ 249 public Term findPropertyTerm(final String termName) throws IllegalArgumentException { 250 return findTerm(termName, false); 251 } 252 253 /** 254 * This method works just as findTerm(final String termName) but restricts 255 * the results to just class terms. 256 */ 257 public Term findClassTerm(final String termName) throws IllegalArgumentException { 258 return findTerm(termName, true); 259 } 260 261 /** 262 * This method works just as findTerm(final String termName) but restricts 263 * the results to just property or class terms. 264 */ 265 public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException { 266 if (termName == null || termName.trim().isEmpty()) { 267 return null; 268 } 269 270 Term t = findTermOnly(termName, isClassTerm); 271 // create new term if needed 272 if (t == null) { 273 t = createUnknownTerm(termName, isClassTerm); 274 } 275 return t; 276 } 277 278 /** 279 * Does not create Unknown terms 280 */ 281 private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException { 282 if (termName == null || termName.trim().isEmpty()) { 283 return null; 284 } 285 286 Map<String, Term> map = termMap(isClassTerm); 287 // first try term just as it is 288 if (map.containsKey(termName)) { 289 return map.get(termName); 290 } 291 292 // try normalised term otherwise 293 if (map.containsKey(normaliseTerm(termName))) { 294 return map.get(normaliseTerm(termName)); 295 } 296 return null; 297 } 298 299 private Term createUnknownTerm(String termName, boolean isClassTerm) { 300 // create new term instance 301 Term term = UnknownTerm.build(termName, isClassTerm); 302 addTerm(termName, term); 303 addTerm(term.qualifiedName(), term); 304 return term; 305 } 306 307 private Term createBibtexTerm(String termName, boolean qualified) { 308 // create new term instance 309 Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName); 310 addTerm(term.qualifiedName(), term); 311 addTerm(term.prefixedName(), term); 312 return term; 313 } 314 315}