001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.dwc.terms; 017 018import java.util.Collections; 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Locale; 022import java.util.Map; 023import java.util.Set; 024import java.util.regex.Pattern; 025 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029/** 030 * Simple, threadsafe factory for terms that knows about all ConceptTerms of this library and keeps singletons for 031 * all unknown Term instances. 032 */ 033public class TermFactory { 034 035 private static final Logger LOG = LoggerFactory.getLogger(TermFactory.class); 036 private static final Pattern NON_ALPHA_NUM_PATTERN = Pattern.compile("[^a-zA-Z0-9#-]+"); 037 private static TermFactory singleton; 038 private static boolean initialized = false; 039 private static final Object LOCK = new Object(); 040 041 private final Map<String, Term> terms = new HashMap<>(); 042 private final Map<String, Term> classTerms = new HashMap<>(); 043 private final Set<Class<? extends Enum<?>>> registeredEnumClasses = new HashSet<>(); 044 045 public static TermFactory instance() { 046 if (initialized) { 047 return singleton; 048 } 049 050 synchronized (LOCK) { 051 if (singleton == null) { 052 LOG.debug("Building new TermFactory instance"); 053 singleton = new TermFactory(); 054 singleton.loadKnownTerms(); 055 initialized = true; 056 } 057 } 058 059 return singleton; 060 } 061 062 private TermFactory() { 063 } 064 065 private void loadKnownTerms() { 066 registerTermEnum(DwcTerm.class); 067 registerTermEnum(DcTerm.class, "dct"); 068 registerTermEnum(GbifTerm.class); 069 registerTermEnum(GbifInternalTerm.class); 070 registerTermEnum(IucnTerm.class); 071 registerTermEnum(DcElement.class); 072 registerTermEnum(AcefTerm.class, "http://rs.col.plus/terms/acef/"); 073 registerTermEnum(PlaziTerm.class); 074 registerTermEnum(GadmTerm.class); 075 registerTermEnum(DwcaTerm.class); 076 077 // Audubon core 078 registerTermEnum(AcTerm.class); 079 registerTermEnum(ExifTerm.class); 080 registerTermEnum(IptcTerm.class); 081 registerTermEnum(PhotoshopTerm.class); 082 registerTermEnum(XmpTerm.class, "adobe"); 083 registerTermEnum(XmpRightsTerm.class, "xmp", "adobe"); // the same as above, but luckily different simple term names 084 085 // DWCA extensions 086 registerTermEnum(ChronoTerm.class); 087 registerTermEnum(GbifDnaTerm.class); 088 registerTermEnum(GbifMiqeTerm.class); 089 registerTermEnum(GermplasmTerm.class); 090 registerTermEnum(GgbnTerm.class); 091 registerTermEnum(MixsTerm.class); 092 registerTermEnum(ObisTerm.class); 093 registerTermEnum(Wgs84GeoPositioningTerm.class); 094 registerTermEnum(EcoTerm.class); 095 096 registerQualifiedTermEnum(DwcaTerm.class); 097 addTerm(BibTexTerm.CLASS_TERM); 098 } 099 100 /** 101 * @return the set of term enum classes that have been registered with this TermFactory 102 */ 103 public Set<Class<? extends Enum<?>>> listRegisteredTermEnums() { 104 return Collections.unmodifiableSet(registeredEnumClasses); 105 } 106 107 public void registerTerm(Term term) { 108 addTerm(term); 109 } 110 111 public void registerTerm(UnknownTerm term) { 112 addTerm(term.qualifiedName(), term); 113 } 114 115 /** 116 * Registers all terms from a term enumeration. 117 * If the same class is registered again it will be silently ignored. 118 * 119 * @param altPrefixes alternative prefixes to be used to register simple prefixed term names 120 */ 121 public synchronized <T extends Enum<?> & Term & AlternativeNames> void registerTermEnum(Class<T> termClass, String ... altPrefixes) { 122 if (registeredEnumClasses.contains(termClass)) { 123 LOG.debug("{} is already registered", termClass); 124 } else { 125 registeredEnumClasses.add(termClass); 126 for (T term : termClass.getEnumConstants()) { 127 // add regular term representations (simple, prefixed & qualified) 128 addTerm(term, altPrefixes); 129 // add alternatives 130 for (String alt : term.alternativeNames()) { 131 addTerm(alt, term); 132 if (!alt.startsWith("http") && !alt.contains(":")) { 133 addTerm(term.prefix() + ":" + alt, term); 134 addTerm(term.namespace().resolve(alt).toString(), term); 135 for (String pre : altPrefixes) { 136 addTerm(pre + ":" + alt, term); 137 } 138 } 139 } 140 } 141 } 142 } 143 144 /** 145 * Registers all terms from a new term enumeration, but only adds their qualified and prefixed names. 146 * This is to avoid clashes with other usually more important terms that should be known by their simple name. 147 */ 148 public <T extends Enum<?> & Term> void registerQualifiedTermEnum(Class<T> termClass) { 149 if (registeredEnumClasses.contains(termClass)) { 150 LOG.debug("{} is already registered", termClass); 151 } else { 152 registeredEnumClasses.add(termClass); 153 for (T term : termClass.getEnumConstants()) { 154 // add only the prefixed and qualified representation to avoid clashes 155 addTerm(term.prefixedName(), term); 156 addTerm(term.qualifiedName(), term); 157 } 158 } 159 } 160 161 private void addTerm(Term term, String ... altPrefixes) { 162 addTerm(term.simpleName(), term); 163 addTerm(term.prefixedName(), term); 164 addTerm(term.qualifiedName(), term); 165 for (String pre : altPrefixes) { 166 addTerm(pre + ":" + term.simpleName(), term); 167 } 168 } 169 170 /** 171 * Checks whether a string is null or empty (after trimming). 172 */ 173 private static boolean isNullOrEmpty(String s) { 174 return s == null || s.trim().isEmpty(); 175 } 176 177 private void addTerm(String key, Term term) { 178 if (isNullOrEmpty(key)) { 179 return; 180 } 181 182 // keep class terms distinct 183 Map<String, Term> map = termMap(term.isClass()); 184 if (map.containsKey(key)) { 185 Term t1 = map.get(key); 186 if (!t1.equals(term)) { 187 LOG.info("{} terms {} and {} are both known as \"{}\". Keeping only earlier {}", term.isClass() ? "Class" : "Property", map.get(key), term, key, map.get(key)); 188 } 189 } else { 190 map.put(key, term); 191 // also add a normalised version 192 key = normaliseTerm(key); 193 map.computeIfAbsent(key, k -> term); 194 } 195 } 196 197 private Map<String, Term> termMap(boolean isClass) { 198 return isClass ? classTerms : terms; 199 } 200 201 /** 202 * @return a purely alphanumerical, lower cased term with all other characters replaced 203 */ 204 public static String normaliseTerm(String term) { 205 return NON_ALPHA_NUM_PATTERN.matcher(term).replaceAll("") 206 .replaceFirst("^https?", "") 207 .toLowerCase(Locale.ROOT); // remove http(s) 208 } 209 210 /** 211 * This is the main method to get a term from the factory searching both for property or class terms. 212 * It will lookup matching terms applying some normalization and known synonyms first. 213 * In case of ambiguous terms Class terms will be preferred. 214 * 215 * If nothing matches the factory creates a new UnknownTerm property instance and keeps it for further requests so that 216 * all terms with the same qualified name return a single UnknownTerm instance. 217 * 218 * For clearly bad term names an IllegalArgumentException is thrown. 219 * For example in the case of a simple name containing whitespace like "hello tom". 220 * Ideally the term names to be looked up should be full URIs, but simple names made up of alphanumerics and dashes 221 * will also work fine. Unknown simple names will be put into the namespace http://unknown.org when a new UnknownTerm 222 * instance is created. 223 */ 224 public Term findTerm(final String termName) throws IllegalArgumentException { 225 // First try an exact match. 226 if (terms.containsKey(termName)) { 227 return terms.get(termName); 228 } 229 230 // Try class term 231 Term t = findTermOnly(termName, true); 232 if (t == null) { 233 // Try property term 234 t = findTermOnly(termName, false); 235 } 236 // create new term if needed 237 if (t == null) { 238 if (termName.startsWith(BibTexTerm.NS) || termName.startsWith(BibTexTerm.PREFIX + ":")) { 239 t = createBibtexTerm(termName, termName.startsWith(BibTexTerm.NS)); 240 } else { 241 t = createUnknownTerm(termName, false); 242 } 243 } 244 return t; 245 } 246 247 /** 248 * This method works just as findTerm(final String termName) but restricts 249 * the results to just property terms. 250 */ 251 public Term findPropertyTerm(final String termName) throws IllegalArgumentException { 252 return findTerm(termName, false); 253 } 254 255 /** 256 * This method works just as findTerm(final String termName) but restricts 257 * the results to just class terms. 258 */ 259 public Term findClassTerm(final String termName) throws IllegalArgumentException { 260 return findTerm(termName, true); 261 } 262 263 /** 264 * This method works just as findTerm(final String termName) but restricts 265 * the results to just property or class terms. 266 */ 267 public Term findTerm(final String termName, boolean isClassTerm) throws IllegalArgumentException { 268 if (isNullOrEmpty(termName)) { 269 return null; 270 } 271 272 Term t = findTermOnly(termName, isClassTerm); 273 // create new term if needed 274 if (t == null) { 275 t = createUnknownTerm(termName, isClassTerm); 276 } 277 return t; 278 } 279 280 /** 281 * Does not create Unknown terms 282 */ 283 private Term findTermOnly(final String termName, boolean isClassTerm) throws IllegalArgumentException { 284 if (isNullOrEmpty(termName)) { 285 return null; 286 } 287 288 Map<String, Term> map = termMap(isClassTerm); 289 // first try term just as it is 290 if (map.containsKey(termName)) { 291 return map.get(termName); 292 } 293 294 // try normalised term otherwise 295 if (map.containsKey(normaliseTerm(termName))) { 296 return map.get(normaliseTerm(termName)); 297 } 298 return null; 299 } 300 301 private Term createUnknownTerm(String termName, boolean isClassTerm) { 302 // create new term instance 303 Term term = UnknownTerm.build(termName, isClassTerm); 304 addTerm(termName, term); 305 addTerm(term.qualifiedName(), term); 306 return term; 307 } 308 309 private Term createBibtexTerm(String termName, boolean qualified) { 310 // create new term instance 311 Term term = qualified ? BibTexTerm.buildFromURI(termName) : BibTexTerm.buildFromPrefix(termName); 312 addTerm(term.qualifiedName(), term); 313 addTerm(term.prefixedName(), term); 314 return term; 315 } 316 317}