001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.dwc.terms; 017 018import java.io.IOException; 019import java.net.URI; 020import java.net.URISyntaxException; 021import java.nio.file.FileSystem; 022import java.nio.file.FileSystems; 023import java.nio.file.Files; 024import java.nio.file.Path; 025import java.nio.file.Paths; 026import java.util.Collections; 027import java.util.stream.Stream; 028import org.junit.jupiter.api.Test; 029 030import java.util.Arrays; 031import java.util.HashSet; 032import java.util.Set; 033import java.util.concurrent.ExecutorService; 034import java.util.concurrent.Executors; 035import java.util.concurrent.TimeUnit; 036 037import static org.junit.jupiter.api.Assertions.assertEquals; 038import static org.junit.jupiter.api.Assertions.assertFalse; 039import static org.junit.jupiter.api.Assertions.assertNotEquals; 040import static org.junit.jupiter.api.Assertions.assertThrows; 041 042public class TermFactoryTest { 043 044 final TermFactory TF = TermFactory.instance(); 045 046 /** 047 * GBIF code assumes a term coming from any of the Term enumerations mostly have unique simple names. 048 * This tests verifies that! 049 * 050 * AcefTerm is known to overlap, so its excluded, see skipSimple in AcefTermTest. 051 */ 052 @Test 053 public void testKnownTermUniqueness() { 054 Set<String> names = new HashSet<>(); 055 056 addTerms(names, DwcTerm.values()); 057 addTerms(names, DcTerm.values()); 058 addTerms(names, GbifTerm.values()); 059 addTerms(names, GbifInternalTerm.values()); 060 addTerms(names, IucnTerm.values()); 061 //addTerms(names, DcElement.values()); 062 //addTerms(names, AcefTerm.values()); 063 //addTerms(names, PlaziTerm.values()); 064 addTerms(names, GadmTerm.values()); 065 //addTerms(names, DwcaTerm.values()); 066 067 // Audubon Core 068 addTerms(names, termsBut(AcTerm.values(), AcTerm.Multimedia, AcTerm.relatedResourceID, AcTerm.fundingAttribution)); 069 addTerms(names, ExifTerm.values()); 070 addTerms(names, IptcTerm.values()); 071 addTerms(names, PhotoshopTerm.values()); 072 addTerms(names, XmpTerm.values()); 073 addTerms(names, XmpRightsTerm.values()); 074 075 // Terms for extensions supported in GBIF downloads. 076 addTerms(names, ChronoTerm.values()); 077 addTerms(names, GbifDnaTerm.values()); 078 addTerms(names, GbifMiqeTerm.values()); 079 addTerms(names, GermplasmTerm.values()); 080 addTerms(names, termsBut(GgbnTerm.values(), GgbnTerm.MaterialSample)); 081 addTerms(names, MixsTerm.values()); 082 addTerms(names, ObisTerm.values()); 083 addTerms(names, Wgs84GeoPositioningTerm.values()); 084 } 085 086 private Term[] termsBut(Term[] terms, Term... exclude) { 087 Set<Term> excl = new HashSet<>(Arrays.asList(exclude)); 088 return Arrays.stream(terms) 089 .filter(t -> !excl.contains(t)) 090 .toArray(Term[]::new); 091 } 092 093 private void addTerms(Set<String> names, Term[] terms) { 094 for (Term t : terms) { 095 assertFalse(names.contains(t.simpleName()), "Duplicate simple name " + t.simpleName() + " for " + t); 096 if (t instanceof AlternativeNames) { 097 for (String a : ((AlternativeNames) t).alternativeNames()) { 098 assertFalse(names.contains(a), "Duplicate alternative name " + a + " for " + t); 099 names.add(a); 100 } 101 } 102 names.add(t.simpleName()); 103 } 104 } 105 106 @Test 107 public void testCompleteness() throws Exception { 108 String packageName = DwcTerm.class.getPackage().getName(); 109 for (Class<?> cl : getClassesInPackage(packageName)) { 110 if (cl.isEnum() && Term.class.isAssignableFrom(cl)) { 111 Class<Term> tcl = (Class<Term>) cl; 112 for (Term t : tcl.getEnumConstants()) { 113 assertEquals(t, TF.findTerm(t.qualifiedName()), "Unknown term " + t.qualifiedName()); 114 assertEquals(t, TF.findTerm(t.prefixedName()), "Unknown term " + t.prefixedName()); 115 } 116 } 117 } 118 } 119 120 /** 121 * Scans all classes in a given package. 122 */ 123 private Set<Class<?>> getClassesInPackage(String packageName) throws IOException, URISyntaxException { 124 ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); 125 String path = packageName.replace('.', '/'); 126 URI resource = classLoader.getResource(path).toURI(); 127 128 if (resource.getScheme().equals("jar")) { 129 try (FileSystem fileSystem = FileSystems.newFileSystem(resource, Collections.emptyMap())) { 130 Path packagePath = fileSystem.getPath(path); 131 return findClasses(packagePath, packageName); 132 } 133 } else { 134 Path packagePath = Paths.get(resource); 135 return findClasses(packagePath, packageName); 136 } 137 } 138 139 /** 140 * Finds all classes in a given directory. 141 */ 142 private Set<Class<?>> findClasses(Path directory, String packageName) throws IOException { 143 Set<Class<?>> classes = new HashSet<>(); 144 try (Stream<Path> paths = Files.walk(directory, 1)) { 145 paths.filter(path -> path.toString().endsWith(".class")) 146 .forEach(path -> { 147 try { 148 String className = packageName + '.' + 149 path.getFileName().toString().replace(".class", ""); 150 classes.add(Class.forName(className)); 151 } catch (ClassNotFoundException e) { 152 // Ignore 153 } 154 }); 155 } 156 return classes; 157 } 158 159 @Test 160 public void testFindTerm() { 161 assertEquals(DwcTerm.scientificName, TF.findTerm("ScientificName")); 162 assertEquals(DwcTerm.scientificName, TF.findTerm("dwc:scientificName")); 163 assertEquals(DwcTerm.scientificName, TF.findTerm("http://rs.tdwg.org/dwc/terms/scientificName")); 164 assertEquals(DcElement.identifier, TF.findTerm("dc:identifier")); 165 assertEquals(GbifTerm.Identifier, TF.findTerm("Identifier")); 166 assertEquals(GbifTerm.Identifier, TF.findClassTerm("identifier")); 167 assertEquals(DcTerm.identifier, TF.findTerm("identifier")); 168 assertEquals(DcTerm.identifier, TF.findPropertyTerm("identifier")); 169 assertEquals(DcTerm.identifier, TF.findTerm("id")); 170 assertEquals(DwcTerm.parentNameUsageID, TF.findTerm("dwc:higherNameUsageID")); 171 assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("dwc:acceptedTaxonId")); 172 assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("dwc:acceptedTaxonID")); 173 assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("acceptedTaxonID")); 174 assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("http://rs.tdwg.org/dwc/terms/acceptedTaxonId")); 175 assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("dwc:acceptedTaxonId")); 176 assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("dwc:acceptedTaxonID")); 177 assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("acceptedTaxonID")); 178 assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("http://rs.tdwg.org/dwc/terms/acceptedTaxonId")); 179 assertEquals(AcefTerm.AcceptedTaxonID, TF.findPropertyTerm("AcceptedTaxonID")); 180 assertEquals(AcefTerm.AcceptedTaxonID, TF.findTerm("acef:AcceptedTaxonID")); 181 assertEquals(AcefTerm.AcceptedTaxonID, TF.findPropertyTerm("acef:AcceptedTaxonID")); 182 183 assertEquals(DwcTerm.vernacularName, TF.findTerm("dwc:vernacularName")); 184 assertEquals(DwcTerm.vernacularName, TF.findTerm("vernacularName")); 185 assertEquals(GbifTerm.VernacularName, TF.findTerm("VernacularName")); 186 assertEquals(GbifTerm.VernacularName, TF.findTerm("gbif:VernacularName")); 187 188 assertEquals(GbifInternalTerm.unitQualifier, TF.findTerm("UNIT_QUALIFIER")); 189 190 assertEquals("threatStatus", TF.findTerm("http://rs.gbif.org/terms/1.0/threatStatus").simpleName()); 191 assertEquals("threatStatus", TF.findTerm("http://rs.gbif.org/terms/1321.43/threatStatus").simpleName()); 192 193 assertEquals(DwcTerm.catalogNumber, TF.findTerm("\"catalogNumber\"")); 194 assertEquals(AcefTerm.Details, TF.findTerm("acef:source")); 195 assertEquals(DwcTerm.family, TF.findTerm("dwc:family")); 196 assertEquals(DwcTerm.family, TF.findTerm("family")); 197 assertEquals(AcefTerm.Family, TF.findTerm("acef:family")); 198 199 assertEquals(DwcaTerm.ID, TF.findTerm("dwca:ID")); 200 201 assertEquals(BibTexTerm.CLASS_TERM, TF.findTerm("bib:BibTeX")); 202 assertEquals(BibTexTerm.CLASS_TERM, TF.findTerm("http://bibtex.org/BibTeX")); 203 204 Term t = BibTexTerm.buildFromURI("http://bibtex.org/creator"); 205 assertEquals(t, TF.findTerm("http://bibtex.org/creator")); 206 assertEquals(t, TF.findTerm("bib:creator")); 207 208 // ACEF namespace has changed: https://github.com/gbif/portal-feedback/issues/4890 209 assertEquals(AcefTerm.Country, TF.findTerm("acef:Country")); 210 assertEquals(AcefTerm.Country, TF.findTerm("http://rs.col.plus/terms/acef/Country")); 211 assertEquals(AcefTerm.Country, TF.findTerm("https://rs.col.plus/terms/acef/Country")); 212 assertEquals(DwcTerm.country, TF.findTerm("country")); 213 214 // MIxS uses unreadable identifiers in qualified names. 215 assertEquals(MixsTerm.samp_size, TF.findTerm("samp_size")); 216 assertEquals(MixsTerm.samp_size, TF.findTerm("mixs:samp_size")); 217 assertEquals(MixsTerm.samp_size, TF.findTerm("https://w3id.org/gensc/terms/MIXS:0000001")); 218 assertEquals(MixsTerm.samp_size, TF.findTerm("https://w3id.org/mixs/0000001")); 219 assertEquals(MixsTerm.lib_reads_seqd, TF.findTerm("http://gensc.org/ns/mixs/lib_reads_seqd")); 220 assertEquals(MixsTerm.assembly_name, TF.findTerm("http://gensc.org/ns/mixs/assembly")); 221 assertEquals(MixsTerm.assembly_qual, TF.findTerm("http://gensc.org/ns/mixs/finishing_strategy")); 222 assertEquals(MixsTerm.annot, TF.findTerm("http://gensc.org/ns/mixs/annot_source")); 223 } 224 225 @Test 226 public void addUnknownTerm() { 227 TermFactory factory = TermFactory.instance(); 228 229 Term me1 = factory.findTerm("http://me.com/#me"); 230 Term me2 = factory.findTerm("http://me.com/me"); 231 Term me3 = factory.findTerm("http://me.org/me"); 232 233 assertNotEquals(me1, me2); 234 assertNotEquals(me1, me3); 235 assertNotEquals(me2, me3); 236 } 237 238 @Test 239 public void badTerm() { 240 TermFactory factory = TermFactory.instance(); 241 assertThrows(IllegalArgumentException.class, () -> factory.findTerm("Hallo Tim")); 242 } 243 244 @Test 245 public void removedGbifTerms() { 246 assertEquals(DwcTerm.genericName, TF.findTerm("genericName")); 247 assertEquals(DwcTerm.recordedByID, TF.findTerm("recordedByID")); 248 assertEquals(DwcTerm.identifiedByID, TF.findTerm("identifiedByID")); 249 250 assertEquals(DwcTerm.genericName, TF.findTerm("http://rs.gbif.org/terms/1.0/genericName")); 251 assertEquals(DwcTerm.recordedByID, TF.findTerm("http://rs.gbif.org/terms/1.0/recordedByID")); 252 assertEquals(DwcTerm.identifiedByID, TF.findTerm("http://rs.gbif.org/terms/1.0/identifiedByID")); 253 254 assertEquals(DwcTerm.genericName, TF.findTerm("gbif:genericName")); 255 assertEquals(DwcTerm.recordedByID, TF.findTerm("gbif:recordedByID")); 256 assertEquals(DwcTerm.identifiedByID, TF.findTerm("gbif:identifiedByID")); 257 } 258 259 @Test 260 public void addSimpleTerm() { 261 TermFactory factory = TermFactory.instance(); 262 263 Term hallo = factory.findTerm("hallo"); 264 assertEquals(UnknownTerm.class, hallo.getClass()); 265 assertEquals("http://unknown.org/hallo", hallo.qualifiedName()); 266 assertEquals("hallo", hallo.simpleName()); 267 268 269 Term tim = factory.findTerm("Tim"); 270 assertEquals(UnknownTerm.class, tim.getClass()); 271 assertEquals("http://unknown.org/Tim", tim.qualifiedName()); 272 assertEquals("http://unknown.org", tim.namespace().toString()); 273 assertEquals("Tim", tim.simpleName()); 274 275 Term eva = factory.findTerm("tim:Eva"); 276 assertEquals(UnknownTerm.class, eva.getClass()); 277 assertEquals("http://unknown.org/tim/Eva", eva.qualifiedName()); 278 assertEquals("http://unknown.org", tim.namespace().toString()); 279 assertEquals("http://unknown.org/tim/Eva", eva.qualifiedName()); 280 assertEquals("tim:Eva", eva.prefixedName()); 281 assertEquals("Eva", eva.simpleName()); 282 283 assertNotEquals(hallo, tim); 284 } 285 286 @Test 287 public void addUnknownSimpleTerm() { 288 TermFactory factory = TermFactory.instance(); 289 290 Term t1 = factory.findTerm("me"); 291 Term t2 = factory.findTerm("me"); 292 Term t3 = factory.findTerm("Ne"); 293 294 assertEquals(t1, t2); 295 assertNotEquals(t1, t3); 296 assertNotEquals(t2, t3); 297 } 298 299 /** 300 * Not a real test, just a way of running many concurrent TermFactory.instance() calls to verify thread safety. 301 */ 302 @Test 303 public void testMultithreadStart() throws InterruptedException { 304 int threadCount = 100; 305 ExecutorService tp = Executors.newFixedThreadPool(threadCount); 306 for (int i = 0; i < threadCount; i++) { 307 tp.submit(new TermFactoryLoader()); 308 } 309 tp.shutdown(); 310 tp.awaitTermination(30, TimeUnit.SECONDS); 311 } 312 313 private static class TermFactoryLoader implements Runnable { 314 @Override 315 public void run() { 316 TermFactory.instance(); 317 } 318 } 319}