001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.dwc.terms;
017
018import java.io.IOException;
019import java.net.URI;
020import java.net.URISyntaxException;
021import java.nio.file.FileSystem;
022import java.nio.file.FileSystems;
023import java.nio.file.Files;
024import java.nio.file.Path;
025import java.nio.file.Paths;
026import java.util.Collections;
027import java.util.stream.Stream;
028import org.junit.jupiter.api.Test;
029
030import java.util.Arrays;
031import java.util.HashSet;
032import java.util.Set;
033import java.util.concurrent.ExecutorService;
034import java.util.concurrent.Executors;
035import java.util.concurrent.TimeUnit;
036
037import static org.junit.jupiter.api.Assertions.assertEquals;
038import static org.junit.jupiter.api.Assertions.assertFalse;
039import static org.junit.jupiter.api.Assertions.assertNotEquals;
040import static org.junit.jupiter.api.Assertions.assertThrows;
041
042public class TermFactoryTest {
043
044  final TermFactory TF = TermFactory.instance();
045
046  /**
047   * GBIF code assumes a term coming from any of the Term enumerations mostly have unique simple names.
048   * This tests verifies that!
049   *
050   * AcefTerm is known to overlap, so its excluded, see skipSimple in AcefTermTest.
051   */
052  @Test
053  public void testKnownTermUniqueness() {
054    Set<String> names = new HashSet<>();
055
056    addTerms(names, DwcTerm.values());
057    addTerms(names, DcTerm.values());
058    addTerms(names, GbifTerm.values());
059    addTerms(names, GbifInternalTerm.values());
060    addTerms(names, IucnTerm.values());
061    //addTerms(names, DcElement.values());
062    //addTerms(names, AcefTerm.values());
063    //addTerms(names, PlaziTerm.values());
064    addTerms(names, GadmTerm.values());
065    //addTerms(names, DwcaTerm.values());
066
067    // Audubon Core
068    addTerms(names, termsBut(AcTerm.values(), AcTerm.Multimedia, AcTerm.relatedResourceID, AcTerm.fundingAttribution));
069    addTerms(names, ExifTerm.values());
070    addTerms(names, IptcTerm.values());
071    addTerms(names, PhotoshopTerm.values());
072    addTerms(names, XmpTerm.values());
073    addTerms(names, XmpRightsTerm.values());
074
075    // Terms for extensions supported in GBIF downloads.
076    addTerms(names, ChronoTerm.values());
077    addTerms(names, GbifDnaTerm.values());
078    addTerms(names, GbifMiqeTerm.values());
079    addTerms(names, GermplasmTerm.values());
080    addTerms(names, termsBut(GgbnTerm.values(), GgbnTerm.MaterialSample));
081    addTerms(names, MixsTerm.values());
082    addTerms(names, ObisTerm.values());
083    addTerms(names, Wgs84GeoPositioningTerm.values());
084  }
085
086  private Term[] termsBut(Term[] terms, Term... exclude) {
087    Set<Term> excl = new HashSet<>(Arrays.asList(exclude));
088    return Arrays.stream(terms)
089        .filter(t -> !excl.contains(t))
090        .toArray(Term[]::new);
091  }
092
093  private void addTerms(Set<String> names, Term[] terms) {
094    for (Term t : terms) {
095      assertFalse(names.contains(t.simpleName()), "Duplicate simple name " + t.simpleName() + " for " + t);
096      if (t instanceof AlternativeNames) {
097        for (String a : ((AlternativeNames) t).alternativeNames()) {
098          assertFalse(names.contains(a), "Duplicate alternative name " + a + " for " + t);
099          names.add(a);
100        }
101      }
102      names.add(t.simpleName());
103    }
104  }
105
106  @Test
107  public void testCompleteness() throws Exception {
108    String packageName = DwcTerm.class.getPackage().getName();
109    for (Class<?> cl : getClassesInPackage(packageName)) {
110      if (cl.isEnum() && Term.class.isAssignableFrom(cl)) {
111        Class<Term> tcl = (Class<Term>) cl;
112        for (Term t : tcl.getEnumConstants()) {
113          assertEquals(t, TF.findTerm(t.qualifiedName()), "Unknown term " + t.qualifiedName());
114          assertEquals(t, TF.findTerm(t.prefixedName()), "Unknown term " + t.prefixedName());
115        }
116      }
117    }
118  }
119
120  /**
121   * Scans all classes in a given package.
122   */
123  private Set<Class<?>> getClassesInPackage(String packageName) throws IOException, URISyntaxException {
124    ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
125    String path = packageName.replace('.', '/');
126    URI resource = classLoader.getResource(path).toURI();
127
128    if (resource.getScheme().equals("jar")) {
129      try (FileSystem fileSystem = FileSystems.newFileSystem(resource, Collections.emptyMap())) {
130        Path packagePath = fileSystem.getPath(path);
131        return findClasses(packagePath, packageName);
132      }
133    } else {
134      Path packagePath = Paths.get(resource);
135      return findClasses(packagePath, packageName);
136    }
137  }
138
139  /**
140   * Finds all classes in a given directory.
141   */
142  private Set<Class<?>> findClasses(Path directory, String packageName) throws IOException {
143    Set<Class<?>> classes = new HashSet<>();
144    try (Stream<Path> paths = Files.walk(directory, 1)) {
145      paths.filter(path -> path.toString().endsWith(".class"))
146          .forEach(path -> {
147            try {
148              String className = packageName + '.' +
149                  path.getFileName().toString().replace(".class", "");
150              classes.add(Class.forName(className));
151            } catch (ClassNotFoundException e) {
152              // Ignore
153            }
154          });
155    }
156    return classes;
157  }
158
159  @Test
160  public void testFindTerm() {
161    assertEquals(DwcTerm.scientificName, TF.findTerm("ScientificName"));
162    assertEquals(DwcTerm.scientificName, TF.findTerm("dwc:scientificName"));
163    assertEquals(DwcTerm.scientificName, TF.findTerm("http://rs.tdwg.org/dwc/terms/scientificName"));
164    assertEquals(DcElement.identifier, TF.findTerm("dc:identifier"));
165    assertEquals(GbifTerm.Identifier, TF.findTerm("Identifier"));
166    assertEquals(GbifTerm.Identifier, TF.findClassTerm("identifier"));
167    assertEquals(DcTerm.identifier, TF.findTerm("identifier"));
168    assertEquals(DcTerm.identifier, TF.findPropertyTerm("identifier"));
169    assertEquals(DcTerm.identifier, TF.findTerm("id"));
170    assertEquals(DwcTerm.parentNameUsageID, TF.findTerm("dwc:higherNameUsageID"));
171    assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("dwc:acceptedTaxonId"));
172    assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("dwc:acceptedTaxonID"));
173    assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("acceptedTaxonID"));
174    assertEquals(DwcTerm.acceptedNameUsageID, TF.findTerm("http://rs.tdwg.org/dwc/terms/acceptedTaxonId"));
175    assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("dwc:acceptedTaxonId"));
176    assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("dwc:acceptedTaxonID"));
177    assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("acceptedTaxonID"));
178    assertEquals(DwcTerm.acceptedNameUsageID, TF.findPropertyTerm("http://rs.tdwg.org/dwc/terms/acceptedTaxonId"));
179    assertEquals(AcefTerm.AcceptedTaxonID, TF.findPropertyTerm("AcceptedTaxonID"));
180    assertEquals(AcefTerm.AcceptedTaxonID, TF.findTerm("acef:AcceptedTaxonID"));
181    assertEquals(AcefTerm.AcceptedTaxonID, TF.findPropertyTerm("acef:AcceptedTaxonID"));
182
183    assertEquals(DwcTerm.vernacularName, TF.findTerm("dwc:vernacularName"));
184    assertEquals(DwcTerm.vernacularName, TF.findTerm("vernacularName"));
185    assertEquals(GbifTerm.VernacularName, TF.findTerm("VernacularName"));
186    assertEquals(GbifTerm.VernacularName, TF.findTerm("gbif:VernacularName"));
187
188    assertEquals(GbifInternalTerm.unitQualifier, TF.findTerm("UNIT_QUALIFIER"));
189
190    assertEquals("threatStatus", TF.findTerm("http://rs.gbif.org/terms/1.0/threatStatus").simpleName());
191    assertEquals("threatStatus", TF.findTerm("http://rs.gbif.org/terms/1321.43/threatStatus").simpleName());
192
193    assertEquals(DwcTerm.catalogNumber, TF.findTerm("\"catalogNumber\""));
194    assertEquals(AcefTerm.Details, TF.findTerm("acef:source"));
195    assertEquals(DwcTerm.family, TF.findTerm("dwc:family"));
196    assertEquals(DwcTerm.family, TF.findTerm("family"));
197    assertEquals(AcefTerm.Family, TF.findTerm("acef:family"));
198
199    assertEquals(DwcaTerm.ID, TF.findTerm("dwca:ID"));
200
201    assertEquals(BibTexTerm.CLASS_TERM, TF.findTerm("bib:BibTeX"));
202    assertEquals(BibTexTerm.CLASS_TERM, TF.findTerm("http://bibtex.org/BibTeX"));
203
204    Term t = BibTexTerm.buildFromURI("http://bibtex.org/creator");
205    assertEquals(t, TF.findTerm("http://bibtex.org/creator"));
206    assertEquals(t, TF.findTerm("bib:creator"));
207
208    // ACEF namespace has changed: https://github.com/gbif/portal-feedback/issues/4890
209    assertEquals(AcefTerm.Country, TF.findTerm("acef:Country"));
210    assertEquals(AcefTerm.Country, TF.findTerm("http://rs.col.plus/terms/acef/Country"));
211    assertEquals(AcefTerm.Country, TF.findTerm("https://rs.col.plus/terms/acef/Country"));
212    assertEquals(DwcTerm.country, TF.findTerm("country"));
213
214    // MIxS uses unreadable identifiers in qualified names.
215    assertEquals(MixsTerm.samp_size, TF.findTerm("samp_size"));
216    assertEquals(MixsTerm.samp_size, TF.findTerm("mixs:samp_size"));
217    assertEquals(MixsTerm.samp_size, TF.findTerm("https://w3id.org/gensc/terms/MIXS:0000001"));
218    assertEquals(MixsTerm.samp_size, TF.findTerm("https://w3id.org/mixs/0000001"));
219    assertEquals(MixsTerm.lib_reads_seqd, TF.findTerm("http://gensc.org/ns/mixs/lib_reads_seqd"));
220    assertEquals(MixsTerm.assembly_name, TF.findTerm("http://gensc.org/ns/mixs/assembly"));
221    assertEquals(MixsTerm.assembly_qual, TF.findTerm("http://gensc.org/ns/mixs/finishing_strategy"));
222    assertEquals(MixsTerm.annot, TF.findTerm("http://gensc.org/ns/mixs/annot_source"));
223  }
224
225  @Test
226  public void addUnknownTerm() {
227    TermFactory factory = TermFactory.instance();
228
229    Term me1 = factory.findTerm("http://me.com/#me");
230    Term me2 = factory.findTerm("http://me.com/me");
231    Term me3 = factory.findTerm("http://me.org/me");
232
233    assertNotEquals(me1, me2);
234    assertNotEquals(me1, me3);
235    assertNotEquals(me2, me3);
236  }
237
238  @Test
239  public void badTerm() {
240    TermFactory factory = TermFactory.instance();
241    assertThrows(IllegalArgumentException.class, () -> factory.findTerm("Hallo Tim"));
242  }
243
244  @Test
245  public void removedGbifTerms() {
246    assertEquals(DwcTerm.genericName, TF.findTerm("genericName"));
247    assertEquals(DwcTerm.recordedByID, TF.findTerm("recordedByID"));
248    assertEquals(DwcTerm.identifiedByID, TF.findTerm("identifiedByID"));
249
250    assertEquals(DwcTerm.genericName, TF.findTerm("http://rs.gbif.org/terms/1.0/genericName"));
251    assertEquals(DwcTerm.recordedByID, TF.findTerm("http://rs.gbif.org/terms/1.0/recordedByID"));
252    assertEquals(DwcTerm.identifiedByID, TF.findTerm("http://rs.gbif.org/terms/1.0/identifiedByID"));
253
254    assertEquals(DwcTerm.genericName, TF.findTerm("gbif:genericName"));
255    assertEquals(DwcTerm.recordedByID, TF.findTerm("gbif:recordedByID"));
256    assertEquals(DwcTerm.identifiedByID, TF.findTerm("gbif:identifiedByID"));
257  }
258
259  @Test
260  public void addSimpleTerm() {
261    TermFactory factory = TermFactory.instance();
262
263    Term hallo = factory.findTerm("hallo");
264    assertEquals(UnknownTerm.class, hallo.getClass());
265    assertEquals("http://unknown.org/hallo", hallo.qualifiedName());
266    assertEquals("hallo", hallo.simpleName());
267
268
269    Term tim = factory.findTerm("Tim");
270    assertEquals(UnknownTerm.class, tim.getClass());
271    assertEquals("http://unknown.org/Tim", tim.qualifiedName());
272    assertEquals("http://unknown.org", tim.namespace().toString());
273    assertEquals("Tim", tim.simpleName());
274
275    Term eva = factory.findTerm("tim:Eva");
276    assertEquals(UnknownTerm.class, eva.getClass());
277    assertEquals("http://unknown.org/tim/Eva", eva.qualifiedName());
278    assertEquals("http://unknown.org", tim.namespace().toString());
279    assertEquals("http://unknown.org/tim/Eva", eva.qualifiedName());
280    assertEquals("tim:Eva", eva.prefixedName());
281    assertEquals("Eva", eva.simpleName());
282
283    assertNotEquals(hallo, tim);
284  }
285
286  @Test
287  public void addUnknownSimpleTerm() {
288    TermFactory factory = TermFactory.instance();
289
290    Term t1 = factory.findTerm("me");
291    Term t2 = factory.findTerm("me");
292    Term t3 = factory.findTerm("Ne");
293
294    assertEquals(t1, t2);
295    assertNotEquals(t1, t3);
296    assertNotEquals(t2, t3);
297  }
298
299  /**
300     * Not a real test, just a way of running many concurrent TermFactory.instance() calls to verify thread safety.
301     */
302  @Test
303  public void testMultithreadStart() throws InterruptedException {
304    int threadCount = 100;
305    ExecutorService tp = Executors.newFixedThreadPool(threadCount);
306    for (int i = 0; i < threadCount; i++) {
307      tp.submit(new TermFactoryLoader());
308    }
309    tp.shutdown();
310    tp.awaitTermination(30, TimeUnit.SECONDS);
311  }
312
313  private static class TermFactoryLoader implements Runnable {
314    @Override
315    public void run() {
316      TermFactory.instance();
317    }
318  }
319}