001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.dwc;
015
016import org.gbif.dwc.meta.DwcMetaFiles;
017import org.gbif.dwc.terms.DcTerm;
018import org.gbif.dwc.terms.DwcTerm;
019import org.gbif.dwc.terms.GbifTerm;
020import org.gbif.dwc.terms.TermFactory;
021import org.gbif.utils.file.FileUtils;
022
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.IOException;
026import java.net.URI;
027import java.nio.file.Files;
028import java.util.ArrayList;
029import java.util.HashSet;
030import java.util.List;
031import java.util.Set;
032
033import javax.xml.parsers.SAXParser;
034import javax.xml.parsers.SAXParserFactory;
035
036import org.junit.jupiter.api.Test;
037import org.xml.sax.Attributes;
038import org.xml.sax.InputSource;
039import org.xml.sax.XMLReader;
040import org.xml.sax.ext.DefaultHandler2;
041
042import static org.junit.jupiter.api.Assertions.assertEquals;
043import static org.junit.jupiter.api.Assertions.assertNotNull;
044import static org.junit.jupiter.api.Assertions.assertNull;
045import static org.junit.jupiter.api.Assertions.assertTrue;
046import static org.junit.jupiter.api.Assertions.fail;
047
048/**
049 * Integration tests related to the MetaDescriptor operations.
050 */
051public class MetaDescriptorTest {
052  public static TermFactory TERM_FACTORY = TermFactory.instance();
053
054  private static final String NOMENCLATURAL_CODE_VOCABULARY = "http://rs.gbif.org/vocabulary/gbif/nomenclatural_code.xml";
055  //for testing only, language vocabulary doesn't exist at rs.gbif.org
056  private static final String LANGUAGE_VOCABULARY = "http://rs.gbif.org/vocabulary/gbif/language.xml";
057
058  public class SAXExtractTerms extends DefaultHandler2 {
059    private final List<String> terms;
060    public SAXExtractTerms(List<String> terms) {
061      this.terms = terms;
062    }
063
064    @Override
065    public void startElement(String uri, String localName, String qName, Attributes atts) {
066      List<String> list = new ArrayList<>();
067      list.add("rowType");
068      list.add("term");
069      for (String attName : list) {
070        if (atts.getValue(attName) != null) {
071          terms.add(atts.getValue(attName));
072        }
073      }
074    }
075  }
076
077  @Test
078  public void testXml() throws Exception {
079    // read archive
080    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath());
081
082    // write meta.xml
083    File tmpMeta = File.createTempFile("meta", ".xml");
084    System.out.println("Writing temporary test meta file to " + tmpMeta.getAbsolutePath());
085    MetaDescriptorWriter.writeMetaFile(tmpMeta, arch);
086
087    // verify rowType & terms are URIs
088    List<String> terms = new ArrayList<>();
089    SAXParserFactory spf = SAXParserFactory.newInstance();
090    spf.setNamespaceAware(true);
091    SAXParser saxParser = spf.newSAXParser();
092    XMLReader xmlReader = saxParser.getXMLReader();
093    xmlReader.setContentHandler(new SAXExtractTerms(terms));
094    xmlReader.parse(new InputSource(new FileInputStream(tmpMeta)));
095
096    assertEquals(18, terms.size());
097    for (String term : terms) {
098      URI uri = URI.create(term);
099      assertNotNull(uri.getScheme(), uri + " is no full URI term");
100      assertNotNull(uri.getAuthority(), uri + " is no full URI term");
101      assertNotNull(uri.getPath(), uri + " is no full URI term");
102    }
103  }
104
105  @Test
106  public void testRoundtrip() {
107    try {
108      // read archive
109      Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath());
110      assertNotNull(arch);
111      assertNotNull(arch.getCore());
112      assertEquals(0, arch.getCore().getId().getIndex());
113      assertTrue(arch.getCore().hasTerm(DwcTerm.scientificName));
114      assertEquals(2, arch.getExtensions().size());
115      assertEquals("\t", arch.getCore().getFieldsTerminatedBy());
116      assertNull(arch.getCore().getField(DwcTerm.scientificName).getDelimitedBy());
117      assertEquals(";", arch.getCore().getField(DwcTerm.nomenclaturalStatus).getDelimitedBy());
118      assertEquals(NOMENCLATURAL_CODE_VOCABULARY, arch.getCore().getField(DwcTerm.nomenclaturalCode).getVocabulary());
119      assertEquals(LANGUAGE_VOCABULARY, arch.getExtension(GbifTerm.VernacularName).getField(DcTerm.language).getVocabulary());
120
121      // write meta.xml
122      File tmpDwca = createTmpMeta(arch);
123      Files.createFile(tmpDwca.toPath().resolve("DarwinCore.txt"));
124      Files.createFile(tmpDwca.toPath().resolve("VernacularName.txt"));
125      Files.createFile(tmpDwca.toPath().resolve("media.txt"));
126
127      Archive arch2 = DwcFiles.fromLocation(tmpDwca.toPath());
128      // core props
129      ArchiveFile core = arch2.getCore();
130      assertNotNull(core);
131      assertNotNull(core.getId());
132      assertTrue(core.hasTerm(DwcTerm.scientificName));
133      assertEquals("DarwinCore.txt", core.getFirstLocation());
134      assertEquals("\t", core.getFieldsTerminatedBy());
135      assertNull(core.getField(DwcTerm.scientificName).getDelimitedBy());
136      assertEquals(";", core.getField(DwcTerm.nomenclaturalStatus).getDelimitedBy());
137      assertEquals(NOMENCLATURAL_CODE_VOCABULARY, core.getField(DwcTerm.nomenclaturalCode).getVocabulary());
138
139      for (ArchiveField f : arch.getCore().getFields().values()) {
140        assertTrue(core.hasTerm(f.getTerm().qualifiedName()));
141        assertEquals(core.getField(f.getTerm().qualifiedName()).getIndex(), f.getIndex());
142      }
143
144      // extensions props
145      assertEquals(2, arch2.getExtensions().size());
146      Set<String> filenames = new HashSet<>();
147      filenames.add("VernacularName.txt");
148      filenames.add("media.txt");
149
150      for (ArchiveFile ext : arch2.getExtensions()) {
151        assertTrue(filenames.contains(ext.getFirstLocation()));
152        filenames.remove(ext.getFirstLocation());
153      }
154      assertTrue(filenames.isEmpty());
155
156    } catch (Exception e) {
157      e.printStackTrace();
158      fail();
159    }
160  }
161
162  private File createTmpMeta(Archive arch) throws IOException {
163    File tmpDir = Files.createTempDirectory("dwca-io-test").toFile();
164    tmpDir.deleteOnExit();
165    File tmpMeta = new File(tmpDir, Archive.META_FN);
166    System.out.println("Writing temporary test meta file to " + tmpMeta.getAbsolutePath());
167    MetaDescriptorWriter.writeMetaFile(tmpMeta, arch);
168    return tmpDir;
169  }
170
171  @Test
172  public void testRoundtripQuotes() {
173    try {
174      // read archive
175      Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("xml-entity-meta").toPath());
176      assertNotNull(arch);
177      assertNotNull(arch.getCore());
178      assertNotNull(arch.getCore().getId());
179      assertTrue(arch.getCore().hasTerm(DwcTerm.scientificName));
180      assertEquals(1, arch.getExtensions().size());
181
182      // write meta.xml
183      File tmpDwca = createTmpMeta(arch);
184      Files.createFile(tmpDwca.toPath().resolve("test"));
185      Files.createFile(tmpDwca.toPath().resolve("test2"));
186      Archive arch2 = DwcFiles.fromLocation(tmpDwca.toPath());
187
188      // core props
189      ArchiveFile core = arch2.getCore();
190      assertNotNull(core);
191      assertNotNull(core.getId());
192      assertTrue(core.hasTerm(DwcTerm.scientificName));
193      assertEquals("test", core.getFirstLocation());
194      for (ArchiveField f : arch.getCore().getFields().values()) {
195        assertTrue(core.hasTerm(f.getTerm().qualifiedName()));
196        assertEquals(core.getField(f.getTerm().qualifiedName()).getIndex(), f.getIndex());
197      }
198
199      // extensions props
200      assertEquals(1, arch2.getExtensions().size());
201      ArchiveFile ext = arch2.getExtensions().iterator().next();
202      assertEquals("test2", ext.getFirstLocation());
203      assertEquals(2, ext.getFields().size());
204
205    } catch (Exception e) {
206      e.printStackTrace();
207      fail();
208    }
209  }
210
211  /**
212   * Test the reading of a static meta.xml file.
213   */
214  @Test
215  public void testMetaDescriptorReading() throws Exception {
216    // we can read only a meta.xml file as an Archive
217    Archive arch = DwcMetaFiles.fromMetaDescriptor(new FileInputStream(FileUtils.getClasspathFile("meta/meta.xml")));
218
219    //validate archive ID field
220    ArchiveField af = arch.getCore().getId();
221    assertEquals(Integer.valueOf(1), af.getIndex());
222    //not specified, should be set to the default value
223    assertEquals(ArchiveFile.DEFAULT_FIELDS_ENCLOSED_BY, arch.getCore().getFieldsEnclosedBy());
224
225    //validate default
226    af = arch.getCore().getField(DwcTerm.kingdom);
227    assertEquals("Animalia", af.getDefaultValue());
228
229    // validate vocabulary
230    af = arch.getCore().getField(DwcTerm.nomenclaturalCode);
231    assertEquals(NOMENCLATURAL_CODE_VOCABULARY, af.getVocabulary());
232
233    //explicitly set to empty string which means we should not use a fieldsEnclosedBy (value == null)
234    assertNull(arch.getExtension(TERM_FACTORY
235            .findTerm("http://rs.tdwg.org/invented/Links")).getFieldsEnclosedBy());
236  }
237}