001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.dwc; 015 016import org.gbif.dwc.meta.DwcMetaFiles; 017import org.gbif.dwc.terms.DcTerm; 018import org.gbif.dwc.terms.DwcTerm; 019import org.gbif.dwc.terms.GbifTerm; 020import org.gbif.dwc.terms.TermFactory; 021import org.gbif.utils.file.FileUtils; 022 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.IOException; 026import java.net.URI; 027import java.nio.file.Files; 028import java.util.ArrayList; 029import java.util.HashSet; 030import java.util.List; 031import java.util.Set; 032 033import javax.xml.parsers.SAXParser; 034import javax.xml.parsers.SAXParserFactory; 035 036import org.junit.jupiter.api.Test; 037import org.xml.sax.Attributes; 038import org.xml.sax.InputSource; 039import org.xml.sax.XMLReader; 040import org.xml.sax.ext.DefaultHandler2; 041 042import static org.junit.jupiter.api.Assertions.assertEquals; 043import static org.junit.jupiter.api.Assertions.assertNotNull; 044import static org.junit.jupiter.api.Assertions.assertNull; 045import static org.junit.jupiter.api.Assertions.assertTrue; 046import static org.junit.jupiter.api.Assertions.fail; 047 048/** 049 * Integration tests related to the MetaDescriptor operations. 050 */ 051public class MetaDescriptorTest { 052 public static TermFactory TERM_FACTORY = TermFactory.instance(); 053 054 private static final String NOMENCLATURAL_CODE_VOCABULARY = "http://rs.gbif.org/vocabulary/gbif/nomenclatural_code.xml"; 055 //for testing only, language vocabulary doesn't exist at rs.gbif.org 056 private static final String LANGUAGE_VOCABULARY = "http://rs.gbif.org/vocabulary/gbif/language.xml"; 057 058 public class SAXExtractTerms extends DefaultHandler2 { 059 private final List<String> terms; 060 public SAXExtractTerms(List<String> terms) { 061 this.terms = terms; 062 } 063 064 @Override 065 public void startElement(String uri, String localName, String qName, Attributes atts) { 066 List<String> list = new ArrayList<>(); 067 list.add("rowType"); 068 list.add("term"); 069 for (String attName : list) { 070 if (atts.getValue(attName) != null) { 071 terms.add(atts.getValue(attName)); 072 } 073 } 074 } 075 } 076 077 @Test 078 public void testXml() throws Exception { 079 // read archive 080 Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath()); 081 082 // write meta.xml 083 File tmpMeta = File.createTempFile("meta", ".xml"); 084 System.out.println("Writing temporary test meta file to " + tmpMeta.getAbsolutePath()); 085 MetaDescriptorWriter.writeMetaFile(tmpMeta, arch); 086 087 // verify rowType & terms are URIs 088 List<String> terms = new ArrayList<>(); 089 SAXParserFactory spf = SAXParserFactory.newInstance(); 090 spf.setNamespaceAware(true); 091 SAXParser saxParser = spf.newSAXParser(); 092 XMLReader xmlReader = saxParser.getXMLReader(); 093 xmlReader.setContentHandler(new SAXExtractTerms(terms)); 094 xmlReader.parse(new InputSource(new FileInputStream(tmpMeta))); 095 096 assertEquals(18, terms.size()); 097 for (String term : terms) { 098 URI uri = URI.create(term); 099 assertNotNull(uri.getScheme(), uri + " is no full URI term"); 100 assertNotNull(uri.getAuthority(), uri + " is no full URI term"); 101 assertNotNull(uri.getPath(), uri + " is no full URI term"); 102 } 103 } 104 105 @Test 106 public void testRoundtrip() { 107 try { 108 // read archive 109 Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath()); 110 assertNotNull(arch); 111 assertNotNull(arch.getCore()); 112 assertEquals(0, arch.getCore().getId().getIndex()); 113 assertTrue(arch.getCore().hasTerm(DwcTerm.scientificName)); 114 assertEquals(2, arch.getExtensions().size()); 115 assertEquals("\t", arch.getCore().getFieldsTerminatedBy()); 116 assertNull(arch.getCore().getField(DwcTerm.scientificName).getDelimitedBy()); 117 assertEquals(";", arch.getCore().getField(DwcTerm.nomenclaturalStatus).getDelimitedBy()); 118 assertEquals(NOMENCLATURAL_CODE_VOCABULARY, arch.getCore().getField(DwcTerm.nomenclaturalCode).getVocabulary()); 119 assertEquals(LANGUAGE_VOCABULARY, arch.getExtension(GbifTerm.VernacularName).getField(DcTerm.language).getVocabulary()); 120 121 // write meta.xml 122 File tmpDwca = createTmpMeta(arch); 123 Files.createFile(tmpDwca.toPath().resolve("DarwinCore.txt")); 124 Files.createFile(tmpDwca.toPath().resolve("VernacularName.txt")); 125 Files.createFile(tmpDwca.toPath().resolve("media.txt")); 126 127 Archive arch2 = DwcFiles.fromLocation(tmpDwca.toPath()); 128 // core props 129 ArchiveFile core = arch2.getCore(); 130 assertNotNull(core); 131 assertNotNull(core.getId()); 132 assertTrue(core.hasTerm(DwcTerm.scientificName)); 133 assertEquals("DarwinCore.txt", core.getFirstLocation()); 134 assertEquals("\t", core.getFieldsTerminatedBy()); 135 assertNull(core.getField(DwcTerm.scientificName).getDelimitedBy()); 136 assertEquals(";", core.getField(DwcTerm.nomenclaturalStatus).getDelimitedBy()); 137 assertEquals(NOMENCLATURAL_CODE_VOCABULARY, core.getField(DwcTerm.nomenclaturalCode).getVocabulary()); 138 139 for (ArchiveField f : arch.getCore().getFields().values()) { 140 assertTrue(core.hasTerm(f.getTerm().qualifiedName())); 141 assertEquals(core.getField(f.getTerm().qualifiedName()).getIndex(), f.getIndex()); 142 } 143 144 // extensions props 145 assertEquals(2, arch2.getExtensions().size()); 146 Set<String> filenames = new HashSet<>(); 147 filenames.add("VernacularName.txt"); 148 filenames.add("media.txt"); 149 150 for (ArchiveFile ext : arch2.getExtensions()) { 151 assertTrue(filenames.contains(ext.getFirstLocation())); 152 filenames.remove(ext.getFirstLocation()); 153 } 154 assertTrue(filenames.isEmpty()); 155 156 } catch (Exception e) { 157 e.printStackTrace(); 158 fail(); 159 } 160 } 161 162 private File createTmpMeta(Archive arch) throws IOException { 163 File tmpDir = Files.createTempDirectory("dwca-io-test").toFile(); 164 tmpDir.deleteOnExit(); 165 File tmpMeta = new File(tmpDir, Archive.META_FN); 166 System.out.println("Writing temporary test meta file to " + tmpMeta.getAbsolutePath()); 167 MetaDescriptorWriter.writeMetaFile(tmpMeta, arch); 168 return tmpDir; 169 } 170 171 @Test 172 public void testRoundtripQuotes() { 173 try { 174 // read archive 175 Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("xml-entity-meta").toPath()); 176 assertNotNull(arch); 177 assertNotNull(arch.getCore()); 178 assertNotNull(arch.getCore().getId()); 179 assertTrue(arch.getCore().hasTerm(DwcTerm.scientificName)); 180 assertEquals(1, arch.getExtensions().size()); 181 182 // write meta.xml 183 File tmpDwca = createTmpMeta(arch); 184 Files.createFile(tmpDwca.toPath().resolve("test")); 185 Files.createFile(tmpDwca.toPath().resolve("test2")); 186 Archive arch2 = DwcFiles.fromLocation(tmpDwca.toPath()); 187 188 // core props 189 ArchiveFile core = arch2.getCore(); 190 assertNotNull(core); 191 assertNotNull(core.getId()); 192 assertTrue(core.hasTerm(DwcTerm.scientificName)); 193 assertEquals("test", core.getFirstLocation()); 194 for (ArchiveField f : arch.getCore().getFields().values()) { 195 assertTrue(core.hasTerm(f.getTerm().qualifiedName())); 196 assertEquals(core.getField(f.getTerm().qualifiedName()).getIndex(), f.getIndex()); 197 } 198 199 // extensions props 200 assertEquals(1, arch2.getExtensions().size()); 201 ArchiveFile ext = arch2.getExtensions().iterator().next(); 202 assertEquals("test2", ext.getFirstLocation()); 203 assertEquals(2, ext.getFields().size()); 204 205 } catch (Exception e) { 206 e.printStackTrace(); 207 fail(); 208 } 209 } 210 211 /** 212 * Test the reading of a static meta.xml file. 213 */ 214 @Test 215 public void testMetaDescriptorReading() throws Exception { 216 // we can read only a meta.xml file as an Archive 217 Archive arch = DwcMetaFiles.fromMetaDescriptor(new FileInputStream(FileUtils.getClasspathFile("meta/meta.xml"))); 218 219 //validate archive ID field 220 ArchiveField af = arch.getCore().getId(); 221 assertEquals(Integer.valueOf(1), af.getIndex()); 222 //not specified, should be set to the default value 223 assertEquals(ArchiveFile.DEFAULT_FIELDS_ENCLOSED_BY, arch.getCore().getFieldsEnclosedBy()); 224 225 //validate default 226 af = arch.getCore().getField(DwcTerm.kingdom); 227 assertEquals("Animalia", af.getDefaultValue()); 228 229 // validate vocabulary 230 af = arch.getCore().getField(DwcTerm.nomenclaturalCode); 231 assertEquals(NOMENCLATURAL_CODE_VOCABULARY, af.getVocabulary()); 232 233 //explicitly set to empty string which means we should not use a fieldsEnclosedBy (value == null) 234 assertNull(arch.getExtension(TERM_FACTORY 235 .findTerm("http://rs.tdwg.org/invented/Links")).getFieldsEnclosedBy()); 236 } 237}