001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.dwc; 015 016import org.gbif.dwc.record.Record; 017import org.gbif.dwc.record.StarRecord; 018import org.gbif.dwc.terms.DcTerm; 019import org.gbif.dwc.terms.DwcTerm; 020import org.gbif.dwc.terms.GbifTerm; 021import org.gbif.dwc.terms.Term; 022import org.gbif.utils.file.FileUtils; 023 024import java.io.BufferedReader; 025import java.io.File; 026import java.io.IOException; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.Map; 030 031import org.junit.jupiter.api.Test; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import static org.junit.jupiter.api.Assertions.assertEquals; 036import static org.junit.jupiter.api.Assertions.assertThrows; 037import static org.junit.jupiter.api.Assertions.fail; 038 039public class DwcaWriterTest { 040 041 private static final Logger LOG = LoggerFactory.getLogger(DwcaWriterTest.class); 042 043 @Test 044 public void testAddingCoreIdTermTwice() throws Exception { 045 File dwcaDir = FileUtils.createTempDir(); 046 dwcaDir.deleteOnExit(); 047 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true); 048 writer.newRecord("dummy1"); 049 assertThrows(IllegalStateException.class, () -> writer.addCoreColumn(DwcTerm.taxonID, "dummy1")); 050 } 051 052 @Test 053 public void testHeaders1() throws Exception { 054 File dwcaDir = FileUtils.createTempDir(); 055 dwcaDir.deleteOnExit(); 056 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true); 057 058 writer.newRecord("dummy1"); 059 writer.addCoreColumn(DwcTerm.parentNameUsageID); 060 writer.addCoreColumn(DwcTerm.acceptedNameUsageID); 061 writer.newRecord("dummy2"); 062 writer.addCoreColumn(DwcTerm.parentNameUsageID); 063 writer.addCoreColumn(DwcTerm.acceptedNameUsageID); 064 } 065 066 @Test 067 public void testHeaders2() throws Exception { 068 File dwcaDir = FileUtils.createTempDir(); 069 dwcaDir.deleteOnExit(); 070 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true); 071 072 writer.newRecord("dummy1"); 073 writer.addCoreColumn(DwcTerm.parentNameUsageID); 074 writer.addCoreColumn(DwcTerm.acceptedNameUsageID); 075 writer.newRecord("dummy2"); 076 assertThrows(IllegalStateException.class, () -> writer.addCoreColumn(DwcTerm.scientificName)); 077 } 078 079 @Test 080 public void testHeaders3() throws Exception { 081 File dwcaDir = FileUtils.createTempDir(); 082 dwcaDir.deleteOnExit(); 083 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true); 084 085 writer.newRecord("dummy1"); 086 writer.addCoreColumn(DwcTerm.parentNameUsageID); 087 writer.addCoreColumn(DwcTerm.acceptedNameUsageID); 088 089 // define extension columns 090 Map<Term, String> eData = new HashMap<>(); 091 eData.put(DwcTerm.locality, "locality1"); 092 eData.put(DwcTerm.occurrenceStatus, "present"); 093 writer.addExtensionRecord(GbifTerm.Distribution, eData); 094 095 eData.put(DwcTerm.establishmentMeans, "alien"); 096 assertThrows(IllegalStateException.class, () -> writer.addExtensionRecord(GbifTerm.Distribution, eData)); 097 } 098 099 100 @Test 101 public void testHeaderWriting() throws Exception { 102 File dwcaDir = FileUtils.createTempDir(); 103 dwcaDir.deleteOnExit(); 104 LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath()); 105 106 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true); 107 108 writer.newRecord("dummy1"); 109 writer.addCoreColumn(DwcTerm.parentNameUsageID); 110 writer.addCoreColumn(DwcTerm.acceptedNameUsageID); 111 writer.addCoreColumn(DwcTerm.scientificName); 112 writer.addCoreColumn(GbifTerm.canonicalName); 113 writer.addCoreColumn(DwcTerm.taxonRank, "species"); 114 writer.addCoreColumn(DwcTerm.taxonomicStatus); 115 writer.addCoreColumn(DwcTerm.kingdom); 116 writer.addCoreColumn(DwcTerm.phylum); 117 writer.addCoreColumn(DwcTerm.class_); 118 writer.addCoreColumn(DwcTerm.order); 119 writer.addCoreColumn(DwcTerm.family); 120 writer.addCoreColumn(GbifTerm.depth); 121 writer.addCoreColumn(GbifTerm.depthAccuracy); 122 123 writer.newRecord("dummy2"); 124 writer.addCoreColumn(DwcTerm.kingdom, "Plantae"); 125 writer.addCoreColumn(DwcTerm.phylum); 126 writer.addCoreColumn(DwcTerm.class_); 127 writer.addCoreColumn(DwcTerm.order); 128 writer.addCoreColumn(DwcTerm.family, "Asteraceae"); 129 130 writer.newRecord("dummy3"); 131 writer.addCoreColumn(GbifTerm.depth, "2"); 132 writer.addCoreColumn(GbifTerm.depthAccuracy, "1"); 133 134 // define extension columns 135 Map<Term, String> eData = new HashMap<>(); 136 // distributions 137 eData.put(DwcTerm.locality, "locality1"); 138 eData.put(DwcTerm.occurrenceStatus, "present"); 139 eData.put(DwcTerm.establishmentMeans, "alien"); 140 writer.addExtensionRecord(GbifTerm.Distribution, eData); 141 142 eData.put(DwcTerm.locality, "locality2"); 143 writer.addExtensionRecord(GbifTerm.Distribution, eData); 144 145 writer.close(); 146 147 File cf = new File(dwcaDir, writer.getDataFiles().get(DwcTerm.Taxon)); 148 File df = new File(dwcaDir, writer.getDataFiles().get(GbifTerm.Distribution)); 149 150 // check if taxon file contains headers 151 String[] headers = getFirstRow(cf); 152 LOG.debug(String.join("; ", headers)); 153 assertEquals(14, headers.length); 154 assertEquals("taxonID", headers[0]); 155 assertEquals("parentNameUsageID", headers[1]); 156 assertEquals("kingdom", headers[7]); 157 158 // check if extension file contains headers 159 headers = getFirstRow(df); 160 LOG.debug(String.join("; ", headers)); 161 assertEquals(4, headers.length); 162 assertEquals("taxonID", headers[0]); 163 } 164 165 private String[] getFirstRow(File f) throws IOException { 166 BufferedReader r = FileUtils.getUtf8Reader(f); 167 String firstRow = r.readLine(); 168 return firstRow.split("\t"); 169 } 170 171 @Test 172 public void testRoundtrip() { 173 try { 174 // read taxon archive 175 Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath()); 176 assertEquals(2, arch.getExtensions().size()); 177 int coreRecords = 0; 178 int allRecords = 0; 179 180 // write taxon archive 181 File tempArch = FileUtils.createTempDir(); 182 tempArch.deleteOnExit(); 183 System.out.println("Writing temporary test archive to " + tempArch.getAbsolutePath()); 184 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, tempArch); 185 for (StarRecord rec : arch) { 186 // core 187 coreRecords++; 188 allRecords += rec.size(); 189 190 writer.newRecord(rec.core().id()); 191 for (Term term : arch.getCore().getTerms()) { 192 writer.addCoreColumn(term, rec.core().value(term)); 193 } 194 // extensions 195 for (Term rt : rec.extensions().keySet()) { 196 ArchiveFile af = arch.getExtension(rt); 197 // iterate over records for one extension 198 for (Record row : rec.extension(rt)) { 199 writer.addExtensionRecord(rt, DwcaWriter.recordToMap(row, af)); 200 } 201 } 202 } 203 writer.close(); 204 205 // reread and compare 206 Archive arch2 = DwcFiles.fromLocation(tempArch.toPath()); 207 208 int coreRecords2 = 0; 209 int allRecords2 = 0; 210 for (StarRecord rec : arch2) { 211 // core 212 coreRecords2++; 213 allRecords2 += rec.size(); 214 } 215 216 // compare 217 assertEquals(coreRecords, coreRecords2); 218 assertEquals(allRecords, allRecords2); 219 220 221 } catch (Exception e) { 222 e.printStackTrace(); 223 fail(); 224 } 225 } 226 227 @Test 228 public void testWriterUsingCoreIdTerm() throws Exception { 229 File dwcaDir = FileUtils.createTempDir(); 230 dwcaDir.deleteOnExit(); 231 LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath()); 232 233 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true); 234 235 writer.newRecord("dummy1"); 236 writer.addCoreColumn(DwcTerm.parentNameUsageID); 237 writer.addCoreColumn(DwcTerm.acceptedNameUsageID); 238 writer.close(); 239 240 Archive arch = DwcFiles.fromLocation(dwcaDir.toPath()); 241 Iterator<Record> recIt = arch.getCore().iterator(); 242 Record firstRecord = recIt.next(); 243 assertEquals("dummy1", firstRecord.id()); 244 assertEquals("dummy1", firstRecord.value(DwcTerm.taxonID)); 245 } 246 247 /** 248 * Test the writing of an archive that includes some default values in the core and in one extension. 249 */ 250 @Test 251 public void testWriterUsingDefaultValues() throws Exception { 252 File dwcaDir = FileUtils.createTempDir(); 253 dwcaDir.deleteOnExit(); 254 LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath()); 255 256 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true); 257 258 writer.newRecord("dummy1"); 259 writer.addCoreColumn(DwcTerm.parentNameUsageID, "1"); 260 writer.addCoreColumn(DwcTerm.acceptedNameUsageID, "2"); 261 writer.addCoreColumn(DwcTerm.countryCode); 262 263 // add a VernacularName extension record 264 Map<Term,String> extensionRecord = new HashMap<>(); 265 extensionRecord.put(DwcTerm.vernacularName, "Komodo Dragon"); 266 extensionRecord.put(DcTerm.language, null); 267 writer.addExtensionRecord(GbifTerm.VernacularName, extensionRecord); 268 269 writer.addCoreDefaultValue(DwcTerm.collectionCode, "A2Z"); 270 writer.addCoreDefaultValue(DwcTerm.countryCode, "CA"); 271 writer.addDefaultValue(GbifTerm.VernacularName, DcTerm.language, "en"); 272 273 // add a second records and overwrite the default value 274 writer.newRecord("dummy2"); 275 writer.addCoreColumn(DwcTerm.parentNameUsageID, "1"); 276 writer.addCoreColumn(DwcTerm.acceptedNameUsageID, "2"); 277 writer.addCoreColumn(DwcTerm.countryCode, "ID"); 278 279 // add a VernacularName extension record 280 extensionRecord = new HashMap<>(); 281 extensionRecord.put(DwcTerm.vernacularName, "Varano De Komodo"); 282 extensionRecord.put(DcTerm.language, "es"); 283 writer.addExtensionRecord(GbifTerm.VernacularName, extensionRecord); 284 285 writer.close(); 286 287 // validate core content 288 Archive arch = DwcFiles.fromLocation(dwcaDir.toPath()); 289 Iterator<Record> recIt = arch.getCore().iterator(); 290 Record firstRecord = recIt.next(); 291 assertEquals("dummy1", firstRecord.id()); 292 assertEquals("dummy1", firstRecord.value(DwcTerm.taxonID)); 293 assertEquals("A2Z", firstRecord.value(DwcTerm.collectionCode)); 294 assertEquals("CA", firstRecord.value(DwcTerm.countryCode)); 295 assertEquals("A2Z", arch.getCore().getField(DwcTerm.collectionCode).getDefaultValue()); 296 assertEquals("CA", arch.getCore().getField(DwcTerm.countryCode).getDefaultValue()); 297 298 Record secondRecord = recIt.next(); 299 assertEquals("dummy2", secondRecord.id()); 300 assertEquals("dummy2", secondRecord.value(DwcTerm.taxonID)); 301 assertEquals("A2Z", secondRecord.value(DwcTerm.collectionCode)); 302 assertEquals("ID", secondRecord.value(DwcTerm.countryCode)); 303 304 // validate extension content 305 Iterator<Record> extRecIt = arch.getExtension(GbifTerm.VernacularName).iterator(); 306 assertEquals("en", arch.getExtension(GbifTerm.VernacularName).getField(DcTerm.language).getDefaultValue()); 307 firstRecord = extRecIt.next(); 308 assertEquals("dummy1", firstRecord.id()); 309 assertEquals("en", firstRecord.value(DcTerm.language)); 310 311 secondRecord = extRecIt.next(); 312 assertEquals("dummy2", secondRecord.id()); 313 assertEquals("es", secondRecord.value(DcTerm.language)); 314 } 315 316 @Test 317 public void testWriteMetadata() throws Exception { 318 File dwcaDir = FileUtils.createTempDir(); 319 dwcaDir.deleteOnExit(); 320 LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath()); 321 322 DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true); 323 324 writer.setMetadata("<eml/>", "eml.xml"); 325 writer.close(); 326 327 Archive arch = DwcFiles.fromLocation(dwcaDir.toPath()); 328 assertEquals("eml.xml", arch.getMetadataLocation()); 329 assertEquals("<eml/>", arch.getMetadata()); 330 } 331}