001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.dwc;
015
016import org.gbif.dwc.record.Record;
017import org.gbif.dwc.record.StarRecord;
018import org.gbif.dwc.terms.DcTerm;
019import org.gbif.dwc.terms.DwcTerm;
020import org.gbif.dwc.terms.GbifTerm;
021import org.gbif.dwc.terms.Term;
022import org.gbif.utils.file.FileUtils;
023
024import java.io.BufferedReader;
025import java.io.File;
026import java.io.IOException;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.Map;
030
031import org.junit.jupiter.api.Test;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import static org.junit.jupiter.api.Assertions.assertEquals;
036import static org.junit.jupiter.api.Assertions.assertThrows;
037import static org.junit.jupiter.api.Assertions.fail;
038
039public class DwcaWriterTest {
040
041  private static final Logger LOG = LoggerFactory.getLogger(DwcaWriterTest.class);
042  
043  @Test
044  public void testAddingCoreIdTermTwice() throws Exception {
045    File dwcaDir = FileUtils.createTempDir();
046    dwcaDir.deleteOnExit();
047    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true);
048    writer.newRecord("dummy1");
049    assertThrows(IllegalStateException.class, () -> writer.addCoreColumn(DwcTerm.taxonID, "dummy1"));
050  }
051  
052  @Test
053  public void testHeaders1() throws Exception {
054    File dwcaDir = FileUtils.createTempDir();
055    dwcaDir.deleteOnExit();
056    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true);
057
058    writer.newRecord("dummy1");
059    writer.addCoreColumn(DwcTerm.parentNameUsageID);
060    writer.addCoreColumn(DwcTerm.acceptedNameUsageID);
061    writer.newRecord("dummy2");
062    writer.addCoreColumn(DwcTerm.parentNameUsageID);
063    writer.addCoreColumn(DwcTerm.acceptedNameUsageID);
064  }
065
066  @Test
067  public void testHeaders2() throws Exception {
068    File dwcaDir = FileUtils.createTempDir();
069    dwcaDir.deleteOnExit();
070    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true);
071
072    writer.newRecord("dummy1");
073    writer.addCoreColumn(DwcTerm.parentNameUsageID);
074    writer.addCoreColumn(DwcTerm.acceptedNameUsageID);
075    writer.newRecord("dummy2");
076    assertThrows(IllegalStateException.class, () -> writer.addCoreColumn(DwcTerm.scientificName));
077  }
078
079  @Test
080  public void testHeaders3() throws Exception {
081    File dwcaDir = FileUtils.createTempDir();
082    dwcaDir.deleteOnExit();
083    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true);
084
085    writer.newRecord("dummy1");
086    writer.addCoreColumn(DwcTerm.parentNameUsageID);
087    writer.addCoreColumn(DwcTerm.acceptedNameUsageID);
088
089    // define extension columns
090    Map<Term, String> eData = new HashMap<>();
091    eData.put(DwcTerm.locality, "locality1");
092    eData.put(DwcTerm.occurrenceStatus, "present");
093    writer.addExtensionRecord(GbifTerm.Distribution, eData);
094
095    eData.put(DwcTerm.establishmentMeans, "alien");
096    assertThrows(IllegalStateException.class, () -> writer.addExtensionRecord(GbifTerm.Distribution, eData));
097  }
098
099
100  @Test
101  public void testHeaderWriting() throws Exception {
102    File dwcaDir = FileUtils.createTempDir();
103    dwcaDir.deleteOnExit();
104    LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath());
105
106    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, dwcaDir, true);
107
108    writer.newRecord("dummy1");
109    writer.addCoreColumn(DwcTerm.parentNameUsageID);
110    writer.addCoreColumn(DwcTerm.acceptedNameUsageID);
111    writer.addCoreColumn(DwcTerm.scientificName);
112    writer.addCoreColumn(GbifTerm.canonicalName);
113    writer.addCoreColumn(DwcTerm.taxonRank, "species");
114    writer.addCoreColumn(DwcTerm.taxonomicStatus);
115    writer.addCoreColumn(DwcTerm.kingdom);
116    writer.addCoreColumn(DwcTerm.phylum);
117    writer.addCoreColumn(DwcTerm.class_);
118    writer.addCoreColumn(DwcTerm.order);
119    writer.addCoreColumn(DwcTerm.family);
120    writer.addCoreColumn(GbifTerm.depth);
121    writer.addCoreColumn(GbifTerm.depthAccuracy);
122
123    writer.newRecord("dummy2");
124    writer.addCoreColumn(DwcTerm.kingdom, "Plantae");
125    writer.addCoreColumn(DwcTerm.phylum);
126    writer.addCoreColumn(DwcTerm.class_);
127    writer.addCoreColumn(DwcTerm.order);
128    writer.addCoreColumn(DwcTerm.family, "Asteraceae");
129
130    writer.newRecord("dummy3");
131    writer.addCoreColumn(GbifTerm.depth, "2");
132    writer.addCoreColumn(GbifTerm.depthAccuracy, "1");
133
134    // define extension columns
135    Map<Term, String> eData = new HashMap<>();
136    // distributions
137    eData.put(DwcTerm.locality, "locality1");
138    eData.put(DwcTerm.occurrenceStatus, "present");
139    eData.put(DwcTerm.establishmentMeans, "alien");
140    writer.addExtensionRecord(GbifTerm.Distribution, eData);
141
142    eData.put(DwcTerm.locality, "locality2");
143    writer.addExtensionRecord(GbifTerm.Distribution, eData);
144
145    writer.close();
146
147    File cf = new File(dwcaDir, writer.getDataFiles().get(DwcTerm.Taxon));
148    File df = new File(dwcaDir, writer.getDataFiles().get(GbifTerm.Distribution));
149
150    // check if taxon file contains headers
151    String[] headers = getFirstRow(cf);
152    LOG.debug(String.join("; ", headers));
153    assertEquals(14, headers.length);
154    assertEquals("taxonID", headers[0]);
155    assertEquals("parentNameUsageID", headers[1]);
156    assertEquals("kingdom", headers[7]);
157
158    // check if extension file contains headers
159    headers = getFirstRow(df);
160    LOG.debug(String.join("; ", headers));
161    assertEquals(4, headers.length);
162    assertEquals("taxonID", headers[0]);
163  }
164
165  private String[] getFirstRow(File f) throws IOException {
166    BufferedReader r = FileUtils.getUtf8Reader(f);
167    String firstRow = r.readLine();
168    return firstRow.split("\t");
169  }
170
171  @Test
172  public void testRoundtrip() {
173    try {
174      // read taxon archive
175      Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath());
176      assertEquals(2, arch.getExtensions().size());
177      int coreRecords = 0;
178      int allRecords = 0;
179
180      // write taxon archive
181      File tempArch = FileUtils.createTempDir();
182      tempArch.deleteOnExit();
183      System.out.println("Writing temporary test archive to " + tempArch.getAbsolutePath());
184      DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, tempArch);
185      for (StarRecord rec : arch) {
186        // core
187        coreRecords++;
188        allRecords += rec.size();
189
190        writer.newRecord(rec.core().id());
191        for (Term term : arch.getCore().getTerms()) {
192          writer.addCoreColumn(term, rec.core().value(term));
193        }
194        // extensions
195        for (Term rt : rec.extensions().keySet()) {
196          ArchiveFile af = arch.getExtension(rt);
197          // iterate over records for one extension
198          for (Record row : rec.extension(rt)) {
199            writer.addExtensionRecord(rt, DwcaWriter.recordToMap(row, af));
200          }
201        }
202      }
203      writer.close();
204
205      // reread and compare
206      Archive arch2 = DwcFiles.fromLocation(tempArch.toPath());
207
208      int coreRecords2 = 0;
209      int allRecords2 = 0;
210      for (StarRecord rec : arch2) {
211        // core
212        coreRecords2++;
213        allRecords2 += rec.size();
214      }
215
216      // compare
217      assertEquals(coreRecords, coreRecords2);
218      assertEquals(allRecords, allRecords2);
219
220
221    } catch (Exception e) {
222      e.printStackTrace();
223      fail();
224    }
225  }
226  
227  @Test
228  public void testWriterUsingCoreIdTerm() throws Exception {
229    File dwcaDir = FileUtils.createTempDir();
230    dwcaDir.deleteOnExit();
231    LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath());
232
233    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true);
234
235    writer.newRecord("dummy1");
236    writer.addCoreColumn(DwcTerm.parentNameUsageID);
237    writer.addCoreColumn(DwcTerm.acceptedNameUsageID);
238    writer.close();
239
240    Archive arch = DwcFiles.fromLocation(dwcaDir.toPath());
241    Iterator<Record> recIt = arch.getCore().iterator();
242    Record firstRecord = recIt.next();
243    assertEquals("dummy1", firstRecord.id());
244    assertEquals("dummy1", firstRecord.value(DwcTerm.taxonID));
245  }
246  
247  /**
248   * Test the writing of an archive that includes some default values in the core and in one extension.
249   */
250  @Test
251  public void testWriterUsingDefaultValues() throws Exception {
252    File dwcaDir = FileUtils.createTempDir();
253    dwcaDir.deleteOnExit();
254    LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath());
255
256    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true);
257
258    writer.newRecord("dummy1");
259    writer.addCoreColumn(DwcTerm.parentNameUsageID, "1");
260    writer.addCoreColumn(DwcTerm.acceptedNameUsageID, "2");
261    writer.addCoreColumn(DwcTerm.countryCode);
262    
263    // add a VernacularName extension record
264    Map<Term,String> extensionRecord = new HashMap<>();
265    extensionRecord.put(DwcTerm.vernacularName, "Komodo Dragon");
266    extensionRecord.put(DcTerm.language, null);
267    writer.addExtensionRecord(GbifTerm.VernacularName, extensionRecord);
268    
269    writer.addCoreDefaultValue(DwcTerm.collectionCode, "A2Z");
270    writer.addCoreDefaultValue(DwcTerm.countryCode, "CA");
271    writer.addDefaultValue(GbifTerm.VernacularName, DcTerm.language, "en");
272    
273    // add a second records and overwrite the default value
274    writer.newRecord("dummy2");
275    writer.addCoreColumn(DwcTerm.parentNameUsageID, "1");
276    writer.addCoreColumn(DwcTerm.acceptedNameUsageID, "2");
277    writer.addCoreColumn(DwcTerm.countryCode, "ID");
278    
279    // add a VernacularName extension record
280    extensionRecord = new HashMap<>();
281    extensionRecord.put(DwcTerm.vernacularName, "Varano De Komodo");
282    extensionRecord.put(DcTerm.language, "es");
283    writer.addExtensionRecord(GbifTerm.VernacularName, extensionRecord);
284    
285    writer.close();
286
287    // validate core content
288    Archive arch = DwcFiles.fromLocation(dwcaDir.toPath());
289    Iterator<Record> recIt = arch.getCore().iterator();
290    Record firstRecord = recIt.next();
291    assertEquals("dummy1", firstRecord.id());
292    assertEquals("dummy1", firstRecord.value(DwcTerm.taxonID));
293    assertEquals("A2Z", firstRecord.value(DwcTerm.collectionCode));
294    assertEquals("CA", firstRecord.value(DwcTerm.countryCode));
295    assertEquals("A2Z", arch.getCore().getField(DwcTerm.collectionCode).getDefaultValue());
296    assertEquals("CA", arch.getCore().getField(DwcTerm.countryCode).getDefaultValue());
297    
298    Record secondRecord = recIt.next();
299    assertEquals("dummy2", secondRecord.id());
300    assertEquals("dummy2", secondRecord.value(DwcTerm.taxonID));
301    assertEquals("A2Z", secondRecord.value(DwcTerm.collectionCode));
302    assertEquals("ID", secondRecord.value(DwcTerm.countryCode));
303    
304    // validate extension content
305    Iterator<Record> extRecIt = arch.getExtension(GbifTerm.VernacularName).iterator();
306    assertEquals("en", arch.getExtension(GbifTerm.VernacularName).getField(DcTerm.language).getDefaultValue());
307    firstRecord = extRecIt.next();
308    assertEquals("dummy1", firstRecord.id());
309    assertEquals("en", firstRecord.value(DcTerm.language));
310    
311    secondRecord = extRecIt.next();
312    assertEquals("dummy2", secondRecord.id());
313    assertEquals("es", secondRecord.value(DcTerm.language));
314  }
315
316  @Test
317  public void testWriteMetadata() throws Exception {
318    File dwcaDir = FileUtils.createTempDir();
319    dwcaDir.deleteOnExit();
320    LOG.info("Test archive writer in {}", dwcaDir.getAbsolutePath());
321
322    DwcaWriter writer = new DwcaWriter(DwcTerm.Taxon, DwcTerm.taxonID, dwcaDir, true);
323
324    writer.setMetadata("<eml/>", "eml.xml");
325    writer.close();
326
327    Archive arch = DwcFiles.fromLocation(dwcaDir.toPath());
328    assertEquals("eml.xml", arch.getMetadataLocation());
329    assertEquals("<eml/>", arch.getMetadata());
330  }
331}