001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.dwc;
015
016import org.gbif.dwc.record.StarRecord;
017import org.gbif.dwc.terms.DcTerm;
018import org.gbif.dwc.terms.DwcTerm;
019import org.gbif.dwc.terms.GbifTerm;
020import org.gbif.dwc.terms.Term;
021import org.gbif.utils.file.FileUtils;
022
023import java.io.File;
024import java.util.HashMap;
025import java.util.Map;
026
027import org.junit.jupiter.api.Test;
028
029import static org.junit.jupiter.api.Assertions.assertEquals;
030
031public class ArchiveTest {
032
033  /**
034   * Check we can handle the simplest of archives.
035   */
036  @Test
037  public void testIteratorDwc() throws Exception {
038    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("simplest-archive").toPath());
039
040    int count = 0;
041    for (StarRecord rec : arch) {
042      assertEquals("Quercus alba", rec.core().value(DwcTerm.scientificName));
043      assertEquals("Species", rec.core().value(DwcTerm.taxonRank));
044      count++;
045    }
046    assertEquals(2, count);
047  }
048
049  /**
050   * Check we can handle an archive file with multiple header lines
051   */
052  @Test
053  public void testMultilineHeader() throws Exception {
054    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("multiline-header").toPath());
055
056    int count = 0;
057    for (StarRecord rec : arch) {
058      count++;
059      assertEquals(String.valueOf(count), rec.core().id());
060      assertEquals("Quercus alba", rec.core().value(DwcTerm.scientificName));
061      assertEquals("Species", rec.core().value(DwcTerm.taxonRank));
062    }
063    assertEquals(2, count);
064  }
065
066  /**
067   * Check we can handle an archive file with multiple |SV files in the core and extension.
068   */
069  @Test
070  public void testMultifile() throws Exception {
071    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("multifile-psv").toPath());
072
073    int count = 0;
074    for (StarRecord rec : arch) {
075      count++;
076      assertEquals(String.valueOf(count), rec.core().id());
077      assertEquals("Quercus alba", rec.core().value(DwcTerm.scientificName));
078      assertEquals("Species", rec.core().value(DwcTerm.taxonRank));
079      assertEquals("id"+count, rec.extension(GbifTerm.Multimedia).get(0).value(DcTerm.identifier));
080    }
081    assertEquals(6, count);
082  }
083
084  /**
085   * Check we can handle an archive file with multiple files each with multiple header lines
086   */
087  @Test
088  public void testMultifileMultilineHeader() throws Exception {
089    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("multifile-multiline-header").toPath());
090
091    int count = 0;
092    for (StarRecord rec : arch) {
093      count++;
094      assertEquals(String.valueOf(count), rec.core().id());
095      assertEquals("Quercus alba", rec.core().value(DwcTerm.scientificName));
096      assertEquals("Species", rec.core().value(DwcTerm.taxonRank));
097    }
098    assertEquals(6, count);
099  }
100
101  @Test
102  public void testConstituents() {
103    File dir = FileUtils.getClasspathFile("constituentsdwca");
104
105    Archive arch = new Archive();
106    arch.setLocation(dir);
107    arch.setMetadataLocation("eml.xml");
108    ArchiveField id = new ArchiveField(0, null, null, null);
109    ArchiveField datasetId = new ArchiveField(1, DwcTerm.datasetID, null, null);
110    ArchiveField sciname = new ArchiveField(2, DwcTerm.scientificName, null, null);
111
112    Map<Term, ArchiveField> fields = new HashMap<>();
113    fields.put(DwcTerm.taxonomicStatus, sciname);
114    fields.put(DwcTerm.datasetID, datasetId);
115
116    Map<String, File> cons = arch.getConstituentMetadata();
117    assertEquals(6, cons.size());
118    for (Map.Entry<String, File> c : cons.entrySet()) {
119      final String name = c.getKey();
120      final File file = c.getValue();
121      assertEquals(name, file.getName().split("\\.")[0]);
122    }
123  }
124}