001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.dwc;
015
016import org.gbif.dwc.record.Record;
017import org.gbif.dwc.record.StarRecord;
018import org.gbif.dwc.terms.DcTerm;
019import org.gbif.dwc.terms.DwcTerm;
020import org.gbif.utils.file.ClosableIterator;
021import org.gbif.utils.file.FileUtils;
022
023import java.io.File;
024import java.io.IOException;
025
026import org.junit.jupiter.api.Test;
027
028import static org.junit.jupiter.api.Assertions.assertEquals;
029import static org.junit.jupiter.api.Assertions.assertFalse;
030import static org.junit.jupiter.api.Assertions.assertNotNull;
031import static org.junit.jupiter.api.Assertions.assertNull;
032import static org.junit.jupiter.api.Assertions.assertThrows;
033import static org.junit.jupiter.api.Assertions.assertTrue;
034import static org.junit.jupiter.api.Assertions.fail;
035
036/**
037 * Tests related to {@link DwcFiles}.
038 * For methods where the call is delegated to {@link InternalDwcFileFactory}, the corresponding test is in
039 * {@link InternalDwcFileFactoryTest}.
040 */
041public class DwcFilesTest {
042
043  @Test
044  public void testNormalizeIfRequired() throws Exception {
045    ArchiveFile testArchiveFile = new ArchiveFile();
046    testArchiveFile.setFieldsEnclosedBy(null);
047    assertNull(testArchiveFile.normalizeIfRequired());
048  }
049
050  @Test
051  public void testDwcRecordIterator() throws IOException {
052    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath());
053    int count=0;
054    try(ClosableIterator<Record> it = arch.getCore().iterator()) {
055      while (it.hasNext()) {
056        it.next();
057        count++;
058      }
059    }
060    catch (Exception e) {
061      e.printStackTrace();
062      fail();
063    }
064    assertEquals(3248, count);
065  }
066
067  @Test
068  public void testStarRecordIterator() throws IOException {
069
070    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath());
071
072    try (ClosableIterator<StarRecord> it = arch.iterator(false, false)) {
073      assertNotNull(arch.getCore());
074      assertEquals(2, arch.getExtensions().size());
075      int found = 0;
076      int extensionRecords = 0;
077      while (it.hasNext()) {
078        StarRecord rec = it.next();
079        //System.out.println(rec.core().id() + " → " + rec.size());
080        // count all extension records
081        extensionRecords += rec.size();
082
083        if (rec.core().id().equals("544382")) {
084          found++;
085          assertEquals("Tursiops truncatus truncatus Montagu", rec.core().value(DwcTerm.scientificName));
086          // test extension iter
087          int i = 0;
088          for (Record er : rec) {
089            i++;
090          }
091          assertEquals(27, i);
092        } else if (rec.core().id().equals("105833")) {
093          found++;
094//        105833  Chinese river dolphin English
095//        105833  Chinese lake dolphin  English
096//        105833  Pei c’hi      Chinese
097//        105833  White flag dolphin  English
098          int i = 0;
099          for (Record er : rec) {
100            i++;
101            if ("Chinese river dolphin".equals(er.value(DwcTerm.vernacularName))) {
102              assertEquals("English", er.value(DcTerm.language));
103              assertEquals("English", er.value(DcTerm.language));
104            } else if ("Chinese lake dolphin".equals(er.value(DwcTerm.vernacularName))) {
105              assertEquals("English", er.value(DcTerm.language));
106            } else if ("Pei c’hi".equals(er.value(DwcTerm.vernacularName))) {
107              assertEquals("Chinese", er.value(DcTerm.language));
108            } else {
109              assertEquals("White flag dolphin", er.value(DwcTerm.vernacularName));
110              assertEquals("English", er.value(DcTerm.language));
111            }
112          }
113          assertEquals(4, i);
114        } else if (rec.core().id().equals("105838")) {
115          found++;
116//      105838  Delfin de La Plata  Spanish
117//      105838  Franciscana Spanish
118//      105838  Franciscano Portuguese
119//      105838  La Plata dolphin  English
120//      105838  Tonina  Spanish
121//      105838  Toninha Portuguese
122          int i = 0;
123          for (Record er : rec) {
124            i++;
125            if ("Delfin de La Plata".equals(er.value(DwcTerm.vernacularName))) {
126              assertEquals("Spanish", er.value(DcTerm.language));
127            } else if ("Franciscana".equals(er.value(DwcTerm.vernacularName))) {
128              assertEquals("Spanish", er.value(DcTerm.language));
129            } else if ("Franciscano".equals(er.value(DwcTerm.vernacularName))) {
130              assertEquals("Portuguese", er.value(DcTerm.language));
131            } else if ("La Plata dolphin".equals(er.value(DwcTerm.vernacularName))) {
132              assertEquals("English", er.value(DcTerm.language));
133            } else if ("Tonina".equals(er.value(DwcTerm.vernacularName))) {
134              assertEquals("Spanish", er.value(DcTerm.language));
135            } else {
136              assertEquals("Toninha", er.value(DwcTerm.vernacularName));
137              assertEquals("Portuguese", er.value(DcTerm.language));
138            }
139          }
140          assertEquals(6, i);
141        }
142      }
143      assertEquals(3, found);
144      assertEquals(1057, extensionRecords);
145    } catch (Exception e) {
146      e.printStackTrace();
147      fail();
148    }
149
150  }
151
152  @Test
153  public void testNormalizeAndSort() throws IOException {
154
155    Archive arch = DwcFiles.fromLocation(FileUtils.getClasspathFile("archive-dwc").toPath());
156    ArchiveFile core = arch.getCore();
157    File sortedFile = ArchiveFile.getLocationFileSorted(core.getFirstLocationFile());
158
159    //ensure the sorted file for the core doesn't exist
160    if(sortedFile.exists()) {
161      sortedFile.delete();
162    }
163    assertTrue(arch.getCore().normalizeAndSort());
164    assertTrue(sortedFile.exists());
165
166    //call the method again. Should return false since we already have the sorted file available.
167    assertFalse(arch.getCore().normalizeAndSort());
168  }
169
170  /**
171   * Basic validation of archives, that the declared files exist and have basic, valid structure.
172   */
173  @Test
174  public void testInvalidArchives() throws IOException {
175    System.out.println("Starting on invalids");
176    // Simple archive problems
177    assertThrows(
178        UnsupportedArchiveException.class,
179        () -> DwcFiles.fromLocation(FileUtils.getClasspathFile("invalid/empty").toPath()),
180        "Empty archive should not be opened.");
181
182    assertThrows(
183        UnsupportedArchiveException.class,
184        () -> DwcFiles.fromLocation(FileUtils.getClasspathFile("invalid/meta-file-location-missing").toPath()),
185        "Archive with missing file location in meta.xml should not be opened.");
186
187    // Extension archive problems
188    assertThrows(
189        UnsupportedArchiveException.class,
190        () -> DwcFiles.fromLocation(FileUtils.getClasspathFile("invalid/extension-missing").toPath()),
191        "Archive with missing extension file should not be opened.");
192
193    assertThrows(
194        UnsupportedArchiveException.class,
195        () -> DwcFiles.fromLocation(FileUtils.getClasspathFile("invalid/extension-location-missing").toPath()),
196        "Archive with missing extension file location in meta.xml should not be opened.");
197
198    assertThrows(
199        UnsupportedArchiveException.class,
200        () -> DwcFiles.fromLocation(FileUtils.getClasspathFile("invalid/extension-core-id-missing").toPath()),
201        "Archive with extension lacking coreid in meta.xml should not be opened."
202    );
203
204    assertThrows(
205        UnsupportedArchiveException.class,
206        () -> DwcFiles.fromLocation(FileUtils.getClasspathFile("invalid/extension-id-missing").toPath()),
207        "Archive with extension and core missing id in meta.xml should not be opened.");
208  }
209}