001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.dwc;
015
016import org.gbif.dwc.record.Record;
017import org.gbif.dwc.terms.DcTerm;
018import org.gbif.dwc.terms.DwcTerm;
019import org.gbif.dwc.terms.Term;
020import org.gbif.utils.file.ClosableIterator;
021import org.gbif.utils.file.FileUtils;
022
023import java.io.File;
024import java.io.FileNotFoundException;
025import java.io.IOException;
026import java.nio.file.Path;
027import java.nio.file.Paths;
028import java.util.Arrays;
029import java.util.Optional;
030
031import org.junit.jupiter.api.Test;
032import org.junit.jupiter.api.io.TempDir;
033
034import static org.junit.jupiter.api.Assertions.assertEquals;
035import static org.junit.jupiter.api.Assertions.assertFalse;
036import static org.junit.jupiter.api.Assertions.assertNotNull;
037import static org.junit.jupiter.api.Assertions.assertThrows;
038import static org.junit.jupiter.api.Assertions.assertTrue;
039
040/**
041 * Tests related to {@link InternalDwcFileFactory}.
042 */
043public class InternalDwcFileFactoryTest {
044
045  @TempDir
046  public File folder;
047
048  @Test
049  public void testFromCompressedZip() throws UnsupportedArchiveException, IOException {
050    // test zip with 1 extension file
051    File zip = FileUtils.getClasspathFile("archive-tax.zip");
052    assertIdInCompressed(zip.toPath(), "113775");
053  }
054
055  @Test
056  public void testFromCompressedTarGzip() throws UnsupportedArchiveException, IOException {
057    // test gziped tar file with 1 extension
058    File gzip = FileUtils.getClasspathFile("archive-tax.tar.gz");
059    assertIdInCompressed(gzip.toPath(), "113775");
060  }
061
062  @Test
063  public void testNonExistingFileFromCompressed() {
064    // test zip with 1 extension file
065    Path none = Paths.get("/ping/pong/nuts");
066    // try to open archive
067    assertThrows(FileNotFoundException.class,
068        () -> InternalDwcFileFactory.fromCompressed(none, folder.toPath()));
069  }
070
071  @Test
072  public void testNonExistingFileFromLocation() {
073    File none = new File("/ping/pong/nuts");
074    assertThrows(FileNotFoundException.class, () -> InternalDwcFileFactory.fromLocation(none.toPath()));
075  }
076
077  /**
078   * Givin a compressed file, make sure we can uncompressed it, read the core and find the provided id.
079   */
080  private void assertIdInCompressed(Path compressedFile, String id) throws IOException {
081    File tmpDir = folder;
082
083    // open archive from zip
084    Archive arch = InternalDwcFileFactory.fromCompressed(compressedFile, tmpDir.toPath());
085    assertNotNull(arch.getCore().getId());
086    assertEquals(1, arch.getExtensions().size());
087
088    boolean found = false;
089    try (ClosableIterator<Record> it = arch.getCore().iterator()){
090      while(it.hasNext()){
091        if(id.equals(it.next().id())){
092          found = true;
093          break;
094        }
095      }
096    } catch (Exception e) {
097      e.printStackTrace();
098    }
099    assertTrue(found, "Can find the id " + id + " inside the archive " + compressedFile.getFileName());
100  }
101
102  @Test
103  public void testDetermineRowType() {
104    Optional<Term> rowType = InternalDwcFileFactory
105            .determineRowType(Arrays.asList(DwcTerm.decimalLatitude, DwcTerm.occurrenceID));
106    assertTrue(rowType.isPresent());
107    assertEquals(DwcTerm.Occurrence, rowType.get());
108  }
109
110  @Test
111  public void testDetermineRecordIdentifier() {
112    Optional<Term> id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.decimalLatitude, DwcTerm.occurrenceID));
113    assertTrue(id.isPresent());
114    assertEquals(DwcTerm.occurrenceID, id.get());
115
116    id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.taxonID, DwcTerm.scientificName));
117    assertTrue(id.isPresent());
118    assertEquals(DwcTerm.taxonID, id.get());
119
120    //eventId should be picked even if taxonID is there
121    id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.eventID, DwcTerm.scientificName, DwcTerm.taxonID));
122    assertTrue(id.isPresent());
123    assertEquals(DwcTerm.taxonID, id.get());
124
125    id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.decimalLongitude, DwcTerm.scientificName,
126            DcTerm.identifier));
127    assertTrue(id.isPresent());
128    assertEquals(DcTerm.identifier, id.get());
129
130    //eventId should be picked even if taxonID is there
131    id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.decimalLongitude, DwcTerm.scientificName, DwcTerm.decimalLatitude));
132    assertFalse(id.isPresent());
133  }
134}