001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.dwc; 015 016import org.gbif.dwc.record.Record; 017import org.gbif.dwc.terms.DcTerm; 018import org.gbif.dwc.terms.DwcTerm; 019import org.gbif.dwc.terms.Term; 020import org.gbif.utils.file.ClosableIterator; 021import org.gbif.utils.file.FileUtils; 022 023import java.io.File; 024import java.io.FileNotFoundException; 025import java.io.IOException; 026import java.nio.file.Path; 027import java.nio.file.Paths; 028import java.util.Arrays; 029import java.util.Optional; 030 031import org.junit.jupiter.api.Test; 032import org.junit.jupiter.api.io.TempDir; 033 034import static org.junit.jupiter.api.Assertions.assertEquals; 035import static org.junit.jupiter.api.Assertions.assertFalse; 036import static org.junit.jupiter.api.Assertions.assertNotNull; 037import static org.junit.jupiter.api.Assertions.assertThrows; 038import static org.junit.jupiter.api.Assertions.assertTrue; 039 040/** 041 * Tests related to {@link InternalDwcFileFactory}. 042 */ 043public class InternalDwcFileFactoryTest { 044 045 @TempDir 046 public File folder; 047 048 @Test 049 public void testFromCompressedZip() throws UnsupportedArchiveException, IOException { 050 // test zip with 1 extension file 051 File zip = FileUtils.getClasspathFile("archive-tax.zip"); 052 assertIdInCompressed(zip.toPath(), "113775"); 053 } 054 055 @Test 056 public void testFromCompressedTarGzip() throws UnsupportedArchiveException, IOException { 057 // test gziped tar file with 1 extension 058 File gzip = FileUtils.getClasspathFile("archive-tax.tar.gz"); 059 assertIdInCompressed(gzip.toPath(), "113775"); 060 } 061 062 @Test 063 public void testNonExistingFileFromCompressed() { 064 // test zip with 1 extension file 065 Path none = Paths.get("/ping/pong/nuts"); 066 // try to open archive 067 assertThrows(FileNotFoundException.class, 068 () -> InternalDwcFileFactory.fromCompressed(none, folder.toPath())); 069 } 070 071 @Test 072 public void testNonExistingFileFromLocation() { 073 File none = new File("/ping/pong/nuts"); 074 assertThrows(FileNotFoundException.class, () -> InternalDwcFileFactory.fromLocation(none.toPath())); 075 } 076 077 /** 078 * Givin a compressed file, make sure we can uncompressed it, read the core and find the provided id. 079 */ 080 private void assertIdInCompressed(Path compressedFile, String id) throws IOException { 081 File tmpDir = folder; 082 083 // open archive from zip 084 Archive arch = InternalDwcFileFactory.fromCompressed(compressedFile, tmpDir.toPath()); 085 assertNotNull(arch.getCore().getId()); 086 assertEquals(1, arch.getExtensions().size()); 087 088 boolean found = false; 089 try (ClosableIterator<Record> it = arch.getCore().iterator()){ 090 while(it.hasNext()){ 091 if(id.equals(it.next().id())){ 092 found = true; 093 break; 094 } 095 } 096 } catch (Exception e) { 097 e.printStackTrace(); 098 } 099 assertTrue(found, "Can find the id " + id + " inside the archive " + compressedFile.getFileName()); 100 } 101 102 @Test 103 public void testDetermineRowType() { 104 Optional<Term> rowType = InternalDwcFileFactory 105 .determineRowType(Arrays.asList(DwcTerm.decimalLatitude, DwcTerm.occurrenceID)); 106 assertTrue(rowType.isPresent()); 107 assertEquals(DwcTerm.Occurrence, rowType.get()); 108 } 109 110 @Test 111 public void testDetermineRecordIdentifier() { 112 Optional<Term> id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.decimalLatitude, DwcTerm.occurrenceID)); 113 assertTrue(id.isPresent()); 114 assertEquals(DwcTerm.occurrenceID, id.get()); 115 116 id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.taxonID, DwcTerm.scientificName)); 117 assertTrue(id.isPresent()); 118 assertEquals(DwcTerm.taxonID, id.get()); 119 120 //eventId should be picked even if taxonID is there 121 id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.eventID, DwcTerm.scientificName, DwcTerm.taxonID)); 122 assertTrue(id.isPresent()); 123 assertEquals(DwcTerm.taxonID, id.get()); 124 125 id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.decimalLongitude, DwcTerm.scientificName, 126 DcTerm.identifier)); 127 assertTrue(id.isPresent()); 128 assertEquals(DcTerm.identifier, id.get()); 129 130 //eventId should be picked even if taxonID is there 131 id = InternalDwcFileFactory.determineRecordIdentifier(Arrays.asList(DwcTerm.decimalLongitude, DwcTerm.scientificName, DwcTerm.decimalLatitude)); 132 assertFalse(id.isPresent()); 133 } 134}