001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.file;
015
016import java.io.File;
017import java.io.IOException;
018import java.lang.management.ManagementFactory;
019import java.lang.management.OperatingSystemMXBean;
020import java.net.URL;
021import java.util.Arrays;
022import java.util.List;
023
024import org.apache.commons.io.FileUtils;
025import org.junit.jupiter.api.Test;
026
027import com.sun.management.UnixOperatingSystemMXBean;
028
029import static org.junit.jupiter.api.Assertions.assertEquals;
030import static org.junit.jupiter.api.Assertions.assertFalse;
031import static org.junit.jupiter.api.Assertions.assertNotEquals;
032import static org.junit.jupiter.api.Assertions.assertTrue;
033import static org.junit.jupiter.api.Assertions.fail;
034
035public class CompressionUtilTest {
036
037  public static File createTempDirectory() throws IOException {
038
039    final File temp = File.createTempFile("temp", Long.toString(System.nanoTime()));
040
041    if (!temp.delete()) {
042      throw new IOException("Could not delete temp file: " + temp.getAbsolutePath());
043    }
044
045    if (!temp.mkdir()) {
046      throw new IOException("Could not create temp directory: " + temp.getAbsolutePath());
047    }
048    temp.deleteOnExit();
049
050    return temp;
051  }
052
053  private static void assureEqualContent(List<File> result, String metaContent, String dataContent)
054      throws IOException {
055    for (File f : result) {
056      String x = FileUtils.readFileToString(f, "utf-8");
057      if ("meta.xml".equals(f.getName())) {
058        assertEquals(metaContent, x);
059      } else if ("quote_in_quote.csv".equals(f.getName())) {
060        assertEquals(dataContent, x);
061      } else {
062        fail("unexpected file");
063      }
064    }
065  }
066
067  public File classpathFile(String path) {
068    File f = null;
069    // relative path. Use classpath instead
070    URL url = getClass().getClassLoader().getResource(path);
071    if (url != null) {
072      f = new File(url.getFile());
073    }
074    return f;
075  }
076
077  @Test
078  public void testDecompress() throws IOException {
079    // meta.xml
080    File meta = classpathFile("compression/archive/meta.xml");
081    String metaContent = FileUtils.readFileToString(meta, "utf-8");
082    // quote_in_quote.csv
083    File data = classpathFile("compression/archive/quote_in_quote.csv");
084    String dataContent = FileUtils.readFileToString(data, "utf-8");
085
086    File tmpDir = createTempDirectory();
087    File testArchiveFile = classpathFile("compression/archive.zip");
088    List<File> result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
089    assertEquals(2, result.size());
090    assureEqualContent(result, metaContent, dataContent);
091
092    FileUtils.cleanDirectory(tmpDir);
093    testArchiveFile = classpathFile("compression/archive.tgz");
094    result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
095    assertEquals(2, result.size());
096    assureEqualContent(result, metaContent, dataContent);
097
098    FileUtils.cleanDirectory(tmpDir);
099    testArchiveFile = classpathFile("compression/archive-zip.dat");
100    result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
101    assertEquals(2, result.size());
102    assureEqualContent(result, metaContent, dataContent);
103
104    FileUtils.cleanDirectory(tmpDir);
105    testArchiveFile = classpathFile("compression/archive-tgz.dat");
106    result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
107    assertEquals(2, result.size());
108    assureEqualContent(result, metaContent, dataContent);
109
110    FileUtils.cleanDirectory(tmpDir);
111    testArchiveFile = classpathFile("compression/archive.tar");
112    result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
113    assertEquals(2, result.size());
114    assureEqualContent(result, metaContent, dataContent);
115
116    FileUtils.cleanDirectory(tmpDir);
117    testArchiveFile = classpathFile("compression/cate.zip");
118    result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
119    assertEquals(3, result.size());
120  }
121
122  @Test
123  public void testUnableToDecompress() throws IOException {
124    File tmpDir = createTempDirectory();
125    File testArchiveFile = classpathFile("compression/test.txt.gz");
126    List<File> result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
127    assertEquals(0, result.size());
128
129    testArchiveFile = classpathFile("compression/empty-file");
130    result = CompressionUtil.decompressFile(tmpDir, testArchiveFile);
131    assertEquals(0, result.size());
132  }
133
134  @Test
135  public void testGunzipWithTar() throws IOException {
136    File tmpDir = createTempDirectory();
137    FileUtils.cleanDirectory(tmpDir);
138    File testArchiveFile = classpathFile("compression/archive-tgz.dat");
139    List<File> result = CompressionUtil.ungzipFile(tmpDir, testArchiveFile, true);
140    assertEquals(2, result.size());
141  }
142
143  @Test
144  public void testGunzipNoTar() throws IOException {
145    File tmpDir = createTempDirectory();
146    FileUtils.cleanDirectory(tmpDir);
147    File testArchiveFile = classpathFile("compression/test.txt.gz");
148    List<File> result = CompressionUtil.ungzipFile(tmpDir, testArchiveFile, false);
149    assertEquals(1, result.size());
150    assertEquals("test.txt", result.get(0).getName());
151  }
152
153  /**
154   * Test unzipping a folder, while NOT preserving subdirectories.
155   */
156  @Test
157  public void testUnzipFolderDoNotKeepSubdirectoriesOrHiddenFiles() throws IOException {
158    File tmpDir = createTempDirectory();
159    File testZippedFolder = classpathFile("compression/withSubdirsAndHiddenFiles.zip");
160    List<File> files = CompressionUtil.unzipFile(tmpDir, testZippedFolder, false);
161    assertEquals(9, files.size()); // 9 files, 0 directories
162    assertTrue(new File(tmpDir, "dwca.zip").exists());
163    assertTrue(new File(tmpDir, "eml.xml").exists());
164    assertTrue(new File(tmpDir, "publication.log").exists());
165    assertTrue(new File(tmpDir, "resource.xml").exists());
166    assertTrue(new File(tmpDir, "test4.rtf").exists());
167    assertTrue(new File(tmpDir, "taxon.log").exists());
168    assertTrue(new File(tmpDir, "taxon.txt").exists());
169    assertTrue(new File(tmpDir, "taxonshort.log").exists());
170    assertTrue(new File(tmpDir, "taxonshort.txt").exists());
171    // assert subdirectory is removed
172    File sourceDir = new File(tmpDir, "sources");
173    assertFalse(sourceDir.exists());
174    // assert wrapping root directory is removed
175    File rootDir = new File(tmpDir, "withSubdirsAndHiddenFiles");
176    assertFalse(rootDir.exists());
177    // assert hidden files are removed
178    assertFalse(new File(tmpDir, ".hidden1").exists());
179    assertFalse(new File(tmpDir, "/sources/.hidden2").exists());
180    // assert .DS_Store removed
181    assertFalse(new File(tmpDir, ".DS_Store").exists());
182    // assert __MACOSX removed
183    assertFalse(new File(tmpDir, "__MACOSX").exists());
184  }
185
186  /**
187   * Test unzipping a folder, while preserving subdirectories.
188   */
189  @Test
190  public void testUnzipFolderKeepSubdirectoriesButNoHiddenFile() throws IOException {
191    File tmpDir = createTempDirectory();
192    File testZippedFolder = classpathFile("compression/withSubdirsAndHiddenFiles.zip");
193    List<File> files = CompressionUtil.unzipFile(tmpDir, testZippedFolder, true);
194    assertEquals(6, files.size()); // 5 files, 1 directory having 4 files inside
195    assertTrue(new File(tmpDir, "dwca.zip").exists());
196    assertTrue(new File(tmpDir, "eml.xml").exists());
197    assertTrue(new File(tmpDir, "publication.log").exists());
198    assertTrue(new File(tmpDir, "resource.xml").exists());
199    assertTrue(new File(tmpDir, "test4.rtf").exists());
200    // assert subdirectory was preserved
201    File sourceDir = new File(tmpDir, "sources");
202    assertTrue(sourceDir.isDirectory());
203    assertTrue(sourceDir.exists());
204    assertTrue(new File(sourceDir, "taxon.log").exists());
205    assertTrue(new File(sourceDir, "taxon.txt").exists());
206    assertTrue(new File(sourceDir, "taxonshort.log").exists());
207    assertTrue(new File(sourceDir, "taxonshort.txt").exists());
208    // assert wrapping root directory is removed
209    File rootDir = new File(tmpDir, "withSubdirsAndHiddenFiles");
210    assertFalse(rootDir.exists());
211    // assert hidden files are removed
212    assertFalse(new File(tmpDir, ".hidden1").exists());
213    assertFalse(new File(tmpDir, "/sources/.hidden2").exists());
214    // assert .DS_Store removed
215    assertFalse(new File(tmpDir, ".DS_Store").exists());
216    assertFalse(new File(sourceDir, ".DS_Store").exists());
217    // assert __MACOSX removed
218    assertFalse(new File(tmpDir, "__MACOSX").exists());
219  }
220
221  /**
222   * Test unzipping a folder, while preserving subdirectories, but making sure the .svn directories and their subfiles
223   * and subdirectories are not extracted.
224   */
225  @Test
226  public void testUnzipFolderKeepSubdirectoriesButNoHiddenDirectories() throws IOException {
227    File tmpDir = createTempDirectory();
228    File testZippedFolder = classpathFile("compression/with_dot_svn.zip");
229    List<File> files = CompressionUtil.unzipFile(tmpDir, testZippedFolder, true);
230    assertEquals(7, files.size()); // 5 files, 2 directories
231    // assert wrapping root directory is removed
232    File rootDir = new File(tmpDir, "res1");
233    assertFalse(rootDir.exists());
234    assertTrue(new File(tmpDir, "dwca.zip").exists());
235    assertTrue(new File(tmpDir, "eml.xml").exists());
236    assertTrue(new File(tmpDir, "publication.log").exists());
237    assertTrue(new File(tmpDir, "resource.xml").exists());
238    assertTrue(new File(tmpDir, "test4.rtf").exists());
239
240    // assert subdirectory sources was preserved
241    File sourceDir = new File(tmpDir, "sources");
242    assertTrue(sourceDir.isDirectory());
243    assertTrue(sourceDir.exists());
244    assertEquals(1, sourceDir.listFiles().length);
245    assertTrue(new File(sourceDir, "occurrence.txt").exists());
246
247    // assert subdirectory dwca was preserved
248    File dwcaDir = new File(tmpDir, "dwca");
249    assertTrue(dwcaDir.isDirectory());
250    assertTrue(dwcaDir.exists());
251    assertEquals(4, dwcaDir.listFiles().length);
252    assertTrue(new File(dwcaDir, "occurrence.txt").exists());
253    assertTrue(new File(dwcaDir, "image.txt").exists());
254    assertTrue(new File(dwcaDir, "meta.xml").exists());
255    assertTrue(new File(dwcaDir, "eml.xml").exists());
256
257    // assert hidden files and directories are removed
258    assertFalse(new File(tmpDir, ".svn").exists());
259    assertFalse(new File(tmpDir, "/sources/.svn").exists());
260    // assert .DS_Store removed
261    assertFalse(new File(tmpDir, ".DS_Store").exists());
262    // assert __MACOSX removed
263    assertFalse(new File(tmpDir, "__MACOSX").exists());
264  }
265
266  @Test
267  public void testDecompressZippedFolderWithNoSubdirectories() throws IOException {
268    File tmpDir = createTempDirectory();
269    File testZippedFolder = classpathFile("compression/archive.zip");
270    List<File> files = CompressionUtil.unzipFile(tmpDir, testZippedFolder);
271    assertEquals(2, files.size());
272    File meta = new File(tmpDir, "meta.xml");
273    assertTrue(meta.exists());
274    File csv = new File(tmpDir, "quote_in_quote.csv");
275    assertTrue(csv.exists());
276  }
277
278  @Test
279  public void testZipFolder() throws IOException {
280    File zipWithDirs = File.createTempFile("aha", ".zip");
281    System.out.println(zipWithDirs.getAbsolutePath());
282
283    File zipWithoutDirs = File.createTempFile("aha", ".zip");
284    System.out.println(zipWithoutDirs.getAbsolutePath());
285    // tmp.deleteOnExit();
286    File testFolder = classpathFile("charsets");
287    // remember how many files we have in the root folder, exlcuding files in subdirectories
288    final int rootFileNum = testFolder.listFiles().length;
289
290    CompressionUtil.zipDir(testFolder, zipWithDirs, true);
291    CompressionUtil.zipDir(testFolder, zipWithoutDirs, false);
292
293    assertNotEquals(zipWithDirs.length(), zipWithoutDirs.length());
294
295    // now decompress the subdir zip and make sure we get the same amount of root files
296    File tmpDir = org.gbif.utils.file.FileUtils.createTempDir();
297    tmpDir.deleteOnExit();
298    CompressionUtil.unzipFile(tmpDir, zipWithDirs, true);
299    int decompressedRootFileNum = tmpDir.listFiles().length;
300    assertEquals(rootFileNum, decompressedRootFileNum);
301  }
302
303  /**
304   * Check we can unpack ZIP64 archives.
305   *
306   * infozip64.zip was created with <code>echo 'hello | zip infozip.zip -</code>, following comments on
307   * https://bugs.openjdk.java.net/browse/JDK-8186464
308   */
309  @Test
310  public void testDecompressZippedFolderWithNoSubdirectoriesx() throws IOException {
311    File tmpDir = createTempDirectory();
312    File testZippedFolder = classpathFile("compression/infozip64.zip");
313
314    List<File> files = CompressionUtil.unzipFile(tmpDir, testZippedFolder);
315    assertEquals(1, files.size());
316    File dash = new File(tmpDir, "-");
317    assertTrue(dash.exists());
318  }
319
320  /**
321   * Check that files are closed after use.
322   */
323  @Test
324  public void testFilesClosedCorrectly() throws Exception {
325    File tmpDir = createTempDirectory();
326    File testZippedFolder = classpathFile("compression/infozip64.zip");
327
328    // Unzip first, to use the code path.  (Various things like /dev/random are opened.)
329    CompressionUtil.unzipFile(tmpDir, testZippedFolder);
330
331    OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean();
332    long openFiles = 0;
333    if (os instanceof UnixOperatingSystemMXBean) {
334      openFiles = ((UnixOperatingSystemMXBean) os).getOpenFileDescriptorCount();
335    }
336
337    // From all the other tests.
338    List<String> files =
339        Arrays.asList(
340            "compression/archive/meta.xml",
341            "compression/archive/quote_in_quote.csv",
342            "compression/archive.tgz",
343            "compression/archive-tgz.dat",
344            "compression/archive.zip",
345            "compression/archive-zip.dat",
346            "compression/cate.zip",
347            "compression/infozip64.zip",
348            "compression/test.txt.gz",
349            "compression/with_dot_svn.zip",
350            "compression/withSubdirsAndHiddenFiles.zip");
351
352    for (String file : files) {
353      tmpDir = createTempDirectory();
354      CompressionUtil.decompressFile(tmpDir, classpathFile(file));
355    }
356
357    if (os instanceof UnixOperatingSystemMXBean) {
358      assertEquals(openFiles, ((UnixOperatingSystemMXBean) os).getOpenFileDescriptorCount());
359    } else {
360      System.err.println("Cannot check files are closed except on Unix.");
361    }
362
363    // Try ls -l /proc/`pgrep -f java -n`/fd
364    // Thread.sleep(30000);
365  }
366}