001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.file.tabular;
015
016import org.gbif.utils.file.FileUtils;
017
018import java.io.File;
019import java.io.IOException;
020import java.nio.charset.StandardCharsets;
021import java.util.List;
022
023import org.junit.jupiter.api.Test;
024import org.junit.jupiter.api.io.TempDir;
025
026import static org.junit.jupiter.api.Assertions.assertEquals;
027
028/**
029 * Unit test related to {@link TabularFileNormalizer}
030 */
031public class TabularFileNormalizerTest {
032
033  @TempDir File tempDir;
034
035  @Test
036  public void testTabularFileNormalizer() throws IOException {
037    // this file includes a null character (\0) that is expected to be removed
038    File csvFile = FileUtils.getClasspathFile("tabular/test_normalize.csv");
039    File normalizedFile = new File(tempDir, "newFile.csv");
040
041    int numberOfLine =
042        TabularFileNormalizer.normalizeFile(
043            csvFile.toPath(), normalizedFile.toPath(), StandardCharsets.UTF_8, ',', "\n", '\"');
044
045    List<String> rows =
046        org.apache.commons.io.FileUtils.readLines(normalizedFile, StandardCharsets.UTF_8);
047    assertEquals("1,\"a,\",b", rows.get(0), "Quoted delimiter");
048    assertEquals("2,c,d", rows.get(1), "Trailing newline");
049    assertEquals("3,é,f", rows.get(2), "Quoted non-ASCII and null character");
050    assertEquals(
051        "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986,\"Pi, Pi, Pi, Pi, Pi, Pi, Pi, Pi\",ππππππππππππππππππππππππππππππππππππππππππππππ",
052        rows.get(3),
053        "Long values");
054    assertEquals(4, numberOfLine);
055  }
056}