001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.file.tabular; 015 016import org.gbif.utils.file.FileUtils; 017 018import java.io.File; 019import java.io.IOException; 020import java.nio.charset.StandardCharsets; 021import java.util.List; 022 023import org.junit.jupiter.api.Test; 024import org.junit.jupiter.api.io.TempDir; 025 026import static org.junit.jupiter.api.Assertions.assertEquals; 027 028/** 029 * Unit test related to {@link TabularFileNormalizer} 030 */ 031public class TabularFileNormalizerTest { 032 033 @TempDir File tempDir; 034 035 @Test 036 public void testTabularFileNormalizer() throws IOException { 037 // this file includes a null character (\0) that is expected to be removed 038 File csvFile = FileUtils.getClasspathFile("tabular/test_normalize.csv"); 039 File normalizedFile = new File(tempDir, "newFile.csv"); 040 041 int numberOfLine = 042 TabularFileNormalizer.normalizeFile( 043 csvFile.toPath(), normalizedFile.toPath(), StandardCharsets.UTF_8, ',', "\n", '\"'); 044 045 List<String> rows = 046 org.apache.commons.io.FileUtils.readLines(normalizedFile, StandardCharsets.UTF_8); 047 assertEquals("1,\"a,\",b", rows.get(0), "Quoted delimiter"); 048 assertEquals("2,c,d", rows.get(1), "Trailing newline"); 049 assertEquals("3,é,f", rows.get(2), "Quoted non-ASCII and null character"); 050 assertEquals( 051 "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986,\"Pi, Pi, Pi, Pi, Pi, Pi, Pi, Pi\",ππππππππππππππππππππππππππππππππππππππππππππππ", 052 rows.get(3), 053 "Long values"); 054 assertEquals(4, numberOfLine); 055 } 056}