001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.file.csv; 015 016import org.gbif.utils.file.FileUtils; 017 018import java.io.File; 019import java.io.IOException; 020 021import org.junit.jupiter.api.Test; 022 023import static org.junit.jupiter.api.Assertions.assertEquals; 024import static org.junit.jupiter.api.Assertions.assertNull; 025import static org.junit.jupiter.api.Assertions.assertTrue; 026 027/** 028 * 029 */ 030public class CSVReaderFactoryTest { 031 032 @Test 033 public void detectCsvAlwaysQuoted() throws IOException { 034 File source = FileUtils.getClasspathFile("csv/csv_always_quoted.csv"); 035 CSVReader reader = CSVReaderFactory.build(source); 036 assertEquals(",", reader.delimiter); 037 assertEquals(new Character('"'), reader.quoteChar); 038 assertEquals(1, reader.headerRows); 039 reader.close(); 040 } 041 042 /** 043 * We dont want unquoted CSVs, See detectCsvOptionallyQuoted() 044 */ 045 public void detectCsvUnquoted() throws IOException { 046 String[] files = {"csv/csv_unquoted.txt"}; 047 for (String fn : files) { 048 File source = FileUtils.getClasspathFile(fn); 049 CSVReader reader = CSVReaderFactory.build(source); 050 assertEquals(",", reader.delimiter); 051 assertNull(reader.quoteChar); 052 assertEquals(1, reader.headerRows); 053 reader.close(); 054 } 055 } 056 057 @Test 058 public void detectPipe() throws IOException { 059 String[] files = new String[] {"csv/pipe_separator.txt"}; 060 for (String fn : files) { 061 File source = FileUtils.getClasspathFile(fn); 062 CSVReader reader = CSVReaderFactory.build(source); 063 assertEquals("|", reader.delimiter); 064 assertNull(reader.quoteChar); 065 assertEquals(1, reader.headerRows); 066 reader.close(); 067 } 068 } 069 070 @Test 071 public void detectSemicolon() throws IOException { 072 String[] files = {"csv/semicolon_separator.csv"}; 073 for (String fn : files) { 074 File source = FileUtils.getClasspathFile(fn); 075 CSVReader reader = CSVReaderFactory.build(source); 076 assertEquals(";", reader.delimiter); 077 assertNull(reader.quoteChar); 078 assertEquals(1, reader.headerRows); 079 reader.close(); 080 } 081 } 082 083 /** 084 * As CSV files with rare optional quotes are hard to detect but cause problems 085 * we prefer to default to the " quotation in case comma seperated files are used. 086 * This is why test detectCsvUnquoted() is outcommented right now! 087 */ 088 @Test 089 public void detectCsvOptionallyQuoted() throws IOException { 090 String[] files = { 091 "csv/csv_optional_quotes_puma.csv", 092 "csv/csv_optional_quotes_excel2008.csv", 093 "csv/csv_incl_single_quotes.csv", 094 "csv/iucn100.csv", 095 "csv/csv_unquoted.txt", 096 "csv/csv_unquoted_coordinates.txt" 097 }; 098 for (String fn : files) { 099 File source = FileUtils.getClasspathFile(fn); 100 CSVReader reader = CSVReaderFactory.build(source); 101 assertEquals(",", reader.delimiter); 102 assertEquals(new Character('"'), reader.quoteChar); 103 assertEquals(1, reader.headerRows); 104 reader.close(); 105 } 106 } 107 108 @Test 109 public void detectTab() throws IOException { 110 String[] files = { 111 "csv/ipni.tab.txt", 112 "csv/tab_separated_generic.txt", 113 "csv/iucn100.tab.txt", 114 "csv/ebird.tab.txt", 115 "csv/irmng.tail", 116 "csv/MOBOT.tab.csv" 117 }; 118 for (String fn : files) { 119 File source = FileUtils.getClasspathFile(fn); 120 CSVReader reader = CSVReaderFactory.build(source); 121 assertEquals("\t", reader.delimiter, "Check " + fn); 122 assertNull(reader.quoteChar); 123 assertEquals(1, reader.headerRows); 124 reader.close(); 125 } 126 } 127 128 @Test 129 public void detectTabQuoted() throws IOException { 130 String[] files = { 131 "csv/eol/my_darwincore_tab_separated_quoted.txt", 132 "csv/eol/my_dataobject_tab_separated_quoted.txt", 133 "csv/borza_tab_separated_quoted.txt" 134 }; 135 for (String fn : files) { 136 File source = FileUtils.getClasspathFile(fn); 137 CSVReader reader = CSVReaderFactory.build(source); 138 assertEquals("\t", reader.delimiter); 139 assertTrue(reader.quoteChar == '"'); 140 assertEquals(1, reader.headerRows); 141 reader.close(); 142 } 143 } 144}