001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.file.csv;
015
016import org.gbif.utils.file.FileUtils;
017
018import java.io.File;
019import java.io.IOException;
020
021import org.junit.jupiter.api.Test;
022
023import static org.junit.jupiter.api.Assertions.assertEquals;
024import static org.junit.jupiter.api.Assertions.assertNull;
025import static org.junit.jupiter.api.Assertions.assertTrue;
026
027/**
028 *
029 */
030public class CSVReaderFactoryTest {
031
032  @Test
033  public void detectCsvAlwaysQuoted() throws IOException {
034    File source = FileUtils.getClasspathFile("csv/csv_always_quoted.csv");
035    CSVReader reader = CSVReaderFactory.build(source);
036    assertEquals(",", reader.delimiter);
037    assertEquals(new Character('"'), reader.quoteChar);
038    assertEquals(1, reader.headerRows);
039    reader.close();
040  }
041
042  /**
043   * We dont want unquoted CSVs, See detectCsvOptionallyQuoted()
044   */
045  public void detectCsvUnquoted() throws IOException {
046    String[] files = {"csv/csv_unquoted.txt"};
047    for (String fn : files) {
048      File source = FileUtils.getClasspathFile(fn);
049      CSVReader reader = CSVReaderFactory.build(source);
050      assertEquals(",", reader.delimiter);
051      assertNull(reader.quoteChar);
052      assertEquals(1, reader.headerRows);
053      reader.close();
054    }
055  }
056
057  @Test
058  public void detectPipe() throws IOException {
059    String[] files = new String[] {"csv/pipe_separator.txt"};
060    for (String fn : files) {
061      File source = FileUtils.getClasspathFile(fn);
062      CSVReader reader = CSVReaderFactory.build(source);
063      assertEquals("|", reader.delimiter);
064      assertNull(reader.quoteChar);
065      assertEquals(1, reader.headerRows);
066      reader.close();
067    }
068  }
069
070  @Test
071  public void detectSemicolon() throws IOException {
072    String[] files = {"csv/semicolon_separator.csv"};
073    for (String fn : files) {
074      File source = FileUtils.getClasspathFile(fn);
075      CSVReader reader = CSVReaderFactory.build(source);
076      assertEquals(";", reader.delimiter);
077      assertNull(reader.quoteChar);
078      assertEquals(1, reader.headerRows);
079      reader.close();
080    }
081  }
082
083  /**
084   * As CSV files with rare optional quotes are hard to detect but cause problems
085   * we prefer to default to the " quotation in case comma seperated files are used.
086   * This is why test detectCsvUnquoted() is outcommented right now!
087   */
088  @Test
089  public void detectCsvOptionallyQuoted() throws IOException {
090    String[] files = {
091      "csv/csv_optional_quotes_puma.csv",
092      "csv/csv_optional_quotes_excel2008.csv",
093      "csv/csv_incl_single_quotes.csv",
094      "csv/iucn100.csv",
095      "csv/csv_unquoted.txt",
096      "csv/csv_unquoted_coordinates.txt"
097    };
098    for (String fn : files) {
099      File source = FileUtils.getClasspathFile(fn);
100      CSVReader reader = CSVReaderFactory.build(source);
101      assertEquals(",", reader.delimiter);
102      assertEquals(new Character('"'), reader.quoteChar);
103      assertEquals(1, reader.headerRows);
104      reader.close();
105    }
106  }
107
108  @Test
109  public void detectTab() throws IOException {
110    String[] files = {
111      "csv/ipni.tab.txt",
112      "csv/tab_separated_generic.txt",
113      "csv/iucn100.tab.txt",
114      "csv/ebird.tab.txt",
115      "csv/irmng.tail",
116      "csv/MOBOT.tab.csv"
117    };
118    for (String fn : files) {
119      File source = FileUtils.getClasspathFile(fn);
120      CSVReader reader = CSVReaderFactory.build(source);
121      assertEquals("\t", reader.delimiter, "Check " + fn);
122      assertNull(reader.quoteChar);
123      assertEquals(1, reader.headerRows);
124      reader.close();
125    }
126  }
127
128  @Test
129  public void detectTabQuoted() throws IOException {
130    String[] files = {
131      "csv/eol/my_darwincore_tab_separated_quoted.txt",
132      "csv/eol/my_dataobject_tab_separated_quoted.txt",
133      "csv/borza_tab_separated_quoted.txt"
134    };
135    for (String fn : files) {
136      File source = FileUtils.getClasspathFile(fn);
137      CSVReader reader = CSVReaderFactory.build(source);
138      assertEquals("\t", reader.delimiter);
139      assertTrue(reader.quoteChar == '"');
140      assertEquals(1, reader.headerRows);
141      reader.close();
142    }
143  }
144}