001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.file.tabular;
015
016import java.io.IOException;
017import java.io.Reader;
018import java.util.List;
019import java.util.Objects;
020
021import com.fasterxml.jackson.dataformat.csv.CsvSchema;
022
023/**
024 * Static utility methods related to {@link TabularDataFileReader} instances.
025 */
026public class TabularFiles {
027
028  /**
029   * Get a new TabularDataFileReader.
030   *
031   * @param reader
032   * @param delimiterChar
033   * @param endOfLineSymbols
034   * @param quoteChar nullable
035   * @param headerLine
036   * @return
037   */
038  public static TabularDataFileReader<List<String>> newTabularFileReader(
039      Reader reader,
040      char delimiterChar,
041      String endOfLineSymbols,
042      Character quoteChar,
043      boolean headerLine)
044      throws IOException {
045    return newTabularFileReader(
046        reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, null);
047  }
048
049  /**
050   * Get a new TabularDataFileReader.
051   *
052   * @param reader
053   * @param delimiterChar
054   * @param endOfLineSymbols
055   * @param quoteChar Nullable
056   * @param headerLine do we expect the first line before the data to be a header line
057   * @param lineToSkipBeforeHeader Nullable. How many line(s) is required to skip in the file before reading the header or the data.
058   *                               This can be used to skip a comment block but if there is a header line, the comment block shall be before the header.
059   * @return
060   */
061  public static TabularDataFileReader<List<String>> newTabularFileReader(
062      Reader reader,
063      char delimiterChar,
064      String endOfLineSymbols,
065      Character quoteChar,
066      boolean headerLine,
067      Integer lineToSkipBeforeHeader)
068      throws IOException {
069
070    Objects.requireNonNull(reader, "A Reader must be provided");
071    Objects.requireNonNull(endOfLineSymbols, "A endOfLineSymbols must be provided");
072    return new JacksonCsvFileReader(
073        reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, lineToSkipBeforeHeader);
074  }
075
076  /**
077   * Get a new TabularDataFileReader using default quote char (") and default endOfLineSymbols (\n).
078   * Usage:
079   * <pre>
080   * {@code
081   * try (TabularDataFileReader<List<String>> reader = TabularFiles.newTabularFileReader(
082   * Files.newBufferedReader(Paths.get("/tmp/test.csv"), StandardCharsets.UTF_8), ',', true)) {
083   * ...
084   * }
085   * }
086   * </pre>
087   *
088   *
089   * @param reader
090   * @param delimiterChar
091   * @param headerLine
092   */
093  public static TabularDataFileReader<List<String>> newTabularFileReader(
094      Reader reader, char delimiterChar, boolean headerLine) throws IOException {
095    return new JacksonCsvFileReader(
096        reader,
097        delimiterChar,
098        new String(CsvSchema.DEFAULT_LINEFEED),
099        CsvSchema.DEFAULT_QUOTE_CHAR,
100        headerLine);
101  }
102}