Source code

001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.utils.file.tabular;
017
018import java.io.IOException;
019import java.io.Reader;
020import java.util.List;
021import java.util.Objects;
022
023import com.fasterxml.jackson.dataformat.csv.CsvSchema;
024
025/**
026 * Static utility methods related to {@link TabularDataFileReader} instances.
027 */
028public class TabularFiles {
029
030  /**
031   * Get a new TabularDataFileReader.
032   *
033   * @param reader
034   * @param delimiterChar
035   * @param endOfLineSymbols
036   * @param quoteChar nullable
037   * @param headerLine
038   * @return
039   */
040  public static TabularDataFileReader<List<String>> newTabularFileReader(Reader reader, char delimiterChar,
041                                                           String endOfLineSymbols, Character quoteChar, boolean headerLine) throws IOException {
042    return newTabularFileReader(reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, null);
043  }
044
045  /**
046   * Get a new TabularDataFileReader.
047   *
048   * @param reader
049   * @param delimiterChar
050   * @param endOfLineSymbols
051   * @param quoteChar Nullable
052   * @param headerLine do we expect the first line before the data to be a header line
053   * @param lineToSkipBeforeHeader Nullable. How many line(s) is required to skip in the file before reading the header or the data.
054   *                               This can be used to skip a comment block but if there is a header line, the comment block shall be before the header.
055   * @return
056   */
057  public static TabularDataFileReader<List<String>> newTabularFileReader(Reader reader, char delimiterChar,
058                                                                         String endOfLineSymbols, Character quoteChar, boolean headerLine,
059                                                                         Integer lineToSkipBeforeHeader) throws IOException {
060
061    Objects.requireNonNull(reader, "A Reader must be provided");
062    Objects.requireNonNull(endOfLineSymbols, "A endOfLineSymbols must be provided");
063    return new JacksonCsvFileReader(reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, lineToSkipBeforeHeader);
064  }
065
066  /**
067   * Get a new TabularDataFileReader using default quote char (") and default endOfLineSymbols (\n).
068   * Usage:
069   * <pre>
070   * {@code
071   * try (TabularDataFileReader<List<String>> reader = TabularFiles.newTabularFileReader(
072  Files.newBufferedReader(Paths.get("/tmp/test.csv"), StandardCharsets.UTF_8), ',', true)) {
073  ...
074  }
075   * }
076   * </pre>
077   *
078   *
079   * @param reader
080   * @param delimiterChar
081   * @param headerLine
082   */
083  public static TabularDataFileReader<List<String>> newTabularFileReader(Reader reader, char delimiterChar,
084                                                                         boolean headerLine) throws IOException {
085    return new JacksonCsvFileReader(reader, delimiterChar, new String(CsvSchema.DEFAULT_LINEFEED),
086            CsvSchema.DEFAULT_QUOTE_CHAR, headerLine);
087  }
088
089
090}