001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.utils.file.tabular; 017 018import java.io.IOException; 019import java.io.Reader; 020import java.util.List; 021import java.util.Objects; 022 023import com.fasterxml.jackson.dataformat.csv.CsvSchema; 024 025/** 026 * Static utility methods related to {@link TabularDataFileReader} instances. 027 */ 028public class TabularFiles { 029 030 /** 031 * Get a new TabularDataFileReader. 032 * 033 * @param reader 034 * @param delimiterChar 035 * @param endOfLineSymbols 036 * @param quoteChar nullable 037 * @param headerLine 038 * @return 039 */ 040 public static TabularDataFileReader<List<String>> newTabularFileReader(Reader reader, char delimiterChar, 041 String endOfLineSymbols, Character quoteChar, boolean headerLine) throws IOException { 042 return newTabularFileReader(reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, null); 043 } 044 045 /** 046 * Get a new TabularDataFileReader. 047 * 048 * @param reader 049 * @param delimiterChar 050 * @param endOfLineSymbols 051 * @param quoteChar Nullable 052 * @param headerLine do we expect the first line before the data to be a header line 053 * @param lineToSkipBeforeHeader Nullable. How many line(s) is required to skip in the file before reading the header or the data. 054 * This can be used to skip a comment block but if there is a header line, the comment block shall be before the header. 055 * @return 056 */ 057 public static TabularDataFileReader<List<String>> newTabularFileReader(Reader reader, char delimiterChar, 058 String endOfLineSymbols, Character quoteChar, boolean headerLine, 059 Integer lineToSkipBeforeHeader) throws IOException { 060 061 Objects.requireNonNull(reader, "A Reader must be provided"); 062 Objects.requireNonNull(endOfLineSymbols, "A endOfLineSymbols must be provided"); 063 return new JacksonCsvFileReader(reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, lineToSkipBeforeHeader); 064 } 065 066 /** 067 * Get a new TabularDataFileReader using default quote char (") and default endOfLineSymbols (\n). 068 * Usage: 069 * <pre> 070 * {@code 071 * try (TabularDataFileReader<List<String>> reader = TabularFiles.newTabularFileReader( 072 Files.newBufferedReader(Paths.get("/tmp/test.csv"), StandardCharsets.UTF_8), ',', true)) { 073 ... 074 } 075 * } 076 * </pre> 077 * 078 * 079 * @param reader 080 * @param delimiterChar 081 * @param headerLine 082 */ 083 public static TabularDataFileReader<List<String>> newTabularFileReader(Reader reader, char delimiterChar, 084 boolean headerLine) throws IOException { 085 return new JacksonCsvFileReader(reader, delimiterChar, new String(CsvSchema.DEFAULT_LINEFEED), 086 CsvSchema.DEFAULT_QUOTE_CHAR, headerLine); 087 } 088 089 090}