001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.file.tabular; 015 016import java.io.IOException; 017import java.io.Reader; 018import java.util.List; 019import java.util.Objects; 020 021import com.fasterxml.jackson.dataformat.csv.CsvSchema; 022 023/** 024 * Static utility methods related to {@link TabularDataFileReader} instances. 025 */ 026public class TabularFiles { 027 028 /** 029 * Get a new TabularDataFileReader. 030 * 031 * @param reader 032 * @param delimiterChar 033 * @param endOfLineSymbols 034 * @param quoteChar nullable 035 * @param headerLine 036 * @return 037 */ 038 public static TabularDataFileReader<List<String>> newTabularFileReader( 039 Reader reader, 040 char delimiterChar, 041 String endOfLineSymbols, 042 Character quoteChar, 043 boolean headerLine) 044 throws IOException { 045 return newTabularFileReader( 046 reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, null); 047 } 048 049 /** 050 * Get a new TabularDataFileReader. 051 * 052 * @param reader 053 * @param delimiterChar 054 * @param endOfLineSymbols 055 * @param quoteChar Nullable 056 * @param headerLine do we expect the first line before the data to be a header line 057 * @param lineToSkipBeforeHeader Nullable. How many line(s) is required to skip in the file before reading the header or the data. 058 * This can be used to skip a comment block but if there is a header line, the comment block shall be before the header. 059 * @return 060 */ 061 public static TabularDataFileReader<List<String>> newTabularFileReader( 062 Reader reader, 063 char delimiterChar, 064 String endOfLineSymbols, 065 Character quoteChar, 066 boolean headerLine, 067 Integer lineToSkipBeforeHeader) 068 throws IOException { 069 070 Objects.requireNonNull(reader, "A Reader must be provided"); 071 Objects.requireNonNull(endOfLineSymbols, "A endOfLineSymbols must be provided"); 072 return new JacksonCsvFileReader( 073 reader, delimiterChar, endOfLineSymbols, quoteChar, headerLine, lineToSkipBeforeHeader); 074 } 075 076 /** 077 * Get a new TabularDataFileReader using default quote char (") and default endOfLineSymbols (\n). 078 * Usage: 079 * <pre> 080 * {@code 081 * try (TabularDataFileReader<List<String>> reader = TabularFiles.newTabularFileReader( 082 * Files.newBufferedReader(Paths.get("/tmp/test.csv"), StandardCharsets.UTF_8), ',', true)) { 083 * ... 084 * } 085 * } 086 * </pre> 087 * 088 * 089 * @param reader 090 * @param delimiterChar 091 * @param headerLine 092 */ 093 public static TabularDataFileReader<List<String>> newTabularFileReader( 094 Reader reader, char delimiterChar, boolean headerLine) throws IOException { 095 return new JacksonCsvFileReader( 096 reader, 097 delimiterChar, 098 new String(CsvSchema.DEFAULT_LINEFEED), 099 CsvSchema.DEFAULT_QUOTE_CHAR, 100 headerLine); 101 } 102}