001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.file.tabular; 015 016import java.io.Closeable; 017import java.io.IOException; 018import java.text.ParseException; 019import java.util.List; 020 021/** 022 * Interface defining a reader for tabular data file (e.g. CSV) 023 */ 024public interface TabularDataFileReader<T> extends Closeable { 025 026 /** 027 * Get the header line of the tabular data file (if possible). 028 * 029 * @return headers or null 030 */ 031 List<String> getHeaderLine() throws IOException; 032 033 /** 034 * Read a non-empty line of the tabular data file. 035 * An empty line represents a line with no printable characters. A line with only the defined separators is 036 * expected to be returned. 037 * 038 * @return the next line of the tabular data file or null if the end of the file is reached. 039 * 040 * @throws IOException 041 * @throws ParseException 042 */ 043 T read() throws IOException, ParseException; 044 045 /** 046 * The line number of where the last record returned by {@link #read()} starts. 047 * If no records have been returned yet this method is expected to return 0. 048 * Once {@link #read()} returned null, this methods will return the number of the last line in the file. 049 * Note that if the very last line is an empty line with only an endline character it will not be counted. 050 * Line number includes header line and empty lines (if applicable). 051 * 052 * @return line number of where the last record starts 053 */ 054 long getLastRecordLineNumber(); 055 056 /** 057 * Represents the number of record returned by the {@link #read()} method. 058 * If no records have been returned yet this method is expected to return 0. 059 * A record can span over multiple line. 060 * 061 * @return 062 */ 063 long getLastRecordNumber(); 064}