001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.utils.file.tabular; 017 018import java.io.Closeable; 019import java.io.IOException; 020import java.text.ParseException; 021import java.util.List; 022 023/** 024 * Interface defining a reader for tabular data file (e.g. CSV) 025 */ 026public interface TabularDataFileReader<T> extends Closeable { 027 028 /** 029 * Get the header line of the tabular data file (if possible). 030 * 031 * @return headers or null 032 */ 033 List<String> getHeaderLine() throws IOException; 034 035 /** 036 * Read a non-empty line of the tabular data file. 037 * An empty line represents a line with no printable characters. A line with only the defined separators is 038 * expected to be returned. 039 * 040 * @return the next line of the tabular data file or null if the end of the file is reached. 041 * 042 * @throws IOException 043 * @throws ParseException 044 */ 045 T read() throws IOException, ParseException; 046 047 048 /** 049 * The line number of where the last record returned by {@link #read()} starts. 050 * If no records have been returned yet this method is expected to return 0. 051 * Once {@link #read()} returned null, this methods will return the number of the last line in the file. 052 * Note that if the very last line is an empty line with only an endline character it will not be counted. 053 * Line number includes header line and empty lines (if applicable). 054 * 055 * @return line number of where the last record starts 056 */ 057 long getLastRecordLineNumber(); 058 059 /** 060 * Represents the number of record returned by the {@link #read()} method. 061 * If no records have been returned yet this method is expected to return 0. 062 * A record can span over multiple line. 063 * 064 * @return 065 */ 066 long getLastRecordNumber(); 067 068}