001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.file.tabular;
015
016import java.io.Closeable;
017import java.io.IOException;
018import java.text.ParseException;
019import java.util.List;
020
021/**
022 * Interface defining a reader for tabular data file (e.g. CSV)
023 */
024public interface TabularDataFileReader<T> extends Closeable {
025
026  /**
027   * Get the header line of the tabular data file (if possible).
028   *
029   * @return headers or null
030   */
031  List<String> getHeaderLine() throws IOException;
032
033  /**
034   * Read a non-empty line of the tabular data file.
035   * An empty line represents a line with no printable characters. A line with only the defined separators is
036   * expected to be returned.
037   *
038   * @return the next line of the tabular data file or null if the end of the file is reached.
039   *
040   * @throws IOException
041   * @throws ParseException
042   */
043  T read() throws IOException, ParseException;
044
045  /**
046   * The line number of where the last record returned by {@link #read()} starts.
047   * If no records have been returned yet this method is expected to return 0.
048   * Once {@link #read()} returned null, this methods will return the number of the last line in the file.
049   * Note that if the very last line is an empty line with only an endline character it will not be counted.
050   * Line number includes header line and empty lines (if applicable).
051   *
052   * @return line number of where the last record starts
053   */
054  long getLastRecordLineNumber();
055
056  /**
057   * Represents the number of record returned by the {@link #read()} method.
058   * If no records have been returned yet this method is expected to return 0.
059   * A record can span over multiple line.
060   *
061   * @return
062   */
063  long getLastRecordNumber();
064}