001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.utils.file.tabular;
017
018import java.io.Closeable;
019import java.io.IOException;
020import java.text.ParseException;
021import java.util.List;
022
023/**
024 * Interface defining a reader for tabular data file (e.g. CSV)
025 */
026public interface TabularDataFileReader<T> extends Closeable {
027
028  /**
029   * Get the header line of the tabular data file (if possible).
030   *
031   * @return headers or null
032   */
033  List<String> getHeaderLine() throws IOException;
034
035  /**
036   * Read a non-empty line of the tabular data file.
037   * An empty line represents a line with no printable characters. A line with only the defined separators is
038   * expected to be returned.
039   *
040   * @return the next line of the tabular data file or null if the end of the file is reached.
041   *
042   * @throws IOException
043   * @throws ParseException
044   */
045  T read() throws IOException, ParseException;
046
047
048  /**
049   * The line number of where the last record returned by {@link #read()} starts.
050   * If no records have been returned yet this method is expected to return 0.
051   * Once {@link #read()} returned null, this methods will return the number of the last line in the file.
052   * Note that if the very last line is an empty line with only an endline character it will not be counted.
053   * Line number includes header line and empty lines (if applicable).
054   *
055   * @return line number of where the last record starts
056   */
057  long getLastRecordLineNumber();
058
059  /**
060   * Represents the number of record returned by the {@link #read()} method.
061   * If no records have been returned yet this method is expected to return 0.
062   * A record can span over multiple line.
063   *
064   * @return
065   */
066  long getLastRecordNumber();
067
068}