001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.text;
015
016import java.util.Comparator;
017
018import org.apache.commons.lang3.text.StrTokenizer;
019
020/**
021 * A comparator for delimited lines that compares the content of a given column number for all rows.
022 * This allows to sort for example tab delimited files by any column and not only the first one.
023 * <p/>
024 * If no explicit comparator is given a string comparison is done for the actual column content.
025 */
026public class LineComparator implements Comparator<String> {
027
028  private final StrTokenizer tokenizer;
029  private final int column;
030  private final Comparator<String> comp;
031
032  public LineComparator(int column, String columnDelimiter) {
033    this(column, columnDelimiter, null, null);
034  }
035
036  public LineComparator(int column, String columnDelimiter, Character quoteChar) {
037    this(column, columnDelimiter, quoteChar, null);
038  }
039
040  public LineComparator(
041      int column,
042      String columnDelimiter,
043      Character quoteChar,
044      Comparator<String> columnComparator) {
045    this.column = column;
046    this.comp =
047        columnComparator == null
048            ? Comparator.nullsFirst(Comparator.naturalOrder())
049            : columnComparator;
050    tokenizer = new StrTokenizer();
051    tokenizer.setEmptyTokenAsNull(true);
052    tokenizer.setIgnoreEmptyTokens(false);
053    if (quoteChar != null) {
054      tokenizer.setQuoteChar(quoteChar);
055    }
056    tokenizer.setDelimiterString(columnDelimiter);
057  }
058
059  public LineComparator(int column, String columnDelimiter, Comparator<String> columnComparator) {
060    this(column, columnDelimiter, null, columnComparator);
061  }
062
063  @Override
064  public int compare(String o1, String o2) {
065    if (o1 == null || o2 == null) {
066      if (o1 == null && o2 == null) {
067        return 0;
068      } else if (o1 == null) {
069        return 1;
070      } else {
071        return -1;
072      }
073    } else {
074      tokenizer.reset(o1);
075      String[] parts = tokenizer.getTokenArray();
076      String s1 = null;
077      if (parts != null && parts.length > column) {
078        s1 = parts[column];
079      }
080      tokenizer.reset(o2);
081      parts = tokenizer.getTokenArray();
082      String s2 = null;
083      if (parts != null && parts.length > column) {
084        s2 = parts[column];
085      }
086
087      if (s1 == null && s2 == null) {
088        return 0;
089      } else if (s1 == null) {
090        return 1;
091      } else if (s2 == null) {
092        return -1;
093      } else {
094        return comp.compare(s1, s2);
095      }
096    }
097  }
098
099  public Comparator<String> getColumnComparator() {
100    return comp;
101  }
102}