001package org.gbif.utils.text;
002
003import java.util.Comparator;
004
005import com.google.common.collect.Ordering;
006import org.apache.commons.lang3.text.StrTokenizer;
007
008/**
009 * A comparator for delimited lines that compares the content of a given column number for all rows.
010 * This allows to sort for example tab delimited files by any column and not only the first one.
011 * <p/>
012 * If no explicit comparator is given a string comparison is done for the actual column content.
013 */
014public class LineComparator implements Comparator<String> {
015
016  private final StrTokenizer tokenizer;
017  private final int column;
018  private final Comparator<String> comp;
019
020  public LineComparator(int column, String columnDelimiter) {
021    this(column, columnDelimiter, null, null);
022  }
023
024  public LineComparator(int column, String columnDelimiter, Character quoteChar) {
025    this(column, columnDelimiter, quoteChar, null);
026  }
027
028  public LineComparator(int column, String columnDelimiter, Character quoteChar, Comparator<String> columnComparator) {
029    this.column = column;
030    this.comp = columnComparator == null ? Ordering.<String>natural().nullsFirst() : columnComparator;
031    tokenizer = new StrTokenizer();
032    tokenizer.setEmptyTokenAsNull(true);
033    tokenizer.setIgnoreEmptyTokens(false);
034    if (quoteChar != null) {
035      tokenizer.setQuoteChar(quoteChar);
036    }
037    tokenizer.setDelimiterString(columnDelimiter);
038  }
039
040  public LineComparator(int column, String columnDelimiter, Comparator<String> columnComparator) {
041    this(column, columnDelimiter, null, null);
042  }
043
044  public int compare(String o1, String o2) {
045    if (o1 == null || o2 == null) {
046      if (o1 == null && o2 == null) {
047        return 0;
048      } else if (o1 == null) {
049        return 1;
050      } else {
051        return -1;
052      }
053    } else {
054      tokenizer.reset(o1);
055      String[] parts = tokenizer.getTokenArray();
056      String s1 = null;
057      if (parts != null && parts.length > column) {
058        s1 = parts[column];
059      }
060      tokenizer.reset(o2);
061      parts = tokenizer.getTokenArray();
062      String s2 = null;
063      if (parts != null && parts.length > column) {
064        s2 = parts[column];
065      }
066
067      if (s1 == null && s2 == null) {
068        return 0;
069      } else if (s1 == null) {
070        return 1;
071      } else if (s2 == null)  {
072        return -1;
073      } else {
074        return comp.compare(s1, s2);
075      }
076
077    }
078  }
079
080  public Comparator<String> getColumnComparator() {
081    return comp;
082  }
083
084}