001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.utils.text;
017
018import java.util.Comparator;
019
020import org.apache.commons.lang3.text.StrTokenizer;
021
022/**
023 * A comparator for delimited lines that compares the content of a given column number for all rows.
024 * This allows to sort for example tab delimited files by any column and not only the first one.
025 * <p/>
026 * If no explicit comparator is given a string comparison is done for the actual column content.
027 */
028public class LineComparator implements Comparator<String> {
029
030  private final StrTokenizer tokenizer;
031  private final int column;
032  private final Comparator<String> comp;
033
034  public LineComparator(int column, String columnDelimiter) {
035    this(column, columnDelimiter, null, null);
036  }
037
038  public LineComparator(int column, String columnDelimiter, Character quoteChar) {
039    this(column, columnDelimiter, quoteChar, null);
040  }
041
042  public LineComparator(int column, String columnDelimiter, Character quoteChar, Comparator<String> columnComparator) {
043    this.column = column;
044    this.comp = columnComparator == null ? Comparator.nullsFirst(Comparator.naturalOrder()) : columnComparator;
045    tokenizer = new StrTokenizer();
046    tokenizer.setEmptyTokenAsNull(true);
047    tokenizer.setIgnoreEmptyTokens(false);
048    if (quoteChar != null) {
049      tokenizer.setQuoteChar(quoteChar);
050    }
051    tokenizer.setDelimiterString(columnDelimiter);
052  }
053
054  public LineComparator(int column, String columnDelimiter, Comparator<String> columnComparator) {
055    this(column, columnDelimiter, null, columnComparator);
056  }
057
058  @Override
059  public int compare(String o1, String o2) {
060    if (o1 == null || o2 == null) {
061      if (o1 == null && o2 == null) {
062        return 0;
063      } else if (o1 == null) {
064        return 1;
065      } else {
066        return -1;
067      }
068    } else {
069      tokenizer.reset(o1);
070      String[] parts = tokenizer.getTokenArray();
071      String s1 = null;
072      if (parts != null && parts.length > column) {
073        s1 = parts[column];
074      }
075      tokenizer.reset(o2);
076      parts = tokenizer.getTokenArray();
077      String s2 = null;
078      if (parts != null && parts.length > column) {
079        s2 = parts[column];
080      }
081
082      if (s1 == null && s2 == null) {
083        return 0;
084      } else if (s1 == null) {
085        return 1;
086      } else if (s2 == null)  {
087        return -1;
088      } else {
089        return comp.compare(s1, s2);
090      }
091
092    }
093  }
094
095  public Comparator<String> getColumnComparator() {
096    return comp;
097  }
098
099}