001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.utils.text; 017 018import java.util.Comparator; 019 020import org.apache.commons.lang3.text.StrTokenizer; 021 022/** 023 * A comparator for delimited lines that compares the content of a given column number for all rows. 024 * This allows to sort for example tab delimited files by any column and not only the first one. 025 * <p/> 026 * If no explicit comparator is given a string comparison is done for the actual column content. 027 */ 028public class LineComparator implements Comparator<String> { 029 030 private final StrTokenizer tokenizer; 031 private final int column; 032 private final Comparator<String> comp; 033 034 public LineComparator(int column, String columnDelimiter) { 035 this(column, columnDelimiter, null, null); 036 } 037 038 public LineComparator(int column, String columnDelimiter, Character quoteChar) { 039 this(column, columnDelimiter, quoteChar, null); 040 } 041 042 public LineComparator(int column, String columnDelimiter, Character quoteChar, Comparator<String> columnComparator) { 043 this.column = column; 044 this.comp = columnComparator == null ? Comparator.nullsFirst(Comparator.naturalOrder()) : columnComparator; 045 tokenizer = new StrTokenizer(); 046 tokenizer.setEmptyTokenAsNull(true); 047 tokenizer.setIgnoreEmptyTokens(false); 048 if (quoteChar != null) { 049 tokenizer.setQuoteChar(quoteChar); 050 } 051 tokenizer.setDelimiterString(columnDelimiter); 052 } 053 054 public LineComparator(int column, String columnDelimiter, Comparator<String> columnComparator) { 055 this(column, columnDelimiter, null, columnComparator); 056 } 057 058 @Override 059 public int compare(String o1, String o2) { 060 if (o1 == null || o2 == null) { 061 if (o1 == null && o2 == null) { 062 return 0; 063 } else if (o1 == null) { 064 return 1; 065 } else { 066 return -1; 067 } 068 } else { 069 tokenizer.reset(o1); 070 String[] parts = tokenizer.getTokenArray(); 071 String s1 = null; 072 if (parts != null && parts.length > column) { 073 s1 = parts[column]; 074 } 075 tokenizer.reset(o2); 076 parts = tokenizer.getTokenArray(); 077 String s2 = null; 078 if (parts != null && parts.length > column) { 079 s2 = parts[column]; 080 } 081 082 if (s1 == null && s2 == null) { 083 return 0; 084 } else if (s1 == null) { 085 return 1; 086 } else if (s2 == null) { 087 return -1; 088 } else { 089 return comp.compare(s1, s2); 090 } 091 092 } 093 } 094 095 public Comparator<String> getColumnComparator() { 096 return comp; 097 } 098 099}