001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.text; 015 016import java.util.Comparator; 017 018import org.apache.commons.lang3.text.StrTokenizer; 019 020/** 021 * A comparator for delimited lines that compares the content of a given column number for all rows. 022 * This allows to sort for example tab delimited files by any column and not only the first one. 023 * <p/> 024 * If no explicit comparator is given a string comparison is done for the actual column content. 025 */ 026public class LineComparator implements Comparator<String> { 027 028 private final StrTokenizer tokenizer; 029 private final int column; 030 private final Comparator<String> comp; 031 032 public LineComparator(int column, String columnDelimiter) { 033 this(column, columnDelimiter, null, null); 034 } 035 036 public LineComparator(int column, String columnDelimiter, Character quoteChar) { 037 this(column, columnDelimiter, quoteChar, null); 038 } 039 040 public LineComparator( 041 int column, 042 String columnDelimiter, 043 Character quoteChar, 044 Comparator<String> columnComparator) { 045 this.column = column; 046 this.comp = 047 columnComparator == null 048 ? Comparator.nullsFirst(Comparator.naturalOrder()) 049 : columnComparator; 050 tokenizer = new StrTokenizer(); 051 tokenizer.setEmptyTokenAsNull(true); 052 tokenizer.setIgnoreEmptyTokens(false); 053 if (quoteChar != null) { 054 tokenizer.setQuoteChar(quoteChar); 055 } 056 tokenizer.setDelimiterString(columnDelimiter); 057 } 058 059 public LineComparator(int column, String columnDelimiter, Comparator<String> columnComparator) { 060 this(column, columnDelimiter, null, columnComparator); 061 } 062 063 @Override 064 public int compare(String o1, String o2) { 065 if (o1 == null || o2 == null) { 066 if (o1 == null && o2 == null) { 067 return 0; 068 } else if (o1 == null) { 069 return 1; 070 } else { 071 return -1; 072 } 073 } else { 074 tokenizer.reset(o1); 075 String[] parts = tokenizer.getTokenArray(); 076 String s1 = null; 077 if (parts != null && parts.length > column) { 078 s1 = parts[column]; 079 } 080 tokenizer.reset(o2); 081 parts = tokenizer.getTokenArray(); 082 String s2 = null; 083 if (parts != null && parts.length > column) { 084 s2 = parts[column]; 085 } 086 087 if (s1 == null && s2 == null) { 088 return 0; 089 } else if (s1 == null) { 090 return 1; 091 } else if (s2 == null) { 092 return -1; 093 } else { 094 return comp.compare(s1, s2); 095 } 096 } 097 } 098 099 public Comparator<String> getColumnComparator() { 100 return comp; 101 } 102}