001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.text; 015 016import org.gbif.utils.file.FileUtils; 017 018import java.io.File; 019import java.io.IOException; 020import java.util.ArrayList; 021import java.util.Collections; 022import java.util.Comparator; 023import java.util.LinkedList; 024import java.util.List; 025 026import org.junit.jupiter.api.Test; 027 028/** 029 * @author markus 030 */ 031public class ComparatorPerformance { 032 033 private final String ENCODING = "UTF-8"; 034 035 /** 036 * Comparing performance for various file soring methods. 037 * <p/> 038 * Executed on a MacPro with 9GB, 8-core 3GHz and 1TB disk 039 * sorting a 207MB large text file made from concatenation of irmng.tail: 040 * <p/> 041 * Sorting with unix sort took 5817 ms 042 * Sorting with org.gbif.utils.text.StringComparator and 10k lines in memory (200 parts) took 48968 ms 043 * Sorting with org.gbif.utils.text.CCollationComparator and 10k lines in memory (200 parts) took 49858 ms 044 * Sorting with org.gbif.utils.text.StringComparator and 100k lines in memory (20 parts) took 17962 ms 045 * Sorting with org.gbif.utils.text.CCollationComparator and 100k lines in memory (20 parts) took 14046 ms 046 * Sorting with org.gbif.utils.text.StringComparator and 1000k lines in memory (2 parts) took 15492 ms 047 * Sorting with org.gbif.utils.text.CCollationComparator and 1000k lines in memory (2 parts) took 14317 ms 048 */ 049 @Test 050 public void testFileSorting() throws IOException { 051 // 10MB text file, big file used in results above was concatenated from this one 052 File source = FileUtils.getClasspathFile("sorting/irmng.tail"); 053 File sorted = File.createTempFile("gbif-common-file-sort2", "sorted.txt"); 054 sorted.deleteOnExit(); 055 FileUtils futils = new FileUtils(); 056 057 // test unix sort 058 long start = System.currentTimeMillis(); 059 futils.sort(source, sorted, ENCODING, 0, "\t", null, "\n", 0); 060 long end = System.currentTimeMillis(); 061 System.out.printf("Sorting with unix sort took %s ms%n", (end - start)); 062 063 // sort with comparator to test 064 List<Comparator<String>> comparators = availableComparators(); 065 066 for (Integer linesInMen : Collections.singletonList(100000)) { 067 FileUtils.setLinesPerMemorySort(linesInMen); 068 for (Comparator<String> comp : comparators) { 069 070 start = System.currentTimeMillis(); 071 futils.sortInJava(source, sorted, ENCODING, comp, 0); 072 end = System.currentTimeMillis(); 073 074 System.out.printf( 075 "Sorting with %s and %s lines in memory took %s ms%n", 076 comp.getClass().getName(), linesInMen, (end - start)); 077 } 078 } 079 } 080 081 private List<Comparator<String>> availableComparators() { 082 List<Comparator<String>> comparators = new ArrayList<>(); 083 comparators.add(new StringComparator()); 084 comparators.add(Comparator.nullsFirst(Comparator.naturalOrder())); 085 return comparators; 086 } 087 088 @Test 089 public void testVariousComparators() throws IOException { 090 // sort with comparator to test 091 List<Comparator<String>> comparators = availableComparators(); 092 for (Comparator<String> comp : comparators) { 093 LinkedList<String> source = 094 FileUtils.streamToList(FileUtils.classpathStream("sorting/irmng.tail")); 095 long start = System.currentTimeMillis(); 096 source.sort(comp); 097 long end = System.currentTimeMillis(); 098 System.out.println( 099 "Sorting with " + comp.getClass().getName() + " took " + (end - start) + " ms"); 100 } 101 } 102}