001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.text;
015
016import org.gbif.utils.file.FileUtils;
017
018import java.io.File;
019import java.io.IOException;
020import java.util.ArrayList;
021import java.util.Collections;
022import java.util.Comparator;
023import java.util.LinkedList;
024import java.util.List;
025
026import org.junit.jupiter.api.Test;
027
028/**
029 * @author markus
030 */
031public class ComparatorPerformance {
032
033  private final String ENCODING = "UTF-8";
034
035  /**
036   * Comparing performance for various file soring methods.
037   * <p/>
038   * Executed on a MacPro with 9GB, 8-core 3GHz and 1TB disk
039   * sorting a 207MB large text file made from concatenation of irmng.tail:
040   * <p/>
041   * Sorting with unix sort took 5817 ms
042   * Sorting with org.gbif.utils.text.StringComparator and 10k lines in memory (200 parts) took 48968 ms
043   * Sorting with org.gbif.utils.text.CCollationComparator and 10k lines in memory (200 parts) took 49858 ms
044   * Sorting with org.gbif.utils.text.StringComparator and 100k lines in memory (20 parts) took 17962 ms
045   * Sorting with org.gbif.utils.text.CCollationComparator and 100k lines in memory (20 parts) took 14046 ms
046   * Sorting with org.gbif.utils.text.StringComparator and 1000k lines in memory (2 parts) took 15492 ms
047   * Sorting with org.gbif.utils.text.CCollationComparator and 1000k lines in memory (2 parts) took 14317 ms
048   */
049  @Test
050  public void testFileSorting() throws IOException {
051    // 10MB text file, big file used in results above was concatenated from this one
052    File source = FileUtils.getClasspathFile("sorting/irmng.tail");
053    File sorted = File.createTempFile("gbif-common-file-sort2", "sorted.txt");
054    sorted.deleteOnExit();
055    FileUtils futils = new FileUtils();
056
057    // test unix sort
058    long start = System.currentTimeMillis();
059    futils.sort(source, sorted, ENCODING, 0, "\t", null, "\n", 0);
060    long end = System.currentTimeMillis();
061    System.out.printf("Sorting with unix sort took %s ms%n", (end - start));
062
063    // sort with comparator to test
064    List<Comparator<String>> comparators = availableComparators();
065
066    for (Integer linesInMen : Collections.singletonList(100000)) {
067      FileUtils.setLinesPerMemorySort(linesInMen);
068      for (Comparator<String> comp : comparators) {
069
070        start = System.currentTimeMillis();
071        futils.sortInJava(source, sorted, ENCODING, comp, 0);
072        end = System.currentTimeMillis();
073
074        System.out.printf(
075            "Sorting with %s and %s lines in memory took %s ms%n",
076            comp.getClass().getName(), linesInMen, (end - start));
077      }
078    }
079  }
080
081  private List<Comparator<String>> availableComparators() {
082    List<Comparator<String>> comparators = new ArrayList<>();
083    comparators.add(new StringComparator());
084    comparators.add(Comparator.nullsFirst(Comparator.naturalOrder()));
085    return comparators;
086  }
087
088  @Test
089  public void testVariousComparators() throws IOException {
090    // sort with comparator to test
091    List<Comparator<String>> comparators = availableComparators();
092    for (Comparator<String> comp : comparators) {
093      LinkedList<String> source =
094          FileUtils.streamToList(FileUtils.classpathStream("sorting/irmng.tail"));
095      long start = System.currentTimeMillis();
096      source.sort(comp);
097      long end = System.currentTimeMillis();
098      System.out.println(
099          "Sorting with " + comp.getClass().getName() + " took " + (end - start) + " ms");
100    }
101  }
102}