001/*
002 * Copyright 2021 Global Biodiversity Information Facility (GBIF)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.gbif.utils.collection;
017
018import java.lang.reflect.Array;
019import java.util.AbstractSet;
020import java.util.Collection;
021import java.util.ConcurrentModificationException;
022import java.util.Iterator;
023import java.util.NoSuchElementException;
024
025/**
026 * A HashSet implementation taken from Ontopia
027 * which is both faster and more compact than java.util.HashSet
028 * <p/>
029 * INTERNAL: Implements the Set interface more compactly than
030 * java.util.HashSet by using a closed hashtable.
031 *
032 * @see <a href="http://ontopia.wordpress.com/2009/09/23/a-faster-and-more-compact-set/">Ontopia blog</a>
033 * @see <a href="http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ontopia/utils/">Ontopia
034 *      source</a>
035 */
036public class CompactHashSet<T> extends AbstractSet<T> {
037
038  private class CompactHashIterator implements Iterator<T> {
039
040    private int index;
041    private int lastReturned = -1;
042
043    /**
044     * The modCount value that the iterator believes that the backing
045     * CompactHashSet should have. If this expectation is violated,
046     * the iterator has detected concurrent modification.
047     */
048    private int expectedModCount;
049
050    CompactHashIterator() {
051      for (index = 0; index < objects.length && (objects[index] == null || objects[index] == deletedObject); index++) {
052      }
053      expectedModCount = modCount;
054    }
055
056    @Override
057    public boolean hasNext() {
058      return index < objects.length;
059    }
060
061    @Override
062    public T next() {
063      if (modCount != expectedModCount) {
064        throw new ConcurrentModificationException();
065      }
066      int length = objects.length;
067      if (index >= length) {
068        lastReturned = -2;
069        throw new NoSuchElementException();
070      }
071
072      lastReturned = index;
073      for (index += 1; index < length && (objects[index] == null || objects[index] == deletedObject); index++) {
074      }
075      if (objects[lastReturned] == nullObject) {
076        return null;
077      } else {
078        return (T) objects[lastReturned];
079      }
080    }
081
082    @Override
083    public void remove() {
084      if (modCount != expectedModCount) {
085        throw new ConcurrentModificationException();
086      }
087      if (lastReturned == -1 || lastReturned == -2) {
088        throw new IllegalStateException();
089      }
090      // delete object
091      if (objects[lastReturned] != null && objects[lastReturned] != deletedObject) {
092        objects[lastReturned] = deletedObject;
093        elements--;
094        modCount++;
095        expectedModCount = modCount; // this is expected; we made the change
096      }
097    }
098  }
099
100  protected static final int INITIAL_SIZE = 3;
101
102  protected static final double LOAD_FACTOR = 0.75;
103  /**
104   * This object is used to represent null, should clients add that to the set.
105   */
106  protected static final Object nullObject = new Object();
107  /**
108   * When an object is deleted this object is put into the hashtable
109   * in its place, so that other objects with the same key
110   * (collisions) further down the hashtable are not lost after we
111   * delete an object in the collision chain.
112   */
113  protected static final Object deletedObject = new Object();
114  protected int elements;
115  protected int freecells;
116  protected Object[] objects;
117
118  protected int modCount;
119
120  /**
121   * Constructs a new, empty set.
122   */
123  public CompactHashSet() {
124    objects = new Object[INITIAL_SIZE];
125    elements = 0;
126    freecells = objects.length;
127    modCount = 0;
128  }
129
130  /**
131   * Constructs a new set containing the elements in the specified
132   * collection.
133   *
134   * @param c the collection whose elements are to be placed into this set.
135   */
136  public CompactHashSet(Collection c) {
137    this(c.size());
138    addAll(c);
139  }
140
141  // ===== SET IMPLEMENTATION =============================================
142
143  /**
144   * Constructs a new, empty set.
145   */
146  public CompactHashSet(int size) {
147    // NOTE: If array size is 0, we get a
148    // "java.lang.ArithmeticException: / by zero" in add(Object).
149    objects = new Object[size == 0 ? 1 : size];
150    elements = 0;
151    freecells = objects.length;
152    modCount = 0;
153  }
154
155  /**
156   * Adds the specified element to this set if it is not already
157   * present.
158   *
159   * @param x element to be added to this set.
160   *
161   * @return <tt>true</tt> if the set did not already contain the specified
162   *         element.
163   */
164  @Override
165  public boolean add(T x) {
166    Object o = x;
167    if (o == null) {
168      o = nullObject;
169    }
170
171    int hash = o.hashCode();
172    int index = (hash & 0x7FFFFFFF) % objects.length;
173    int offset = 1;
174    int deletedix = -1;
175
176    // search for the object (continue while !null and !this object)
177    while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
178
179      // if there's a deleted object here we can put this object here,
180      // provided it's not in here somewhere else already
181      if (objects[index] == deletedObject) {
182        deletedix = index;
183      }
184
185      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
186      offset = offset * 2 + 1;
187
188      if (offset == -1) {
189        offset = 2;
190      }
191    }
192
193    if (objects[index] == null) { // wasn't present already
194      if (deletedix != -1) {
195        index = deletedix;
196      } else {
197        freecells--;
198      }
199
200      modCount++;
201      elements++;
202      objects[index] = o;
203
204      // rehash with same capacity
205      if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) {
206        rehash(objects.length);
207        // rehash with increased capacity
208        if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) {
209          rehash(objects.length * 2 + 1);
210        }
211      }
212      return true;
213    } else {
214      return false;
215    }
216  }
217
218  /**
219   * Removes all of the elements from this set.
220   */
221  @Override
222  public void clear() {
223    elements = 0;
224    for (int ix = 0; ix < objects.length; ix++) {
225      objects[ix] = null;
226    }
227    freecells = objects.length;
228    modCount++;
229  }
230
231  /**
232   * Returns <tt>true</tt> if this set contains the specified element.
233   *
234   * @param o element whose presence in this set is to be tested.
235   *
236   * @return <tt>true</tt> if this set contains the specified element.
237   */
238  @Override
239  public boolean contains(Object o) {
240    if (o == null) {
241      o = nullObject;
242    }
243
244    int hash = o.hashCode();
245    int index = (hash & 0x7FFFFFFF) % objects.length;
246    int offset = 1;
247
248    // search for the object (continue while !null and !this object)
249    while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
250      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
251      offset = offset * 2 + 1;
252
253      if (offset == -1) {
254        offset = 2;
255      }
256    }
257
258    return objects[index] != null;
259  }
260
261  /**
262   * INTERNAL: Used for debugging only.
263   */
264  public void dump() {
265    System.out.println("Size: " + objects.length);
266    System.out.println("Elements: " + elements);
267    System.out.println("Free cells: " + freecells);
268    System.out.println();
269    for (int ix = 0; ix < objects.length; ix++) {
270      System.out.println("[" + ix + "]: " + objects[ix]);
271    }
272  }
273
274  /**
275   * Returns <tt>true</tt> if this set contains no elements.
276   */
277  @Override
278  public boolean isEmpty() {
279    return elements == 0;
280  }
281
282  /**
283   * Returns an iterator over the elements in this set. The elements
284   * are returned in no particular order.
285   *
286   * @return an Iterator over the elements in this set.
287   *
288   * @see ConcurrentModificationException
289   */
290  @Override
291  public Iterator iterator() {
292    return new CompactHashIterator();
293  }
294
295  /**
296   * INTERNAL: Rehashes the hashset to a bigger size.
297   */
298  protected void rehash(int newCapacity) {
299    int oldCapacity = objects.length;
300    Object[] newObjects = new Object[newCapacity];
301
302    for (int ix = 0; ix < oldCapacity; ix++) {
303      Object o = objects[ix];
304      if (o == null || o == deletedObject) {
305        continue;
306      }
307
308      int hash = o.hashCode();
309      int index = (hash & 0x7FFFFFFF) % newCapacity;
310      int offset = 1;
311
312      // search for the object
313      while (newObjects[index] != null) { // no need to test for duplicates
314        index = ((index + offset) & 0x7FFFFFFF) % newCapacity;
315        offset = offset * 2 + 1;
316
317        if (offset == -1) {
318          offset = 2;
319        }
320      }
321
322      newObjects[index] = o;
323    }
324
325    objects = newObjects;
326    freecells = objects.length - elements;
327  }
328
329  /**
330   * Removes the specified element from the set.
331   */
332  @Override
333  public boolean remove(Object o) {
334    if (o == null) {
335      o = nullObject;
336    }
337
338    int hash = o.hashCode();
339    int index = (hash & 0x7FFFFFFF) % objects.length;
340    int offset = 1;
341
342    // search for the object (continue while !null and !this object)
343    while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
344      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
345      offset = offset * 2 + 1;
346
347      if (offset == -1) {
348        offset = 2;
349      }
350    }
351
352    // we found the right position, now do the removal
353    if (objects[index] != null) {
354      // we found the object
355      objects[index] = deletedObject;
356      modCount++;
357      elements--;
358      return true;
359    } else {
360      // we did not find the object
361      return false;
362    }
363  }
364
365  // ===== INTERNAL METHODS ===============================================
366
367  /**
368   * Returns the number of elements in this set (its cardinality).
369   */
370  @Override
371  public int size() {
372    return elements;
373  }
374
375  @Override
376  public Object[] toArray() {
377    Object[] result = new Object[elements];
378    Object[] objects = this.objects;
379    int pos = 0;
380    for (Object object : objects) {
381      if (object != null && object != deletedObject) {
382        result[pos++] = object == nullObject ? null : object;
383      }
384    }
385    return result;
386  }
387
388  // ===== ITERATOR IMPLEMENTATON =========================================
389
390  @Override
391  public Object[] toArray(Object[] a) {
392    int size = elements;
393    if (a.length < size) {
394      a = (Object[]) Array.newInstance(a.getClass().getComponentType(), size);
395    }
396    Object[] objects = this.objects;
397    int pos = 0;
398    for (Object object : objects) {
399      if (object != null && object != deletedObject) {
400        a[pos++] = object == nullObject ? null : object;
401      }
402    }
403    return a;
404  }
405
406}