001package org.gbif.utils.collection;
002
003import java.lang.reflect.Array;
004import java.util.AbstractSet;
005import java.util.Collection;
006import java.util.ConcurrentModificationException;
007import java.util.Iterator;
008import java.util.NoSuchElementException;
009
010/**
011 * A HashSet implementation taken from Ontopia
012 * which is both faster and more compact than java.util.HashSet
013 * <p/>
014 * INTERNAL: Implements the Set interface more compactly than
015 * java.util.HashSet by using a closed hashtable.
016 *
017 * @see <a href="http://ontopia.wordpress.com/2009/09/23/a-faster-and-more-compact-set/">Ontopia blog</a>
018 * @see <a href="http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ontopia/utils/">Ontopia
019 *      source</a>
020 */
021public class CompactHashSet<T> extends AbstractSet<T> {
022
023  private class CompactHashIterator implements Iterator<T> {
024
025    private int index;
026    private int lastReturned = -1;
027
028    /**
029     * The modCount value that the iterator believes that the backing
030     * CompactHashSet should have. If this expectation is violated,
031     * the iterator has detected concurrent modification.
032     */
033    private int expectedModCount;
034
035    CompactHashIterator() {
036      for (index = 0; index < objects.length && (objects[index] == null || objects[index] == deletedObject); index++) {
037      }
038      expectedModCount = modCount;
039    }
040
041    public boolean hasNext() {
042      return index < objects.length;
043    }
044
045    public T next() {
046      if (modCount != expectedModCount) {
047        throw new ConcurrentModificationException();
048      }
049      int length = objects.length;
050      if (index >= length) {
051        lastReturned = -2;
052        throw new NoSuchElementException();
053      }
054
055      lastReturned = index;
056      for (index += 1; index < length && (objects[index] == null || objects[index] == deletedObject); index++) {
057      }
058      if (objects[lastReturned] == nullObject) {
059        return null;
060      } else {
061        return (T) objects[lastReturned];
062      }
063    }
064
065    public void remove() {
066      if (modCount != expectedModCount) {
067        throw new ConcurrentModificationException();
068      }
069      if (lastReturned == -1 || lastReturned == -2) {
070        throw new IllegalStateException();
071      }
072      // delete object
073      if (objects[lastReturned] != null && objects[lastReturned] != deletedObject) {
074        objects[lastReturned] = deletedObject;
075        elements--;
076        modCount++;
077        expectedModCount = modCount; // this is expected; we made the change
078      }
079    }
080  }
081
082  protected static final int INITIAL_SIZE = 3;
083
084  protected static final double LOAD_FACTOR = 0.75;
085  /**
086   * This object is used to represent null, should clients add that to the set.
087   */
088  protected static final Object nullObject = new Object();
089  /**
090   * When an object is deleted this object is put into the hashtable
091   * in its place, so that other objects with the same key
092   * (collisions) further down the hashtable are not lost after we
093   * delete an object in the collision chain.
094   */
095  protected static final Object deletedObject = new Object();
096  protected int elements;
097  protected int freecells;
098  protected Object[] objects;
099
100  protected int modCount;
101
102  /**
103   * Constructs a new, empty set.
104   */
105  public CompactHashSet() {
106    objects = new Object[INITIAL_SIZE];
107    elements = 0;
108    freecells = objects.length;
109    modCount = 0;
110  }
111
112  /**
113   * Constructs a new set containing the elements in the specified
114   * collection.
115   *
116   * @param c the collection whose elements are to be placed into this set.
117   */
118  public CompactHashSet(Collection c) {
119    this(c.size());
120    addAll(c);
121  }
122
123  // ===== SET IMPLEMENTATION =============================================
124
125  /**
126   * Constructs a new, empty set.
127   */
128  public CompactHashSet(int size) {
129    // NOTE: If array size is 0, we get a
130    // "java.lang.ArithmeticException: / by zero" in add(Object).
131    objects = new Object[size == 0 ? 1 : size];
132    elements = 0;
133    freecells = objects.length;
134    modCount = 0;
135  }
136
137  /**
138   * Adds the specified element to this set if it is not already
139   * present.
140   *
141   * @param x element to be added to this set.
142   *
143   * @return <tt>true</tt> if the set did not already contain the specified
144   *         element.
145   */
146  @Override
147  public boolean add(T x) {
148    Object o = x;
149    if (o == null) {
150      o = nullObject;
151    }
152
153    int hash = o.hashCode();
154    int index = (hash & 0x7FFFFFFF) % objects.length;
155    int offset = 1;
156    int deletedix = -1;
157
158    // search for the object (continue while !null and !this object)
159    while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
160
161      // if there's a deleted object here we can put this object here,
162      // provided it's not in here somewhere else already
163      if (objects[index] == deletedObject) {
164        deletedix = index;
165      }
166
167      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
168      offset = offset * 2 + 1;
169
170      if (offset == -1) {
171        offset = 2;
172      }
173    }
174
175    if (objects[index] == null) { // wasn't present already
176      if (deletedix != -1) {
177        index = deletedix;
178      } else {
179        freecells--;
180      }
181
182      modCount++;
183      elements++;
184      objects[index] = o;
185
186      // rehash with same capacity
187      if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) {
188        rehash(objects.length);
189        // rehash with increased capacity
190        if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) {
191          rehash(objects.length * 2 + 1);
192        }
193      }
194      return true;
195    } else {
196      return false;
197    }
198  }
199
200  /**
201   * Removes all of the elements from this set.
202   */
203  @Override
204  public void clear() {
205    elements = 0;
206    for (int ix = 0; ix < objects.length; ix++) {
207      objects[ix] = null;
208    }
209    freecells = objects.length;
210    modCount++;
211  }
212
213  /**
214   * Returns <tt>true</tt> if this set contains the specified element.
215   *
216   * @param o element whose presence in this set is to be tested.
217   *
218   * @return <tt>true</tt> if this set contains the specified element.
219   */
220  @Override
221  public boolean contains(Object o) {
222    if (o == null) {
223      o = nullObject;
224    }
225
226    int hash = o.hashCode();
227    int index = (hash & 0x7FFFFFFF) % objects.length;
228    int offset = 1;
229
230    // search for the object (continue while !null and !this object)
231    while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
232      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
233      offset = offset * 2 + 1;
234
235      if (offset == -1) {
236        offset = 2;
237      }
238    }
239
240    return objects[index] != null;
241  }
242
243  /**
244   * INTERNAL: Used for debugging only.
245   */
246  public void dump() {
247    System.out.println("Size: " + objects.length);
248    System.out.println("Elements: " + elements);
249    System.out.println("Free cells: " + freecells);
250    System.out.println();
251    for (int ix = 0; ix < objects.length; ix++) {
252      System.out.println("[" + ix + "]: " + objects[ix]);
253    }
254  }
255
256  /**
257   * Returns <tt>true</tt> if this set contains no elements.
258   */
259  @Override
260  public boolean isEmpty() {
261    return elements == 0;
262  }
263
264  /**
265   * Returns an iterator over the elements in this set. The elements
266   * are returned in no particular order.
267   *
268   * @return an Iterator over the elements in this set.
269   *
270   * @see ConcurrentModificationException
271   */
272  @Override
273  public Iterator iterator() {
274    return new CompactHashIterator();
275  }
276
277  /**
278   * INTERNAL: Rehashes the hashset to a bigger size.
279   */
280  protected void rehash(int newCapacity) {
281    int oldCapacity = objects.length;
282    Object[] newObjects = new Object[newCapacity];
283
284    for (int ix = 0; ix < oldCapacity; ix++) {
285      Object o = objects[ix];
286      if (o == null || o == deletedObject) {
287        continue;
288      }
289
290      int hash = o.hashCode();
291      int index = (hash & 0x7FFFFFFF) % newCapacity;
292      int offset = 1;
293
294      // search for the object
295      while (newObjects[index] != null) { // no need to test for duplicates
296        index = ((index + offset) & 0x7FFFFFFF) % newCapacity;
297        offset = offset * 2 + 1;
298
299        if (offset == -1) {
300          offset = 2;
301        }
302      }
303
304      newObjects[index] = o;
305    }
306
307    objects = newObjects;
308    freecells = objects.length - elements;
309  }
310
311  /**
312   * Removes the specified element from the set.
313   */
314  @Override
315  public boolean remove(Object o) {
316    if (o == null) {
317      o = nullObject;
318    }
319
320    int hash = o.hashCode();
321    int index = (hash & 0x7FFFFFFF) % objects.length;
322    int offset = 1;
323
324    // search for the object (continue while !null and !this object)
325    while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
326      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
327      offset = offset * 2 + 1;
328
329      if (offset == -1) {
330        offset = 2;
331      }
332    }
333
334    // we found the right position, now do the removal
335    if (objects[index] != null) {
336      // we found the object
337      objects[index] = deletedObject;
338      modCount++;
339      elements--;
340      return true;
341    } else {
342      // we did not find the object
343      return false;
344    }
345  }
346
347  // ===== INTERNAL METHODS ===============================================
348
349  /**
350   * Returns the number of elements in this set (its cardinality).
351   */
352  @Override
353  public int size() {
354    return elements;
355  }
356
357  @Override
358  public Object[] toArray() {
359    Object[] result = new Object[elements];
360    Object[] objects = this.objects;
361    int pos = 0;
362    for (Object object : objects) {
363      if (object != null && object != deletedObject) {
364        result[pos++] = object == nullObject ? null : object;
365      }
366    }
367    return result;
368  }
369
370  // ===== ITERATOR IMPLEMENTATON =========================================
371
372  @Override
373  public Object[] toArray(Object[] a) {
374    int size = elements;
375    if (a.length < size) {
376      a = (Object[]) Array.newInstance(a.getClass().getComponentType(), size);
377    }
378    Object[] objects = this.objects;
379    int pos = 0;
380    for (Object object : objects) {
381      if (object != null && object != deletedObject) {
382        a[pos++] = object == nullObject ? null : object;
383      }
384    }
385    return a;
386  }
387
388}