001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.utils.collection;
015
016import java.lang.reflect.Array;
017import java.util.AbstractSet;
018import java.util.Collection;
019import java.util.ConcurrentModificationException;
020import java.util.Iterator;
021import java.util.NoSuchElementException;
022
023/**
024 * A HashSet implementation taken from Ontopia
025 * which is both faster and more compact than java.util.HashSet
026 * <p/>
027 * INTERNAL: Implements the Set interface more compactly than
028 * java.util.HashSet by using a closed hashtable.
029 *
030 * @see <a href="http://ontopia.wordpress.com/2009/09/23/a-faster-and-more-compact-set/">Ontopia blog</a>
031 * @see <a href="http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ontopia/utils/">Ontopia
032 *      source</a>
033 */
034public class CompactHashSet<T> extends AbstractSet<T> {
035
036  private class CompactHashIterator implements Iterator<T> {
037
038    private int index;
039    private int lastReturned = -1;
040
041    /**
042     * The modCount value that the iterator believes that the backing
043     * CompactHashSet should have. If this expectation is violated,
044     * the iterator has detected concurrent modification.
045     */
046    private int expectedModCount;
047
048    CompactHashIterator() {
049      for (index = 0;
050          index < objects.length && (objects[index] == null || objects[index] == deletedObject);
051          index++) {}
052      expectedModCount = modCount;
053    }
054
055    @Override
056    public boolean hasNext() {
057      return index < objects.length;
058    }
059
060    @Override
061    public T next() {
062      if (modCount != expectedModCount) {
063        throw new ConcurrentModificationException();
064      }
065      int length = objects.length;
066      if (index >= length) {
067        lastReturned = -2;
068        throw new NoSuchElementException();
069      }
070
071      lastReturned = index;
072      for (index += 1;
073          index < length && (objects[index] == null || objects[index] == deletedObject);
074          index++) {}
075      if (objects[lastReturned] == nullObject) {
076        return null;
077      } else {
078        return (T) objects[lastReturned];
079      }
080    }
081
082    @Override
083    public void remove() {
084      if (modCount != expectedModCount) {
085        throw new ConcurrentModificationException();
086      }
087      if (lastReturned == -1 || lastReturned == -2) {
088        throw new IllegalStateException();
089      }
090      // delete object
091      if (objects[lastReturned] != null && objects[lastReturned] != deletedObject) {
092        objects[lastReturned] = deletedObject;
093        elements--;
094        modCount++;
095        expectedModCount = modCount; // this is expected; we made the change
096      }
097    }
098  }
099
100  protected static final int INITIAL_SIZE = 3;
101
102  protected static final double LOAD_FACTOR = 0.75;
103  /**
104   * This object is used to represent null, should clients add that to the set.
105   */
106  protected static final Object nullObject = new Object();
107  /**
108   * When an object is deleted this object is put into the hashtable
109   * in its place, so that other objects with the same key
110   * (collisions) further down the hashtable are not lost after we
111   * delete an object in the collision chain.
112   */
113  protected static final Object deletedObject = new Object();
114
115  protected int elements;
116  protected int freecells;
117  protected Object[] objects;
118
119  protected int modCount;
120
121  /**
122   * Constructs a new, empty set.
123   */
124  public CompactHashSet() {
125    objects = new Object[INITIAL_SIZE];
126    elements = 0;
127    freecells = objects.length;
128    modCount = 0;
129  }
130
131  /**
132   * Constructs a new set containing the elements in the specified
133   * collection.
134   *
135   * @param c the collection whose elements are to be placed into this set.
136   */
137  public CompactHashSet(Collection c) {
138    this(c.size());
139    addAll(c);
140  }
141
142  // ===== SET IMPLEMENTATION =============================================
143
144  /**
145   * Constructs a new, empty set.
146   */
147  public CompactHashSet(int size) {
148    // NOTE: If array size is 0, we get a
149    // "java.lang.ArithmeticException: / by zero" in add(Object).
150    objects = new Object[size == 0 ? 1 : size];
151    elements = 0;
152    freecells = objects.length;
153    modCount = 0;
154  }
155
156  /**
157   * Adds the specified element to this set if it is not already
158   * present.
159   *
160   * @param x element to be added to this set.
161   *
162   * @return <tt>true</tt> if the set did not already contain the specified
163   *         element.
164   */
165  @Override
166  public boolean add(T x) {
167    Object o = x;
168    if (o == null) {
169      o = nullObject;
170    }
171
172    int hash = o.hashCode();
173    int index = (hash & 0x7FFFFFFF) % objects.length;
174    int offset = 1;
175    int deletedix = -1;
176
177    // search for the object (continue while !null and !this object)
178    while (objects[index] != null
179        && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
180
181      // if there's a deleted object here we can put this object here,
182      // provided it's not in here somewhere else already
183      if (objects[index] == deletedObject) {
184        deletedix = index;
185      }
186
187      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
188      offset = offset * 2 + 1;
189
190      if (offset == -1) {
191        offset = 2;
192      }
193    }
194
195    if (objects[index] == null) { // wasn't present already
196      if (deletedix != -1) {
197        index = deletedix;
198      } else {
199        freecells--;
200      }
201
202      modCount++;
203      elements++;
204      objects[index] = o;
205
206      // rehash with same capacity
207      if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) {
208        rehash(objects.length);
209        // rehash with increased capacity
210        if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) {
211          rehash(objects.length * 2 + 1);
212        }
213      }
214      return true;
215    } else {
216      return false;
217    }
218  }
219
220  /**
221   * Removes all of the elements from this set.
222   */
223  @Override
224  public void clear() {
225    elements = 0;
226    for (int ix = 0; ix < objects.length; ix++) {
227      objects[ix] = null;
228    }
229    freecells = objects.length;
230    modCount++;
231  }
232
233  /**
234   * Returns <tt>true</tt> if this set contains the specified element.
235   *
236   * @param o element whose presence in this set is to be tested.
237   *
238   * @return <tt>true</tt> if this set contains the specified element.
239   */
240  @Override
241  public boolean contains(Object o) {
242    if (o == null) {
243      o = nullObject;
244    }
245
246    int hash = o.hashCode();
247    int index = (hash & 0x7FFFFFFF) % objects.length;
248    int offset = 1;
249
250    // search for the object (continue while !null and !this object)
251    while (objects[index] != null
252        && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
253      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
254      offset = offset * 2 + 1;
255
256      if (offset == -1) {
257        offset = 2;
258      }
259    }
260
261    return objects[index] != null;
262  }
263
264  /**
265   * INTERNAL: Used for debugging only.
266   */
267  public void dump() {
268    System.out.println("Size: " + objects.length);
269    System.out.println("Elements: " + elements);
270    System.out.println("Free cells: " + freecells);
271    System.out.println();
272    for (int ix = 0; ix < objects.length; ix++) {
273      System.out.println("[" + ix + "]: " + objects[ix]);
274    }
275  }
276
277  /**
278   * Returns <tt>true</tt> if this set contains no elements.
279   */
280  @Override
281  public boolean isEmpty() {
282    return elements == 0;
283  }
284
285  /**
286   * Returns an iterator over the elements in this set. The elements
287   * are returned in no particular order.
288   *
289   * @return an Iterator over the elements in this set.
290   *
291   * @see ConcurrentModificationException
292   */
293  @Override
294  public Iterator iterator() {
295    return new CompactHashIterator();
296  }
297
298  /**
299   * INTERNAL: Rehashes the hashset to a bigger size.
300   */
301  protected void rehash(int newCapacity) {
302    int oldCapacity = objects.length;
303    Object[] newObjects = new Object[newCapacity];
304
305    for (int ix = 0; ix < oldCapacity; ix++) {
306      Object o = objects[ix];
307      if (o == null || o == deletedObject) {
308        continue;
309      }
310
311      int hash = o.hashCode();
312      int index = (hash & 0x7FFFFFFF) % newCapacity;
313      int offset = 1;
314
315      // search for the object
316      while (newObjects[index] != null) { // no need to test for duplicates
317        index = ((index + offset) & 0x7FFFFFFF) % newCapacity;
318        offset = offset * 2 + 1;
319
320        if (offset == -1) {
321          offset = 2;
322        }
323      }
324
325      newObjects[index] = o;
326    }
327
328    objects = newObjects;
329    freecells = objects.length - elements;
330  }
331
332  /**
333   * Removes the specified element from the set.
334   */
335  @Override
336  public boolean remove(Object o) {
337    if (o == null) {
338      o = nullObject;
339    }
340
341    int hash = o.hashCode();
342    int index = (hash & 0x7FFFFFFF) % objects.length;
343    int offset = 1;
344
345    // search for the object (continue while !null and !this object)
346    while (objects[index] != null
347        && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
348      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
349      offset = offset * 2 + 1;
350
351      if (offset == -1) {
352        offset = 2;
353      }
354    }
355
356    // we found the right position, now do the removal
357    if (objects[index] != null) {
358      // we found the object
359      objects[index] = deletedObject;
360      modCount++;
361      elements--;
362      return true;
363    } else {
364      // we did not find the object
365      return false;
366    }
367  }
368
369  // ===== INTERNAL METHODS ===============================================
370
371  /**
372   * Returns the number of elements in this set (its cardinality).
373   */
374  @Override
375  public int size() {
376    return elements;
377  }
378
379  @Override
380  public Object[] toArray() {
381    Object[] result = new Object[elements];
382    Object[] objects = this.objects;
383    int pos = 0;
384    for (Object object : objects) {
385      if (object != null && object != deletedObject) {
386        result[pos++] = object == nullObject ? null : object;
387      }
388    }
389    return result;
390  }
391
392  // ===== ITERATOR IMPLEMENTATON =========================================
393
394  @Override
395  public Object[] toArray(Object[] a) {
396    int size = elements;
397    if (a.length < size) {
398      a = (Object[]) Array.newInstance(a.getClass().getComponentType(), size);
399    }
400    Object[] objects = this.objects;
401    int pos = 0;
402    for (Object object : objects) {
403      if (object != null && object != deletedObject) {
404        a[pos++] = object == nullObject ? null : object;
405      }
406    }
407    return a;
408  }
409}