001/* 002 * Licensed under the Apache License, Version 2.0 (the "License"); 003 * you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at 005 * 006 * http://www.apache.org/licenses/LICENSE-2.0 007 * 008 * Unless required by applicable law or agreed to in writing, software 009 * distributed under the License is distributed on an "AS IS" BASIS, 010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 011 * See the License for the specific language governing permissions and 012 * limitations under the License. 013 */ 014package org.gbif.utils.collection; 015 016import java.lang.reflect.Array; 017import java.util.AbstractSet; 018import java.util.Collection; 019import java.util.ConcurrentModificationException; 020import java.util.Iterator; 021import java.util.NoSuchElementException; 022 023/** 024 * A HashSet implementation taken from Ontopia 025 * which is both faster and more compact than java.util.HashSet 026 * <p/> 027 * INTERNAL: Implements the Set interface more compactly than 028 * java.util.HashSet by using a closed hashtable. 029 * 030 * @see <a href="http://ontopia.wordpress.com/2009/09/23/a-faster-and-more-compact-set/">Ontopia blog</a> 031 * @see <a href="http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ontopia/utils/">Ontopia 032 * source</a> 033 */ 034public class CompactHashSet<T> extends AbstractSet<T> { 035 036 private class CompactHashIterator implements Iterator<T> { 037 038 private int index; 039 private int lastReturned = -1; 040 041 /** 042 * The modCount value that the iterator believes that the backing 043 * CompactHashSet should have. If this expectation is violated, 044 * the iterator has detected concurrent modification. 045 */ 046 private int expectedModCount; 047 048 CompactHashIterator() { 049 for (index = 0; 050 index < objects.length && (objects[index] == null || objects[index] == deletedObject); 051 index++) {} 052 expectedModCount = modCount; 053 } 054 055 @Override 056 public boolean hasNext() { 057 return index < objects.length; 058 } 059 060 @Override 061 public T next() { 062 if (modCount != expectedModCount) { 063 throw new ConcurrentModificationException(); 064 } 065 int length = objects.length; 066 if (index >= length) { 067 lastReturned = -2; 068 throw new NoSuchElementException(); 069 } 070 071 lastReturned = index; 072 for (index += 1; 073 index < length && (objects[index] == null || objects[index] == deletedObject); 074 index++) {} 075 if (objects[lastReturned] == nullObject) { 076 return null; 077 } else { 078 return (T) objects[lastReturned]; 079 } 080 } 081 082 @Override 083 public void remove() { 084 if (modCount != expectedModCount) { 085 throw new ConcurrentModificationException(); 086 } 087 if (lastReturned == -1 || lastReturned == -2) { 088 throw new IllegalStateException(); 089 } 090 // delete object 091 if (objects[lastReturned] != null && objects[lastReturned] != deletedObject) { 092 objects[lastReturned] = deletedObject; 093 elements--; 094 modCount++; 095 expectedModCount = modCount; // this is expected; we made the change 096 } 097 } 098 } 099 100 protected static final int INITIAL_SIZE = 3; 101 102 protected static final double LOAD_FACTOR = 0.75; 103 /** 104 * This object is used to represent null, should clients add that to the set. 105 */ 106 protected static final Object nullObject = new Object(); 107 /** 108 * When an object is deleted this object is put into the hashtable 109 * in its place, so that other objects with the same key 110 * (collisions) further down the hashtable are not lost after we 111 * delete an object in the collision chain. 112 */ 113 protected static final Object deletedObject = new Object(); 114 115 protected int elements; 116 protected int freecells; 117 protected Object[] objects; 118 119 protected int modCount; 120 121 /** 122 * Constructs a new, empty set. 123 */ 124 public CompactHashSet() { 125 objects = new Object[INITIAL_SIZE]; 126 elements = 0; 127 freecells = objects.length; 128 modCount = 0; 129 } 130 131 /** 132 * Constructs a new set containing the elements in the specified 133 * collection. 134 * 135 * @param c the collection whose elements are to be placed into this set. 136 */ 137 public CompactHashSet(Collection c) { 138 this(c.size()); 139 addAll(c); 140 } 141 142 // ===== SET IMPLEMENTATION ============================================= 143 144 /** 145 * Constructs a new, empty set. 146 */ 147 public CompactHashSet(int size) { 148 // NOTE: If array size is 0, we get a 149 // "java.lang.ArithmeticException: / by zero" in add(Object). 150 objects = new Object[size == 0 ? 1 : size]; 151 elements = 0; 152 freecells = objects.length; 153 modCount = 0; 154 } 155 156 /** 157 * Adds the specified element to this set if it is not already 158 * present. 159 * 160 * @param x element to be added to this set. 161 * 162 * @return <tt>true</tt> if the set did not already contain the specified 163 * element. 164 */ 165 @Override 166 public boolean add(T x) { 167 Object o = x; 168 if (o == null) { 169 o = nullObject; 170 } 171 172 int hash = o.hashCode(); 173 int index = (hash & 0x7FFFFFFF) % objects.length; 174 int offset = 1; 175 int deletedix = -1; 176 177 // search for the object (continue while !null and !this object) 178 while (objects[index] != null 179 && !(objects[index].hashCode() == hash && objects[index].equals(o))) { 180 181 // if there's a deleted object here we can put this object here, 182 // provided it's not in here somewhere else already 183 if (objects[index] == deletedObject) { 184 deletedix = index; 185 } 186 187 index = ((index + offset) & 0x7FFFFFFF) % objects.length; 188 offset = offset * 2 + 1; 189 190 if (offset == -1) { 191 offset = 2; 192 } 193 } 194 195 if (objects[index] == null) { // wasn't present already 196 if (deletedix != -1) { 197 index = deletedix; 198 } else { 199 freecells--; 200 } 201 202 modCount++; 203 elements++; 204 objects[index] = o; 205 206 // rehash with same capacity 207 if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) { 208 rehash(objects.length); 209 // rehash with increased capacity 210 if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) { 211 rehash(objects.length * 2 + 1); 212 } 213 } 214 return true; 215 } else { 216 return false; 217 } 218 } 219 220 /** 221 * Removes all of the elements from this set. 222 */ 223 @Override 224 public void clear() { 225 elements = 0; 226 for (int ix = 0; ix < objects.length; ix++) { 227 objects[ix] = null; 228 } 229 freecells = objects.length; 230 modCount++; 231 } 232 233 /** 234 * Returns <tt>true</tt> if this set contains the specified element. 235 * 236 * @param o element whose presence in this set is to be tested. 237 * 238 * @return <tt>true</tt> if this set contains the specified element. 239 */ 240 @Override 241 public boolean contains(Object o) { 242 if (o == null) { 243 o = nullObject; 244 } 245 246 int hash = o.hashCode(); 247 int index = (hash & 0x7FFFFFFF) % objects.length; 248 int offset = 1; 249 250 // search for the object (continue while !null and !this object) 251 while (objects[index] != null 252 && !(objects[index].hashCode() == hash && objects[index].equals(o))) { 253 index = ((index + offset) & 0x7FFFFFFF) % objects.length; 254 offset = offset * 2 + 1; 255 256 if (offset == -1) { 257 offset = 2; 258 } 259 } 260 261 return objects[index] != null; 262 } 263 264 /** 265 * INTERNAL: Used for debugging only. 266 */ 267 public void dump() { 268 System.out.println("Size: " + objects.length); 269 System.out.println("Elements: " + elements); 270 System.out.println("Free cells: " + freecells); 271 System.out.println(); 272 for (int ix = 0; ix < objects.length; ix++) { 273 System.out.println("[" + ix + "]: " + objects[ix]); 274 } 275 } 276 277 /** 278 * Returns <tt>true</tt> if this set contains no elements. 279 */ 280 @Override 281 public boolean isEmpty() { 282 return elements == 0; 283 } 284 285 /** 286 * Returns an iterator over the elements in this set. The elements 287 * are returned in no particular order. 288 * 289 * @return an Iterator over the elements in this set. 290 * 291 * @see ConcurrentModificationException 292 */ 293 @Override 294 public Iterator iterator() { 295 return new CompactHashIterator(); 296 } 297 298 /** 299 * INTERNAL: Rehashes the hashset to a bigger size. 300 */ 301 protected void rehash(int newCapacity) { 302 int oldCapacity = objects.length; 303 Object[] newObjects = new Object[newCapacity]; 304 305 for (int ix = 0; ix < oldCapacity; ix++) { 306 Object o = objects[ix]; 307 if (o == null || o == deletedObject) { 308 continue; 309 } 310 311 int hash = o.hashCode(); 312 int index = (hash & 0x7FFFFFFF) % newCapacity; 313 int offset = 1; 314 315 // search for the object 316 while (newObjects[index] != null) { // no need to test for duplicates 317 index = ((index + offset) & 0x7FFFFFFF) % newCapacity; 318 offset = offset * 2 + 1; 319 320 if (offset == -1) { 321 offset = 2; 322 } 323 } 324 325 newObjects[index] = o; 326 } 327 328 objects = newObjects; 329 freecells = objects.length - elements; 330 } 331 332 /** 333 * Removes the specified element from the set. 334 */ 335 @Override 336 public boolean remove(Object o) { 337 if (o == null) { 338 o = nullObject; 339 } 340 341 int hash = o.hashCode(); 342 int index = (hash & 0x7FFFFFFF) % objects.length; 343 int offset = 1; 344 345 // search for the object (continue while !null and !this object) 346 while (objects[index] != null 347 && !(objects[index].hashCode() == hash && objects[index].equals(o))) { 348 index = ((index + offset) & 0x7FFFFFFF) % objects.length; 349 offset = offset * 2 + 1; 350 351 if (offset == -1) { 352 offset = 2; 353 } 354 } 355 356 // we found the right position, now do the removal 357 if (objects[index] != null) { 358 // we found the object 359 objects[index] = deletedObject; 360 modCount++; 361 elements--; 362 return true; 363 } else { 364 // we did not find the object 365 return false; 366 } 367 } 368 369 // ===== INTERNAL METHODS =============================================== 370 371 /** 372 * Returns the number of elements in this set (its cardinality). 373 */ 374 @Override 375 public int size() { 376 return elements; 377 } 378 379 @Override 380 public Object[] toArray() { 381 Object[] result = new Object[elements]; 382 Object[] objects = this.objects; 383 int pos = 0; 384 for (Object object : objects) { 385 if (object != null && object != deletedObject) { 386 result[pos++] = object == nullObject ? null : object; 387 } 388 } 389 return result; 390 } 391 392 // ===== ITERATOR IMPLEMENTATON ========================================= 393 394 @Override 395 public Object[] toArray(Object[] a) { 396 int size = elements; 397 if (a.length < size) { 398 a = (Object[]) Array.newInstance(a.getClass().getComponentType(), size); 399 } 400 Object[] objects = this.objects; 401 int pos = 0; 402 for (Object object : objects) { 403 if (object != null && object != deletedObject) { 404 a[pos++] = object == nullObject ? null : object; 405 } 406 } 407 return a; 408 } 409}