001/* 002 * Copyright 2021 Global Biodiversity Information Facility (GBIF) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.gbif.utils.collection; 017 018import java.lang.reflect.Array; 019import java.util.AbstractSet; 020import java.util.Collection; 021import java.util.ConcurrentModificationException; 022import java.util.Iterator; 023import java.util.NoSuchElementException; 024 025/** 026 * A HashSet implementation taken from Ontopia 027 * which is both faster and more compact than java.util.HashSet 028 * <p/> 029 * INTERNAL: Implements the Set interface more compactly than 030 * java.util.HashSet by using a closed hashtable. 031 * 032 * @see <a href="http://ontopia.wordpress.com/2009/09/23/a-faster-and-more-compact-set/">Ontopia blog</a> 033 * @see <a href="http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ontopia/utils/">Ontopia 034 * source</a> 035 */ 036public class CompactHashSet<T> extends AbstractSet<T> { 037 038 private class CompactHashIterator implements Iterator<T> { 039 040 private int index; 041 private int lastReturned = -1; 042 043 /** 044 * The modCount value that the iterator believes that the backing 045 * CompactHashSet should have. If this expectation is violated, 046 * the iterator has detected concurrent modification. 047 */ 048 private int expectedModCount; 049 050 CompactHashIterator() { 051 for (index = 0; index < objects.length && (objects[index] == null || objects[index] == deletedObject); index++) { 052 } 053 expectedModCount = modCount; 054 } 055 056 @Override 057 public boolean hasNext() { 058 return index < objects.length; 059 } 060 061 @Override 062 public T next() { 063 if (modCount != expectedModCount) { 064 throw new ConcurrentModificationException(); 065 } 066 int length = objects.length; 067 if (index >= length) { 068 lastReturned = -2; 069 throw new NoSuchElementException(); 070 } 071 072 lastReturned = index; 073 for (index += 1; index < length && (objects[index] == null || objects[index] == deletedObject); index++) { 074 } 075 if (objects[lastReturned] == nullObject) { 076 return null; 077 } else { 078 return (T) objects[lastReturned]; 079 } 080 } 081 082 @Override 083 public void remove() { 084 if (modCount != expectedModCount) { 085 throw new ConcurrentModificationException(); 086 } 087 if (lastReturned == -1 || lastReturned == -2) { 088 throw new IllegalStateException(); 089 } 090 // delete object 091 if (objects[lastReturned] != null && objects[lastReturned] != deletedObject) { 092 objects[lastReturned] = deletedObject; 093 elements--; 094 modCount++; 095 expectedModCount = modCount; // this is expected; we made the change 096 } 097 } 098 } 099 100 protected static final int INITIAL_SIZE = 3; 101 102 protected static final double LOAD_FACTOR = 0.75; 103 /** 104 * This object is used to represent null, should clients add that to the set. 105 */ 106 protected static final Object nullObject = new Object(); 107 /** 108 * When an object is deleted this object is put into the hashtable 109 * in its place, so that other objects with the same key 110 * (collisions) further down the hashtable are not lost after we 111 * delete an object in the collision chain. 112 */ 113 protected static final Object deletedObject = new Object(); 114 protected int elements; 115 protected int freecells; 116 protected Object[] objects; 117 118 protected int modCount; 119 120 /** 121 * Constructs a new, empty set. 122 */ 123 public CompactHashSet() { 124 objects = new Object[INITIAL_SIZE]; 125 elements = 0; 126 freecells = objects.length; 127 modCount = 0; 128 } 129 130 /** 131 * Constructs a new set containing the elements in the specified 132 * collection. 133 * 134 * @param c the collection whose elements are to be placed into this set. 135 */ 136 public CompactHashSet(Collection c) { 137 this(c.size()); 138 addAll(c); 139 } 140 141 // ===== SET IMPLEMENTATION ============================================= 142 143 /** 144 * Constructs a new, empty set. 145 */ 146 public CompactHashSet(int size) { 147 // NOTE: If array size is 0, we get a 148 // "java.lang.ArithmeticException: / by zero" in add(Object). 149 objects = new Object[size == 0 ? 1 : size]; 150 elements = 0; 151 freecells = objects.length; 152 modCount = 0; 153 } 154 155 /** 156 * Adds the specified element to this set if it is not already 157 * present. 158 * 159 * @param x element to be added to this set. 160 * 161 * @return <tt>true</tt> if the set did not already contain the specified 162 * element. 163 */ 164 @Override 165 public boolean add(T x) { 166 Object o = x; 167 if (o == null) { 168 o = nullObject; 169 } 170 171 int hash = o.hashCode(); 172 int index = (hash & 0x7FFFFFFF) % objects.length; 173 int offset = 1; 174 int deletedix = -1; 175 176 // search for the object (continue while !null and !this object) 177 while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) { 178 179 // if there's a deleted object here we can put this object here, 180 // provided it's not in here somewhere else already 181 if (objects[index] == deletedObject) { 182 deletedix = index; 183 } 184 185 index = ((index + offset) & 0x7FFFFFFF) % objects.length; 186 offset = offset * 2 + 1; 187 188 if (offset == -1) { 189 offset = 2; 190 } 191 } 192 193 if (objects[index] == null) { // wasn't present already 194 if (deletedix != -1) { 195 index = deletedix; 196 } else { 197 freecells--; 198 } 199 200 modCount++; 201 elements++; 202 objects[index] = o; 203 204 // rehash with same capacity 205 if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) { 206 rehash(objects.length); 207 // rehash with increased capacity 208 if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) { 209 rehash(objects.length * 2 + 1); 210 } 211 } 212 return true; 213 } else { 214 return false; 215 } 216 } 217 218 /** 219 * Removes all of the elements from this set. 220 */ 221 @Override 222 public void clear() { 223 elements = 0; 224 for (int ix = 0; ix < objects.length; ix++) { 225 objects[ix] = null; 226 } 227 freecells = objects.length; 228 modCount++; 229 } 230 231 /** 232 * Returns <tt>true</tt> if this set contains the specified element. 233 * 234 * @param o element whose presence in this set is to be tested. 235 * 236 * @return <tt>true</tt> if this set contains the specified element. 237 */ 238 @Override 239 public boolean contains(Object o) { 240 if (o == null) { 241 o = nullObject; 242 } 243 244 int hash = o.hashCode(); 245 int index = (hash & 0x7FFFFFFF) % objects.length; 246 int offset = 1; 247 248 // search for the object (continue while !null and !this object) 249 while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) { 250 index = ((index + offset) & 0x7FFFFFFF) % objects.length; 251 offset = offset * 2 + 1; 252 253 if (offset == -1) { 254 offset = 2; 255 } 256 } 257 258 return objects[index] != null; 259 } 260 261 /** 262 * INTERNAL: Used for debugging only. 263 */ 264 public void dump() { 265 System.out.println("Size: " + objects.length); 266 System.out.println("Elements: " + elements); 267 System.out.println("Free cells: " + freecells); 268 System.out.println(); 269 for (int ix = 0; ix < objects.length; ix++) { 270 System.out.println("[" + ix + "]: " + objects[ix]); 271 } 272 } 273 274 /** 275 * Returns <tt>true</tt> if this set contains no elements. 276 */ 277 @Override 278 public boolean isEmpty() { 279 return elements == 0; 280 } 281 282 /** 283 * Returns an iterator over the elements in this set. The elements 284 * are returned in no particular order. 285 * 286 * @return an Iterator over the elements in this set. 287 * 288 * @see ConcurrentModificationException 289 */ 290 @Override 291 public Iterator iterator() { 292 return new CompactHashIterator(); 293 } 294 295 /** 296 * INTERNAL: Rehashes the hashset to a bigger size. 297 */ 298 protected void rehash(int newCapacity) { 299 int oldCapacity = objects.length; 300 Object[] newObjects = new Object[newCapacity]; 301 302 for (int ix = 0; ix < oldCapacity; ix++) { 303 Object o = objects[ix]; 304 if (o == null || o == deletedObject) { 305 continue; 306 } 307 308 int hash = o.hashCode(); 309 int index = (hash & 0x7FFFFFFF) % newCapacity; 310 int offset = 1; 311 312 // search for the object 313 while (newObjects[index] != null) { // no need to test for duplicates 314 index = ((index + offset) & 0x7FFFFFFF) % newCapacity; 315 offset = offset * 2 + 1; 316 317 if (offset == -1) { 318 offset = 2; 319 } 320 } 321 322 newObjects[index] = o; 323 } 324 325 objects = newObjects; 326 freecells = objects.length - elements; 327 } 328 329 /** 330 * Removes the specified element from the set. 331 */ 332 @Override 333 public boolean remove(Object o) { 334 if (o == null) { 335 o = nullObject; 336 } 337 338 int hash = o.hashCode(); 339 int index = (hash & 0x7FFFFFFF) % objects.length; 340 int offset = 1; 341 342 // search for the object (continue while !null and !this object) 343 while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) { 344 index = ((index + offset) & 0x7FFFFFFF) % objects.length; 345 offset = offset * 2 + 1; 346 347 if (offset == -1) { 348 offset = 2; 349 } 350 } 351 352 // we found the right position, now do the removal 353 if (objects[index] != null) { 354 // we found the object 355 objects[index] = deletedObject; 356 modCount++; 357 elements--; 358 return true; 359 } else { 360 // we did not find the object 361 return false; 362 } 363 } 364 365 // ===== INTERNAL METHODS =============================================== 366 367 /** 368 * Returns the number of elements in this set (its cardinality). 369 */ 370 @Override 371 public int size() { 372 return elements; 373 } 374 375 @Override 376 public Object[] toArray() { 377 Object[] result = new Object[elements]; 378 Object[] objects = this.objects; 379 int pos = 0; 380 for (Object object : objects) { 381 if (object != null && object != deletedObject) { 382 result[pos++] = object == nullObject ? null : object; 383 } 384 } 385 return result; 386 } 387 388 // ===== ITERATOR IMPLEMENTATON ========================================= 389 390 @Override 391 public Object[] toArray(Object[] a) { 392 int size = elements; 393 if (a.length < size) { 394 a = (Object[]) Array.newInstance(a.getClass().getComponentType(), size); 395 } 396 Object[] objects = this.objects; 397 int pos = 0; 398 for (Object object : objects) { 399 if (object != null && object != deletedObject) { 400 a[pos++] = object == nullObject ? null : object; 401 } 402 } 403 return a; 404 } 405 406}