1 /* $Id: hashtab.c 6135 2003-01-19 01:15:40Z rra $
3 ** Generic hash table implementation.
5 ** Written by Russ Allbery <rra@stanford.edu>
6 ** This work is hereby placed in the public domain by its author.
8 ** This is a generic hash table implementation with linear probing. It
9 ** takes a comparison function and a hashing function and stores void *.
11 ** Included for the use of callers is the hash function LOOKUP2 by Bob
12 ** Jenkins, taken from <http://burtleburtle.net/bob/hash/>; see that web
13 ** page for analysis and performance comparisons. The performance of this
14 ** hash is slightly worse than the standard sum and modulus hash function
15 ** seen in many places but it produces fewer collisions.
20 #include "inn/hashtab.h"
23 /* Magic values for empty and deleted hash table slots. */
24 #define HASH_EMPTY ((void *) 0)
25 #define HASH_DELETED ((void *) 1)
28 size_t size; /* Allocated size. */
29 size_t mask; /* Used to resolve a hash to an index. */
30 size_t nelements; /* Total elements, including deleted. */
31 size_t ndeleted; /* Number of deleted elements. */
33 unsigned long searches; /* Count of lookups (for debugging). */
34 unsigned long collisions; /* Count of collisions (for debugging). */
35 unsigned long expansions; /* Count of hash resizes needed. */
37 hash_func hash; /* Return hash of a key. */
38 hash_key_func key; /* Given an element, returns its key. */
39 hash_equal_func equal; /* Whether a key matches an element. */
40 hash_delete_func delete; /* Called when a hash element is deleted. */
42 void **table; /* The actual elements. */
47 ** Given a target table size, return the nearest power of two that's
48 ** greater than or equal to that size, with a minimum size of four. The
49 ** minimum must be at least four to ensure that there is always at least
50 ** one empty slot in the table given hash_find_slot's resizing of the table
51 ** if it as least 75% full. Otherwise, it would be possible for
52 ** hash_find_slot to go into an infinite loop.
55 hash_size(size_t target)
61 for (n = 0; size > 0; n++)
64 return (size < 4) ? 4 : size;
69 ** Create a new hash table. The given size is rounded up to the nearest
70 ** power of two for speed reasons (it greatly simplifies the use of the
74 hash_create(size_t size, hash_func hash_f, hash_key_func key_f,
75 hash_equal_func equal_f, hash_delete_func delete_f)
79 hash = xcalloc(1, sizeof(struct hash));
82 hash->equal = equal_f;
83 hash->delete = delete_f;
84 hash->size = hash_size(size);
85 hash->mask = hash->size - 1;
86 hash->table = xcalloc(hash->size, sizeof(void *));
92 ** Free a hash and all resources used by it, and call the delete function
96 hash_free(struct hash *hash)
101 for (i = 0; i < hash->size; i++) {
102 entry = hash->table[i];
103 if (entry != HASH_EMPTY && entry != HASH_DELETED)
104 (*hash->delete)(entry);
112 ** Internal search function used by hash_expand. This is an optimized
113 ** version of hash_find_slot that returns a pointer to the first empty
114 ** slot, not trying to call the equality function on non-empty slots and
115 ** assuming there are no HASH_DELETED slots.
118 hash_find_empty(struct hash *hash, const void *key)
122 slot = (*hash->hash)(key) & hash->mask;
124 if (hash->table[slot] == HASH_EMPTY)
125 return &hash->table[slot];
128 if (slot >= hash->size)
135 ** Expand the hash table to be approximately 50% empty based on the number
136 ** of elements in the hash. This is done by allocating a new table and
137 ** then calling hash_find_empty for each element in the previous table,
138 ** recovering the key by calling hash->key on the element.
141 hash_expand(struct hash *hash)
148 hash->size = hash_size((hash->nelements - hash->ndeleted) * 2);
149 hash->mask = hash->size - 1;
150 hash->table = xcalloc(hash->size, sizeof(void *));
154 for (i = 0; i < size; i++)
155 if (old[i] != HASH_EMPTY && old[i] != HASH_DELETED) {
156 slot = hash_find_empty(hash, (*hash->key)(old[i]));
167 ** Find a slot in the hash for a given key. This is used both for
168 ** inserting and deleting elements from the hash, as well as looking up
169 ** entries. Returns a pointer to the slot. If insert is true, return the
170 ** first empty or deleted slot. If insert is false, return NULL if the
171 ** element could not be found.
173 ** This function assumes that there is at least one empty slot in the
174 ** hash; otherwise, it can loop infinitely. It attempts to ensure this by
175 ** always expanding the hash if it is at least 75% full; this will ensure
176 ** that property for any hash size of 4 or higher.
179 hash_find_slot(struct hash *hash, const void *key, bool insert)
181 void **deleted_slot = NULL;
185 if (insert && hash->nelements * 4 >= hash->size * 3)
190 slot = (*hash->hash)(key) & hash->mask;
192 entry = hash->table[slot];
193 if (entry == HASH_EMPTY) {
197 if (deleted_slot != NULL) {
198 *deleted_slot = HASH_EMPTY;
203 return &hash->table[slot];
204 } else if (entry == HASH_DELETED) {
206 deleted_slot = &hash->table[slot];
207 } else if ((*hash->equal)(key, entry)) {
208 return &hash->table[slot];
213 if (slot >= hash->size)
220 ** Given a key, return the entry corresponding to that key or NULL if that
221 ** key isn't present in the hash table.
224 hash_lookup(struct hash *hash, const void *key)
228 slot = hash_find_slot(hash, key, false);
229 return (slot == NULL) ? NULL : *slot;
234 ** Insert a new key/value pair into the hash, returning true if the
235 ** insertion was successful and false if there is already a value in the
236 ** hash with that key.
239 hash_insert(struct hash *hash, const void *key, void *datum)
243 slot = hash_find_slot(hash, key, true);
244 if (*slot != HASH_EMPTY)
252 ** Replace an existing hash value with a new data value, calling the delete
253 ** function for the old data. Returns true if the replacement was
254 ** successful or false (without changing the hash) if the key whose value
255 ** should be replaced was not found in the hash.
258 hash_replace(struct hash *hash, const void *key, void *datum)
262 slot = hash_find_slot(hash, key, false);
265 (*hash->delete)(*slot);
272 ** Delete a key out of the hash. Returns true if the deletion was
273 ** successful, false if the key could not be found in the hash.
276 hash_delete(struct hash *hash, const void *key)
280 result = hash_replace(hash, key, HASH_DELETED);
288 ** For each element in the hash table, call the provided function, passing
289 ** it the element and the opaque token that's passed to this function.
292 hash_traverse(struct hash *hash, hash_traverse_func callback, void *data)
297 for (i = 0; i < hash->size; i++) {
298 entry = hash->table[i];
299 if (entry != HASH_EMPTY && entry != HASH_DELETED)
300 (*callback)(entry, data);
306 ** Returns a count of undeleted elements in the hash.
309 hash_count(struct hash *hash)
311 return hash->nelements - hash->ndeleted;
316 ** Accessor functions for the debugging statistics.
319 hash_searches(struct hash *hash)
321 return hash->searches;
325 hash_collisions(struct hash *hash)
327 return hash->collisions;
331 hash_expansions(struct hash *hash)
333 return hash->expansions;
338 ** Mix three 32-bit values reversibly. This is the internal mixing
339 ** function for the hash function.
341 ** For every delta with one or two bit set, and the deltas of all three
342 ** high bits or all three low bits, whether the original value of a,b,c
343 ** is almost all zero or is uniformly distributed,
345 ** * If mix() is run forward or backward, at least 32 bits in a,b,c
346 ** have at least 1/4 probability of changing.
348 ** * If mix() is run forward, every bit of c will change between 1/3 and
349 ** 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
351 ** mix() takes 36 machine instructions, but only 18 cycles on a superscalar
352 ** machine (like a Pentium or a Sparc). No faster mixer seems to work,
353 ** that's the result of my brute-force search. There were about 2^68
354 ** hashes to choose from. I (Bob Jenkins) only tested about a billion of
357 #define MIX(a, b, c) \
359 (a) -= (b); (a) -= (c); (a) ^= ((c) >> 13); \
360 (b) -= (c); (b) -= (a); (b) ^= ((a) << 8); \
361 (c) -= (a); (c) -= (b); (c) ^= ((b) >> 13); \
362 (a) -= (b); (a) -= (c); (a) ^= ((c) >> 12); \
363 (b) -= (c); (b) -= (a); (b) ^= ((a) << 16); \
364 (c) -= (a); (c) -= (b); (c) ^= ((b) >> 5); \
365 (a) -= (b); (a) -= (c); (a) ^= ((c) >> 3); \
366 (b) -= (c); (b) -= (a); (b) ^= ((a) << 10); \
367 (c) -= (a); (c) -= (b); (c) ^= ((b) >> 15); \
372 ** Hash a variable-length key into a 32-bit value.
374 ** Takes byte sequence to hash and returns a 32-bit value. A partial
375 ** result can be passed as the third parameter so that large amounts of
376 ** data can be hashed by subsequent calls, passing in the result of the
377 ** previous call each time. Every bit of the key affects every bit of the
378 ** return value. Every 1-bit and 2-bit delta achieves avalanche. About
379 ** (36 + 6n) instructions.
381 ** The best hash table sizes are powers of 2. There is no need to mod with
382 ** a prime (mod is sooo slow!). If you need less than 32 bits, use a
383 ** bitmask. For example, if you need only 10 bits, do:
385 ** h = h & ((1 << 10) - 1);
387 ** In which case, the hash table should have 2^10 elements.
389 ** Based on code by Bob Jenkins <bob_jenkins@burtleburtle.net>, originally
390 ** written in 1996. The original license was:
392 ** By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use
393 ** this code any way you wish, private, educational, or commercial.
396 ** See <http://burlteburtle.net/bob/hash/evahash.html> for discussion of
397 ** this hash function. Use for hash table lookup, or anything where one
398 ** collision in 2^32 is acceptable. Do NOT use for cryptographic purposes.
401 hash_lookup2(const char *key, size_t length, unsigned long partial)
403 uint32_t a, b, c, len;
405 /* Set up the internal state. a and b are initialized to a golden
406 ratio, an arbitrary value intended to avoid mapping all zeroes to all
412 #define S0(c) ((uint32_t)(c))
413 #define S1(c) ((uint32_t)(c) << 8)
414 #define S2(c) ((uint32_t)(c) << 16)
415 #define S3(c) ((uint32_t)(c) << 24)
417 /* Handle most of the key. */
419 a += S0(key[0]) + S1(key[1]) + S2(key[2]) + S3(key[3]);
420 b += S0(key[4]) + S1(key[5]) + S2(key[6]) + S3(key[7]);
421 c += S0(key[8]) + S1(key[9]) + S2(key[10]) + S3(key[11]);
427 /* Handle the last 11 bytes. All of the cases fall through. */
430 case 11: c += S3(key[10]);
431 case 10: c += S2(key[9]);
432 case 9: c += S1(key[8]);
433 /* The first byte of c is reserved for the length. */
434 case 8: b += S3(key[7]);
435 case 7: b += S2(key[6]);
436 case 6: b += S1(key[5]);
437 case 5: b += S0(key[4]);
438 case 4: a += S3(key[3]);
439 case 3: a += S2(key[2]);
440 case 2: a += S1(key[1]);
441 case 1: a += S0(key[0]);
442 /* case 0: nothing left to add. */
450 ** A hash function for nul-terminated strings using hash_lookup2, suitable
451 ** for passing to hash_create.
454 hash_string(const void *key)
456 return hash_lookup2(key, strlen(key), 0);