2 Copyright (c) 2003-2014, Troy D. Hanson http://troydhanson.github.com/uthash/
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
12 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
13 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
15 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
18 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
19 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <string.h> /* memcmp,strlen */
28 #include <stddef.h> /* ptrdiff_t */
29 #include <stdlib.h> /* exit() */
31 /* These macros use decltype or the earlier __typeof GNU extension.
32 As decltype is only available in newer compilers (VS2010 or gcc 4.3+
33 when compiling c++ source) this code uses whatever method is needed
34 or, for VS2008 where neither is available, uses casting workarounds. */
35 #if defined(_MSC_VER) /* MS compiler */
36 #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
37 #define DECLTYPE(x) (decltype(x))
38 #else /* VS2008 or older (or VS2010 in C mode) */
42 #elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__)
45 #else /* GNU, Sun and other compilers */
46 #define DECLTYPE(x) (__typeof(x))
50 #define DECLTYPE_ASSIGN(dst,src) \
52 char **_da_dst = (char**)(&(dst)); \
53 *_da_dst = (char*)(src); \
56 #define DECLTYPE_ASSIGN(dst,src) \
58 (dst) = DECLTYPE(dst)(src); \
62 /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
64 #if defined(_MSC_VER) && _MSC_VER >= 1600
66 #elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__)
69 typedef unsigned int uint32_t;
70 typedef unsigned char uint8_t;
72 #elif defined(__GNUC__) && !defined(__VXWORKS__)
75 typedef unsigned int uint32_t;
76 typedef unsigned char uint8_t;
79 #define UTHASH_VERSION 1.9.9
82 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
85 #define uthash_malloc(sz) malloc(sz) /* malloc fcn */
88 #define uthash_free(ptr,sz) free(ptr) /* free fcn */
91 #ifndef uthash_noexpand_fyi
92 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
94 #ifndef uthash_expand_fyi
95 #define uthash_expand_fyi(tbl) /* can be defined to log expands */
98 /* initial number of buckets */
99 #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */
100 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */
101 #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */
103 /* calculate the element whose hash handle address is hhe */
104 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
106 #define HASH_FIND(hh,head,keyptr,keylen,out) \
109 if (head != NULL) { \
110 unsigned _hf_bkt,_hf_hashv; \
111 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
112 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv) != 0) { \
113 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
114 keyptr,keylen,out); \
120 #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
121 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL)
122 #define HASH_BLOOM_MAKE(tbl) \
124 (tbl)->bloom_nbits = HASH_BLOOM; \
125 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
126 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
127 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
128 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
131 #define HASH_BLOOM_FREE(tbl) \
133 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
136 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U)))
137 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U)))
139 #define HASH_BLOOM_ADD(tbl,hashv) \
140 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
142 #define HASH_BLOOM_TEST(tbl,hashv) \
143 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
146 #define HASH_BLOOM_MAKE(tbl)
147 #define HASH_BLOOM_FREE(tbl)
148 #define HASH_BLOOM_ADD(tbl,hashv)
149 #define HASH_BLOOM_TEST(tbl,hashv) (1)
150 #define HASH_BLOOM_BYTELEN 0U
153 #define HASH_MAKE_TABLE(hh,head) \
155 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
156 sizeof(UT_hash_table)); \
157 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
158 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
159 (head)->hh.tbl->tail = &((head)->hh); \
160 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
161 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
162 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
163 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
164 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
165 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
166 memset((head)->hh.tbl->buckets, 0, \
167 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
168 HASH_BLOOM_MAKE((head)->hh.tbl); \
169 (head)->hh.tbl->signature = HASH_SIGNATURE; \
172 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \
173 HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
175 #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \
178 HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \
179 if (replaced!=NULL) { \
180 HASH_DELETE(hh,head,replaced); \
182 HASH_ADD(hh,head,fieldname,keylen_in,add); \
185 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
188 (add)->hh.next = NULL; \
189 (add)->hh.key = (char*)(keyptr); \
190 (add)->hh.keylen = (unsigned)(keylen_in); \
193 (head)->hh.prev = NULL; \
194 HASH_MAKE_TABLE(hh,head); \
196 (head)->hh.tbl->tail->next = (add); \
197 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
198 (head)->hh.tbl->tail = &((add)->hh); \
200 (head)->hh.tbl->num_items++; \
201 (add)->hh.tbl = (head)->hh.tbl; \
202 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
203 (add)->hh.hashv, _ha_bkt); \
204 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
205 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
206 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
207 HASH_FSCK(hh,head); \
210 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \
212 bkt = ((hashv) & ((num_bkts) - 1U)); \
215 /* delete "delptr" from the hash table.
216 * "the usual" patch-up process for the app-order doubly-linked-list.
217 * The use of _hd_hh_del below deserves special explanation.
218 * These used to be expressed using (delptr) but that led to a bug
219 * if someone used the same symbol for the head and deletee, like
220 * HASH_DELETE(hh,users,users);
221 * We want that to work, but by changing the head (users) below
222 * we were forfeiting our ability to further refer to the deletee (users)
223 * in the patch-up process. Solution: use scratch space to
224 * copy the deletee pointer, then the latter references are via that
225 * scratch pointer rather than through the repointed (users) symbol.
227 #define HASH_DELETE(hh,head,delptr) \
229 struct UT_hash_handle *_hd_hh_del; \
230 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
231 uthash_free((head)->hh.tbl->buckets, \
232 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
233 HASH_BLOOM_FREE((head)->hh.tbl); \
234 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
238 _hd_hh_del = &((delptr)->hh); \
239 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
240 (head)->hh.tbl->tail = \
241 (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
242 (head)->hh.tbl->hho); \
244 if ((delptr)->hh.prev != NULL) { \
245 ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
246 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
248 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
250 if (_hd_hh_del->next != NULL) { \
251 ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
252 (head)->hh.tbl->hho))->prev = \
255 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
256 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
257 (head)->hh.tbl->num_items--; \
259 HASH_FSCK(hh,head); \
263 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
264 #define HASH_FIND_STR(head,findstr,out) \
265 HASH_FIND(hh,head,findstr,(unsigned)strlen(findstr),out)
266 #define HASH_ADD_STR(head,strfield,add) \
267 HASH_ADD(hh,head,strfield[0],(unsigned int)strlen(add->strfield),add)
268 #define HASH_REPLACE_STR(head,strfield,add,replaced) \
269 HASH_REPLACE(hh,head,strfield[0],(unsigned)strlen(add->strfield),add,replaced)
270 #define HASH_FIND_INT(head,findint,out) \
271 HASH_FIND(hh,head,findint,sizeof(int),out)
272 #define HASH_ADD_INT(head,intfield,add) \
273 HASH_ADD(hh,head,intfield,sizeof(int),add)
274 #define HASH_REPLACE_INT(head,intfield,add,replaced) \
275 HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
276 #define HASH_FIND_PTR(head,findptr,out) \
277 HASH_FIND(hh,head,findptr,sizeof(void *),out)
278 #define HASH_ADD_PTR(head,ptrfield,add) \
279 HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
280 #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \
281 HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
282 #define HASH_DEL(head,delptr) \
283 HASH_DELETE(hh,head,delptr)
285 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
286 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
289 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
290 #define HASH_FSCK(hh,head) \
292 struct UT_hash_handle *_thh; \
298 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
299 unsigned _bkt_count = 0; \
300 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
303 if (_prev != (char*)(_thh->hh_prev)) { \
304 HASH_OOPS("invalid hh_prev %p, actual %p\n", \
305 _thh->hh_prev, _prev ); \
308 _prev = (char*)(_thh); \
309 _thh = _thh->hh_next; \
311 _count += _bkt_count; \
312 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
313 HASH_OOPS("invalid bucket count %u, actual %u\n", \
314 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
317 if (_count != (head)->hh.tbl->num_items) { \
318 HASH_OOPS("invalid hh item count %u, actual %u\n", \
319 (head)->hh.tbl->num_items, _count ); \
321 /* traverse hh in app order; check next/prev integrity, count */ \
324 _thh = &(head)->hh; \
327 if (_prev !=(char*)(_thh->prev)) { \
328 HASH_OOPS("invalid prev %p, actual %p\n", \
329 _thh->prev, _prev ); \
331 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
332 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
333 (head)->hh.tbl->hho) : NULL ); \
335 if (_count != (head)->hh.tbl->num_items) { \
336 HASH_OOPS("invalid app item count %u, actual %u\n", \
337 (head)->hh.tbl->num_items, _count ); \
342 #define HASH_FSCK(hh,head)
345 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
346 * the descriptor to which this macro is defined for tuning the hash function.
347 * The app can #include <unistd.h> to get the prototype for write(2). */
348 #ifdef HASH_EMIT_KEYS
349 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
351 unsigned _klen = fieldlen; \
352 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
353 write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \
356 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
359 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
361 #define HASH_FCN HASH_FUNCTION
363 #define HASH_FCN HASH_JEN
366 /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
367 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
369 unsigned _hb_keylen=(unsigned)keylen; \
370 const unsigned char *_hb_key=(const unsigned char*)(key); \
372 while (_hb_keylen-- != 0U) { \
373 (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \
375 bkt = (hashv) & (num_bkts-1U); \
379 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
380 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
381 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
384 const unsigned char *_hs_key=(const unsigned char*)(key); \
386 for(_sx_i=0; _sx_i < keylen; _sx_i++) { \
387 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
389 bkt = hashv & (num_bkts-1U); \
391 /* FNV-1a variation */
392 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
395 const unsigned char *_hf_key=(const unsigned char*)(key); \
396 hashv = 2166136261U; \
397 for(_fn_i=0; _fn_i < keylen; _fn_i++) { \
398 hashv = hashv ^ _hf_key[_fn_i]; \
399 hashv = hashv * 16777619U; \
401 bkt = hashv & (num_bkts-1U); \
404 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
407 const unsigned char *_ho_key=(const unsigned char*)(key); \
409 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
410 hashv += _ho_key[_ho_i]; \
411 hashv += (hashv << 10); \
412 hashv ^= (hashv >> 6); \
414 hashv += (hashv << 3); \
415 hashv ^= (hashv >> 11); \
416 hashv += (hashv << 15); \
417 bkt = hashv & (num_bkts-1U); \
420 #define HASH_JEN_MIX(a,b,c) \
422 a -= b; a -= c; a ^= ( c >> 13 ); \
423 b -= c; b -= a; b ^= ( a << 8 ); \
424 c -= a; c -= b; c ^= ( b >> 13 ); \
425 a -= b; a -= c; a ^= ( c >> 12 ); \
426 b -= c; b -= a; b ^= ( a << 16 ); \
427 c -= a; c -= b; c ^= ( b >> 5 ); \
428 a -= b; a -= c; a ^= ( c >> 3 ); \
429 b -= c; b -= a; b ^= ( a << 10 ); \
430 c -= a; c -= b; c ^= ( b >> 15 ); \
433 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
435 unsigned _hj_i,_hj_j,_hj_k; \
436 unsigned const char *_hj_key=(unsigned const char*)(key); \
437 hashv = 0xfeedbeefu; \
438 _hj_i = _hj_j = 0x9e3779b9u; \
439 _hj_k = (unsigned)(keylen); \
440 while (_hj_k >= 12U) { \
441 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
442 + ( (unsigned)_hj_key[2] << 16 ) \
443 + ( (unsigned)_hj_key[3] << 24 ) ); \
444 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
445 + ( (unsigned)_hj_key[6] << 16 ) \
446 + ( (unsigned)_hj_key[7] << 24 ) ); \
447 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
448 + ( (unsigned)_hj_key[10] << 16 ) \
449 + ( (unsigned)_hj_key[11] << 24 ) ); \
451 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
456 hashv += (unsigned)(keylen); \
458 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \
459 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \
460 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \
461 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \
462 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \
463 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \
464 case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \
465 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \
466 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \
467 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \
468 case 1: _hj_i += _hj_key[0]; \
470 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
471 bkt = hashv & (num_bkts-1U); \
474 /* The Paul Hsieh hash function */
476 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
477 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
478 #define get16bits(d) (*((const uint16_t *) (d)))
481 #if !defined (get16bits)
482 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
483 +(uint32_t)(((const uint8_t *)(d))[0]) )
485 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
487 unsigned const char *_sfh_key=(unsigned const char*)(key); \
488 uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \
490 unsigned _sfh_rem = _sfh_len & 3U; \
492 hashv = 0xcafebabeu; \
495 for (;_sfh_len > 0U; _sfh_len--) { \
496 hashv += get16bits (_sfh_key); \
497 _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \
498 hashv = (hashv << 16) ^ _sfh_tmp; \
499 _sfh_key += 2U*sizeof (uint16_t); \
500 hashv += hashv >> 11; \
503 /* Handle end cases */ \
504 switch (_sfh_rem) { \
505 case 3: hashv += get16bits (_sfh_key); \
506 hashv ^= hashv << 16; \
507 hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \
508 hashv += hashv >> 11; \
510 case 2: hashv += get16bits (_sfh_key); \
511 hashv ^= hashv << 11; \
512 hashv += hashv >> 17; \
514 case 1: hashv += *_sfh_key; \
515 hashv ^= hashv << 10; \
516 hashv += hashv >> 1; \
519 /* Force "avalanching" of final 127 bits */ \
520 hashv ^= hashv << 3; \
521 hashv += hashv >> 5; \
522 hashv ^= hashv << 4; \
523 hashv += hashv >> 17; \
524 hashv ^= hashv << 25; \
525 hashv += hashv >> 6; \
526 bkt = hashv & (num_bkts-1U); \
529 #ifdef HASH_USING_NO_STRICT_ALIASING
530 /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
531 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
532 * MurmurHash uses the faster approach only on CPU's where we know it's safe.
534 * Note the preprocessor built-in defines can be emitted using:
536 * gcc -m64 -dM -E - < /dev/null (on gcc)
537 * cc -## a.c (where a.c is a simple test file) (Sun Studio)
539 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86))
540 #define MUR_GETBLOCK(p,i) p[i]
541 #else /* non intel */
542 #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
543 #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
544 #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
545 #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
546 #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
547 #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
548 #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
549 #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
550 #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
551 #else /* assume little endian non-intel */
552 #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
553 #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
554 #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
556 #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
557 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
558 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
561 #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
562 #define MUR_FMIX(_h) \
571 #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
573 const uint8_t *_mur_data = (const uint8_t*)(key); \
574 const int _mur_nblocks = (int)(keylen) / 4; \
575 uint32_t _mur_h1 = 0xf88D5353u; \
576 uint32_t _mur_c1 = 0xcc9e2d51u; \
577 uint32_t _mur_c2 = 0x1b873593u; \
578 uint32_t _mur_k1 = 0; \
579 const uint8_t *_mur_tail; \
580 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \
582 for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \
583 _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
584 _mur_k1 *= _mur_c1; \
585 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
586 _mur_k1 *= _mur_c2; \
588 _mur_h1 ^= _mur_k1; \
589 _mur_h1 = MUR_ROTL32(_mur_h1,13); \
590 _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \
592 _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \
594 switch((keylen) & 3U) { \
595 case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \
596 case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \
597 case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \
598 _mur_k1 *= _mur_c1; \
599 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
600 _mur_k1 *= _mur_c2; \
601 _mur_h1 ^= _mur_k1; \
603 _mur_h1 ^= (uint32_t)(keylen); \
606 bkt = hashv & (num_bkts-1U); \
608 #endif /* HASH_USING_NO_STRICT_ALIASING */
610 /* key comparison function; return 0 if keys equal */
611 #define HASH_KEYCMP(a,b,len) memcmp(a,b,(unsigned long)(len))
613 /* iterate over items in a known bucket to find desired item */
614 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
616 if (head.hh_head != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); } \
618 while (out != NULL) { \
619 if ((out)->hh.keylen == (keylen_in)) { \
620 if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) { break; } \
622 if ((out)->hh.hh_next != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); } \
623 else { out = NULL; } \
627 /* add an item to a bucket */
628 #define HASH_ADD_TO_BKT(head,addhh) \
631 (addhh)->hh_next = head.hh_head; \
632 (addhh)->hh_prev = NULL; \
633 if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \
634 (head).hh_head=addhh; \
635 if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \
636 && ((addhh)->tbl->noexpand != 1U)) { \
637 HASH_EXPAND_BUCKETS((addhh)->tbl); \
641 /* remove an item from a given bucket */
642 #define HASH_DEL_IN_BKT(hh,head,hh_del) \
644 if ((head).hh_head == hh_del) { \
645 (head).hh_head = hh_del->hh_next; \
647 if (hh_del->hh_prev) { \
648 hh_del->hh_prev->hh_next = hh_del->hh_next; \
650 if (hh_del->hh_next) { \
651 hh_del->hh_next->hh_prev = hh_del->hh_prev; \
654 /* Bucket expansion has the effect of doubling the number of buckets
655 * and redistributing the items into the new buckets. Ideally the
656 * items will distribute more or less evenly into the new buckets
657 * (the extent to which this is true is a measure of the quality of
658 * the hash function as it applies to the key domain).
660 * With the items distributed into more buckets, the chain length
661 * (item count) in each bucket is reduced. Thus by expanding buckets
662 * the hash keeps a bound on the chain length. This bounded chain
663 * length is the essence of how a hash provides constant time lookup.
665 * The calculation of tbl->ideal_chain_maxlen below deserves some
666 * explanation. First, keep in mind that we're calculating the ideal
667 * maximum chain length based on the *new* (doubled) bucket count.
668 * In fractions this is just n/b (n=number of items,b=new num buckets).
669 * Since the ideal chain length is an integer, we want to calculate
670 * ceil(n/b). We don't depend on floating point arithmetic in this
671 * hash, so to calculate ceil(n/b) with integers we could write
673 * ceil(n/b) = (n/b) + ((n%b)?1:0)
675 * and in fact a previous version of this hash did just that.
676 * But now we have improved things a bit by recognizing that b is
677 * always a power of two. We keep its base 2 log handy (call it lb),
678 * so now we can write this with a bit shift and logical AND:
680 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
683 #define HASH_EXPAND_BUCKETS(tbl) \
686 unsigned _he_bkt_i; \
687 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
688 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
689 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
690 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
691 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
692 memset(_he_new_buckets, 0, \
693 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
694 tbl->ideal_chain_maxlen = \
695 (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \
696 (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \
697 tbl->nonideal_items = 0; \
698 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
700 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
701 while (_he_thh != NULL) { \
702 _he_hh_nxt = _he_thh->hh_next; \
703 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \
704 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
705 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
706 tbl->nonideal_items++; \
707 _he_newbkt->expand_mult = _he_newbkt->count / \
708 tbl->ideal_chain_maxlen; \
710 _he_thh->hh_prev = NULL; \
711 _he_thh->hh_next = _he_newbkt->hh_head; \
712 if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \
714 _he_newbkt->hh_head = _he_thh; \
715 _he_thh = _he_hh_nxt; \
718 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
719 tbl->num_buckets *= 2U; \
720 tbl->log2_num_buckets++; \
721 tbl->buckets = _he_new_buckets; \
722 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
723 (tbl->ineff_expands+1U) : 0U; \
724 if (tbl->ineff_expands > 1U) { \
726 uthash_noexpand_fyi(tbl); \
728 uthash_expand_fyi(tbl); \
732 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
733 /* Note that HASH_SORT assumes the hash handle name to be hh.
734 * HASH_SRT was added to allow the hash handle name to be passed in. */
735 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
736 #define HASH_SRT(hh,head,cmpfcn) \
739 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
740 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
741 if (head != NULL) { \
744 _hs_list = &((head)->hh); \
745 while (_hs_looping != 0U) { \
750 while (_hs_p != NULL) { \
754 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
756 _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \
757 ((void*)((char*)(_hs_q->next) + \
758 (head)->hh.tbl->hho)) : NULL); \
759 if (! (_hs_q) ) { break; } \
761 _hs_qsize = _hs_insize; \
762 while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\
763 if (_hs_psize == 0U) { \
765 _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \
766 ((void*)((char*)(_hs_q->next) + \
767 (head)->hh.tbl->hho)) : NULL); \
769 } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \
771 if (_hs_p != NULL){ \
772 _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \
773 ((void*)((char*)(_hs_p->next) + \
774 (head)->hh.tbl->hho)) : NULL); \
778 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
779 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
782 if (_hs_p != NULL){ \
783 _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \
784 ((void*)((char*)(_hs_p->next) + \
785 (head)->hh.tbl->hho)) : NULL); \
790 _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \
791 ((void*)((char*)(_hs_q->next) + \
792 (head)->hh.tbl->hho)) : NULL); \
795 if ( _hs_tail != NULL ) { \
796 _hs_tail->next = ((_hs_e != NULL) ? \
797 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
801 if (_hs_e != NULL) { \
802 _hs_e->prev = ((_hs_tail != NULL) ? \
803 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
809 if (_hs_tail != NULL){ \
810 _hs_tail->next = NULL; \
812 if ( _hs_nmerges <= 1U ) { \
814 (head)->hh.tbl->tail = _hs_tail; \
815 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
819 HASH_FSCK(hh,head); \
823 /* This function selects items from one hash into another hash.
824 * The end result is that the selected items have dual presence
825 * in both hashes. There is no copy of the items made; rather
826 * they are added into the new hash through a secondary hash
827 * hash handle that must be present in the structure. */
828 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
830 unsigned _src_bkt, _dst_bkt; \
831 void *_last_elt=NULL, *_elt; \
832 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
833 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
835 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
836 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
838 _src_hh = _src_hh->hh_next) { \
839 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
841 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
842 _dst_hh->key = _src_hh->key; \
843 _dst_hh->keylen = _src_hh->keylen; \
844 _dst_hh->hashv = _src_hh->hashv; \
845 _dst_hh->prev = _last_elt; \
846 _dst_hh->next = NULL; \
847 if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \
849 DECLTYPE_ASSIGN(dst,_elt); \
850 HASH_MAKE_TABLE(hh_dst,dst); \
852 _dst_hh->tbl = (dst)->hh_dst.tbl; \
854 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
855 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
856 (dst)->hh_dst.tbl->num_items++; \
858 _last_elt_hh = _dst_hh; \
863 HASH_FSCK(hh_dst,dst); \
866 #define HASH_CLEAR(hh,head) \
868 if (head != NULL) { \
869 uthash_free((head)->hh.tbl->buckets, \
870 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
871 HASH_BLOOM_FREE((head)->hh.tbl); \
872 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
877 #define HASH_OVERHEAD(hh,head) \
878 ((head != NULL) ? ( \
879 (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \
880 ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \
881 sizeof(UT_hash_table) + \
882 (HASH_BLOOM_BYTELEN))) : 0U)
885 #define HASH_ITER(hh,head,el,tmp) \
886 for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \
887 (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL)))
889 #define HASH_ITER(hh,head,el,tmp) \
890 for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \
891 (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL)))
894 /* obtain a count of items in the hash */
895 #define HASH_COUNT(head) HASH_CNT(hh,head)
896 #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U)
898 typedef struct UT_hash_bucket
{
899 struct UT_hash_handle
*hh_head
;
902 /* expand_mult is normally set to 0. In this situation, the max chain length
903 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
904 * the bucket's chain exceeds this length, bucket expansion is triggered).
905 * However, setting expand_mult to a non-zero value delays bucket expansion
906 * (that would be triggered by additions to this particular bucket)
907 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
908 * (The multiplier is simply expand_mult+1). The whole idea of this
909 * multiplier is to reduce bucket expansions, since they are expensive, in
910 * situations where we know that a particular bucket tends to be overused.
911 * It is better to let its chain length grow to a longer yet-still-bounded
912 * value, than to do an O(n) bucket expansion too often.
914 unsigned expand_mult
;
918 /* random signature used only to find hash tables in external analysis */
919 #define HASH_SIGNATURE 0xa0111fe1u
920 #define HASH_BLOOM_SIGNATURE 0xb12220f2u
922 typedef struct UT_hash_table
{
923 UT_hash_bucket
*buckets
;
924 unsigned num_buckets
, log2_num_buckets
;
926 struct UT_hash_handle
*tail
; /* tail hh in app order, for fast append */
927 ptrdiff_t hho
; /* hash handle offset (byte pos of hash handle in element */
929 /* in an ideal situation (all buckets used equally), no bucket would have
930 * more than ceil(#items/#buckets) items. that's the ideal chain length. */
931 unsigned ideal_chain_maxlen
;
933 /* nonideal_items is the number of items in the hash whose chain position
934 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
935 * hash distribution; reaching them in a chain traversal takes >ideal steps */
936 unsigned nonideal_items
;
938 /* ineffective expands occur when a bucket doubling was performed, but
939 * afterward, more than half the items in the hash had nonideal chain
940 * positions. If this happens on two consecutive expansions we inhibit any
941 * further expansion, as it's not helping; this happens when the hash
942 * function isn't a good fit for the key domain. When expansion is inhibited
943 * the hash will still work, albeit no longer in constant time. */
944 unsigned ineff_expands
, noexpand
;
946 uint32_t signature
; /* used only to find hash tables in external analysis */
948 uint32_t bloom_sig
; /* used only to test bloom exists in external analysis */
955 typedef struct UT_hash_handle
{
956 struct UT_hash_table
*tbl
;
957 void *prev
; /* prev element in app order */
958 void *next
; /* next element in app order */
959 struct UT_hash_handle
*hh_prev
; /* previous hh in bucket order */
960 struct UT_hash_handle
*hh_next
; /* next hh in bucket order */
961 void *key
; /* ptr to enclosing struct's key */
962 unsigned keylen
; /* enclosing struct's key len */
963 unsigned hashv
; /* result of hash-fcn(key) */
966 #endif /* UTHASH_H */