1 #include "iwhmap.h"
2 #include "iwlog.h"
3 #include "murmur3.h"
4
5 #include <stdlib.h>
6 #include <string.h>
7 #include <errno.h>
8 #include <assert.h>
9
10 #define MIN_BUCKETS 64
11 #define STEPS 4
12
13 typedef struct {
14 void *key;
15 void *val;
16 uint32_t hash;
17 } entry_t;
18
19 typedef struct {
20 entry_t *entries;
21 uint32_t used;
22 uint32_t total;
23 } bucket_t;
24
25 typedef struct _IWHMAP {
26 uint32_t count;
27 uint32_t buckets_mask;
28 bucket_t *buckets;
29
30 int (*cmp_fn)(const void *, const void *);
31 uint32_t (*hash_key_fn)(const void *);
32 void (*kv_free_fn)(void *, void *);
33
34 bool int_key_as_pointer_value;
35 } hmap_t;
36
37
_noop_kv_free(void * key,void * val)38 static void _noop_kv_free(void *key, void *val) {
39 }
40
iwhmap_kv_free(void * key,void * val)41 void iwhmap_kv_free(void *key, void *val) {
42 free(key);
43 free(val);
44 }
45
_n_buckets(hmap_t * hm)46 IW_INLINE uint32_t _n_buckets(hmap_t *hm) {
47 return hm->buckets_mask + 1;
48 }
49
_ptr_cmp(const void * v1,const void * v2)50 static int _ptr_cmp(const void *v1, const void *v2) {
51 return v1 > v2 ? 1 : v1 < v2 ? -1 : 0;
52 }
53
_int32_cmp(const void * v1,const void * v2)54 static int _int32_cmp(const void *v1, const void *v2) {
55 intptr_t p1 = (intptr_t) v1;
56 intptr_t p2 = (intptr_t) v2;
57 return p1 > p2 ? 1 : p1 < p2 ? -1 : 0;
58 }
59
_int64_cmp(const void * v1,const void * v2)60 static int _int64_cmp(const void *v1, const void *v2) {
61 #ifdef IW_64
62 intptr_t p1 = (intptr_t) v1;
63 intptr_t p2 = (intptr_t) v2;
64 return p1 > p2 ? 1 : p1 < p2 ? -1 : 0;
65 #else
66 int64_t l1, l2;
67 memcpy(&l1, v1, sizeof(l1));
68 memcpy(&l2, v2, sizeof(l2));
69 return l1 > l2 ? 1 : l1 < l2 ? -1 : 0;
70 #endif
71 }
72
73 // https://gist.github.com/badboy/6267743
74 // https://nullprogram.com/blog/2018/07/31
75
_hash_int32(uint32_t x)76 IW_INLINE uint32_t _hash_int32(uint32_t x) {
77 x ^= x >> 17;
78 x *= UINT32_C(0xed5ad4bb);
79 x ^= x >> 11;
80 x *= UINT32_C(0xac4c1b51);
81 x ^= x >> 15;
82 x *= UINT32_C(0x31848bab);
83 x ^= x >> 14;
84 return x;
85 }
86
_hash_int64(uint64_t x)87 IW_INLINE uint32_t _hash_int64(uint64_t x) {
88 return _hash_int32(x) ^ _hash_int32(x >> 31);
89 }
90
_hash_int64_key(const void * key)91 IW_INLINE uint32_t _hash_int64_key(const void *key) {
92 #ifdef IW_64
93 return _hash_int64((uint64_t) key);
94 #else
95 uint64_t lv;
96 memcpy(&lv, key, sizeof(lv));
97 return _hash_int64(lv);
98 #endif
99 }
100
_hash_int32_key(const void * key)101 IW_INLINE uint32_t _hash_int32_key(const void *key) {
102 return _hash_int32((uintptr_t) key);
103 }
104
_hash_buf_key(const void * key)105 IW_INLINE uint32_t _hash_buf_key(const void *key) {
106 return murmur3(key, strlen(key));
107 }
108
iwhmap_create(int (* cmp_fn)(const void *,const void *),uint32_t (* hash_key_fn)(const void *),void (* kv_free_fn)(void *,void *))109 IWHMAP *iwhmap_create(int (*cmp_fn)(const void *, const void *),
110 uint32_t (*hash_key_fn)(const void *),
111 void (*kv_free_fn)(void *, void *)) {
112
113 if (!hash_key_fn) {
114 return 0;
115 }
116 if (!cmp_fn) {
117 cmp_fn = _ptr_cmp;
118 }
119 if (!kv_free_fn) {
120 kv_free_fn = _noop_kv_free;
121 }
122
123 hmap_t *hm = malloc(sizeof(*hm));
124 if (!hm) {
125 return 0;
126 }
127 hm->buckets = calloc(MIN_BUCKETS, sizeof(hm->buckets[0]));
128 if (!hm->buckets) {
129 free(hm);
130 return 0;
131 }
132 hm->cmp_fn = cmp_fn;
133 hm->hash_key_fn = hash_key_fn;
134 hm->kv_free_fn = kv_free_fn;
135 hm->buckets_mask = MIN_BUCKETS - 1;
136 hm->count = 0;
137 hm->int_key_as_pointer_value = false;
138 return hm;
139 }
140
iwhmap_create_i64(void (* kv_free_fn)(void *,void *))141 IWHMAP *iwhmap_create_i64(void (*kv_free_fn)(void *, void *)) {
142 hmap_t *hm = iwhmap_create(_int64_cmp, _hash_int64_key, kv_free_fn);
143 if (hm) {
144 #ifdef IW_64
145 hm->int_key_as_pointer_value = true;
146 #endif
147 }
148 return hm;
149 }
150
iwhmap_create_i32(void (* kv_free_fn)(void *,void *))151 IWHMAP *iwhmap_create_i32(void (*kv_free_fn)(void *, void *)) {
152 hmap_t *hm = iwhmap_create(_int32_cmp, _hash_int32_key, kv_free_fn);
153 if (hm) {
154 hm->int_key_as_pointer_value = true;
155 }
156 return hm;
157 }
158
iwhmap_create_str(void (* kv_free_fn)(void *,void *))159 IWHMAP *iwhmap_create_str(void (*kv_free_fn)(void *, void *)) {
160 return iwhmap_create((int (*)(const void *, const void *)) strcmp, _hash_buf_key, kv_free_fn);
161 }
162
_entry_find(IWHMAP * hm,const void * key,uint32_t hash)163 static entry_t *_entry_find(IWHMAP *hm, const void *key, uint32_t hash) {
164 bucket_t *bucket = hm->buckets + (hash & hm->buckets_mask);
165 entry_t *entry = bucket->entries;
166 for (entry_t *end = entry + bucket->used; entry < end; ++entry) {
167 if (hash == entry->hash && hm->cmp_fn(key, entry->key) == 0) {
168 return entry;
169 }
170 }
171 return 0;
172 }
173
_entry_add(IWHMAP * hm,void * key,uint32_t hash)174 static entry_t *_entry_add(IWHMAP *hm, void *key, uint32_t hash) {
175 entry_t *entry;
176 bucket_t *bucket = hm->buckets + (hash & hm->buckets_mask);
177
178 if (bucket->used + 1 >= bucket->total) {
179 if (UINT32_MAX - bucket->total < STEPS) {
180 errno = EOVERFLOW;
181 return 0;
182 }
183 uint32_t new_total = bucket->total + STEPS;
184 entry_t *new_entries = realloc(bucket->entries, new_total * sizeof(new_entries[0]));
185 if (!new_entries) {
186 return 0;
187 }
188 bucket->entries = new_entries;
189 bucket->total = new_total;
190 }
191 entry = bucket->entries;
192 for (entry_t *end = entry + bucket->used; entry < end; ++entry) {
193 // NOLINTNEXTLINE (clang-analyzer-core.UndefinedBinaryOperatorResult)
194 if (hash == entry->hash && hm->cmp_fn(key, entry->key) == 0) {
195 return entry;
196 }
197 }
198 ++bucket->used;
199 ++hm->count;
200
201 entry->hash = hash;
202 entry->key = 0;
203 entry->val = 0;
204
205 return entry;
206 }
207
_rehash(hmap_t * hm,uint32_t num_buckets)208 static void _rehash(hmap_t *hm, uint32_t num_buckets) {
209 bucket_t *buckets = calloc(num_buckets, sizeof(*buckets));
210 if (!buckets) {
211 return;
212 }
213 assert(!(num_buckets & (num_buckets - 1)));
214 assert(num_buckets != _n_buckets(hm));
215
216 bucket_t *bucket,
217 *bucket_end = hm->buckets + _n_buckets(hm);
218
219 hmap_t hm_copy = *hm;
220 hm_copy.count = 0;
221 hm_copy.buckets_mask = num_buckets - 1;
222 hm_copy.buckets = buckets;
223
224 for (bucket = hm->buckets; bucket < bucket_end; ++bucket) {
225 entry_t *entry_old = bucket->entries;
226 entry_t *entry_old_end = entry_old + bucket->used;
227 for (; entry_old < entry_old_end; ++entry_old) {
228 entry_t *entry_new = _entry_add(&hm_copy, entry_old->key, entry_old->hash);
229 if (!entry_new) {
230 goto fail;
231 }
232 entry_new->key = entry_old->key;
233 entry_new->val = entry_old->val;
234 }
235 }
236
237 for (bucket = hm->buckets; bucket < bucket_end; ++bucket) {
238 free(bucket->entries);
239 }
240 free(hm->buckets);
241
242 hm->buckets = buckets;
243 hm->buckets_mask = num_buckets - 1;
244
245 assert(hm->count == hm_copy.count);
246 return;
247
248 fail:
249 for (bucket_end = bucket, bucket = hm->buckets; bucket < bucket_end; ++bucket) {
250 free(bucket->entries);
251 }
252 free(buckets);
253 }
254
iwhmap_put(IWHMAP * hm,void * key,void * val)255 iwrc iwhmap_put(IWHMAP *hm, void *key, void *val) {
256 uint32_t hash = hm->hash_key_fn(key);
257 entry_t *entry = _entry_add(hm, key, hash);
258 if (!entry) {
259 return iwrc_set_errno(IW_ERROR_ERRNO, errno);
260 }
261
262 hm->kv_free_fn(hm->int_key_as_pointer_value ? 0 : entry->key, entry->val);
263
264 entry->key = key;
265 entry->val = val;
266
267 if (hm->count > hm->buckets_mask) {
268 _rehash(hm, _n_buckets(hm) * 2);
269 }
270 return 0;
271 }
272
iwhmap_get(IWHMAP * hm,const void * key)273 void *iwhmap_get(IWHMAP *hm, const void *key) {
274 uint32_t hash = hm->hash_key_fn(key);
275 entry_t *entry = _entry_find(hm, key, hash);
276 if (entry) {
277 return entry->val;
278 } else {
279 return 0;
280 }
281 }
282
iwhmap_remove(IWHMAP * hm,const void * key)283 void iwhmap_remove(IWHMAP *hm, const void *key) {
284 uint32_t hash = hm->hash_key_fn(key);
285 bucket_t *bucket = hm->buckets + (hash & hm->buckets_mask);
286
287 entry_t *entry = _entry_find(hm, key, hash);
288 if (!entry) {
289 return;
290 }
291
292 hm->kv_free_fn(hm->int_key_as_pointer_value ? 0 : entry->key, entry->val);
293
294 if (bucket->used > 1) {
295 entry_t *entry_last = bucket->entries + bucket->used - 1;
296 if (entry != entry_last) {
297 memcpy(entry, entry_last, sizeof(*entry));
298 }
299 }
300 --bucket->used;
301 --hm->count;
302
303 if (hm->buckets_mask > MIN_BUCKETS - 1 && hm->count < hm->buckets_mask / 2) {
304 _rehash(hm, _n_buckets(hm) / 2);
305 } else {
306 uint32_t steps_used = bucket->used / STEPS;
307 uint32_t steps_total = bucket->total / STEPS;
308
309 if (steps_used + 1 < steps_total) {
310 entry_t *entries_new = realloc(bucket->entries, (steps_used + 1) * STEPS * sizeof(entries_new[0]));
311 if (entries_new) {
312 bucket->entries = entries_new;
313 bucket->total = (steps_used + 1) * STEPS;
314 }
315 }
316 }
317 }
318
iwhmap_count(IWHMAP * hm)319 int iwhmap_count(IWHMAP *hm) {
320 return hm->count;
321 }
322
iwhmap_iter_init(IWHMAP * hm,IWHMAP_ITER * iter)323 void iwhmap_iter_init(IWHMAP *hm, IWHMAP_ITER *iter) {
324 iter->hm = hm;
325 iter->entry = -1;
326 iter->bucket = 0;
327 iter->key = 0;
328 iter->val = 0;
329 }
330
iwhmap_iter_next(IWHMAP_ITER * iter)331 bool iwhmap_iter_next(IWHMAP_ITER *iter) {
332 entry_t *entry;
333 bucket_t *bucket = iter->hm->buckets + iter->bucket;
334
335 ++iter->entry;
336 if ((uint32_t) iter->entry >= bucket->used) {
337 uint32_t n = _n_buckets(iter->hm);
338 iter->entry = 0;
339 for (++iter->bucket; iter->bucket < n; ++iter->bucket) {
340 bucket = iter->hm->buckets + iter->bucket;
341 if (bucket->used > 0) {
342 break;
343 }
344 }
345 if (iter->bucket >= n) {
346 return false;
347 }
348 }
349 entry = bucket->entries + iter->entry;
350 iter->key = entry->key;
351 iter->val = entry->val;
352 return true;
353 }
354
iwhmap_clear(IWHMAP * hm)355 void iwhmap_clear(IWHMAP *hm) {
356 if (!hm) {
357 return;
358 }
359 for (bucket_t *b = hm->buckets, *be = hm->buckets + _n_buckets(hm); b < be; ++b) {
360 for (entry_t *e = b->entries, *ee = b->entries + b->used; e < ee; ++e) {
361 hm->kv_free_fn(hm->int_key_as_pointer_value ? 0 : e->key, e->val);
362 }
363 free(b->entries);
364 b->used = 0;
365 b->total = 0;
366 b->entries = 0;
367 }
368 if (_n_buckets(hm) > MIN_BUCKETS) {
369 bucket_t *buckets_new = realloc(hm->buckets, sizeof(buckets_new[0]) * MIN_BUCKETS);
370 if (buckets_new) {
371 memset(buckets_new, 0, sizeof(buckets_new[0]) * MIN_BUCKETS);
372 hm->buckets = buckets_new;
373 hm->buckets_mask = MIN_BUCKETS - 1;
374 }
375 }
376 hm->count = 0;
377 }
378
iwhmap_destroy(IWHMAP * hm)379 void iwhmap_destroy(IWHMAP *hm) {
380 if (!hm) {
381 return;
382 }
383 for (bucket_t *b = hm->buckets, *be = hm->buckets + _n_buckets(hm); b < be; ++b) {
384 for (entry_t *e = b->entries, *ee = b->entries + b->used; e < ee; ++e) {
385 hm->kv_free_fn(hm->int_key_as_pointer_value ? 0 : e->key, e->val);
386 }
387 free(b->entries);
388 }
389 free(hm->buckets);
390 free(hm);
391 }
392