1 /* MIT License
2 *
3 * Copyright (c) 2023 Brad House
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * SPDX-License-Identifier: MIT
25 */
26 #include "ares_setup.h"
27 #include "ares.h"
28 #include "ares_private.h"
29 #include "ares__llist.h"
30 #include "ares__htable.h"
31
32 #define ARES__HTABLE_MAX_BUCKETS (1U << 24)
33 #define ARES__HTABLE_MIN_BUCKETS (1U << 4)
34 #define ARES__HTABLE_EXPAND_PERCENT 75
35
36 struct ares__htable {
37 ares__htable_hashfunc_t hash;
38 ares__htable_bucket_key_t bucket_key;
39 ares__htable_bucket_free_t bucket_free;
40 ares__htable_key_eq_t key_eq;
41 unsigned int seed;
42 unsigned int size;
43 size_t num_keys;
44 size_t num_collisions;
45 /* NOTE: if we converted buckets into ares__slist_t we could guarantee on
46 * hash collisions we would have O(log n) worst case insert and search
47 * performance. (We'd also need to make key_eq into a key_cmp to
48 * support sort). That said, risk with a random hash seed is near zero,
49 * and ares__slist_t is heavier weight, so I think using ares__llist_t
50 * is an overall win. */
51 ares__llist_t **buckets;
52 };
53
ares__htable_generate_seed(ares__htable_t * htable)54 static unsigned int ares__htable_generate_seed(ares__htable_t *htable)
55 {
56 unsigned int seed = 0;
57 time_t t = time(NULL);
58
59 /* Mix stack address, heap address, and time to generate a random seed, it
60 * doesn't have to be super secure, just quick. Likelihood of a hash
61 * collision attack is very low with a small amount of effort */
62 seed |= (unsigned int)((size_t)htable & 0xFFFFFFFF);
63 seed |= (unsigned int)((size_t)&seed & 0xFFFFFFFF);
64 seed |= (unsigned int)(t & 0xFFFFFFFF);
65 return seed;
66 }
67
ares__htable_buckets_destroy(ares__llist_t ** buckets,unsigned int size,ares_bool_t destroy_vals)68 static void ares__htable_buckets_destroy(ares__llist_t **buckets,
69 unsigned int size,
70 ares_bool_t destroy_vals)
71 {
72 unsigned int i;
73
74 if (buckets == NULL) {
75 return;
76 }
77
78 for (i = 0; i < size; i++) {
79 if (buckets[i] == NULL) {
80 continue;
81 }
82
83 if (!destroy_vals) {
84 ares__llist_replace_destructor(buckets[i], NULL);
85 }
86
87 ares__llist_destroy(buckets[i]);
88 }
89
90 ares_free(buckets);
91 }
92
ares__htable_destroy(ares__htable_t * htable)93 void ares__htable_destroy(ares__htable_t *htable)
94 {
95 if (htable == NULL) {
96 return;
97 }
98 ares__htable_buckets_destroy(htable->buckets, htable->size, ARES_TRUE);
99 ares_free(htable);
100 }
101
ares__htable_create(ares__htable_hashfunc_t hash_func,ares__htable_bucket_key_t bucket_key,ares__htable_bucket_free_t bucket_free,ares__htable_key_eq_t key_eq)102 ares__htable_t *ares__htable_create(ares__htable_hashfunc_t hash_func,
103 ares__htable_bucket_key_t bucket_key,
104 ares__htable_bucket_free_t bucket_free,
105 ares__htable_key_eq_t key_eq)
106 {
107 ares__htable_t *htable = NULL;
108
109 if (hash_func == NULL || bucket_key == NULL || bucket_free == NULL ||
110 key_eq == NULL) {
111 goto fail;
112 }
113
114 htable = ares_malloc_zero(sizeof(*htable));
115 if (htable == NULL) {
116 goto fail;
117 }
118
119 htable->hash = hash_func;
120 htable->bucket_key = bucket_key;
121 htable->bucket_free = bucket_free;
122 htable->key_eq = key_eq;
123 htable->seed = ares__htable_generate_seed(htable);
124 htable->size = ARES__HTABLE_MIN_BUCKETS;
125 htable->buckets = ares_malloc_zero(sizeof(*htable->buckets) * htable->size);
126
127 if (htable->buckets == NULL) {
128 goto fail;
129 }
130
131 return htable;
132
133 fail:
134 ares__htable_destroy(htable);
135 return NULL;
136 }
137
ares__htable_all_buckets(const ares__htable_t * htable,size_t * num)138 const void **ares__htable_all_buckets(const ares__htable_t *htable, size_t *num)
139 {
140 const void **out = NULL;
141 size_t cnt = 0;
142 size_t i;
143
144 if (htable == NULL || num == NULL) {
145 return NULL;
146 }
147
148 *num = 0;
149
150 out = ares_malloc_zero(sizeof(*out) * htable->num_keys);
151 if (out == NULL) {
152 return NULL;
153 }
154
155 for (i = 0; i < htable->size; i++) {
156 ares__llist_node_t *node;
157 for (node = ares__llist_node_first(htable->buckets[i]); node != NULL;
158 node = ares__llist_node_next(node)) {
159 out[cnt++] = ares__llist_node_val(node);
160 }
161 }
162
163 *num = cnt;
164 return out;
165 }
166
167 /*! Grabs the Hashtable index from the key and length. The h index is
168 * the hash of the function reduced to the size of the bucket list.
169 * We are doing "hash & (size - 1)" since we are guaranteeing a power of
170 * 2 for size. This is equivalent to "hash % size", but should be more
171 * efficient */
172 #define HASH_IDX(h, key) h->hash(key, h->seed) & (h->size - 1)
173
ares__htable_find(const ares__htable_t * htable,unsigned int idx,const void * key)174 static ares__llist_node_t *ares__htable_find(const ares__htable_t *htable,
175 unsigned int idx, const void *key)
176 {
177 ares__llist_node_t *node = NULL;
178
179 for (node = ares__llist_node_first(htable->buckets[idx]); node != NULL;
180 node = ares__llist_node_next(node)) {
181 if (htable->key_eq(key, htable->bucket_key(ares__llist_node_val(node)))) {
182 break;
183 }
184 }
185
186 return node;
187 }
188
ares__htable_expand(ares__htable_t * htable)189 static ares_bool_t ares__htable_expand(ares__htable_t *htable)
190 {
191 ares__llist_t **buckets = NULL;
192 unsigned int old_size = htable->size;
193 size_t i;
194 ares__llist_t **prealloc_llist = NULL;
195 size_t prealloc_llist_len = 0;
196 ares_bool_t rv = ARES_FALSE;
197
198 /* Not a failure, just won't expand */
199 if (old_size == ARES__HTABLE_MAX_BUCKETS) {
200 return ARES_TRUE;
201 }
202
203 htable->size <<= 1;
204
205 /* We must pre-allocate all memory we'll need before moving entries to the
206 * new hash array. Otherwise if there's a memory allocation failure in the
207 * middle, we wouldn't be able to recover. */
208 buckets = ares_malloc_zero(sizeof(*buckets) * htable->size);
209 if (buckets == NULL) {
210 goto done;
211 }
212
213 /* The maximum number of new llists we'll need is the number of collisions
214 * that were recorded */
215 prealloc_llist_len = htable->num_collisions;
216 if (prealloc_llist_len) {
217 prealloc_llist =
218 ares_malloc_zero(sizeof(*prealloc_llist) * prealloc_llist_len);
219 if (prealloc_llist == NULL) {
220 goto done;
221 }
222 }
223 for (i = 0; i < prealloc_llist_len; i++) {
224 prealloc_llist[i] = ares__llist_create(htable->bucket_free);
225 if (prealloc_llist[i] == NULL) {
226 goto done;
227 }
228 }
229
230 /* Iterate across all buckets and move the entries to the new buckets */
231 htable->num_collisions = 0;
232 for (i = 0; i < old_size; i++) {
233 ares__llist_node_t *node;
234
235 /* Nothing in this bucket */
236 if (htable->buckets[i] == NULL) {
237 continue;
238 }
239
240 /* Fast path optimization (most likely case), there is likely only a single
241 * entry in both the source and destination, check for this to confirm and
242 * if so, just move the bucket over */
243 if (ares__llist_len(htable->buckets[i]) == 1) {
244 const void *val = ares__llist_first_val(htable->buckets[i]);
245 size_t idx = HASH_IDX(htable, htable->bucket_key(val));
246
247 if (buckets[idx] == NULL) {
248 /* Swap! */
249 buckets[idx] = htable->buckets[i];
250 htable->buckets[i] = NULL;
251 continue;
252 }
253 }
254
255 /* Slow path, collisions */
256 while ((node = ares__llist_node_first(htable->buckets[i])) != NULL) {
257 const void *val = ares__llist_node_val(node);
258 size_t idx = HASH_IDX(htable, htable->bucket_key(val));
259
260 /* Try fast path again as maybe we popped one collision off and the
261 * next we can reuse the llist parent */
262 if (buckets[idx] == NULL && ares__llist_len(htable->buckets[i]) == 1) {
263 /* Swap! */
264 buckets[idx] = htable->buckets[i];
265 htable->buckets[i] = NULL;
266 break;
267 }
268
269 /* Grab one off our preallocated list */
270 if (buckets[idx] == NULL) {
271 /* Silence static analysis, this isn't possible but it doesn't know */
272 if (prealloc_llist == NULL || prealloc_llist_len == 0) {
273 goto done;
274 }
275 buckets[idx] = prealloc_llist[prealloc_llist_len - 1];
276 prealloc_llist_len--;
277 } else {
278 /* Collision occurred since the bucket wasn't empty */
279 htable->num_collisions++;
280 }
281
282 ares__llist_node_move_parent_first(node, buckets[idx]);
283 }
284
285 /* Abandoned bucket, destroy */
286 if (htable->buckets[i] != NULL) {
287 ares__llist_destroy(htable->buckets[i]);
288 htable->buckets[i] = NULL;
289 }
290 }
291
292 /* We have guaranteed all the buckets have either been moved or destroyed,
293 * so we just call ares_free() on the array and swap out the pointer */
294 ares_free(htable->buckets);
295 htable->buckets = buckets;
296 buckets = NULL;
297 rv = ARES_TRUE;
298
299 done:
300 ares_free(buckets);
301 /* destroy any unused preallocated buckets */
302 ares__htable_buckets_destroy(prealloc_llist, (unsigned int)prealloc_llist_len,
303 ARES_FALSE);
304
305 /* On failure, we need to restore the htable size */
306 if (rv != ARES_TRUE) {
307 htable->size = old_size;
308 }
309
310 return rv;
311 }
312
ares__htable_insert(ares__htable_t * htable,void * bucket)313 ares_bool_t ares__htable_insert(ares__htable_t *htable, void *bucket)
314 {
315 unsigned int idx = 0;
316 ares__llist_node_t *node = NULL;
317 const void *key = NULL;
318
319 if (htable == NULL || bucket == NULL) {
320 return ARES_FALSE;
321 }
322
323
324 key = htable->bucket_key(bucket);
325 idx = HASH_IDX(htable, key);
326
327 /* See if we have a matching bucket already, if so, replace it */
328 node = ares__htable_find(htable, idx, key);
329 if (node != NULL) {
330 ares__llist_node_replace(node, bucket);
331 return ARES_TRUE;
332 }
333
334 /* Check to see if we should rehash because likelihood of collisions has
335 * increased beyond our threshold */
336 if (htable->num_keys + 1 >
337 (htable->size * ARES__HTABLE_EXPAND_PERCENT) / 100) {
338 if (!ares__htable_expand(htable)) {
339 return ARES_FALSE;
340 }
341 /* If we expanded, need to calculate a new index */
342 idx = HASH_IDX(htable, key);
343 }
344
345 /* We lazily allocate the linked list */
346 if (htable->buckets[idx] == NULL) {
347 htable->buckets[idx] = ares__llist_create(htable->bucket_free);
348 if (htable->buckets[idx] == NULL) {
349 return ARES_FALSE;
350 }
351 }
352
353 node = ares__llist_insert_first(htable->buckets[idx], bucket);
354 if (node == NULL) {
355 return ARES_FALSE;
356 }
357
358 /* Track collisions for rehash stability */
359 if (ares__llist_len(htable->buckets[idx]) > 1) {
360 htable->num_collisions++;
361 }
362
363 htable->num_keys++;
364
365 return ARES_TRUE;
366 }
367
ares__htable_get(const ares__htable_t * htable,const void * key)368 void *ares__htable_get(const ares__htable_t *htable, const void *key)
369 {
370 unsigned int idx;
371
372 if (htable == NULL || key == NULL) {
373 return NULL;
374 }
375
376 idx = HASH_IDX(htable, key);
377
378 return ares__llist_node_val(ares__htable_find(htable, idx, key));
379 }
380
ares__htable_remove(ares__htable_t * htable,const void * key)381 ares_bool_t ares__htable_remove(ares__htable_t *htable, const void *key)
382 {
383 ares__llist_node_t *node;
384 unsigned int idx;
385
386 if (htable == NULL || key == NULL) {
387 return ARES_FALSE;
388 }
389
390 idx = HASH_IDX(htable, key);
391 node = ares__htable_find(htable, idx, key);
392 if (node == NULL) {
393 return ARES_FALSE;
394 }
395
396 htable->num_keys--;
397
398 /* Reduce collisions */
399 if (ares__llist_len(ares__llist_node_parent(node)) > 1) {
400 htable->num_collisions--;
401 }
402
403 ares__llist_node_destroy(node);
404 return ARES_TRUE;
405 }
406
ares__htable_num_keys(const ares__htable_t * htable)407 size_t ares__htable_num_keys(const ares__htable_t *htable)
408 {
409 if (htable == NULL) {
410 return 0;
411 }
412 return htable->num_keys;
413 }
414
ares__htable_hash_FNV1a(const unsigned char * key,size_t key_len,unsigned int seed)415 unsigned int ares__htable_hash_FNV1a(const unsigned char *key, size_t key_len,
416 unsigned int seed)
417 {
418 /* recommended seed is 2166136261U, but we don't want collisions */
419 unsigned int hv = seed;
420 size_t i;
421
422 for (i = 0; i < key_len; i++) {
423 hv ^= (unsigned int)key[i];
424 /* hv *= 0x01000193 */
425 hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
426 }
427
428 return hv;
429 }
430
431 /* Case insensitive version, meant for ASCII strings */
ares__htable_hash_FNV1a_casecmp(const unsigned char * key,size_t key_len,unsigned int seed)432 unsigned int ares__htable_hash_FNV1a_casecmp(const unsigned char *key,
433 size_t key_len, unsigned int seed)
434 {
435 /* recommended seed is 2166136261U, but we don't want collisions */
436 unsigned int hv = seed;
437 size_t i;
438
439 for (i = 0; i < key_len; i++) {
440 hv ^= (unsigned int)ares__tolower(key[i]);
441 /* hv *= 0x01000193 */
442 hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
443 }
444
445 return hv;
446 }
447