1 /* MIT License
2 *
3 * Copyright (c) 2023 Brad House
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * SPDX-License-Identifier: MIT
25 */
26 #include "ares_private.h"
27 #include "ares_llist.h"
28 #include "ares_htable.h"
29
30 #define ARES__HTABLE_MAX_BUCKETS (1U << 24)
31 #define ARES__HTABLE_MIN_BUCKETS (1U << 4)
32 #define ARES__HTABLE_EXPAND_PERCENT 75
33
34 struct ares_htable {
35 ares_htable_hashfunc_t hash;
36 ares_htable_bucket_key_t bucket_key;
37 ares_htable_bucket_free_t bucket_free;
38 ares_htable_key_eq_t key_eq;
39 unsigned int seed;
40 unsigned int size;
41 size_t num_keys;
42 size_t num_collisions;
43 /* NOTE: if we converted buckets into ares_slist_t we could guarantee on
44 * hash collisions we would have O(log n) worst case insert and search
45 * performance. (We'd also need to make key_eq into a key_cmp to
46 * support sort). That said, risk with a random hash seed is near zero,
47 * and ares_slist_t is heavier weight, so I think using ares_llist_t
48 * is an overall win. */
49 ares_llist_t **buckets;
50 };
51
ares_htable_generate_seed(ares_htable_t * htable)52 static unsigned int ares_htable_generate_seed(ares_htable_t *htable)
53 {
54 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
55 /* Seed needs to be static for fuzzing */
56 return 0;
57 #else
58 unsigned int seed = 0;
59 time_t t = time(NULL);
60
61 /* Mix stack address, heap address, and time to generate a random seed, it
62 * doesn't have to be super secure, just quick. Likelihood of a hash
63 * collision attack is very low with a small amount of effort */
64 seed |= (unsigned int)((size_t)htable & 0xFFFFFFFF);
65 seed |= (unsigned int)((size_t)&seed & 0xFFFFFFFF);
66 seed |= (unsigned int)(((ares_uint64_t)t) & 0xFFFFFFFF);
67 return seed;
68 #endif
69 }
70
ares_htable_buckets_destroy(ares_llist_t ** buckets,unsigned int size,ares_bool_t destroy_vals)71 static void ares_htable_buckets_destroy(ares_llist_t **buckets,
72 unsigned int size,
73 ares_bool_t destroy_vals)
74 {
75 unsigned int i;
76
77 if (buckets == NULL) {
78 return;
79 }
80
81 for (i = 0; i < size; i++) {
82 if (buckets[i] == NULL) {
83 continue;
84 }
85
86 if (!destroy_vals) {
87 ares_llist_replace_destructor(buckets[i], NULL);
88 }
89
90 ares_llist_destroy(buckets[i]);
91 }
92
93 ares_free(buckets);
94 }
95
ares_htable_destroy(ares_htable_t * htable)96 void ares_htable_destroy(ares_htable_t *htable)
97 {
98 if (htable == NULL) {
99 return;
100 }
101 ares_htable_buckets_destroy(htable->buckets, htable->size, ARES_TRUE);
102 ares_free(htable);
103 }
104
ares_htable_create(ares_htable_hashfunc_t hash_func,ares_htable_bucket_key_t bucket_key,ares_htable_bucket_free_t bucket_free,ares_htable_key_eq_t key_eq)105 ares_htable_t *ares_htable_create(ares_htable_hashfunc_t hash_func,
106 ares_htable_bucket_key_t bucket_key,
107 ares_htable_bucket_free_t bucket_free,
108 ares_htable_key_eq_t key_eq)
109 {
110 ares_htable_t *htable = NULL;
111
112 if (hash_func == NULL || bucket_key == NULL || bucket_free == NULL ||
113 key_eq == NULL) {
114 goto fail;
115 }
116
117 htable = ares_malloc_zero(sizeof(*htable));
118 if (htable == NULL) {
119 goto fail;
120 }
121
122 htable->hash = hash_func;
123 htable->bucket_key = bucket_key;
124 htable->bucket_free = bucket_free;
125 htable->key_eq = key_eq;
126 htable->seed = ares_htable_generate_seed(htable);
127 htable->size = ARES__HTABLE_MIN_BUCKETS;
128 htable->buckets = ares_malloc_zero(sizeof(*htable->buckets) * htable->size);
129
130 if (htable->buckets == NULL) {
131 goto fail;
132 }
133
134 return htable;
135
136 fail:
137 ares_htable_destroy(htable);
138 return NULL;
139 }
140
ares_htable_all_buckets(const ares_htable_t * htable,size_t * num)141 const void **ares_htable_all_buckets(const ares_htable_t *htable, size_t *num)
142 {
143 const void **out = NULL;
144 size_t cnt = 0;
145 size_t i;
146
147 if (htable == NULL || num == NULL) {
148 return NULL; /* LCOV_EXCL_LINE */
149 }
150
151 *num = 0;
152
153 out = ares_malloc_zero(sizeof(*out) * htable->num_keys);
154 if (out == NULL) {
155 return NULL; /* LCOV_EXCL_LINE */
156 }
157
158 for (i = 0; i < htable->size; i++) {
159 ares_llist_node_t *node;
160 for (node = ares_llist_node_first(htable->buckets[i]); node != NULL;
161 node = ares_llist_node_next(node)) {
162 out[cnt++] = ares_llist_node_val(node);
163 }
164 }
165
166 *num = cnt;
167 return out;
168 }
169
170 /*! Grabs the Hashtable index from the key and length. The h index is
171 * the hash of the function reduced to the size of the bucket list.
172 * We are doing "hash & (size - 1)" since we are guaranteeing a power of
173 * 2 for size. This is equivalent to "hash % size", but should be more
174 * efficient */
175 #define HASH_IDX(h, key) h->hash(key, h->seed) & (h->size - 1)
176
ares_htable_find(const ares_htable_t * htable,unsigned int idx,const void * key)177 static ares_llist_node_t *ares_htable_find(const ares_htable_t *htable,
178 unsigned int idx, const void *key)
179 {
180 ares_llist_node_t *node = NULL;
181
182 for (node = ares_llist_node_first(htable->buckets[idx]); node != NULL;
183 node = ares_llist_node_next(node)) {
184 if (htable->key_eq(key, htable->bucket_key(ares_llist_node_val(node)))) {
185 break;
186 }
187 }
188
189 return node;
190 }
191
ares_htable_expand(ares_htable_t * htable)192 static ares_bool_t ares_htable_expand(ares_htable_t *htable)
193 {
194 ares_llist_t **buckets = NULL;
195 unsigned int old_size = htable->size;
196 size_t i;
197 ares_llist_t **prealloc_llist = NULL;
198 size_t prealloc_llist_len = 0;
199 ares_bool_t rv = ARES_FALSE;
200
201 /* Not a failure, just won't expand */
202 if (old_size == ARES__HTABLE_MAX_BUCKETS) {
203 return ARES_TRUE; /* LCOV_EXCL_LINE */
204 }
205
206 htable->size <<= 1;
207
208 /* We must pre-allocate all memory we'll need before moving entries to the
209 * new hash array. Otherwise if there's a memory allocation failure in the
210 * middle, we wouldn't be able to recover. */
211 buckets = ares_malloc_zero(sizeof(*buckets) * htable->size);
212 if (buckets == NULL) {
213 goto done; /* LCOV_EXCL_LINE */
214 }
215
216 /* The maximum number of new llists we'll need is the number of collisions
217 * that were recorded */
218 prealloc_llist_len = htable->num_collisions;
219 if (prealloc_llist_len) {
220 prealloc_llist =
221 ares_malloc_zero(sizeof(*prealloc_llist) * prealloc_llist_len);
222 if (prealloc_llist == NULL) {
223 goto done; /* LCOV_EXCL_LINE */
224 }
225 }
226 for (i = 0; i < prealloc_llist_len; i++) {
227 prealloc_llist[i] = ares_llist_create(htable->bucket_free);
228 if (prealloc_llist[i] == NULL) {
229 goto done;
230 }
231 }
232
233 /* Iterate across all buckets and move the entries to the new buckets */
234 htable->num_collisions = 0;
235 for (i = 0; i < old_size; i++) {
236 ares_llist_node_t *node;
237
238 /* Nothing in this bucket */
239 if (htable->buckets[i] == NULL) {
240 continue;
241 }
242
243 /* Fast path optimization (most likely case), there is likely only a single
244 * entry in both the source and destination, check for this to confirm and
245 * if so, just move the bucket over */
246 if (ares_llist_len(htable->buckets[i]) == 1) {
247 const void *val = ares_llist_first_val(htable->buckets[i]);
248 size_t idx = HASH_IDX(htable, htable->bucket_key(val));
249
250 if (buckets[idx] == NULL) {
251 /* Swap! */
252 buckets[idx] = htable->buckets[i];
253 htable->buckets[i] = NULL;
254 continue;
255 }
256 }
257
258 /* Slow path, collisions */
259 while ((node = ares_llist_node_first(htable->buckets[i])) != NULL) {
260 const void *val = ares_llist_node_val(node);
261 size_t idx = HASH_IDX(htable, htable->bucket_key(val));
262
263 /* Try fast path again as maybe we popped one collision off and the
264 * next we can reuse the llist parent */
265 if (buckets[idx] == NULL && ares_llist_len(htable->buckets[i]) == 1) {
266 /* Swap! */
267 buckets[idx] = htable->buckets[i];
268 htable->buckets[i] = NULL;
269 break;
270 }
271
272 /* Grab one off our preallocated list */
273 if (buckets[idx] == NULL) {
274 /* Silence static analysis, this isn't possible but it doesn't know */
275 if (prealloc_llist == NULL || prealloc_llist_len == 0) {
276 goto done; /* LCOV_EXCL_LINE */
277 }
278 buckets[idx] = prealloc_llist[prealloc_llist_len - 1];
279 prealloc_llist_len--;
280 } else {
281 /* Collision occurred since the bucket wasn't empty */
282 htable->num_collisions++;
283 }
284
285 ares_llist_node_mvparent_first(node, buckets[idx]);
286 }
287
288 /* Abandoned bucket, destroy */
289 if (htable->buckets[i] != NULL) {
290 ares_llist_destroy(htable->buckets[i]);
291 htable->buckets[i] = NULL;
292 }
293 }
294
295 /* We have guaranteed all the buckets have either been moved or destroyed,
296 * so we just call ares_free() on the array and swap out the pointer */
297 ares_free(htable->buckets);
298 htable->buckets = buckets;
299 buckets = NULL;
300 rv = ARES_TRUE;
301
302 done:
303 ares_free(buckets);
304 /* destroy any unused preallocated buckets */
305 ares_htable_buckets_destroy(prealloc_llist, (unsigned int)prealloc_llist_len,
306 ARES_FALSE);
307
308 /* On failure, we need to restore the htable size */
309 if (rv != ARES_TRUE) {
310 htable->size = old_size; /* LCOV_EXCL_LINE */
311 }
312
313 return rv;
314 }
315
ares_htable_insert(ares_htable_t * htable,void * bucket)316 ares_bool_t ares_htable_insert(ares_htable_t *htable, void *bucket)
317 {
318 unsigned int idx = 0;
319 ares_llist_node_t *node = NULL;
320 const void *key = NULL;
321
322 if (htable == NULL || bucket == NULL) {
323 return ARES_FALSE;
324 }
325
326
327 key = htable->bucket_key(bucket);
328 idx = HASH_IDX(htable, key);
329
330 /* See if we have a matching bucket already, if so, replace it */
331 node = ares_htable_find(htable, idx, key);
332 if (node != NULL) {
333 ares_llist_node_replace(node, bucket);
334 return ARES_TRUE;
335 }
336
337 /* Check to see if we should rehash because likelihood of collisions has
338 * increased beyond our threshold */
339 if (htable->num_keys + 1 >
340 (htable->size * ARES__HTABLE_EXPAND_PERCENT) / 100) {
341 if (!ares_htable_expand(htable)) {
342 return ARES_FALSE; /* LCOV_EXCL_LINE */
343 }
344 /* If we expanded, need to calculate a new index */
345 idx = HASH_IDX(htable, key);
346 }
347
348 /* We lazily allocate the linked list */
349 if (htable->buckets[idx] == NULL) {
350 htable->buckets[idx] = ares_llist_create(htable->bucket_free);
351 if (htable->buckets[idx] == NULL) {
352 return ARES_FALSE;
353 }
354 }
355
356 node = ares_llist_insert_first(htable->buckets[idx], bucket);
357 if (node == NULL) {
358 return ARES_FALSE;
359 }
360
361 /* Track collisions for rehash stability */
362 if (ares_llist_len(htable->buckets[idx]) > 1) {
363 htable->num_collisions++;
364 }
365
366 htable->num_keys++;
367
368 return ARES_TRUE;
369 }
370
ares_htable_get(const ares_htable_t * htable,const void * key)371 void *ares_htable_get(const ares_htable_t *htable, const void *key)
372 {
373 unsigned int idx;
374
375 if (htable == NULL || key == NULL) {
376 return NULL;
377 }
378
379 idx = HASH_IDX(htable, key);
380
381 return ares_llist_node_val(ares_htable_find(htable, idx, key));
382 }
383
ares_htable_remove(ares_htable_t * htable,const void * key)384 ares_bool_t ares_htable_remove(ares_htable_t *htable, const void *key)
385 {
386 ares_llist_node_t *node;
387 unsigned int idx;
388
389 if (htable == NULL || key == NULL) {
390 return ARES_FALSE;
391 }
392
393 idx = HASH_IDX(htable, key);
394 node = ares_htable_find(htable, idx, key);
395 if (node == NULL) {
396 return ARES_FALSE;
397 }
398
399 htable->num_keys--;
400
401 /* Reduce collisions */
402 if (ares_llist_len(ares_llist_node_parent(node)) > 1) {
403 htable->num_collisions--;
404 }
405
406 ares_llist_node_destroy(node);
407 return ARES_TRUE;
408 }
409
ares_htable_num_keys(const ares_htable_t * htable)410 size_t ares_htable_num_keys(const ares_htable_t *htable)
411 {
412 if (htable == NULL) {
413 return 0;
414 }
415 return htable->num_keys;
416 }
417
ares_htable_hash_FNV1a(const unsigned char * key,size_t key_len,unsigned int seed)418 unsigned int ares_htable_hash_FNV1a(const unsigned char *key, size_t key_len,
419 unsigned int seed)
420 {
421 unsigned int hv = seed ^ 2166136261U;
422 size_t i;
423
424 for (i = 0; i < key_len; i++) {
425 hv ^= (unsigned int)key[i];
426 /* hv *= 16777619 (0x01000193) */
427 hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
428 }
429
430 return hv;
431 }
432
433 /* Case insensitive version, meant for ASCII strings */
ares_htable_hash_FNV1a_casecmp(const unsigned char * key,size_t key_len,unsigned int seed)434 unsigned int ares_htable_hash_FNV1a_casecmp(const unsigned char *key,
435 size_t key_len, unsigned int seed)
436 {
437 unsigned int hv = seed ^ 2166136261U;
438 size_t i;
439
440 for (i = 0; i < key_len; i++) {
441 hv ^= (unsigned int)ares_tolower(key[i]);
442 /* hv *= 16777619 (0x01000193) */
443 hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
444 }
445
446 return hv;
447 }
448