• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* MIT License
2  *
3  * Copyright (c) 2023 Brad House
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to deal
7  * in the Software without restriction, including without limitation the rights
8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9  * copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * SPDX-License-Identifier: MIT
25  */
26 #include "ares_private.h"
27 #include "ares_llist.h"
28 #include "ares_htable.h"
29 
30 #define ARES__HTABLE_MAX_BUCKETS    (1U << 24)
31 #define ARES__HTABLE_MIN_BUCKETS    (1U << 4)
32 #define ARES__HTABLE_EXPAND_PERCENT 75
33 
34 struct ares_htable {
35   ares_htable_hashfunc_t    hash;
36   ares_htable_bucket_key_t  bucket_key;
37   ares_htable_bucket_free_t bucket_free;
38   ares_htable_key_eq_t      key_eq;
39   unsigned int              seed;
40   unsigned int              size;
41   size_t                    num_keys;
42   size_t                    num_collisions;
43   /* NOTE: if we converted buckets into ares_slist_t we could guarantee on
44    *       hash collisions we would have O(log n) worst case insert and search
45    *       performance.  (We'd also need to make key_eq into a key_cmp to
46    *       support sort).  That said, risk with a random hash seed is near zero,
47    *       and ares_slist_t is heavier weight, so I think using ares_llist_t
48    *       is an overall win. */
49   ares_llist_t            **buckets;
50 };
51 
ares_htable_generate_seed(ares_htable_t * htable)52 static unsigned int ares_htable_generate_seed(ares_htable_t *htable)
53 {
54 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
55   /* Seed needs to be static for fuzzing */
56   return 0;
57 #else
58   unsigned int seed = 0;
59   time_t       t    = time(NULL);
60 
61   /* Mix stack address, heap address, and time to generate a random seed, it
62    * doesn't have to be super secure, just quick.  Likelihood of a hash
63    * collision attack is very low with a small amount of effort */
64   seed |= (unsigned int)((size_t)htable & 0xFFFFFFFF);
65   seed |= (unsigned int)((size_t)&seed & 0xFFFFFFFF);
66   seed |= (unsigned int)(((ares_uint64_t)t) & 0xFFFFFFFF);
67   return seed;
68 #endif
69 }
70 
ares_htable_buckets_destroy(ares_llist_t ** buckets,unsigned int size,ares_bool_t destroy_vals)71 static void ares_htable_buckets_destroy(ares_llist_t **buckets,
72                                         unsigned int   size,
73                                         ares_bool_t    destroy_vals)
74 {
75   unsigned int i;
76 
77   if (buckets == NULL) {
78     return;
79   }
80 
81   for (i = 0; i < size; i++) {
82     if (buckets[i] == NULL) {
83       continue;
84     }
85 
86     if (!destroy_vals) {
87       ares_llist_replace_destructor(buckets[i], NULL);
88     }
89 
90     ares_llist_destroy(buckets[i]);
91   }
92 
93   ares_free(buckets);
94 }
95 
ares_htable_destroy(ares_htable_t * htable)96 void ares_htable_destroy(ares_htable_t *htable)
97 {
98   if (htable == NULL) {
99     return;
100   }
101   ares_htable_buckets_destroy(htable->buckets, htable->size, ARES_TRUE);
102   ares_free(htable);
103 }
104 
ares_htable_create(ares_htable_hashfunc_t hash_func,ares_htable_bucket_key_t bucket_key,ares_htable_bucket_free_t bucket_free,ares_htable_key_eq_t key_eq)105 ares_htable_t *ares_htable_create(ares_htable_hashfunc_t    hash_func,
106                                   ares_htable_bucket_key_t  bucket_key,
107                                   ares_htable_bucket_free_t bucket_free,
108                                   ares_htable_key_eq_t      key_eq)
109 {
110   ares_htable_t *htable = NULL;
111 
112   if (hash_func == NULL || bucket_key == NULL || bucket_free == NULL ||
113       key_eq == NULL) {
114     goto fail;
115   }
116 
117   htable = ares_malloc_zero(sizeof(*htable));
118   if (htable == NULL) {
119     goto fail;
120   }
121 
122   htable->hash        = hash_func;
123   htable->bucket_key  = bucket_key;
124   htable->bucket_free = bucket_free;
125   htable->key_eq      = key_eq;
126   htable->seed        = ares_htable_generate_seed(htable);
127   htable->size        = ARES__HTABLE_MIN_BUCKETS;
128   htable->buckets = ares_malloc_zero(sizeof(*htable->buckets) * htable->size);
129 
130   if (htable->buckets == NULL) {
131     goto fail;
132   }
133 
134   return htable;
135 
136 fail:
137   ares_htable_destroy(htable);
138   return NULL;
139 }
140 
ares_htable_all_buckets(const ares_htable_t * htable,size_t * num)141 const void **ares_htable_all_buckets(const ares_htable_t *htable, size_t *num)
142 {
143   const void **out = NULL;
144   size_t       cnt = 0;
145   size_t       i;
146 
147   if (htable == NULL || num == NULL) {
148     return NULL; /* LCOV_EXCL_LINE */
149   }
150 
151   *num = 0;
152 
153   out = ares_malloc_zero(sizeof(*out) * htable->num_keys);
154   if (out == NULL) {
155     return NULL; /* LCOV_EXCL_LINE */
156   }
157 
158   for (i = 0; i < htable->size; i++) {
159     ares_llist_node_t *node;
160     for (node = ares_llist_node_first(htable->buckets[i]); node != NULL;
161          node = ares_llist_node_next(node)) {
162       out[cnt++] = ares_llist_node_val(node);
163     }
164   }
165 
166   *num = cnt;
167   return out;
168 }
169 
170 /*! Grabs the Hashtable index from the key and length.  The h index is
171  *  the hash of the function reduced to the size of the bucket list.
172  *  We are doing "hash & (size - 1)" since we are guaranteeing a power of
173  *  2 for size. This is equivalent to "hash % size", but should be more
174  * efficient */
175 #define HASH_IDX(h, key) h->hash(key, h->seed) & (h->size - 1)
176 
ares_htable_find(const ares_htable_t * htable,unsigned int idx,const void * key)177 static ares_llist_node_t *ares_htable_find(const ares_htable_t *htable,
178                                            unsigned int idx, const void *key)
179 {
180   ares_llist_node_t *node = NULL;
181 
182   for (node = ares_llist_node_first(htable->buckets[idx]); node != NULL;
183        node = ares_llist_node_next(node)) {
184     if (htable->key_eq(key, htable->bucket_key(ares_llist_node_val(node)))) {
185       break;
186     }
187   }
188 
189   return node;
190 }
191 
ares_htable_expand(ares_htable_t * htable)192 static ares_bool_t ares_htable_expand(ares_htable_t *htable)
193 {
194   ares_llist_t **buckets  = NULL;
195   unsigned int   old_size = htable->size;
196   size_t         i;
197   ares_llist_t **prealloc_llist     = NULL;
198   size_t         prealloc_llist_len = 0;
199   ares_bool_t    rv                 = ARES_FALSE;
200 
201   /* Not a failure, just won't expand */
202   if (old_size == ARES__HTABLE_MAX_BUCKETS) {
203     return ARES_TRUE; /* LCOV_EXCL_LINE */
204   }
205 
206   htable->size <<= 1;
207 
208   /* We must pre-allocate all memory we'll need before moving entries to the
209    * new hash array.  Otherwise if there's a memory allocation failure in the
210    * middle, we wouldn't be able to recover. */
211   buckets = ares_malloc_zero(sizeof(*buckets) * htable->size);
212   if (buckets == NULL) {
213     goto done; /* LCOV_EXCL_LINE */
214   }
215 
216   /* The maximum number of new llists we'll need is the number of collisions
217    * that were recorded */
218   prealloc_llist_len = htable->num_collisions;
219   if (prealloc_llist_len) {
220     prealloc_llist =
221       ares_malloc_zero(sizeof(*prealloc_llist) * prealloc_llist_len);
222     if (prealloc_llist == NULL) {
223       goto done; /* LCOV_EXCL_LINE */
224     }
225   }
226   for (i = 0; i < prealloc_llist_len; i++) {
227     prealloc_llist[i] = ares_llist_create(htable->bucket_free);
228     if (prealloc_llist[i] == NULL) {
229       goto done;
230     }
231   }
232 
233   /* Iterate across all buckets and move the entries to the new buckets */
234   htable->num_collisions = 0;
235   for (i = 0; i < old_size; i++) {
236     ares_llist_node_t *node;
237 
238     /* Nothing in this bucket */
239     if (htable->buckets[i] == NULL) {
240       continue;
241     }
242 
243     /* Fast path optimization (most likely case), there is likely only a single
244      * entry in both the source and destination, check for this to confirm and
245      * if so, just move the bucket over */
246     if (ares_llist_len(htable->buckets[i]) == 1) {
247       const void *val = ares_llist_first_val(htable->buckets[i]);
248       size_t      idx = HASH_IDX(htable, htable->bucket_key(val));
249 
250       if (buckets[idx] == NULL) {
251         /* Swap! */
252         buckets[idx]       = htable->buckets[i];
253         htable->buckets[i] = NULL;
254         continue;
255       }
256     }
257 
258     /* Slow path, collisions */
259     while ((node = ares_llist_node_first(htable->buckets[i])) != NULL) {
260       const void *val = ares_llist_node_val(node);
261       size_t      idx = HASH_IDX(htable, htable->bucket_key(val));
262 
263       /* Try fast path again as maybe we popped one collision off and the
264        * next we can reuse the llist parent */
265       if (buckets[idx] == NULL && ares_llist_len(htable->buckets[i]) == 1) {
266         /* Swap! */
267         buckets[idx]       = htable->buckets[i];
268         htable->buckets[i] = NULL;
269         break;
270       }
271 
272       /* Grab one off our preallocated list */
273       if (buckets[idx] == NULL) {
274         /* Silence static analysis, this isn't possible but it doesn't know */
275         if (prealloc_llist == NULL || prealloc_llist_len == 0) {
276           goto done; /* LCOV_EXCL_LINE */
277         }
278         buckets[idx] = prealloc_llist[prealloc_llist_len - 1];
279         prealloc_llist_len--;
280       } else {
281         /* Collision occurred since the bucket wasn't empty */
282         htable->num_collisions++;
283       }
284 
285       ares_llist_node_mvparent_first(node, buckets[idx]);
286     }
287 
288     /* Abandoned bucket, destroy */
289     if (htable->buckets[i] != NULL) {
290       ares_llist_destroy(htable->buckets[i]);
291       htable->buckets[i] = NULL;
292     }
293   }
294 
295   /* We have guaranteed all the buckets have either been moved or destroyed,
296    * so we just call ares_free() on the array and swap out the pointer */
297   ares_free(htable->buckets);
298   htable->buckets = buckets;
299   buckets         = NULL;
300   rv              = ARES_TRUE;
301 
302 done:
303   ares_free(buckets);
304   /* destroy any unused preallocated buckets */
305   ares_htable_buckets_destroy(prealloc_llist, (unsigned int)prealloc_llist_len,
306                               ARES_FALSE);
307 
308   /* On failure, we need to restore the htable size */
309   if (rv != ARES_TRUE) {
310     htable->size = old_size; /* LCOV_EXCL_LINE */
311   }
312 
313   return rv;
314 }
315 
ares_htable_insert(ares_htable_t * htable,void * bucket)316 ares_bool_t ares_htable_insert(ares_htable_t *htable, void *bucket)
317 {
318   unsigned int       idx  = 0;
319   ares_llist_node_t *node = NULL;
320   const void        *key  = NULL;
321 
322   if (htable == NULL || bucket == NULL) {
323     return ARES_FALSE;
324   }
325 
326 
327   key = htable->bucket_key(bucket);
328   idx = HASH_IDX(htable, key);
329 
330   /* See if we have a matching bucket already, if so, replace it */
331   node = ares_htable_find(htable, idx, key);
332   if (node != NULL) {
333     ares_llist_node_replace(node, bucket);
334     return ARES_TRUE;
335   }
336 
337   /* Check to see if we should rehash because likelihood of collisions has
338    * increased beyond our threshold */
339   if (htable->num_keys + 1 >
340       (htable->size * ARES__HTABLE_EXPAND_PERCENT) / 100) {
341     if (!ares_htable_expand(htable)) {
342       return ARES_FALSE; /* LCOV_EXCL_LINE */
343     }
344     /* If we expanded, need to calculate a new index */
345     idx = HASH_IDX(htable, key);
346   }
347 
348   /* We lazily allocate the linked list */
349   if (htable->buckets[idx] == NULL) {
350     htable->buckets[idx] = ares_llist_create(htable->bucket_free);
351     if (htable->buckets[idx] == NULL) {
352       return ARES_FALSE;
353     }
354   }
355 
356   node = ares_llist_insert_first(htable->buckets[idx], bucket);
357   if (node == NULL) {
358     return ARES_FALSE;
359   }
360 
361   /* Track collisions for rehash stability */
362   if (ares_llist_len(htable->buckets[idx]) > 1) {
363     htable->num_collisions++;
364   }
365 
366   htable->num_keys++;
367 
368   return ARES_TRUE;
369 }
370 
ares_htable_get(const ares_htable_t * htable,const void * key)371 void *ares_htable_get(const ares_htable_t *htable, const void *key)
372 {
373   unsigned int idx;
374 
375   if (htable == NULL || key == NULL) {
376     return NULL;
377   }
378 
379   idx = HASH_IDX(htable, key);
380 
381   return ares_llist_node_val(ares_htable_find(htable, idx, key));
382 }
383 
ares_htable_remove(ares_htable_t * htable,const void * key)384 ares_bool_t ares_htable_remove(ares_htable_t *htable, const void *key)
385 {
386   ares_llist_node_t *node;
387   unsigned int       idx;
388 
389   if (htable == NULL || key == NULL) {
390     return ARES_FALSE;
391   }
392 
393   idx  = HASH_IDX(htable, key);
394   node = ares_htable_find(htable, idx, key);
395   if (node == NULL) {
396     return ARES_FALSE;
397   }
398 
399   htable->num_keys--;
400 
401   /* Reduce collisions */
402   if (ares_llist_len(ares_llist_node_parent(node)) > 1) {
403     htable->num_collisions--;
404   }
405 
406   ares_llist_node_destroy(node);
407   return ARES_TRUE;
408 }
409 
ares_htable_num_keys(const ares_htable_t * htable)410 size_t ares_htable_num_keys(const ares_htable_t *htable)
411 {
412   if (htable == NULL) {
413     return 0;
414   }
415   return htable->num_keys;
416 }
417 
ares_htable_hash_FNV1a(const unsigned char * key,size_t key_len,unsigned int seed)418 unsigned int ares_htable_hash_FNV1a(const unsigned char *key, size_t key_len,
419                                     unsigned int seed)
420 {
421   unsigned int hv = seed ^ 2166136261U;
422   size_t       i;
423 
424   for (i = 0; i < key_len; i++) {
425     hv ^= (unsigned int)key[i];
426     /* hv *= 16777619 (0x01000193) */
427     hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
428   }
429 
430   return hv;
431 }
432 
433 /* Case insensitive version, meant for ASCII strings */
ares_htable_hash_FNV1a_casecmp(const unsigned char * key,size_t key_len,unsigned int seed)434 unsigned int ares_htable_hash_FNV1a_casecmp(const unsigned char *key,
435                                             size_t key_len, unsigned int seed)
436 {
437   unsigned int hv = seed ^ 2166136261U;
438   size_t       i;
439 
440   for (i = 0; i < key_len; i++) {
441     hv ^= (unsigned int)ares_tolower(key[i]);
442     /* hv *= 16777619 (0x01000193) */
443     hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
444   }
445 
446   return hv;
447 }
448