• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* MIT License
2  *
3  * Copyright (c) 2023 Brad House
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to deal
7  * in the Software without restriction, including without limitation the rights
8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9  * copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * SPDX-License-Identifier: MIT
25  */
26 #include "ares_setup.h"
27 #include "ares.h"
28 #include "ares_private.h"
29 #include "ares__llist.h"
30 #include "ares__htable.h"
31 
32 #define ARES__HTABLE_MAX_BUCKETS    (1U << 24)
33 #define ARES__HTABLE_MIN_BUCKETS    (1U << 4)
34 #define ARES__HTABLE_EXPAND_PERCENT 75
35 
36 struct ares__htable {
37   ares__htable_hashfunc_t    hash;
38   ares__htable_bucket_key_t  bucket_key;
39   ares__htable_bucket_free_t bucket_free;
40   ares__htable_key_eq_t      key_eq;
41   unsigned int               seed;
42   unsigned int               size;
43   size_t                     num_keys;
44   size_t                     num_collisions;
45   /* NOTE: if we converted buckets into ares__slist_t we could guarantee on
46    *       hash collisions we would have O(log n) worst case insert and search
47    *       performance.  (We'd also need to make key_eq into a key_cmp to
48    *       support sort).  That said, risk with a random hash seed is near zero,
49    *       and ares__slist_t is heavier weight, so I think using ares__llist_t
50    *       is an overall win. */
51   ares__llist_t            **buckets;
52 };
53 
ares__htable_generate_seed(ares__htable_t * htable)54 static unsigned int ares__htable_generate_seed(ares__htable_t *htable)
55 {
56   unsigned int seed = 0;
57   time_t       t    = time(NULL);
58 
59   /* Mix stack address, heap address, and time to generate a random seed, it
60    * doesn't have to be super secure, just quick.  Likelihood of a hash
61    * collision attack is very low with a small amount of effort */
62   seed |= (unsigned int)((size_t)htable & 0xFFFFFFFF);
63   seed |= (unsigned int)((size_t)&seed & 0xFFFFFFFF);
64   seed |= (unsigned int)(t & 0xFFFFFFFF);
65   return seed;
66 }
67 
ares__htable_buckets_destroy(ares__llist_t ** buckets,unsigned int size,ares_bool_t destroy_vals)68 static void ares__htable_buckets_destroy(ares__llist_t **buckets,
69                                          unsigned int    size,
70                                          ares_bool_t     destroy_vals)
71 {
72   unsigned int i;
73 
74   if (buckets == NULL) {
75     return;
76   }
77 
78   for (i = 0; i < size; i++) {
79     if (buckets[i] == NULL) {
80       continue;
81     }
82 
83     if (!destroy_vals) {
84       ares__llist_replace_destructor(buckets[i], NULL);
85     }
86 
87     ares__llist_destroy(buckets[i]);
88   }
89 
90   ares_free(buckets);
91 }
92 
ares__htable_destroy(ares__htable_t * htable)93 void ares__htable_destroy(ares__htable_t *htable)
94 {
95   if (htable == NULL) {
96     return;
97   }
98   ares__htable_buckets_destroy(htable->buckets, htable->size, ARES_TRUE);
99   ares_free(htable);
100 }
101 
ares__htable_create(ares__htable_hashfunc_t hash_func,ares__htable_bucket_key_t bucket_key,ares__htable_bucket_free_t bucket_free,ares__htable_key_eq_t key_eq)102 ares__htable_t *ares__htable_create(ares__htable_hashfunc_t    hash_func,
103                                     ares__htable_bucket_key_t  bucket_key,
104                                     ares__htable_bucket_free_t bucket_free,
105                                     ares__htable_key_eq_t      key_eq)
106 {
107   ares__htable_t *htable = NULL;
108 
109   if (hash_func == NULL || bucket_key == NULL || bucket_free == NULL ||
110       key_eq == NULL) {
111     goto fail;
112   }
113 
114   htable = ares_malloc_zero(sizeof(*htable));
115   if (htable == NULL) {
116     goto fail;
117   }
118 
119   htable->hash        = hash_func;
120   htable->bucket_key  = bucket_key;
121   htable->bucket_free = bucket_free;
122   htable->key_eq      = key_eq;
123   htable->seed        = ares__htable_generate_seed(htable);
124   htable->size        = ARES__HTABLE_MIN_BUCKETS;
125   htable->buckets = ares_malloc_zero(sizeof(*htable->buckets) * htable->size);
126 
127   if (htable->buckets == NULL) {
128     goto fail;
129   }
130 
131   return htable;
132 
133 fail:
134   ares__htable_destroy(htable);
135   return NULL;
136 }
137 
ares__htable_all_buckets(const ares__htable_t * htable,size_t * num)138 const void **ares__htable_all_buckets(const ares__htable_t *htable, size_t *num)
139 {
140   const void **out = NULL;
141   size_t       cnt = 0;
142   size_t       i;
143 
144   if (htable == NULL || num == NULL) {
145     return NULL;
146   }
147 
148   *num = 0;
149 
150   out = ares_malloc_zero(sizeof(*out) * htable->num_keys);
151   if (out == NULL) {
152     return NULL;
153   }
154 
155   for (i = 0; i < htable->size; i++) {
156     ares__llist_node_t *node;
157     for (node = ares__llist_node_first(htable->buckets[i]); node != NULL;
158          node = ares__llist_node_next(node)) {
159       out[cnt++] = ares__llist_node_val(node);
160     }
161   }
162 
163   *num = cnt;
164   return out;
165 }
166 
167 /*! Grabs the Hashtable index from the key and length.  The h index is
168  *  the hash of the function reduced to the size of the bucket list.
169  *  We are doing "hash & (size - 1)" since we are guaranteeing a power of
170  *  2 for size. This is equivalent to "hash % size", but should be more
171  * efficient */
172 #define HASH_IDX(h, key) h->hash(key, h->seed) & (h->size - 1)
173 
ares__htable_find(const ares__htable_t * htable,unsigned int idx,const void * key)174 static ares__llist_node_t *ares__htable_find(const ares__htable_t *htable,
175                                              unsigned int idx, const void *key)
176 {
177   ares__llist_node_t *node = NULL;
178 
179   for (node = ares__llist_node_first(htable->buckets[idx]); node != NULL;
180        node = ares__llist_node_next(node)) {
181     if (htable->key_eq(key, htable->bucket_key(ares__llist_node_val(node)))) {
182       break;
183     }
184   }
185 
186   return node;
187 }
188 
ares__htable_expand(ares__htable_t * htable)189 static ares_bool_t ares__htable_expand(ares__htable_t *htable)
190 {
191   ares__llist_t **buckets  = NULL;
192   unsigned int    old_size = htable->size;
193   size_t          i;
194   ares__llist_t **prealloc_llist     = NULL;
195   size_t          prealloc_llist_len = 0;
196   ares_bool_t     rv                 = ARES_FALSE;
197 
198   /* Not a failure, just won't expand */
199   if (old_size == ARES__HTABLE_MAX_BUCKETS) {
200     return ARES_TRUE;
201   }
202 
203   htable->size <<= 1;
204 
205   /* We must pre-allocate all memory we'll need before moving entries to the
206    * new hash array.  Otherwise if there's a memory allocation failure in the
207    * middle, we wouldn't be able to recover. */
208   buckets = ares_malloc_zero(sizeof(*buckets) * htable->size);
209   if (buckets == NULL) {
210     goto done;
211   }
212 
213   /* The maximum number of new llists we'll need is the number of collisions
214    * that were recorded */
215   prealloc_llist_len = htable->num_collisions;
216   if (prealloc_llist_len) {
217     prealloc_llist =
218       ares_malloc_zero(sizeof(*prealloc_llist) * prealloc_llist_len);
219     if (prealloc_llist == NULL) {
220       goto done;
221     }
222   }
223   for (i = 0; i < prealloc_llist_len; i++) {
224     prealloc_llist[i] = ares__llist_create(htable->bucket_free);
225     if (prealloc_llist[i] == NULL) {
226       goto done;
227     }
228   }
229 
230   /* Iterate across all buckets and move the entries to the new buckets */
231   htable->num_collisions = 0;
232   for (i = 0; i < old_size; i++) {
233     ares__llist_node_t *node;
234 
235     /* Nothing in this bucket */
236     if (htable->buckets[i] == NULL) {
237       continue;
238     }
239 
240     /* Fast path optimization (most likely case), there is likely only a single
241      * entry in both the source and destination, check for this to confirm and
242      * if so, just move the bucket over */
243     if (ares__llist_len(htable->buckets[i]) == 1) {
244       const void *val = ares__llist_first_val(htable->buckets[i]);
245       size_t      idx = HASH_IDX(htable, htable->bucket_key(val));
246 
247       if (buckets[idx] == NULL) {
248         /* Swap! */
249         buckets[idx]       = htable->buckets[i];
250         htable->buckets[i] = NULL;
251         continue;
252       }
253     }
254 
255     /* Slow path, collisions */
256     while ((node = ares__llist_node_first(htable->buckets[i])) != NULL) {
257       const void *val = ares__llist_node_val(node);
258       size_t      idx = HASH_IDX(htable, htable->bucket_key(val));
259 
260       /* Try fast path again as maybe we popped one collision off and the
261        * next we can reuse the llist parent */
262       if (buckets[idx] == NULL && ares__llist_len(htable->buckets[i]) == 1) {
263         /* Swap! */
264         buckets[idx]       = htable->buckets[i];
265         htable->buckets[i] = NULL;
266         break;
267       }
268 
269       /* Grab one off our preallocated list */
270       if (buckets[idx] == NULL) {
271         /* Silence static analysis, this isn't possible but it doesn't know */
272         if (prealloc_llist == NULL || prealloc_llist_len == 0) {
273           goto done;
274         }
275         buckets[idx] = prealloc_llist[prealloc_llist_len - 1];
276         prealloc_llist_len--;
277       } else {
278         /* Collision occurred since the bucket wasn't empty */
279         htable->num_collisions++;
280       }
281 
282       ares__llist_node_move_parent_first(node, buckets[idx]);
283     }
284 
285     /* Abandoned bucket, destroy */
286     if (htable->buckets[i] != NULL) {
287       ares__llist_destroy(htable->buckets[i]);
288       htable->buckets[i] = NULL;
289     }
290   }
291 
292   /* We have guaranteed all the buckets have either been moved or destroyed,
293    * so we just call ares_free() on the array and swap out the pointer */
294   ares_free(htable->buckets);
295   htable->buckets = buckets;
296   buckets         = NULL;
297   rv              = ARES_TRUE;
298 
299 done:
300   ares_free(buckets);
301   /* destroy any unused preallocated buckets */
302   ares__htable_buckets_destroy(prealloc_llist, (unsigned int)prealloc_llist_len,
303                                ARES_FALSE);
304 
305   /* On failure, we need to restore the htable size */
306   if (rv != ARES_TRUE) {
307     htable->size = old_size;
308   }
309 
310   return rv;
311 }
312 
ares__htable_insert(ares__htable_t * htable,void * bucket)313 ares_bool_t ares__htable_insert(ares__htable_t *htable, void *bucket)
314 {
315   unsigned int        idx  = 0;
316   ares__llist_node_t *node = NULL;
317   const void         *key  = NULL;
318 
319   if (htable == NULL || bucket == NULL) {
320     return ARES_FALSE;
321   }
322 
323 
324   key = htable->bucket_key(bucket);
325   idx = HASH_IDX(htable, key);
326 
327   /* See if we have a matching bucket already, if so, replace it */
328   node = ares__htable_find(htable, idx, key);
329   if (node != NULL) {
330     ares__llist_node_replace(node, bucket);
331     return ARES_TRUE;
332   }
333 
334   /* Check to see if we should rehash because likelihood of collisions has
335    * increased beyond our threshold */
336   if (htable->num_keys + 1 >
337       (htable->size * ARES__HTABLE_EXPAND_PERCENT) / 100) {
338     if (!ares__htable_expand(htable)) {
339       return ARES_FALSE;
340     }
341     /* If we expanded, need to calculate a new index */
342     idx = HASH_IDX(htable, key);
343   }
344 
345   /* We lazily allocate the linked list */
346   if (htable->buckets[idx] == NULL) {
347     htable->buckets[idx] = ares__llist_create(htable->bucket_free);
348     if (htable->buckets[idx] == NULL) {
349       return ARES_FALSE;
350     }
351   }
352 
353   node = ares__llist_insert_first(htable->buckets[idx], bucket);
354   if (node == NULL) {
355     return ARES_FALSE;
356   }
357 
358   /* Track collisions for rehash stability */
359   if (ares__llist_len(htable->buckets[idx]) > 1) {
360     htable->num_collisions++;
361   }
362 
363   htable->num_keys++;
364 
365   return ARES_TRUE;
366 }
367 
ares__htable_get(const ares__htable_t * htable,const void * key)368 void *ares__htable_get(const ares__htable_t *htable, const void *key)
369 {
370   unsigned int idx;
371 
372   if (htable == NULL || key == NULL) {
373     return NULL;
374   }
375 
376   idx = HASH_IDX(htable, key);
377 
378   return ares__llist_node_val(ares__htable_find(htable, idx, key));
379 }
380 
ares__htable_remove(ares__htable_t * htable,const void * key)381 ares_bool_t ares__htable_remove(ares__htable_t *htable, const void *key)
382 {
383   ares__llist_node_t *node;
384   unsigned int        idx;
385 
386   if (htable == NULL || key == NULL) {
387     return ARES_FALSE;
388   }
389 
390   idx  = HASH_IDX(htable, key);
391   node = ares__htable_find(htable, idx, key);
392   if (node == NULL) {
393     return ARES_FALSE;
394   }
395 
396   htable->num_keys--;
397 
398   /* Reduce collisions */
399   if (ares__llist_len(ares__llist_node_parent(node)) > 1) {
400     htable->num_collisions--;
401   }
402 
403   ares__llist_node_destroy(node);
404   return ARES_TRUE;
405 }
406 
ares__htable_num_keys(const ares__htable_t * htable)407 size_t ares__htable_num_keys(const ares__htable_t *htable)
408 {
409   if (htable == NULL) {
410     return 0;
411   }
412   return htable->num_keys;
413 }
414 
ares__htable_hash_FNV1a(const unsigned char * key,size_t key_len,unsigned int seed)415 unsigned int ares__htable_hash_FNV1a(const unsigned char *key, size_t key_len,
416                                      unsigned int seed)
417 {
418   /* recommended seed is 2166136261U, but we don't want collisions */
419   unsigned int hv = seed;
420   size_t       i;
421 
422   for (i = 0; i < key_len; i++) {
423     hv ^= (unsigned int)key[i];
424     /* hv *= 0x01000193 */
425     hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
426   }
427 
428   return hv;
429 }
430 
431 /* Case insensitive version, meant for ASCII strings */
ares__htable_hash_FNV1a_casecmp(const unsigned char * key,size_t key_len,unsigned int seed)432 unsigned int ares__htable_hash_FNV1a_casecmp(const unsigned char *key,
433                                              size_t key_len, unsigned int seed)
434 {
435   /* recommended seed is 2166136261U, but we don't want collisions */
436   unsigned int hv = seed;
437   size_t       i;
438 
439   for (i = 0; i < key_len; i++) {
440     hv ^= (unsigned int)ares__tolower(key[i]);
441     /* hv *= 0x01000193 */
442     hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
443   }
444 
445   return hv;
446 }
447