1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7 Copyright 2014 Michal Schmidt
8
9 systemd is free software; you can redistribute it and/or modify it
10 under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 2.1 of the License, or
12 (at your option) any later version.
13
14 systemd is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public License
20 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 ***/
22
23 #include <assert.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <errno.h>
27
28 #include "util.h"
29 #include "hashmap.h"
30 #include "set.h"
31 #include "macro.h"
32 #include "siphash24.h"
33 #include "strv.h"
34 #include "list.h"
35 #include "mempool.h"
36 #include "random-util.h"
37
38 /*
39 * Implementation of hashmaps.
40 * Addressing: open
41 * - uses less RAM compared to closed addressing (chaining), because
42 * our entries are small (especially in Sets, which tend to contain
43 * the majority of entries in systemd).
44 * Collision resolution: Robin Hood
45 * - tends to equalize displacement of entries from their optimal buckets.
46 * Probe sequence: linear
47 * - though theoretically worse than random probing/uniform hashing/double
48 * hashing, it is good for cache locality.
49 *
50 * References:
51 * Celis, P. 1986. Robin Hood Hashing.
52 * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
53 * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
54 * - The results are derived for random probing. Suggests deletion with
55 * tombstones and two mean-centered search methods. None of that works
56 * well for linear probing.
57 *
58 * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
59 * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
60 * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
61 * http://www.math.uu.se/~svante/papers/sj157.pdf
62 * - Applies to Robin Hood with linear probing. Contains remarks on
63 * the unsuitability of mean-centered search with linear probing.
64 *
65 * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
66 * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
67 * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
68 * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
69 * in a successful search), and Janson writes about displacement. C = d + 1.
70 *
71 * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
72 * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
73 * - Explanation of backward shift deletion with pictures.
74 *
75 * Khuong, P. 2013. The Other Robin Hood Hashing.
76 * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
77 * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
78 */
79
80 /*
81 * XXX Ideas for improvement:
82 * For unordered hashmaps, randomize iteration order, similarly to Perl:
83 * http://blog.booking.com/hardening-perls-hash-function.html
84 */
85
86 /* INV_KEEP_FREE = 1 / (1 - max_load_factor)
87 * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
88 #define INV_KEEP_FREE 5U
89
90 /* Fields common to entries of all hashmap/set types */
91 struct hashmap_base_entry {
92 const void *key;
93 };
94
95 /* Entry types for specific hashmap/set types
96 * hashmap_base_entry must be at the beginning of each entry struct. */
97
98 struct plain_hashmap_entry {
99 struct hashmap_base_entry b;
100 void *value;
101 };
102
103 struct ordered_hashmap_entry {
104 struct plain_hashmap_entry p;
105 unsigned iterate_next, iterate_previous;
106 };
107
108 struct set_entry {
109 struct hashmap_base_entry b;
110 };
111
112 /* In several functions it is advantageous to have the hash table extended
113 * virtually by a couple of additional buckets. We reserve special index values
114 * for these "swap" buckets. */
115 #define _IDX_SWAP_BEGIN (UINT_MAX - 3)
116 #define IDX_PUT (_IDX_SWAP_BEGIN + 0)
117 #define IDX_TMP (_IDX_SWAP_BEGIN + 1)
118 #define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
119
120 #define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
121 #define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
122
123 assert_cc(IDX_FIRST == _IDX_SWAP_END);
124 assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
125
126 /* Storage space for the "swap" buckets.
127 * All entry types can fit into a ordered_hashmap_entry. */
128 struct swap_entries {
129 struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
130 };
131
132 /* Distance from Initial Bucket */
133 typedef uint8_t dib_raw_t;
134 #define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
135 #define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
136 #define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
137 #define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
138
139 #define DIB_FREE UINT_MAX
140
141 #ifdef ENABLE_DEBUG_HASHMAP
142 struct hashmap_debug_info {
143 LIST_FIELDS(struct hashmap_debug_info, debug_list);
144 unsigned max_entries; /* high watermark of n_entries */
145
146 /* who allocated this hashmap */
147 int line;
148 const char *file;
149 const char *func;
150
151 /* fields to detect modification while iterating */
152 unsigned put_count; /* counts puts into the hashmap */
153 unsigned rem_count; /* counts removals from hashmap */
154 unsigned last_rem_idx; /* remembers last removal index */
155 };
156
157 /* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
158 static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
159
160 #define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug;
161
162 #else /* !ENABLE_DEBUG_HASHMAP */
163 #define HASHMAP_DEBUG_FIELDS
164 #endif /* ENABLE_DEBUG_HASHMAP */
165
166 enum HashmapType {
167 HASHMAP_TYPE_PLAIN,
168 HASHMAP_TYPE_ORDERED,
169 HASHMAP_TYPE_SET,
170 _HASHMAP_TYPE_MAX
171 };
172
173 struct _packed_ indirect_storage {
174 char *storage; /* where buckets and DIBs are stored */
175 uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */
176
177 unsigned n_entries; /* number of stored entries */
178 unsigned n_buckets; /* number of buckets */
179
180 unsigned idx_lowest_entry; /* Index below which all buckets are free.
181 Makes "while(hashmap_steal_first())" loops
182 O(n) instead of O(n^2) for unordered hashmaps. */
183 uint8_t _pad[3]; /* padding for the whole HashmapBase */
184 /* The bitfields in HashmapBase complete the alignment of the whole thing. */
185 };
186
187 struct direct_storage {
188 /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
189 * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
190 * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
191 char storage[sizeof(struct indirect_storage)];
192 };
193
194 #define DIRECT_BUCKETS(entry_t) \
195 (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
196
197 /* We should be able to store at least one entry directly. */
198 assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
199
200 /* We have 3 bits for n_direct_entries. */
201 assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
202
203 /* Hashmaps with directly stored entries all use this shared hash key.
204 * It's no big deal if the key is guessed, because there can be only
205 * a handful of directly stored entries in a hashmap. When a hashmap
206 * outgrows direct storage, it gets its own key for indirect storage. */
207 static uint8_t shared_hash_key[HASH_KEY_SIZE];
208 static bool shared_hash_key_initialized;
209
210 /* Fields that all hashmap/set types must have */
211 struct HashmapBase {
212 const struct hash_ops *hash_ops; /* hash and compare ops to use */
213
214 union _packed_ {
215 struct indirect_storage indirect; /* if has_indirect */
216 struct direct_storage direct; /* if !has_indirect */
217 };
218
219 enum HashmapType type:2; /* HASHMAP_TYPE_* */
220 bool has_indirect:1; /* whether indirect storage is used */
221 unsigned n_direct_entries:3; /* Number of entries in direct storage.
222 * Only valid if !has_indirect. */
223 bool from_pool:1; /* whether was allocated from mempool */
224 HASHMAP_DEBUG_FIELDS /* optional hashmap_debug_info */
225 };
226
227 /* Specific hash types
228 * HashmapBase must be at the beginning of each hashmap struct. */
229
230 struct Hashmap {
231 struct HashmapBase b;
232 };
233
234 struct OrderedHashmap {
235 struct HashmapBase b;
236 unsigned iterate_list_head, iterate_list_tail;
237 };
238
239 struct Set {
240 struct HashmapBase b;
241 };
242
243 DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8);
244 DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
245 /* No need for a separate Set pool */
246 assert_cc(sizeof(Hashmap) == sizeof(Set));
247
248 struct hashmap_type_info {
249 size_t head_size;
250 size_t entry_size;
251 struct mempool *mempool;
252 unsigned n_direct_buckets;
253 };
254
255 static const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
256 [HASHMAP_TYPE_PLAIN] = {
257 .head_size = sizeof(Hashmap),
258 .entry_size = sizeof(struct plain_hashmap_entry),
259 .mempool = &hashmap_pool,
260 .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
261 },
262 [HASHMAP_TYPE_ORDERED] = {
263 .head_size = sizeof(OrderedHashmap),
264 .entry_size = sizeof(struct ordered_hashmap_entry),
265 .mempool = &ordered_hashmap_pool,
266 .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
267 },
268 [HASHMAP_TYPE_SET] = {
269 .head_size = sizeof(Set),
270 .entry_size = sizeof(struct set_entry),
271 .mempool = &hashmap_pool,
272 .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
273 },
274 };
275
string_hash_func(const void * p,const uint8_t hash_key[HASH_KEY_SIZE])276 unsigned long string_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
277 uint64_t u;
278 siphash24((uint8_t*) &u, p, strlen(p), hash_key);
279 return (unsigned long) u;
280 }
281
string_compare_func(const void * a,const void * b)282 int string_compare_func(const void *a, const void *b) {
283 return strcmp(a, b);
284 }
285
286 const struct hash_ops string_hash_ops = {
287 .hash = string_hash_func,
288 .compare = string_compare_func
289 };
290
trivial_hash_func(const void * p,const uint8_t hash_key[HASH_KEY_SIZE])291 unsigned long trivial_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
292 uint64_t u;
293 siphash24((uint8_t*) &u, &p, sizeof(p), hash_key);
294 return (unsigned long) u;
295 }
296
trivial_compare_func(const void * a,const void * b)297 int trivial_compare_func(const void *a, const void *b) {
298 return a < b ? -1 : (a > b ? 1 : 0);
299 }
300
301 const struct hash_ops trivial_hash_ops = {
302 .hash = trivial_hash_func,
303 .compare = trivial_compare_func
304 };
305
uint64_hash_func(const void * p,const uint8_t hash_key[HASH_KEY_SIZE])306 unsigned long uint64_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
307 uint64_t u;
308 siphash24((uint8_t*) &u, p, sizeof(uint64_t), hash_key);
309 return (unsigned long) u;
310 }
311
uint64_compare_func(const void * _a,const void * _b)312 int uint64_compare_func(const void *_a, const void *_b) {
313 uint64_t a, b;
314 a = *(const uint64_t*) _a;
315 b = *(const uint64_t*) _b;
316 return a < b ? -1 : (a > b ? 1 : 0);
317 }
318
319 const struct hash_ops uint64_hash_ops = {
320 .hash = uint64_hash_func,
321 .compare = uint64_compare_func
322 };
323
324 #if SIZEOF_DEV_T != 8
devt_hash_func(const void * p,const uint8_t hash_key[HASH_KEY_SIZE])325 unsigned long devt_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
326 uint64_t u;
327 siphash24((uint8_t*) &u, p, sizeof(dev_t), hash_key);
328 return (unsigned long) u;
329 }
330
devt_compare_func(const void * _a,const void * _b)331 int devt_compare_func(const void *_a, const void *_b) {
332 dev_t a, b;
333 a = *(const dev_t*) _a;
334 b = *(const dev_t*) _b;
335 return a < b ? -1 : (a > b ? 1 : 0);
336 }
337
338 const struct hash_ops devt_hash_ops = {
339 .hash = devt_hash_func,
340 .compare = devt_compare_func
341 };
342 #endif
343
n_buckets(HashmapBase * h)344 static unsigned n_buckets(HashmapBase *h) {
345 return h->has_indirect ? h->indirect.n_buckets
346 : hashmap_type_info[h->type].n_direct_buckets;
347 }
348
n_entries(HashmapBase * h)349 static unsigned n_entries(HashmapBase *h) {
350 return h->has_indirect ? h->indirect.n_entries
351 : h->n_direct_entries;
352 }
353
n_entries_inc(HashmapBase * h)354 static void n_entries_inc(HashmapBase *h) {
355 if (h->has_indirect)
356 h->indirect.n_entries++;
357 else
358 h->n_direct_entries++;
359 }
360
n_entries_dec(HashmapBase * h)361 static void n_entries_dec(HashmapBase *h) {
362 if (h->has_indirect)
363 h->indirect.n_entries--;
364 else
365 h->n_direct_entries--;
366 }
367
storage_ptr(HashmapBase * h)368 static char *storage_ptr(HashmapBase *h) {
369 return h->has_indirect ? h->indirect.storage
370 : h->direct.storage;
371 }
372
hash_key(HashmapBase * h)373 static uint8_t *hash_key(HashmapBase *h) {
374 return h->has_indirect ? h->indirect.hash_key
375 : shared_hash_key;
376 }
377
base_bucket_hash(HashmapBase * h,const void * p)378 static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
379 return (unsigned) (h->hash_ops->hash(p, hash_key(h)) % n_buckets(h));
380 }
381 #define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
382
get_hash_key(uint8_t hash_key[HASH_KEY_SIZE],bool reuse_is_ok)383 static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
384 static uint8_t current[HASH_KEY_SIZE];
385 static bool current_initialized = false;
386
387 /* Returns a hash function key to use. In order to keep things
388 * fast we will not generate a new key each time we allocate a
389 * new hash table. Instead, we'll just reuse the most recently
390 * generated one, except if we never generated one or when we
391 * are rehashing an entire hash table because we reached a
392 * fill level */
393
394 if (!current_initialized || !reuse_is_ok) {
395 random_bytes(current, sizeof(current));
396 current_initialized = true;
397 }
398
399 memcpy(hash_key, current, sizeof(current));
400 }
401
bucket_at(HashmapBase * h,unsigned idx)402 static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) {
403 return (struct hashmap_base_entry*)
404 (storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
405 }
406
plain_bucket_at(Hashmap * h,unsigned idx)407 static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) {
408 return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
409 }
410
ordered_bucket_at(OrderedHashmap * h,unsigned idx)411 static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
412 return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
413 }
414
set_bucket_at(Set * h,unsigned idx)415 static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
416 return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
417 }
418
bucket_at_swap(struct swap_entries * swap,unsigned idx)419 static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) {
420 return &swap->e[idx - _IDX_SWAP_BEGIN];
421 }
422
423 /* Returns a pointer to the bucket at index idx.
424 * Understands real indexes and swap indexes, hence "_virtual". */
bucket_at_virtual(HashmapBase * h,struct swap_entries * swap,unsigned idx)425 static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
426 unsigned idx) {
427 if (idx < _IDX_SWAP_BEGIN)
428 return bucket_at(h, idx);
429
430 if (idx < _IDX_SWAP_END)
431 return &bucket_at_swap(swap, idx)->p.b;
432
433 assert_not_reached("Invalid index");
434 }
435
dib_raw_ptr(HashmapBase * h)436 static dib_raw_t *dib_raw_ptr(HashmapBase *h) {
437 return (dib_raw_t*)
438 (storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
439 }
440
bucket_distance(HashmapBase * h,unsigned idx,unsigned from)441 static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
442 return idx >= from ? idx - from
443 : n_buckets(h) + idx - from;
444 }
445
bucket_calculate_dib(HashmapBase * h,unsigned idx,dib_raw_t raw_dib)446 static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
447 unsigned initial_bucket;
448
449 if (raw_dib == DIB_RAW_FREE)
450 return DIB_FREE;
451
452 if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
453 return raw_dib;
454
455 /*
456 * Having an overflow DIB value is very unlikely. The hash function
457 * would have to be bad. For example, in a table of size 2^24 filled
458 * to load factor 0.9 the maximum observed DIB is only about 60.
459 * In theory (assuming I used Maxima correctly), for an infinite size
460 * hash table with load factor 0.8 the probability of a given entry
461 * having DIB > 40 is 1.9e-8.
462 * This returns the correct DIB value by recomputing the hash value in
463 * the unlikely case. XXX Hitting this case could be a hint to rehash.
464 */
465 initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
466 return bucket_distance(h, idx, initial_bucket);
467 }
468
bucket_set_dib(HashmapBase * h,unsigned idx,unsigned dib)469 static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
470 dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
471 }
472
skip_free_buckets(HashmapBase * h,unsigned idx)473 static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
474 dib_raw_t *dibs;
475
476 dibs = dib_raw_ptr(h);
477
478 for ( ; idx < n_buckets(h); idx++)
479 if (dibs[idx] != DIB_RAW_FREE)
480 return idx;
481
482 return IDX_NIL;
483 }
484
bucket_mark_free(HashmapBase * h,unsigned idx)485 static void bucket_mark_free(HashmapBase *h, unsigned idx) {
486 memset(bucket_at(h, idx), 0, hashmap_type_info[h->type].entry_size);
487 bucket_set_dib(h, idx, DIB_FREE);
488 }
489
bucket_move_entry(HashmapBase * h,struct swap_entries * swap,unsigned from,unsigned to)490 static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
491 unsigned from, unsigned to) {
492 struct hashmap_base_entry *e_from, *e_to;
493
494 assert(from != to);
495
496 e_from = bucket_at_virtual(h, swap, from);
497 e_to = bucket_at_virtual(h, swap, to);
498
499 memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
500
501 if (h->type == HASHMAP_TYPE_ORDERED) {
502 OrderedHashmap *lh = (OrderedHashmap*) h;
503 struct ordered_hashmap_entry *le, *le_to;
504
505 le_to = (struct ordered_hashmap_entry*) e_to;
506
507 if (le_to->iterate_next != IDX_NIL) {
508 le = (struct ordered_hashmap_entry*)
509 bucket_at_virtual(h, swap, le_to->iterate_next);
510 le->iterate_previous = to;
511 }
512
513 if (le_to->iterate_previous != IDX_NIL) {
514 le = (struct ordered_hashmap_entry*)
515 bucket_at_virtual(h, swap, le_to->iterate_previous);
516 le->iterate_next = to;
517 }
518
519 if (lh->iterate_list_head == from)
520 lh->iterate_list_head = to;
521 if (lh->iterate_list_tail == from)
522 lh->iterate_list_tail = to;
523 }
524 }
525
next_idx(HashmapBase * h,unsigned idx)526 static unsigned next_idx(HashmapBase *h, unsigned idx) {
527 return (idx + 1U) % n_buckets(h);
528 }
529
prev_idx(HashmapBase * h,unsigned idx)530 static unsigned prev_idx(HashmapBase *h, unsigned idx) {
531 return (n_buckets(h) + idx - 1U) % n_buckets(h);
532 }
533
entry_value(HashmapBase * h,struct hashmap_base_entry * e)534 static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
535 switch (h->type) {
536
537 case HASHMAP_TYPE_PLAIN:
538 case HASHMAP_TYPE_ORDERED:
539 return ((struct plain_hashmap_entry*)e)->value;
540
541 case HASHMAP_TYPE_SET:
542 return (void*) e->key;
543
544 default:
545 assert_not_reached("Unknown hashmap type");
546 }
547 }
548
base_remove_entry(HashmapBase * h,unsigned idx)549 static void base_remove_entry(HashmapBase *h, unsigned idx) {
550 unsigned left, right, prev, dib;
551 dib_raw_t raw_dib, *dibs;
552
553 dibs = dib_raw_ptr(h);
554 assert(dibs[idx] != DIB_RAW_FREE);
555
556 #ifdef ENABLE_DEBUG_HASHMAP
557 h->debug.rem_count++;
558 h->debug.last_rem_idx = idx;
559 #endif
560
561 left = idx;
562 /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
563 for (right = next_idx(h, left); ; right = next_idx(h, right)) {
564 raw_dib = dibs[right];
565 if (raw_dib == 0 || raw_dib == DIB_RAW_FREE)
566 break;
567
568 /* The buckets are not supposed to be all occupied and with DIB > 0.
569 * That would mean we could make everyone better off by shifting them
570 * backward. This scenario is impossible. */
571 assert(left != right);
572 }
573
574 if (h->type == HASHMAP_TYPE_ORDERED) {
575 OrderedHashmap *lh = (OrderedHashmap*) h;
576 struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
577
578 if (le->iterate_next != IDX_NIL)
579 ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
580 else
581 lh->iterate_list_tail = le->iterate_previous;
582
583 if (le->iterate_previous != IDX_NIL)
584 ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
585 else
586 lh->iterate_list_head = le->iterate_next;
587 }
588
589 /* Now shift all buckets in the interval (left, right) one step backwards */
590 for (prev = left, left = next_idx(h, left); left != right;
591 prev = left, left = next_idx(h, left)) {
592 dib = bucket_calculate_dib(h, left, dibs[left]);
593 assert(dib != 0);
594 bucket_move_entry(h, NULL, left, prev);
595 bucket_set_dib(h, prev, dib - 1);
596 }
597
598 bucket_mark_free(h, prev);
599 n_entries_dec(h);
600 }
601 #define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
602
hashmap_iterate_in_insertion_order(OrderedHashmap * h,Iterator * i)603 static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
604 struct ordered_hashmap_entry *e;
605 unsigned idx;
606
607 assert(h);
608 assert(i);
609
610 if (i->idx == IDX_NIL)
611 goto at_end;
612
613 if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
614 goto at_end;
615
616 if (i->idx == IDX_FIRST) {
617 idx = h->iterate_list_head;
618 e = ordered_bucket_at(h, idx);
619 } else {
620 idx = i->idx;
621 e = ordered_bucket_at(h, idx);
622 /*
623 * We allow removing the current entry while iterating, but removal may cause
624 * a backward shift. The next entry may thus move one bucket to the left.
625 * To detect when it happens, we remember the key pointer of the entry we were
626 * going to iterate next. If it does not match, there was a backward shift.
627 */
628 if (e->p.b.key != i->next_key) {
629 idx = prev_idx(HASHMAP_BASE(h), idx);
630 e = ordered_bucket_at(h, idx);
631 }
632 assert(e->p.b.key == i->next_key);
633 }
634
635 #ifdef ENABLE_DEBUG_HASHMAP
636 i->prev_idx = idx;
637 #endif
638
639 if (e->iterate_next != IDX_NIL) {
640 struct ordered_hashmap_entry *n;
641 i->idx = e->iterate_next;
642 n = ordered_bucket_at(h, i->idx);
643 i->next_key = n->p.b.key;
644 } else
645 i->idx = IDX_NIL;
646
647 return idx;
648
649 at_end:
650 i->idx = IDX_NIL;
651 return IDX_NIL;
652 }
653
hashmap_iterate_in_internal_order(HashmapBase * h,Iterator * i)654 static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
655 unsigned idx;
656
657 assert(h);
658 assert(i);
659
660 if (i->idx == IDX_NIL)
661 goto at_end;
662
663 if (i->idx == IDX_FIRST) {
664 /* fast forward to the first occupied bucket */
665 if (h->has_indirect) {
666 i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
667 h->indirect.idx_lowest_entry = i->idx;
668 } else
669 i->idx = skip_free_buckets(h, 0);
670
671 if (i->idx == IDX_NIL)
672 goto at_end;
673 } else {
674 struct hashmap_base_entry *e;
675
676 assert(i->idx > 0);
677
678 e = bucket_at(h, i->idx);
679 /*
680 * We allow removing the current entry while iterating, but removal may cause
681 * a backward shift. The next entry may thus move one bucket to the left.
682 * To detect when it happens, we remember the key pointer of the entry we were
683 * going to iterate next. If it does not match, there was a backward shift.
684 */
685 if (e->key != i->next_key)
686 e = bucket_at(h, --i->idx);
687
688 assert(e->key == i->next_key);
689 }
690
691 idx = i->idx;
692 #ifdef ENABLE_DEBUG_HASHMAP
693 i->prev_idx = idx;
694 #endif
695
696 i->idx = skip_free_buckets(h, i->idx + 1);
697 if (i->idx != IDX_NIL)
698 i->next_key = bucket_at(h, i->idx)->key;
699 else
700 i->idx = IDX_NIL;
701
702 return idx;
703
704 at_end:
705 i->idx = IDX_NIL;
706 return IDX_NIL;
707 }
708
hashmap_iterate_entry(HashmapBase * h,Iterator * i)709 static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
710 if (!h) {
711 i->idx = IDX_NIL;
712 return IDX_NIL;
713 }
714
715 #ifdef ENABLE_DEBUG_HASHMAP
716 if (i->idx == IDX_FIRST) {
717 i->put_count = h->debug.put_count;
718 i->rem_count = h->debug.rem_count;
719 } else {
720 /* While iterating, must not add any new entries */
721 assert(i->put_count == h->debug.put_count);
722 /* ... or remove entries other than the current one */
723 assert(i->rem_count == h->debug.rem_count ||
724 (i->rem_count == h->debug.rem_count - 1 &&
725 i->prev_idx == h->debug.last_rem_idx));
726 /* Reset our removals counter */
727 i->rem_count = h->debug.rem_count;
728 }
729 #endif
730
731 return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
732 : hashmap_iterate_in_internal_order(h, i);
733 }
734
internal_hashmap_iterate(HashmapBase * h,Iterator * i,const void ** key)735 void *internal_hashmap_iterate(HashmapBase *h, Iterator *i, const void **key) {
736 struct hashmap_base_entry *e;
737 void *data;
738 unsigned idx;
739
740 idx = hashmap_iterate_entry(h, i);
741 if (idx == IDX_NIL) {
742 if (key)
743 *key = NULL;
744
745 return NULL;
746 }
747
748 e = bucket_at(h, idx);
749 data = entry_value(h, e);
750 if (key)
751 *key = e->key;
752
753 return data;
754 }
755
set_iterate(Set * s,Iterator * i)756 void *set_iterate(Set *s, Iterator *i) {
757 return internal_hashmap_iterate(HASHMAP_BASE(s), i, NULL);
758 }
759
760 #define HASHMAP_FOREACH_IDX(idx, h, i) \
761 for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
762 (idx != IDX_NIL); \
763 (idx) = hashmap_iterate_entry((h), &(i)))
764
reset_direct_storage(HashmapBase * h)765 static void reset_direct_storage(HashmapBase *h) {
766 const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
767 void *p;
768
769 assert(!h->has_indirect);
770
771 p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
772 memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
773 }
774
hashmap_base_new(const struct hash_ops * hash_ops,enum HashmapType type HASHMAP_DEBUG_PARAMS)775 static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
776 HashmapBase *h;
777 const struct hashmap_type_info *hi = &hashmap_type_info[type];
778 bool use_pool;
779
780 use_pool = is_main_thread();
781
782 h = use_pool ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
783
784 if (!h)
785 return NULL;
786
787 h->type = type;
788 h->from_pool = use_pool;
789 h->hash_ops = hash_ops ? hash_ops : &trivial_hash_ops;
790
791 if (type == HASHMAP_TYPE_ORDERED) {
792 OrderedHashmap *lh = (OrderedHashmap*)h;
793 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
794 }
795
796 reset_direct_storage(h);
797
798 if (!shared_hash_key_initialized) {
799 random_bytes(shared_hash_key, sizeof(shared_hash_key));
800 shared_hash_key_initialized= true;
801 }
802
803 #ifdef ENABLE_DEBUG_HASHMAP
804 LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
805 h->debug.func = func;
806 h->debug.file = file;
807 h->debug.line = line;
808 #endif
809
810 return h;
811 }
812
internal_hashmap_new(const struct hash_ops * hash_ops HASHMAP_DEBUG_PARAMS)813 Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
814 return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
815 }
816
internal_ordered_hashmap_new(const struct hash_ops * hash_ops HASHMAP_DEBUG_PARAMS)817 OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
818 return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
819 }
820
internal_set_new(const struct hash_ops * hash_ops HASHMAP_DEBUG_PARAMS)821 Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
822 return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
823 }
824
hashmap_base_ensure_allocated(HashmapBase ** h,const struct hash_ops * hash_ops,enum HashmapType type HASHMAP_DEBUG_PARAMS)825 static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
826 enum HashmapType type HASHMAP_DEBUG_PARAMS) {
827 HashmapBase *q;
828
829 assert(h);
830
831 if (*h)
832 return 0;
833
834 q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
835 if (!q)
836 return -ENOMEM;
837
838 *h = q;
839 return 0;
840 }
841
internal_hashmap_ensure_allocated(Hashmap ** h,const struct hash_ops * hash_ops HASHMAP_DEBUG_PARAMS)842 int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
843 return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
844 }
845
internal_ordered_hashmap_ensure_allocated(OrderedHashmap ** h,const struct hash_ops * hash_ops HASHMAP_DEBUG_PARAMS)846 int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
847 return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
848 }
849
internal_set_ensure_allocated(Set ** s,const struct hash_ops * hash_ops HASHMAP_DEBUG_PARAMS)850 int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
851 return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
852 }
853
hashmap_free_no_clear(HashmapBase * h)854 static void hashmap_free_no_clear(HashmapBase *h) {
855 assert(!h->has_indirect);
856 assert(!h->n_direct_entries);
857
858 #ifdef ENABLE_DEBUG_HASHMAP
859 LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
860 #endif
861
862 if (h->from_pool)
863 mempool_free_tile(hashmap_type_info[h->type].mempool, h);
864 else
865 free(h);
866 }
867
internal_hashmap_free(HashmapBase * h)868 HashmapBase *internal_hashmap_free(HashmapBase *h) {
869
870 /* Free the hashmap, but nothing in it */
871
872 if (h) {
873 internal_hashmap_clear(h);
874 hashmap_free_no_clear(h);
875 }
876
877 return NULL;
878 }
879
internal_hashmap_free_free(HashmapBase * h)880 HashmapBase *internal_hashmap_free_free(HashmapBase *h) {
881
882 /* Free the hashmap and all data objects in it, but not the
883 * keys */
884
885 if (h) {
886 internal_hashmap_clear_free(h);
887 hashmap_free_no_clear(h);
888 }
889
890 return NULL;
891 }
892
hashmap_free_free_free(Hashmap * h)893 Hashmap *hashmap_free_free_free(Hashmap *h) {
894
895 /* Free the hashmap and all data and key objects in it */
896
897 if (h) {
898 hashmap_clear_free_free(h);
899 hashmap_free_no_clear(HASHMAP_BASE(h));
900 }
901
902 return NULL;
903 }
904
internal_hashmap_clear(HashmapBase * h)905 void internal_hashmap_clear(HashmapBase *h) {
906 if (!h)
907 return;
908
909 if (h->has_indirect) {
910 free(h->indirect.storage);
911 h->has_indirect = false;
912 }
913
914 h->n_direct_entries = 0;
915 reset_direct_storage(h);
916
917 if (h->type == HASHMAP_TYPE_ORDERED) {
918 OrderedHashmap *lh = (OrderedHashmap*) h;
919 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
920 }
921 }
922
internal_hashmap_clear_free(HashmapBase * h)923 void internal_hashmap_clear_free(HashmapBase *h) {
924 unsigned idx;
925
926 if (!h)
927 return;
928
929 for (idx = skip_free_buckets(h, 0); idx != IDX_NIL;
930 idx = skip_free_buckets(h, idx + 1))
931 free(entry_value(h, bucket_at(h, idx)));
932
933 internal_hashmap_clear(h);
934 }
935
hashmap_clear_free_free(Hashmap * h)936 void hashmap_clear_free_free(Hashmap *h) {
937 unsigned idx;
938
939 if (!h)
940 return;
941
942 for (idx = skip_free_buckets(HASHMAP_BASE(h), 0); idx != IDX_NIL;
943 idx = skip_free_buckets(HASHMAP_BASE(h), idx + 1)) {
944 struct plain_hashmap_entry *e = plain_bucket_at(h, idx);
945 free((void*)e->b.key);
946 free(e->value);
947 }
948
949 internal_hashmap_clear(HASHMAP_BASE(h));
950 }
951
952 static int resize_buckets(HashmapBase *h, unsigned entries_add);
953
954 /*
955 * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
956 * Performs Robin Hood swaps as it goes. The entry to put must be placed
957 * by the caller into swap slot IDX_PUT.
958 * If used for in-place resizing, may leave a displaced entry in swap slot
959 * IDX_PUT. Caller must rehash it next.
960 * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
961 * false otherwise.
962 */
hashmap_put_robin_hood(HashmapBase * h,unsigned idx,struct swap_entries * swap)963 static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
964 struct swap_entries *swap) {
965 dib_raw_t raw_dib, *dibs;
966 unsigned dib, distance;
967
968 #ifdef ENABLE_DEBUG_HASHMAP
969 h->debug.put_count++;
970 #endif
971
972 dibs = dib_raw_ptr(h);
973
974 for (distance = 0; ; distance++) {
975 raw_dib = dibs[idx];
976 if (raw_dib == DIB_RAW_FREE || raw_dib == DIB_RAW_REHASH) {
977 if (raw_dib == DIB_RAW_REHASH)
978 bucket_move_entry(h, swap, idx, IDX_TMP);
979
980 if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
981 h->indirect.idx_lowest_entry = idx;
982
983 bucket_set_dib(h, idx, distance);
984 bucket_move_entry(h, swap, IDX_PUT, idx);
985 if (raw_dib == DIB_RAW_REHASH) {
986 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
987 return true;
988 }
989
990 return false;
991 }
992
993 dib = bucket_calculate_dib(h, idx, raw_dib);
994
995 if (dib < distance) {
996 /* Found a wealthier entry. Go Robin Hood! */
997 bucket_set_dib(h, idx, distance);
998
999 /* swap the entries */
1000 bucket_move_entry(h, swap, idx, IDX_TMP);
1001 bucket_move_entry(h, swap, IDX_PUT, idx);
1002 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
1003
1004 distance = dib;
1005 }
1006
1007 idx = next_idx(h, idx);
1008 }
1009 }
1010
1011 /*
1012 * Puts an entry into a hashmap, boldly - no check whether key already exists.
1013 * The caller must place the entry (only its key and value, not link indexes)
1014 * in swap slot IDX_PUT.
1015 * Caller must ensure: the key does not exist yet in the hashmap.
1016 * that resize is not needed if !may_resize.
1017 * Returns: 1 if entry was put successfully.
1018 * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
1019 * Cannot return -ENOMEM if !may_resize.
1020 */
hashmap_base_put_boldly(HashmapBase * h,unsigned idx,struct swap_entries * swap,bool may_resize)1021 static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
1022 struct swap_entries *swap, bool may_resize) {
1023 struct ordered_hashmap_entry *new_entry;
1024 int r;
1025
1026 assert(idx < n_buckets(h));
1027
1028 new_entry = bucket_at_swap(swap, IDX_PUT);
1029
1030 if (may_resize) {
1031 r = resize_buckets(h, 1);
1032 if (r < 0)
1033 return r;
1034 if (r > 0)
1035 idx = bucket_hash(h, new_entry->p.b.key);
1036 }
1037 assert(n_entries(h) < n_buckets(h));
1038
1039 if (h->type == HASHMAP_TYPE_ORDERED) {
1040 OrderedHashmap *lh = (OrderedHashmap*) h;
1041
1042 new_entry->iterate_next = IDX_NIL;
1043 new_entry->iterate_previous = lh->iterate_list_tail;
1044
1045 if (lh->iterate_list_tail != IDX_NIL) {
1046 struct ordered_hashmap_entry *old_tail;
1047
1048 old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
1049 assert(old_tail->iterate_next == IDX_NIL);
1050 old_tail->iterate_next = IDX_PUT;
1051 }
1052
1053 lh->iterate_list_tail = IDX_PUT;
1054 if (lh->iterate_list_head == IDX_NIL)
1055 lh->iterate_list_head = IDX_PUT;
1056 }
1057
1058 assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
1059
1060 n_entries_inc(h);
1061 #ifdef ENABLE_DEBUG_HASHMAP
1062 h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
1063 #endif
1064
1065 return 1;
1066 }
1067 #define hashmap_put_boldly(h, idx, swap, may_resize) \
1068 hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
1069
1070 /*
1071 * Returns 0 if resize is not needed.
1072 * 1 if successfully resized.
1073 * -ENOMEM on allocation failure.
1074 */
resize_buckets(HashmapBase * h,unsigned entries_add)1075 static int resize_buckets(HashmapBase *h, unsigned entries_add) {
1076 struct swap_entries swap;
1077 char *new_storage;
1078 dib_raw_t *old_dibs, *new_dibs;
1079 const struct hashmap_type_info *hi;
1080 unsigned idx, optimal_idx;
1081 unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
1082 uint8_t new_shift;
1083 bool rehash_next;
1084
1085 assert(h);
1086
1087 hi = &hashmap_type_info[h->type];
1088 new_n_entries = n_entries(h) + entries_add;
1089
1090 /* overflow? */
1091 if (_unlikely_(new_n_entries < entries_add))
1092 return -ENOMEM;
1093
1094 /* For direct storage we allow 100% load, because it's tiny. */
1095 if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
1096 return 0;
1097
1098 /*
1099 * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
1100 * From it follows: m = n + n/(INV_KEEP_FREE - 1)
1101 */
1102 new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
1103 /* overflow? */
1104 if (_unlikely_(new_n_buckets < new_n_entries))
1105 return -ENOMEM;
1106
1107 if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
1108 return -ENOMEM;
1109
1110 old_n_buckets = n_buckets(h);
1111
1112 if (_likely_(new_n_buckets <= old_n_buckets))
1113 return 0;
1114
1115 new_shift = log2u_round_up(MAX(
1116 new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
1117 2 * sizeof(struct direct_storage)));
1118
1119 /* Realloc storage (buckets and DIB array). */
1120 new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
1121 1U << new_shift);
1122 if (!new_storage)
1123 return -ENOMEM;
1124
1125 /* Must upgrade direct to indirect storage. */
1126 if (!h->has_indirect) {
1127 memcpy(new_storage, h->direct.storage,
1128 old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
1129 h->indirect.n_entries = h->n_direct_entries;
1130 h->indirect.idx_lowest_entry = 0;
1131 h->n_direct_entries = 0;
1132 }
1133
1134 /* Get a new hash key. If we've just upgraded to indirect storage,
1135 * allow reusing a previously generated key. It's still a different key
1136 * from the shared one that we used for direct storage. */
1137 get_hash_key(h->indirect.hash_key, !h->has_indirect);
1138
1139 h->has_indirect = true;
1140 h->indirect.storage = new_storage;
1141 h->indirect.n_buckets = (1U << new_shift) /
1142 (hi->entry_size + sizeof(dib_raw_t));
1143
1144 old_dibs = (dib_raw_t*)(new_storage + hi->entry_size * old_n_buckets);
1145 new_dibs = dib_raw_ptr(h);
1146
1147 /*
1148 * Move the DIB array to the new place, replacing valid DIB values with
1149 * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
1150 * Note: Overlap is not possible, because we have at least doubled the
1151 * number of buckets and dib_raw_t is smaller than any entry type.
1152 */
1153 for (idx = 0; idx < old_n_buckets; idx++) {
1154 assert(old_dibs[idx] != DIB_RAW_REHASH);
1155 new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
1156 : DIB_RAW_REHASH;
1157 }
1158
1159 /* Zero the area of newly added entries (including the old DIB area) */
1160 memset(bucket_at(h, old_n_buckets), 0,
1161 (n_buckets(h) - old_n_buckets) * hi->entry_size);
1162
1163 /* The upper half of the new DIB array needs initialization */
1164 memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
1165 (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
1166
1167 /* Rehash entries that need it */
1168 n_rehashed = 0;
1169 for (idx = 0; idx < old_n_buckets; idx++) {
1170 if (new_dibs[idx] != DIB_RAW_REHASH)
1171 continue;
1172
1173 optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
1174
1175 /*
1176 * Not much to do if by luck the entry hashes to its current
1177 * location. Just set its DIB.
1178 */
1179 if (optimal_idx == idx) {
1180 new_dibs[idx] = 0;
1181 n_rehashed++;
1182 continue;
1183 }
1184
1185 new_dibs[idx] = DIB_RAW_FREE;
1186 bucket_move_entry(h, &swap, idx, IDX_PUT);
1187 /* bucket_move_entry does not clear the source */
1188 memset(bucket_at(h, idx), 0, hi->entry_size);
1189
1190 do {
1191 /*
1192 * Find the new bucket for the current entry. This may make
1193 * another entry homeless and load it into IDX_PUT.
1194 */
1195 rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
1196 n_rehashed++;
1197
1198 /* Did the current entry displace another one? */
1199 if (rehash_next)
1200 optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
1201 } while (rehash_next);
1202 }
1203
1204 assert(n_rehashed == n_entries(h));
1205
1206 return 1;
1207 }
1208
1209 /*
1210 * Finds an entry with a matching key
1211 * Returns: index of the found entry, or IDX_NIL if not found.
1212 */
base_bucket_scan(HashmapBase * h,unsigned idx,const void * key)1213 static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
1214 struct hashmap_base_entry *e;
1215 unsigned dib, distance;
1216 dib_raw_t *dibs = dib_raw_ptr(h);
1217
1218 assert(idx < n_buckets(h));
1219
1220 for (distance = 0; ; distance++) {
1221 if (dibs[idx] == DIB_RAW_FREE)
1222 return IDX_NIL;
1223
1224 dib = bucket_calculate_dib(h, idx, dibs[idx]);
1225
1226 if (dib < distance)
1227 return IDX_NIL;
1228 if (dib == distance) {
1229 e = bucket_at(h, idx);
1230 if (h->hash_ops->compare(e->key, key) == 0)
1231 return idx;
1232 }
1233
1234 idx = next_idx(h, idx);
1235 }
1236 }
1237 #define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
1238
hashmap_put(Hashmap * h,const void * key,void * value)1239 int hashmap_put(Hashmap *h, const void *key, void *value) {
1240 struct swap_entries swap;
1241 struct plain_hashmap_entry *e;
1242 unsigned hash, idx;
1243
1244 assert(h);
1245
1246 hash = bucket_hash(h, key);
1247 idx = bucket_scan(h, hash, key);
1248 if (idx != IDX_NIL) {
1249 e = plain_bucket_at(h, idx);
1250 if (e->value == value)
1251 return 0;
1252 return -EEXIST;
1253 }
1254
1255 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1256 e->b.key = key;
1257 e->value = value;
1258 return hashmap_put_boldly(h, hash, &swap, true);
1259 }
1260
set_put(Set * s,const void * key)1261 int set_put(Set *s, const void *key) {
1262 struct swap_entries swap;
1263 struct hashmap_base_entry *e;
1264 unsigned hash, idx;
1265
1266 assert(s);
1267
1268 hash = bucket_hash(s, key);
1269 idx = bucket_scan(s, hash, key);
1270 if (idx != IDX_NIL)
1271 return 0;
1272
1273 e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1274 e->key = key;
1275 return hashmap_put_boldly(s, hash, &swap, true);
1276 }
1277
hashmap_replace(Hashmap * h,const void * key,void * value)1278 int hashmap_replace(Hashmap *h, const void *key, void *value) {
1279 struct swap_entries swap;
1280 struct plain_hashmap_entry *e;
1281 unsigned hash, idx;
1282
1283 assert(h);
1284
1285 hash = bucket_hash(h, key);
1286 idx = bucket_scan(h, hash, key);
1287 if (idx != IDX_NIL) {
1288 e = plain_bucket_at(h, idx);
1289 #ifdef ENABLE_DEBUG_HASHMAP
1290 /* Although the key is equal, the key pointer may have changed,
1291 * and this would break our assumption for iterating. So count
1292 * this operation as incompatible with iteration. */
1293 if (e->b.key != key) {
1294 h->b.debug.put_count++;
1295 h->b.debug.rem_count++;
1296 h->b.debug.last_rem_idx = idx;
1297 }
1298 #endif
1299 e->b.key = key;
1300 e->value = value;
1301 return 0;
1302 }
1303
1304 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1305 e->b.key = key;
1306 e->value = value;
1307 return hashmap_put_boldly(h, hash, &swap, true);
1308 }
1309
hashmap_update(Hashmap * h,const void * key,void * value)1310 int hashmap_update(Hashmap *h, const void *key, void *value) {
1311 struct plain_hashmap_entry *e;
1312 unsigned hash, idx;
1313
1314 assert(h);
1315
1316 hash = bucket_hash(h, key);
1317 idx = bucket_scan(h, hash, key);
1318 if (idx == IDX_NIL)
1319 return -ENOENT;
1320
1321 e = plain_bucket_at(h, idx);
1322 e->value = value;
1323 return 0;
1324 }
1325
internal_hashmap_get(HashmapBase * h,const void * key)1326 void *internal_hashmap_get(HashmapBase *h, const void *key) {
1327 struct hashmap_base_entry *e;
1328 unsigned hash, idx;
1329
1330 if (!h)
1331 return NULL;
1332
1333 hash = bucket_hash(h, key);
1334 idx = bucket_scan(h, hash, key);
1335 if (idx == IDX_NIL)
1336 return NULL;
1337
1338 e = bucket_at(h, idx);
1339 return entry_value(h, e);
1340 }
1341
hashmap_get2(Hashmap * h,const void * key,void ** key2)1342 void *hashmap_get2(Hashmap *h, const void *key, void **key2) {
1343 struct plain_hashmap_entry *e;
1344 unsigned hash, idx;
1345
1346 if (!h)
1347 return NULL;
1348
1349 hash = bucket_hash(h, key);
1350 idx = bucket_scan(h, hash, key);
1351 if (idx == IDX_NIL)
1352 return NULL;
1353
1354 e = plain_bucket_at(h, idx);
1355 if (key2)
1356 *key2 = (void*) e->b.key;
1357
1358 return e->value;
1359 }
1360
internal_hashmap_contains(HashmapBase * h,const void * key)1361 bool internal_hashmap_contains(HashmapBase *h, const void *key) {
1362 unsigned hash;
1363
1364 if (!h)
1365 return false;
1366
1367 hash = bucket_hash(h, key);
1368 return bucket_scan(h, hash, key) != IDX_NIL;
1369 }
1370
internal_hashmap_remove(HashmapBase * h,const void * key)1371 void *internal_hashmap_remove(HashmapBase *h, const void *key) {
1372 struct hashmap_base_entry *e;
1373 unsigned hash, idx;
1374 void *data;
1375
1376 if (!h)
1377 return NULL;
1378
1379 hash = bucket_hash(h, key);
1380 idx = bucket_scan(h, hash, key);
1381 if (idx == IDX_NIL)
1382 return NULL;
1383
1384 e = bucket_at(h, idx);
1385 data = entry_value(h, e);
1386 remove_entry(h, idx);
1387
1388 return data;
1389 }
1390
hashmap_remove2(Hashmap * h,const void * key,void ** rkey)1391 void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
1392 struct plain_hashmap_entry *e;
1393 unsigned hash, idx;
1394 void *data;
1395
1396 if (!h) {
1397 if (rkey)
1398 *rkey = NULL;
1399 return NULL;
1400 }
1401
1402 hash = bucket_hash(h, key);
1403 idx = bucket_scan(h, hash, key);
1404 if (idx == IDX_NIL) {
1405 if (rkey)
1406 *rkey = NULL;
1407 return NULL;
1408 }
1409
1410 e = plain_bucket_at(h, idx);
1411 data = e->value;
1412 if (rkey)
1413 *rkey = (void*) e->b.key;
1414
1415 remove_entry(h, idx);
1416
1417 return data;
1418 }
1419
hashmap_remove_and_put(Hashmap * h,const void * old_key,const void * new_key,void * value)1420 int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1421 struct swap_entries swap;
1422 struct plain_hashmap_entry *e;
1423 unsigned old_hash, new_hash, idx;
1424
1425 if (!h)
1426 return -ENOENT;
1427
1428 old_hash = bucket_hash(h, old_key);
1429 idx = bucket_scan(h, old_hash, old_key);
1430 if (idx == IDX_NIL)
1431 return -ENOENT;
1432
1433 new_hash = bucket_hash(h, new_key);
1434 if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
1435 return -EEXIST;
1436
1437 remove_entry(h, idx);
1438
1439 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1440 e->b.key = new_key;
1441 e->value = value;
1442 assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1443
1444 return 0;
1445 }
1446
set_remove_and_put(Set * s,const void * old_key,const void * new_key)1447 int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
1448 struct swap_entries swap;
1449 struct hashmap_base_entry *e;
1450 unsigned old_hash, new_hash, idx;
1451
1452 if (!s)
1453 return -ENOENT;
1454
1455 old_hash = bucket_hash(s, old_key);
1456 idx = bucket_scan(s, old_hash, old_key);
1457 if (idx == IDX_NIL)
1458 return -ENOENT;
1459
1460 new_hash = bucket_hash(s, new_key);
1461 if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
1462 return -EEXIST;
1463
1464 remove_entry(s, idx);
1465
1466 e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1467 e->key = new_key;
1468 assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
1469
1470 return 0;
1471 }
1472
hashmap_remove_and_replace(Hashmap * h,const void * old_key,const void * new_key,void * value)1473 int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1474 struct swap_entries swap;
1475 struct plain_hashmap_entry *e;
1476 unsigned old_hash, new_hash, idx_old, idx_new;
1477
1478 if (!h)
1479 return -ENOENT;
1480
1481 old_hash = bucket_hash(h, old_key);
1482 idx_old = bucket_scan(h, old_hash, old_key);
1483 if (idx_old == IDX_NIL)
1484 return -ENOENT;
1485
1486 old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
1487
1488 new_hash = bucket_hash(h, new_key);
1489 idx_new = bucket_scan(h, new_hash, new_key);
1490 if (idx_new != IDX_NIL)
1491 if (idx_old != idx_new) {
1492 remove_entry(h, idx_new);
1493 /* Compensate for a possible backward shift. */
1494 if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
1495 idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
1496 assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
1497 }
1498
1499 remove_entry(h, idx_old);
1500
1501 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1502 e->b.key = new_key;
1503 e->value = value;
1504 assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1505
1506 return 0;
1507 }
1508
hashmap_remove_value(Hashmap * h,const void * key,void * value)1509 void *hashmap_remove_value(Hashmap *h, const void *key, void *value) {
1510 struct plain_hashmap_entry *e;
1511 unsigned hash, idx;
1512
1513 if (!h)
1514 return NULL;
1515
1516 hash = bucket_hash(h, key);
1517 idx = bucket_scan(h, hash, key);
1518 if (idx == IDX_NIL)
1519 return NULL;
1520
1521 e = plain_bucket_at(h, idx);
1522 if (e->value != value)
1523 return NULL;
1524
1525 remove_entry(h, idx);
1526
1527 return value;
1528 }
1529
find_first_entry(HashmapBase * h)1530 static unsigned find_first_entry(HashmapBase *h) {
1531 Iterator i = ITERATOR_FIRST;
1532
1533 if (!h || !n_entries(h))
1534 return IDX_NIL;
1535
1536 return hashmap_iterate_entry(h, &i);
1537 }
1538
internal_hashmap_first(HashmapBase * h)1539 void *internal_hashmap_first(HashmapBase *h) {
1540 unsigned idx;
1541
1542 idx = find_first_entry(h);
1543 if (idx == IDX_NIL)
1544 return NULL;
1545
1546 return entry_value(h, bucket_at(h, idx));
1547 }
1548
internal_hashmap_first_key(HashmapBase * h)1549 void *internal_hashmap_first_key(HashmapBase *h) {
1550 struct hashmap_base_entry *e;
1551 unsigned idx;
1552
1553 idx = find_first_entry(h);
1554 if (idx == IDX_NIL)
1555 return NULL;
1556
1557 e = bucket_at(h, idx);
1558 return (void*) e->key;
1559 }
1560
internal_hashmap_steal_first(HashmapBase * h)1561 void *internal_hashmap_steal_first(HashmapBase *h) {
1562 struct hashmap_base_entry *e;
1563 void *data;
1564 unsigned idx;
1565
1566 idx = find_first_entry(h);
1567 if (idx == IDX_NIL)
1568 return NULL;
1569
1570 e = bucket_at(h, idx);
1571 data = entry_value(h, e);
1572 remove_entry(h, idx);
1573
1574 return data;
1575 }
1576
internal_hashmap_steal_first_key(HashmapBase * h)1577 void *internal_hashmap_steal_first_key(HashmapBase *h) {
1578 struct hashmap_base_entry *e;
1579 void *key;
1580 unsigned idx;
1581
1582 idx = find_first_entry(h);
1583 if (idx == IDX_NIL)
1584 return NULL;
1585
1586 e = bucket_at(h, idx);
1587 key = (void*) e->key;
1588 remove_entry(h, idx);
1589
1590 return key;
1591 }
1592
internal_hashmap_size(HashmapBase * h)1593 unsigned internal_hashmap_size(HashmapBase *h) {
1594
1595 if (!h)
1596 return 0;
1597
1598 return n_entries(h);
1599 }
1600
internal_hashmap_buckets(HashmapBase * h)1601 unsigned internal_hashmap_buckets(HashmapBase *h) {
1602
1603 if (!h)
1604 return 0;
1605
1606 return n_buckets(h);
1607 }
1608
internal_hashmap_merge(Hashmap * h,Hashmap * other)1609 int internal_hashmap_merge(Hashmap *h, Hashmap *other) {
1610 Iterator i;
1611 unsigned idx;
1612
1613 assert(h);
1614
1615 HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1616 struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
1617 int r;
1618
1619 r = hashmap_put(h, pe->b.key, pe->value);
1620 if (r < 0 && r != -EEXIST)
1621 return r;
1622 }
1623
1624 return 0;
1625 }
1626
set_merge(Set * s,Set * other)1627 int set_merge(Set *s, Set *other) {
1628 Iterator i;
1629 unsigned idx;
1630
1631 assert(s);
1632
1633 HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1634 struct set_entry *se = set_bucket_at(other, idx);
1635 int r;
1636
1637 r = set_put(s, se->b.key);
1638 if (r < 0)
1639 return r;
1640 }
1641
1642 return 0;
1643 }
1644
internal_hashmap_reserve(HashmapBase * h,unsigned entries_add)1645 int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) {
1646 int r;
1647
1648 assert(h);
1649
1650 r = resize_buckets(h, entries_add);
1651 if (r < 0)
1652 return r;
1653
1654 return 0;
1655 }
1656
1657 /*
1658 * The same as hashmap_merge(), but every new item from other is moved to h.
1659 * Keys already in h are skipped and stay in other.
1660 * Returns: 0 on success.
1661 * -ENOMEM on alloc failure, in which case no move has been done.
1662 */
internal_hashmap_move(HashmapBase * h,HashmapBase * other)1663 int internal_hashmap_move(HashmapBase *h, HashmapBase *other) {
1664 struct swap_entries swap;
1665 struct hashmap_base_entry *e, *n;
1666 Iterator i;
1667 unsigned idx;
1668 int r;
1669
1670 assert(h);
1671
1672 if (!other)
1673 return 0;
1674
1675 assert(other->type == h->type);
1676
1677 /*
1678 * This reserves buckets for the worst case, where none of other's
1679 * entries are yet present in h. This is preferable to risking
1680 * an allocation failure in the middle of the moving and having to
1681 * rollback or return a partial result.
1682 */
1683 r = resize_buckets(h, n_entries(other));
1684 if (r < 0)
1685 return r;
1686
1687 HASHMAP_FOREACH_IDX(idx, other, i) {
1688 unsigned h_hash;
1689
1690 e = bucket_at(other, idx);
1691 h_hash = bucket_hash(h, e->key);
1692 if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
1693 continue;
1694
1695 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1696 n->key = e->key;
1697 if (h->type != HASHMAP_TYPE_SET)
1698 ((struct plain_hashmap_entry*) n)->value =
1699 ((struct plain_hashmap_entry*) e)->value;
1700 assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
1701
1702 remove_entry(other, idx);
1703 }
1704
1705 return 0;
1706 }
1707
internal_hashmap_move_one(HashmapBase * h,HashmapBase * other,const void * key)1708 int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
1709 struct swap_entries swap;
1710 unsigned h_hash, other_hash, idx;
1711 struct hashmap_base_entry *e, *n;
1712 int r;
1713
1714 assert(h);
1715
1716 h_hash = bucket_hash(h, key);
1717 if (bucket_scan(h, h_hash, key) != IDX_NIL)
1718 return -EEXIST;
1719
1720 if (!other)
1721 return -ENOENT;
1722
1723 assert(other->type == h->type);
1724
1725 other_hash = bucket_hash(other, key);
1726 idx = bucket_scan(other, other_hash, key);
1727 if (idx == IDX_NIL)
1728 return -ENOENT;
1729
1730 e = bucket_at(other, idx);
1731
1732 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1733 n->key = e->key;
1734 if (h->type != HASHMAP_TYPE_SET)
1735 ((struct plain_hashmap_entry*) n)->value =
1736 ((struct plain_hashmap_entry*) e)->value;
1737 r = hashmap_put_boldly(h, h_hash, &swap, true);
1738 if (r < 0)
1739 return r;
1740
1741 remove_entry(other, idx);
1742 return 0;
1743 }
1744
internal_hashmap_copy(HashmapBase * h)1745 HashmapBase *internal_hashmap_copy(HashmapBase *h) {
1746 HashmapBase *copy;
1747 int r;
1748
1749 assert(h);
1750
1751 copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_SRC_ARGS);
1752 if (!copy)
1753 return NULL;
1754
1755 switch (h->type) {
1756 case HASHMAP_TYPE_PLAIN:
1757 case HASHMAP_TYPE_ORDERED:
1758 r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
1759 break;
1760 case HASHMAP_TYPE_SET:
1761 r = set_merge((Set*)copy, (Set*)h);
1762 break;
1763 default:
1764 assert_not_reached("Unknown hashmap type");
1765 }
1766
1767 if (r < 0) {
1768 internal_hashmap_free(copy);
1769 return NULL;
1770 }
1771
1772 return copy;
1773 }
1774
internal_hashmap_get_strv(HashmapBase * h)1775 char **internal_hashmap_get_strv(HashmapBase *h) {
1776 char **sv;
1777 Iterator i;
1778 unsigned idx, n;
1779
1780 sv = new(char*, n_entries(h)+1);
1781 if (!sv)
1782 return NULL;
1783
1784 n = 0;
1785 HASHMAP_FOREACH_IDX(idx, h, i)
1786 sv[n++] = entry_value(h, bucket_at(h, idx));
1787 sv[n] = NULL;
1788
1789 return sv;
1790 }
1791
ordered_hashmap_next(OrderedHashmap * h,const void * key)1792 void *ordered_hashmap_next(OrderedHashmap *h, const void *key) {
1793 struct ordered_hashmap_entry *e;
1794 unsigned hash, idx;
1795
1796 assert(key);
1797
1798 if (!h)
1799 return NULL;
1800
1801 hash = bucket_hash(h, key);
1802 idx = bucket_scan(h, hash, key);
1803 if (idx == IDX_NIL)
1804 return NULL;
1805
1806 e = ordered_bucket_at(h, idx);
1807 if (e->iterate_next == IDX_NIL)
1808 return NULL;
1809 return ordered_bucket_at(h, e->iterate_next)->p.value;
1810 }
1811
set_consume(Set * s,void * value)1812 int set_consume(Set *s, void *value) {
1813 int r;
1814
1815 r = set_put(s, value);
1816 if (r <= 0)
1817 free(value);
1818
1819 return r;
1820 }
1821
set_put_strdup(Set * s,const char * p)1822 int set_put_strdup(Set *s, const char *p) {
1823 char *c;
1824 int r;
1825
1826 assert(s);
1827 assert(p);
1828
1829 c = strdup(p);
1830 if (!c)
1831 return -ENOMEM;
1832
1833 r = set_consume(s, c);
1834 if (r == -EEXIST)
1835 return 0;
1836
1837 return r;
1838 }
1839
set_put_strdupv(Set * s,char ** l)1840 int set_put_strdupv(Set *s, char **l) {
1841 int n = 0, r;
1842 char **i;
1843
1844 STRV_FOREACH(i, l) {
1845 r = set_put_strdup(s, *i);
1846 if (r < 0)
1847 return r;
1848
1849 n += r;
1850 }
1851
1852 return n;
1853 }
1854