1 /* hash - implement simple hashing table where the keys are memory blocks.
2 Copyright (C) 1994-1995, 2000-2006, 2018, 2020 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #include <config.h>
19
20 /* Specification. */
21 #include "mem-hash-map.h"
22
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 #include <limits.h>
27 #include <sys/types.h>
28
29 /* Since this simple implementation of hash tables allows only insertion, no
30 removal of entries, the right data structure for the memory holding all keys
31 is an obstack. */
32 #include "obstack.h"
33
34 /* Use checked memory allocation. */
35 #include "xalloc.h"
36
37 #define obstack_chunk_alloc xmalloc
38 #define obstack_chunk_free free
39
40
41 typedef struct hash_entry
42 {
43 unsigned long used; /* Hash code of the key, or 0 for an unused entry. */
44 const void *key; /* Key. */
45 size_t keylen;
46 void *data; /* Value. */
47 struct hash_entry *next;
48 }
49 hash_entry;
50
51
52 /* Given an odd CANDIDATE > 1, return true if it is a prime number. */
53 static int
is_prime(unsigned long int candidate)54 is_prime (unsigned long int candidate)
55 {
56 /* No even number and none less than 10 will be passed here. */
57 unsigned long int divn = 3;
58 unsigned long int sq = divn * divn;
59
60 while (sq < candidate && candidate % divn != 0)
61 {
62 ++divn;
63 sq += 4 * divn;
64 ++divn;
65 }
66
67 return candidate % divn != 0;
68 }
69
70
71 /* Given SEED > 1, return the smallest odd prime number >= SEED. */
72 unsigned long
next_prime(unsigned long int seed)73 next_prime (unsigned long int seed)
74 {
75 /* Make it definitely odd. */
76 seed |= 1;
77
78 while (!is_prime (seed))
79 seed += 2;
80
81 return seed;
82 }
83
84
85 /* Initialize a hash table. INIT_SIZE > 1 is the initial number of available
86 entries.
87 Return 0 upon successful completion, -1 upon memory allocation error. */
88 int
hash_init(hash_table * htab,unsigned long int init_size)89 hash_init (hash_table *htab, unsigned long int init_size)
90 {
91 /* We need the size to be a prime. */
92 init_size = next_prime (init_size);
93
94 /* Initialize the data structure. */
95 htab->size = init_size;
96 htab->filled = 0;
97 htab->first = NULL;
98 htab->table = XCALLOC (init_size + 1, hash_entry);
99
100 obstack_init (&htab->mem_pool);
101
102 return 0;
103 }
104
105
106 /* Delete a hash table's contents.
107 Return 0 always. */
108 int
hash_destroy(hash_table * htab)109 hash_destroy (hash_table *htab)
110 {
111 free (htab->table);
112 obstack_free (&htab->mem_pool, NULL);
113 return 0;
114 }
115
116
117 /* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY
118 in memory. */
119 static unsigned long
compute_hashval(const void * key,size_t keylen)120 compute_hashval (const void *key, size_t keylen)
121 {
122 size_t cnt;
123 unsigned long int hval;
124
125 /* Compute the hash value for the given string. The algorithm
126 is taken from [Aho,Sethi,Ullman], fixed according to
127 https://haible.de/bruno/hashfunc.html. */
128 cnt = 0;
129 hval = keylen;
130 while (cnt < keylen)
131 {
132 hval = (hval << 9) | (hval >> (sizeof (unsigned long) * CHAR_BIT - 9));
133 hval += (unsigned long int) *(((const char *) key) + cnt++);
134 }
135 return hval != 0 ? hval : ~((unsigned long) 0);
136 }
137
138
139 /* References:
140 [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
141 [Knuth] The Art of Computer Programming, part3 (6.4) */
142
143 /* Look up a given key in the hash table.
144 Return the index of the entry, if present, or otherwise the index a free
145 entry where it could be inserted. */
146 static size_t
lookup(hash_table * htab,const void * key,size_t keylen,unsigned long int hval)147 lookup (hash_table *htab,
148 const void *key, size_t keylen,
149 unsigned long int hval)
150 {
151 unsigned long int hash;
152 size_t idx;
153 hash_entry *table = htab->table;
154
155 /* First hash function: simply take the modul but prevent zero. */
156 hash = 1 + hval % htab->size;
157
158 idx = hash;
159
160 if (table[idx].used)
161 {
162 if (table[idx].used == hval && table[idx].keylen == keylen
163 && memcmp (table[idx].key, key, keylen) == 0)
164 return idx;
165
166 /* Second hash function as suggested in [Knuth]. */
167 hash = 1 + hval % (htab->size - 2);
168
169 do
170 {
171 if (idx <= hash)
172 idx = htab->size + idx - hash;
173 else
174 idx -= hash;
175
176 /* If entry is found use it. */
177 if (table[idx].used == hval && table[idx].keylen == keylen
178 && memcmp (table[idx].key, key, keylen) == 0)
179 return idx;
180 }
181 while (table[idx].used);
182 }
183 return idx;
184 }
185
186
187 /* Look up the value of a key in the given table.
188 If found, return 0 and set *RESULT to it. Otherwise return -1. */
189 int
hash_find_entry(hash_table * htab,const void * key,size_t keylen,void ** result)190 hash_find_entry (hash_table *htab, const void *key, size_t keylen,
191 void **result)
192 {
193 hash_entry *table = htab->table;
194 size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen));
195
196 if (table[idx].used == 0)
197 return -1;
198
199 *result = table[idx].data;
200 return 0;
201 }
202
203
204 /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX.
205 HVAL is the key's hash code. IDX depends on it. The table entry at index
206 IDX is known to be unused. */
207 static void
insert_entry_2(hash_table * htab,const void * key,size_t keylen,unsigned long int hval,size_t idx,void * data)208 insert_entry_2 (hash_table *htab,
209 const void *key, size_t keylen,
210 unsigned long int hval, size_t idx, void *data)
211 {
212 hash_entry *table = htab->table;
213
214 table[idx].used = hval;
215 table[idx].key = key;
216 table[idx].keylen = keylen;
217 table[idx].data = data;
218
219 /* List the new value in the list. */
220 if (htab->first == NULL)
221 {
222 table[idx].next = &table[idx];
223 htab->first = &table[idx];
224 }
225 else
226 {
227 table[idx].next = htab->first->next;
228 htab->first->next = &table[idx];
229 htab->first = &table[idx];
230 }
231
232 ++htab->filled;
233 }
234
235
236 /* Grow the hash table. */
237 static void
resize(hash_table * htab)238 resize (hash_table *htab)
239 {
240 unsigned long int old_size = htab->size;
241 hash_entry *table = htab->table;
242 size_t idx;
243
244 htab->size = next_prime (htab->size * 2);
245 htab->filled = 0;
246 htab->first = NULL;
247 htab->table = XCALLOC (1 + htab->size, hash_entry);
248
249 for (idx = 1; idx <= old_size; ++idx)
250 if (table[idx].used)
251 insert_entry_2 (htab, table[idx].key, table[idx].keylen,
252 table[idx].used,
253 lookup (htab, table[idx].key, table[idx].keylen,
254 table[idx].used),
255 table[idx].data);
256
257 free (table);
258 }
259
260
261 /* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
262 Return non-NULL (more precisely, the address of the KEY inside the table's
263 memory pool) if successful, or NULL if there is already an entry with the
264 given key. */
265 const void *
hash_insert_entry(hash_table * htab,const void * key,size_t keylen,void * data)266 hash_insert_entry (hash_table *htab,
267 const void *key, size_t keylen,
268 void *data)
269 {
270 unsigned long int hval = compute_hashval (key, keylen);
271 hash_entry *table = htab->table;
272 size_t idx = lookup (htab, key, keylen, hval);
273
274 if (table[idx].used)
275 /* We don't want to overwrite the old value. */
276 return NULL;
277 else
278 {
279 /* An empty bucket has been found. */
280 void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
281 insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
282 if (100 * htab->filled > 75 * htab->size)
283 /* Table is filled more than 75%. Resize the table. */
284 resize (htab);
285 return keycopy;
286 }
287 }
288
289
290 /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
291 Return 0. */
292 int
hash_set_value(hash_table * htab,const void * key,size_t keylen,void * data)293 hash_set_value (hash_table *htab,
294 const void *key, size_t keylen,
295 void *data)
296 {
297 unsigned long int hval = compute_hashval (key, keylen);
298 hash_entry *table = htab->table;
299 size_t idx = lookup (htab, key, keylen, hval);
300
301 if (table[idx].used)
302 {
303 /* Overwrite the old value. */
304 table[idx].data = data;
305 return 0;
306 }
307 else
308 {
309 /* An empty bucket has been found. */
310 void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
311 insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
312 if (100 * htab->filled > 75 * htab->size)
313 /* Table is filled more than 75%. Resize the table. */
314 resize (htab);
315 return 0;
316 }
317 }
318
319
320 /* Steps *PTR forward to the next used entry in the given hash table. *PTR
321 should be initially set to NULL. Store information about the next entry
322 in *KEY, *KEYLEN, *DATA.
323 Return 0 normally, -1 when the whole hash table has been traversed. */
324 int
hash_iterate(hash_table * htab,void ** ptr,const void ** key,size_t * keylen,void ** data)325 hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen,
326 void **data)
327 {
328 hash_entry *curr;
329
330 if (*ptr == NULL)
331 {
332 if (htab->first == NULL)
333 return -1;
334 curr = htab->first;
335 }
336 else
337 {
338 if (*ptr == htab->first)
339 return -1;
340 curr = (hash_entry *) *ptr;
341 }
342 curr = curr->next;
343 *ptr = (void *) curr;
344
345 *key = curr->key;
346 *keylen = curr->keylen;
347 *data = curr->data;
348 return 0;
349 }
350
351
352 /* Steps *PTR forward to the next used entry in the given hash table. *PTR
353 should be initially set to NULL. Store information about the next entry
354 in *KEY, *KEYLEN, *DATAP. *DATAP is set to point to the storage of the
355 value; modifying **DATAP will modify the value of the entry.
356 Return 0 normally, -1 when the whole hash table has been traversed. */
357 int
hash_iterate_modify(hash_table * htab,void ** ptr,const void ** key,size_t * keylen,void *** datap)358 hash_iterate_modify (hash_table *htab, void **ptr,
359 const void **key, size_t *keylen,
360 void ***datap)
361 {
362 hash_entry *curr;
363
364 if (*ptr == NULL)
365 {
366 if (htab->first == NULL)
367 return -1;
368 curr = htab->first;
369 }
370 else
371 {
372 if (*ptr == htab->first)
373 return -1;
374 curr = (hash_entry *) *ptr;
375 }
376 curr = curr->next;
377 *ptr = (void *) curr;
378
379 *key = curr->key;
380 *keylen = curr->keylen;
381 *datap = &curr->data;
382 return 0;
383 }
384