1 /*
2 ** upb_table
3 **
4 ** This header is INTERNAL-ONLY! Its interfaces are not public or stable!
5 ** This file defines very fast int->upb_value (inttable) and string->upb_value
6 ** (strtable) hash tables.
7 **
8 ** The table uses chained scatter with Brent's variation (inspired by the Lua
9 ** implementation of hash tables). The hash function for strings is Austin
10 ** Appleby's "MurmurHash."
11 **
12 ** The inttable uses uintptr_t as its key, which guarantees it can be used to
13 ** store pointers or integers of at least 32 bits (upb isn't really useful on
14 ** systems where sizeof(void*) < 4).
15 **
16 ** The table must be homogeneous (all values of the same type). In debug
17 ** mode, we check this on insert and lookup.
18 */
19
20 #ifndef UPB_TABLE_H_
21 #define UPB_TABLE_H_
22
23 #include <stdint.h>
24 #include <string.h>
25 #include "upb/upb.h"
26
27 #include "upb/port_def.inc"
28
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32
33
34 /* upb_value ******************************************************************/
35
36 /* A tagged union (stored untagged inside the table) so that we can check that
37 * clients calling table accessors are correctly typed without having to have
38 * an explosion of accessors. */
39 typedef enum {
40 UPB_CTYPE_INT32 = 1,
41 UPB_CTYPE_INT64 = 2,
42 UPB_CTYPE_UINT32 = 3,
43 UPB_CTYPE_UINT64 = 4,
44 UPB_CTYPE_BOOL = 5,
45 UPB_CTYPE_CSTR = 6,
46 UPB_CTYPE_PTR = 7,
47 UPB_CTYPE_CONSTPTR = 8,
48 UPB_CTYPE_FPTR = 9,
49 UPB_CTYPE_FLOAT = 10,
50 UPB_CTYPE_DOUBLE = 11
51 } upb_ctype_t;
52
53 typedef struct {
54 uint64_t val;
55 } upb_value;
56
57 /* Like strdup(), which isn't always available since it's not ANSI C. */
58 char *upb_strdup(const char *s, upb_alloc *a);
59 /* Variant that works with a length-delimited rather than NULL-delimited string,
60 * as supported by strtable. */
61 char *upb_strdup2(const char *s, size_t len, upb_alloc *a);
62
upb_gstrdup(const char * s)63 UPB_INLINE char *upb_gstrdup(const char *s) {
64 return upb_strdup(s, &upb_alloc_global);
65 }
66
_upb_value_setval(upb_value * v,uint64_t val)67 UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val) {
68 v->val = val;
69 }
70
_upb_value_val(uint64_t val)71 UPB_INLINE upb_value _upb_value_val(uint64_t val) {
72 upb_value ret;
73 _upb_value_setval(&ret, val);
74 return ret;
75 }
76
77 /* For each value ctype, define the following set of functions:
78 *
79 * // Get/set an int32 from a upb_value.
80 * int32_t upb_value_getint32(upb_value val);
81 * void upb_value_setint32(upb_value *val, int32_t cval);
82 *
83 * // Construct a new upb_value from an int32.
84 * upb_value upb_value_int32(int32_t val); */
85 #define FUNCS(name, membername, type_t, converter, proto_type) \
86 UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
87 val->val = (converter)cval; \
88 } \
89 UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
90 upb_value ret; \
91 upb_value_set ## name(&ret, val); \
92 return ret; \
93 } \
94 UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
95 return (type_t)(converter)val.val; \
96 }
97
FUNCS(int32,int32,int32_t,int32_t,UPB_CTYPE_INT32)98 FUNCS(int32, int32, int32_t, int32_t, UPB_CTYPE_INT32)
99 FUNCS(int64, int64, int64_t, int64_t, UPB_CTYPE_INT64)
100 FUNCS(uint32, uint32, uint32_t, uint32_t, UPB_CTYPE_UINT32)
101 FUNCS(uint64, uint64, uint64_t, uint64_t, UPB_CTYPE_UINT64)
102 FUNCS(bool, _bool, bool, bool, UPB_CTYPE_BOOL)
103 FUNCS(cstr, cstr, char*, uintptr_t, UPB_CTYPE_CSTR)
104 FUNCS(ptr, ptr, void*, uintptr_t, UPB_CTYPE_PTR)
105 FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR)
106 FUNCS(fptr, fptr, upb_func*, uintptr_t, UPB_CTYPE_FPTR)
107
108 #undef FUNCS
109
110 UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) {
111 memcpy(&val->val, &cval, sizeof(cval));
112 }
113
upb_value_setdouble(upb_value * val,double cval)114 UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) {
115 memcpy(&val->val, &cval, sizeof(cval));
116 }
117
upb_value_float(float cval)118 UPB_INLINE upb_value upb_value_float(float cval) {
119 upb_value ret;
120 upb_value_setfloat(&ret, cval);
121 return ret;
122 }
123
upb_value_double(double cval)124 UPB_INLINE upb_value upb_value_double(double cval) {
125 upb_value ret;
126 upb_value_setdouble(&ret, cval);
127 return ret;
128 }
129
130 #undef SET_TYPE
131
132
133 /* upb_tabkey *****************************************************************/
134
135 /* Either:
136 * 1. an actual integer key, or
137 * 2. a pointer to a string prefixed by its uint32_t length, owned by us.
138 *
139 * ...depending on whether this is a string table or an int table. We would
140 * make this a union of those two types, but C89 doesn't support statically
141 * initializing a non-first union member. */
142 typedef uintptr_t upb_tabkey;
143
upb_tabstr(upb_tabkey key,uint32_t * len)144 UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
145 char* mem = (char*)key;
146 if (len) memcpy(len, mem, sizeof(*len));
147 return mem + sizeof(*len);
148 }
149
upb_tabstrview(upb_tabkey key)150 UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) {
151 upb_strview ret;
152 uint32_t len;
153 ret.data = upb_tabstr(key, &len);
154 ret.size = len;
155 return ret;
156 }
157
158 /* upb_tabval *****************************************************************/
159
160 typedef struct upb_tabval {
161 uint64_t val;
162 } upb_tabval;
163
164 #define UPB_TABVALUE_EMPTY_INIT {-1}
165
166 /* upb_table ******************************************************************/
167
168 typedef struct _upb_tabent {
169 upb_tabkey key;
170 upb_tabval val;
171
172 /* Internal chaining. This is const so we can create static initializers for
173 * tables. We cast away const sometimes, but *only* when the containing
174 * upb_table is known to be non-const. This requires a bit of care, but
175 * the subtlety is confined to table.c. */
176 const struct _upb_tabent *next;
177 } upb_tabent;
178
179 typedef struct {
180 size_t count; /* Number of entries in the hash part. */
181 uint32_t mask; /* Mask to turn hash value -> bucket. */
182 uint32_t max_count; /* Max count before we hit our load limit. */
183 uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
184
185 /* Hash table entries.
186 * Making this const isn't entirely accurate; what we really want is for it to
187 * have the same const-ness as the table it's inside. But there's no way to
188 * declare that in C. So we have to make it const so that we can statically
189 * initialize const hash tables. Then we cast away const when we have to.
190 */
191 const upb_tabent *entries;
192 } upb_table;
193
194 typedef struct {
195 upb_table t;
196 } upb_strtable;
197
198 typedef struct {
199 upb_table t; /* For entries that don't fit in the array part. */
200 const upb_tabval *array; /* Array part of the table. See const note above. */
201 size_t array_size; /* Array part size. */
202 size_t array_count; /* Array part number of elements. */
203 } upb_inttable;
204
205 #define UPB_ARRAY_EMPTYENT -1
206
upb_table_size(const upb_table * t)207 UPB_INLINE size_t upb_table_size(const upb_table *t) {
208 if (t->size_lg2 == 0)
209 return 0;
210 else
211 return 1 << t->size_lg2;
212 }
213
214 /* Internal-only functions, in .h file only out of necessity. */
upb_tabent_isempty(const upb_tabent * e)215 UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
216 return e->key == 0;
217 }
218
219 /* Used by some of the unit tests for generic hashing functionality. */
220 uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed);
221
upb_intkey(uintptr_t key)222 UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
223 return key;
224 }
225
upb_inthash(uintptr_t key)226 UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
227 return (uint32_t)key;
228 }
229
upb_getentry(const upb_table * t,uint32_t hash)230 static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
231 return t->entries + (hash & t->mask);
232 }
233
upb_arrhas(upb_tabval key)234 UPB_INLINE bool upb_arrhas(upb_tabval key) {
235 return key.val != (uint64_t)-1;
236 }
237
238 /* Initialize and uninitialize a table, respectively. If memory allocation
239 * failed, false is returned that the table is uninitialized. */
240 bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
241 bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype,
242 size_t expected_size, upb_alloc *a);
243 void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
244 void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
245
upb_inttable_init(upb_inttable * table,upb_ctype_t ctype)246 UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
247 return upb_inttable_init2(table, ctype, &upb_alloc_global);
248 }
249
upb_strtable_init(upb_strtable * table,upb_ctype_t ctype)250 UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
251 return upb_strtable_init2(table, ctype, 4, &upb_alloc_global);
252 }
253
upb_inttable_uninit(upb_inttable * table)254 UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {
255 upb_inttable_uninit2(table, &upb_alloc_global);
256 }
257
upb_strtable_uninit(upb_strtable * table)258 UPB_INLINE void upb_strtable_uninit(upb_strtable *table) {
259 upb_strtable_uninit2(table, &upb_alloc_global);
260 }
261
262 /* Returns the number of values in the table. */
263 size_t upb_inttable_count(const upb_inttable *t);
upb_strtable_count(const upb_strtable * t)264 UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
265 return t->t.count;
266 }
267
268 void upb_inttable_packedsize(const upb_inttable *t, size_t *size);
269 void upb_strtable_packedsize(const upb_strtable *t, size_t *size);
270 upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs,
271 size_t size);
272 upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs,
273 size_t size);
274 void upb_strtable_clear(upb_strtable *t);
275
276 /* Inserts the given key into the hashtable with the given value. The key must
277 * not already exist in the hash table. For string tables, the key must be
278 * NULL-terminated, and the table will make an internal copy of the key.
279 * Inttables must not insert a value of UINTPTR_MAX.
280 *
281 * If a table resize was required but memory allocation failed, false is
282 * returned and the table is unchanged. */
283 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
284 upb_alloc *a);
285 bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len,
286 upb_value val, upb_alloc *a);
287
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val)288 UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key,
289 upb_value val) {
290 return upb_inttable_insert2(t, key, val, &upb_alloc_global);
291 }
292
upb_strtable_insert2(upb_strtable * t,const char * key,size_t len,upb_value val)293 UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key,
294 size_t len, upb_value val) {
295 return upb_strtable_insert3(t, key, len, val, &upb_alloc_global);
296 }
297
298 /* For NULL-terminated strings. */
upb_strtable_insert(upb_strtable * t,const char * key,upb_value val)299 UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
300 upb_value val) {
301 return upb_strtable_insert2(t, key, strlen(key), val);
302 }
303
304 /* Looks up key in this table, returning "true" if the key was found.
305 * If v is non-NULL, copies the value for this key into *v. */
306 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
307 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
308 upb_value *v);
309
310 /* For NULL-terminated strings. */
upb_strtable_lookup(const upb_strtable * t,const char * key,upb_value * v)311 UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
312 upb_value *v) {
313 return upb_strtable_lookup2(t, key, strlen(key), v);
314 }
315
316 /* Removes an item from the table. Returns true if the remove was successful,
317 * and stores the removed item in *val if non-NULL. */
318 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
319 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
320 upb_value *val, upb_alloc *alloc);
321
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)322 UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key,
323 size_t len, upb_value *val) {
324 return upb_strtable_remove3(t, key, len, val, &upb_alloc_global);
325 }
326
327 /* For NULL-terminated strings. */
upb_strtable_remove(upb_strtable * t,const char * key,upb_value * v)328 UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
329 upb_value *v) {
330 return upb_strtable_remove2(t, key, strlen(key), v);
331 }
332
333 /* Updates an existing entry in an inttable. If the entry does not exist,
334 * returns false and does nothing. Unlike insert/remove, this does not
335 * invalidate iterators. */
336 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
337
338 /* Convenience routines for inttables with pointer keys. */
339 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
340 upb_alloc *a);
341 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
342 bool upb_inttable_lookupptr(
343 const upb_inttable *t, const void *key, upb_value *val);
344
upb_inttable_insertptr(upb_inttable * t,const void * key,upb_value val)345 UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key,
346 upb_value val) {
347 return upb_inttable_insertptr2(t, key, val, &upb_alloc_global);
348 }
349
350 /* Optimizes the table for the current set of entries, for both memory use and
351 * lookup time. Client should call this after all entries have been inserted;
352 * inserting more entries is legal, but will likely require a table resize. */
353 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a);
354
upb_inttable_compact(upb_inttable * t)355 UPB_INLINE void upb_inttable_compact(upb_inttable *t) {
356 upb_inttable_compact2(t, &upb_alloc_global);
357 }
358
359 /* A special-case inlinable version of the lookup routine for 32-bit
360 * integers. */
upb_inttable_lookup32(const upb_inttable * t,uint32_t key,upb_value * v)361 UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
362 upb_value *v) {
363 *v = upb_value_int32(0); /* Silence compiler warnings. */
364 if (key < t->array_size) {
365 upb_tabval arrval = t->array[key];
366 if (upb_arrhas(arrval)) {
367 _upb_value_setval(v, arrval.val);
368 return true;
369 } else {
370 return false;
371 }
372 } else {
373 const upb_tabent *e;
374 if (t->t.entries == NULL) return false;
375 for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
376 if ((uint32_t)e->key == key) {
377 _upb_value_setval(v, e->val.val);
378 return true;
379 }
380 if (e->next == NULL) return false;
381 }
382 }
383 }
384
385 /* Exposed for testing only. */
386 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a);
387
388 /* Iterators ******************************************************************/
389
390 /* Iterators for int and string tables. We are subject to some kind of unusual
391 * design constraints:
392 *
393 * For high-level languages:
394 * - we must be able to guarantee that we don't crash or corrupt memory even if
395 * the program accesses an invalidated iterator.
396 *
397 * For C++11 range-based for:
398 * - iterators must be copyable
399 * - iterators must be comparable
400 * - it must be possible to construct an "end" value.
401 *
402 * Iteration order is undefined.
403 *
404 * Modifying the table invalidates iterators. upb_{str,int}table_done() is
405 * guaranteed to work even on an invalidated iterator, as long as the table it
406 * is iterating over has not been freed. Calling next() or accessing data from
407 * an invalidated iterator yields unspecified elements from the table, but it is
408 * guaranteed not to crash and to return real table elements (except when done()
409 * is true). */
410
411
412 /* upb_strtable_iter **********************************************************/
413
414 /* upb_strtable_iter i;
415 * upb_strtable_begin(&i, t);
416 * for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
417 * const char *key = upb_strtable_iter_key(&i);
418 * const upb_value val = upb_strtable_iter_value(&i);
419 * // ...
420 * }
421 */
422
423 typedef struct {
424 const upb_strtable *t;
425 size_t index;
426 } upb_strtable_iter;
427
428 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
429 void upb_strtable_next(upb_strtable_iter *i);
430 bool upb_strtable_done(const upb_strtable_iter *i);
431 upb_strview upb_strtable_iter_key(const upb_strtable_iter *i);
432 upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
433 void upb_strtable_iter_setdone(upb_strtable_iter *i);
434 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
435 const upb_strtable_iter *i2);
436
437
438 /* upb_inttable_iter **********************************************************/
439
440 /* upb_inttable_iter i;
441 * upb_inttable_begin(&i, t);
442 * for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
443 * uintptr_t key = upb_inttable_iter_key(&i);
444 * upb_value val = upb_inttable_iter_value(&i);
445 * // ...
446 * }
447 */
448
449 typedef struct {
450 const upb_inttable *t;
451 size_t index;
452 bool array_part;
453 } upb_inttable_iter;
454
str_tabent(const upb_strtable_iter * i)455 UPB_INLINE const upb_tabent *str_tabent(const upb_strtable_iter *i) {
456 return &i->t->t.entries[i->index];
457 }
458
459 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
460 void upb_inttable_next(upb_inttable_iter *i);
461 bool upb_inttable_done(const upb_inttable_iter *i);
462 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i);
463 upb_value upb_inttable_iter_value(const upb_inttable_iter *i);
464 void upb_inttable_iter_setdone(upb_inttable_iter *i);
465 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
466 const upb_inttable_iter *i2);
467
468
469 #ifdef __cplusplus
470 } /* extern "C" */
471 #endif
472
473 #include "upb/port_undef.inc"
474
475 #endif /* UPB_TABLE_H_ */
476