1 /* Amalgamated source file */
2 #include "upb.h"
3 /*
4 * This is where we define macros used across upb.
5 *
6 * All of these macros are undef'd in port_undef.inc to avoid leaking them to
7 * users.
8 *
9 * The correct usage is:
10 *
11 * #include "upb/foobar.h"
12 * #include "upb/baz.h"
13 *
14 * // MUST be last included header.
15 * #include "upb/port_def.inc"
16 *
17 * // Code for this file.
18 * // <...>
19 *
20 * // Can be omitted for .c files, required for .h.
21 * #include "upb/port_undef.inc"
22 *
23 * This file is private and must not be included by users!
24 */
25 #include <stdint.h>
26
27 #if UINTPTR_MAX == 0xffffffff
28 #define UPB_SIZE(size32, size64) size32
29 #else
30 #define UPB_SIZE(size32, size64) size64
31 #endif
32
33 /* If we always read/write as a consistent type to each address, this shouldn't
34 * violate aliasing.
35 */
36 #define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
37
38 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
39 *UPB_PTR_AT(msg, case_offset, int) == case_val \
40 ? *UPB_PTR_AT(msg, offset, fieldtype) \
41 : default
42
43 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
44 *UPB_PTR_AT(msg, case_offset, int) = case_val; \
45 *UPB_PTR_AT(msg, offset, fieldtype) = value;
46
47 #define UPB_MAPTYPE_STRING 0
48
49 /* UPB_INLINE: inline if possible, emit standalone code if required. */
50 #ifdef __cplusplus
51 #define UPB_INLINE inline
52 #elif defined (__GNUC__) || defined(__clang__)
53 #define UPB_INLINE static __inline__
54 #else
55 #define UPB_INLINE static
56 #endif
57
58 /* Hints to the compiler about likely/unlikely branches. */
59 #if defined (__GNUC__) || defined(__clang__)
60 #define UPB_LIKELY(x) __builtin_expect((x),1)
61 #define UPB_UNLIKELY(x) __builtin_expect((x),0)
62 #else
63 #define UPB_LIKELY(x) (x)
64 #define UPB_UNLIKELY(x) (x)
65 #endif
66
67 /* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
68 * doesn't provide these preprocessor symbols. */
69 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
70 #define UPB_BIG_ENDIAN
71 #endif
72
73 /* Macros for function attributes on compilers that support them. */
74 #ifdef __GNUC__
75 #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
76 #define UPB_NOINLINE __attribute__((noinline))
77 #define UPB_NORETURN __attribute__((__noreturn__))
78 #else /* !defined(__GNUC__) */
79 #define UPB_FORCEINLINE
80 #define UPB_NOINLINE
81 #define UPB_NORETURN
82 #endif
83
84 #if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L
85 /* C99/C++11 versions. */
86 #include <stdio.h>
87 #define _upb_snprintf snprintf
88 #define _upb_vsnprintf vsnprintf
89 #define _upb_va_copy(a, b) va_copy(a, b)
90 #elif defined(_MSC_VER)
91 /* Microsoft C/C++ versions. */
92 #include <stdarg.h>
93 #include <stdio.h>
94 #if _MSC_VER < 1900
95 int msvc_snprintf(char* s, size_t n, const char* format, ...);
96 int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
97 #define UPB_MSVC_VSNPRINTF
98 #define _upb_snprintf msvc_snprintf
99 #define _upb_vsnprintf msvc_vsnprintf
100 #else
101 #define _upb_snprintf snprintf
102 #define _upb_vsnprintf vsnprintf
103 #endif
104 #define _upb_va_copy(a, b) va_copy(a, b)
105 #elif defined __GNUC__
106 /* A few hacky workarounds for functions not in C89.
107 * For internal use only!
108 * TODO(haberman): fix these by including our own implementations, or finding
109 * another workaround.
110 */
111 #define _upb_snprintf __builtin_snprintf
112 #define _upb_vsnprintf __builtin_vsnprintf
113 #define _upb_va_copy(a, b) __va_copy(a, b)
114 #else
115 #error Need implementations of [v]snprintf and va_copy
116 #endif
117
118 #ifdef __cplusplus
119 #if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \
120 (defined(_MSC_VER) && _MSC_VER >= 1900)
121 /* C++11 is present */
122 #else
123 #error upb requires C++11 for C++ support
124 #endif
125 #endif
126
127 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
128 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
129
130 #define UPB_UNUSED(var) (void)var
131
132 /* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
133 */
134 #ifdef NDEBUG
135 #ifdef __GNUC__
136 #define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
137 #else
138 #define UPB_ASSUME(expr) do {} if (false && (expr))
139 #endif
140 #else
141 #define UPB_ASSUME(expr) assert(expr)
142 #endif
143
144 /* UPB_ASSERT(): in release mode, we use the expression without letting it be
145 * evaluated. This prevents "unused variable" warnings. */
146 #ifdef NDEBUG
147 #define UPB_ASSERT(expr) do {} while (false && (expr))
148 #else
149 #define UPB_ASSERT(expr) assert(expr)
150 #endif
151
152 /* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only
153 * exist in debug mode. This turns into regular assert. */
154 #define UPB_ASSERT_DEBUGVAR(expr) assert(expr)
155
156 #if defined(__GNUC__) || defined(__clang__)
157 #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
158 #else
159 #define UPB_UNREACHABLE() do { assert(0); } while(0)
160 #endif
161
162 /* UPB_INFINITY representing floating-point positive infinity. */
163 #include <math.h>
164 #ifdef INFINITY
165 #define UPB_INFINITY INFINITY
166 #else
167 #define UPB_INFINITY (1.0 / 0.0)
168 #endif
169
170 #include <setjmp.h>
171 #include <string.h>
172
173
174
175 /* Maps descriptor type -> upb field type. */
176 static const uint8_t desctype_to_fieldtype[] = {
177 -1, /* invalid descriptor type */
178 UPB_TYPE_DOUBLE, /* DOUBLE */
179 UPB_TYPE_FLOAT, /* FLOAT */
180 UPB_TYPE_INT64, /* INT64 */
181 UPB_TYPE_UINT64, /* UINT64 */
182 UPB_TYPE_INT32, /* INT32 */
183 UPB_TYPE_UINT64, /* FIXED64 */
184 UPB_TYPE_UINT32, /* FIXED32 */
185 UPB_TYPE_BOOL, /* BOOL */
186 UPB_TYPE_STRING, /* STRING */
187 UPB_TYPE_MESSAGE, /* GROUP */
188 UPB_TYPE_MESSAGE, /* MESSAGE */
189 UPB_TYPE_BYTES, /* BYTES */
190 UPB_TYPE_UINT32, /* UINT32 */
191 UPB_TYPE_ENUM, /* ENUM */
192 UPB_TYPE_INT32, /* SFIXED32 */
193 UPB_TYPE_INT64, /* SFIXED64 */
194 UPB_TYPE_INT32, /* SINT32 */
195 UPB_TYPE_INT64, /* SINT64 */
196 };
197
198 /* Maps descriptor type -> upb map size. */
199 static const uint8_t desctype_to_mapsize[] = {
200 -1, /* invalid descriptor type */
201 8, /* DOUBLE */
202 4, /* FLOAT */
203 8, /* INT64 */
204 8, /* UINT64 */
205 4, /* INT32 */
206 8, /* FIXED64 */
207 4, /* FIXED32 */
208 1, /* BOOL */
209 UPB_MAPTYPE_STRING, /* STRING */
210 sizeof(void *), /* GROUP */
211 sizeof(void *), /* MESSAGE */
212 UPB_MAPTYPE_STRING, /* BYTES */
213 4, /* UINT32 */
214 4, /* ENUM */
215 4, /* SFIXED32 */
216 8, /* SFIXED64 */
217 4, /* SINT32 */
218 8, /* SINT64 */
219 };
220
221 static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) |
222 (1 << UPB_DTYPE_FIXED32) |
223 (1 << UPB_DTYPE_SFIXED32);
224
225 static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) |
226 (1 << UPB_DTYPE_FIXED64) |
227 (1 << UPB_DTYPE_SFIXED64);
228
229 /* Op: an action to be performed for a wire-type/field-type combination. */
230 #define OP_SCALAR_LG2(n) (n)
231 #define OP_FIXPCK_LG2(n) (n + 4)
232 #define OP_VARPCK_LG2(n) (n + 8)
233 #define OP_STRING 4
234 #define OP_SUBMSG 5
235
236 static const int8_t varint_ops[19] = {
237 -1, /* field not found */
238 -1, /* DOUBLE */
239 -1, /* FLOAT */
240 OP_SCALAR_LG2(3), /* INT64 */
241 OP_SCALAR_LG2(3), /* UINT64 */
242 OP_SCALAR_LG2(2), /* INT32 */
243 -1, /* FIXED64 */
244 -1, /* FIXED32 */
245 OP_SCALAR_LG2(0), /* BOOL */
246 -1, /* STRING */
247 -1, /* GROUP */
248 -1, /* MESSAGE */
249 -1, /* BYTES */
250 OP_SCALAR_LG2(2), /* UINT32 */
251 OP_SCALAR_LG2(2), /* ENUM */
252 -1, /* SFIXED32 */
253 -1, /* SFIXED64 */
254 OP_SCALAR_LG2(2), /* SINT32 */
255 OP_SCALAR_LG2(3), /* SINT64 */
256 };
257
258 static const int8_t delim_ops[37] = {
259 /* For non-repeated field type. */
260 -1, /* field not found */
261 -1, /* DOUBLE */
262 -1, /* FLOAT */
263 -1, /* INT64 */
264 -1, /* UINT64 */
265 -1, /* INT32 */
266 -1, /* FIXED64 */
267 -1, /* FIXED32 */
268 -1, /* BOOL */
269 OP_STRING, /* STRING */
270 -1, /* GROUP */
271 OP_SUBMSG, /* MESSAGE */
272 OP_STRING, /* BYTES */
273 -1, /* UINT32 */
274 -1, /* ENUM */
275 -1, /* SFIXED32 */
276 -1, /* SFIXED64 */
277 -1, /* SINT32 */
278 -1, /* SINT64 */
279 /* For repeated field type. */
280 OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
281 OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
282 OP_VARPCK_LG2(3), /* REPEATED INT64 */
283 OP_VARPCK_LG2(3), /* REPEATED UINT64 */
284 OP_VARPCK_LG2(2), /* REPEATED INT32 */
285 OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
286 OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
287 OP_VARPCK_LG2(0), /* REPEATED BOOL */
288 OP_STRING, /* REPEATED STRING */
289 OP_SUBMSG, /* REPEATED GROUP */
290 OP_SUBMSG, /* REPEATED MESSAGE */
291 OP_STRING, /* REPEATED BYTES */
292 OP_VARPCK_LG2(2), /* REPEATED UINT32 */
293 OP_VARPCK_LG2(2), /* REPEATED ENUM */
294 OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
295 OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
296 OP_VARPCK_LG2(2), /* REPEATED SINT32 */
297 OP_VARPCK_LG2(3), /* REPEATED SINT64 */
298 };
299
300 /* Data pertaining to the parse. */
301 typedef struct {
302 const char *limit; /* End of delimited region or end of buffer. */
303 upb_arena *arena;
304 int depth;
305 uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
306 jmp_buf err;
307 } upb_decstate;
308
309 typedef union {
310 bool bool_val;
311 int32_t int32_val;
312 int64_t int64_val;
313 uint32_t uint32_val;
314 uint64_t uint64_val;
315 upb_strview str_val;
316 } wireval;
317
318 static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
319 const upb_msglayout *layout);
320
decode_err(upb_decstate * d)321 UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }
322
decode_reserve(upb_decstate * d,upb_array * arr,int elem)323 static bool decode_reserve(upb_decstate *d, upb_array *arr, int elem) {
324 bool need_realloc = arr->size - arr->len < elem;
325 if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) {
326 decode_err(d);
327 }
328 return need_realloc;
329 }
330
331 UPB_NOINLINE
decode_longvarint64(upb_decstate * d,const char * ptr,const char * limit,uint64_t * val)332 static const char *decode_longvarint64(upb_decstate *d, const char *ptr,
333 const char *limit, uint64_t *val) {
334 uint8_t byte;
335 int bitpos = 0;
336 uint64_t out = 0;
337
338 do {
339 if (bitpos >= 70 || ptr == limit) decode_err(d);
340 byte = *ptr;
341 out |= (uint64_t)(byte & 0x7F) << bitpos;
342 ptr++;
343 bitpos += 7;
344 } while (byte & 0x80);
345
346 *val = out;
347 return ptr;
348 }
349
350 UPB_FORCEINLINE
decode_varint64(upb_decstate * d,const char * ptr,const char * limit,uint64_t * val)351 static const char *decode_varint64(upb_decstate *d, const char *ptr,
352 const char *limit, uint64_t *val) {
353 if (UPB_LIKELY(ptr < limit && (*ptr & 0x80) == 0)) {
354 *val = (uint8_t)*ptr;
355 return ptr + 1;
356 } else {
357 return decode_longvarint64(d, ptr, limit, val);
358 }
359 }
360
decode_varint32(upb_decstate * d,const char * ptr,const char * limit,uint32_t * val)361 static const char *decode_varint32(upb_decstate *d, const char *ptr,
362 const char *limit, uint32_t *val) {
363 uint64_t u64;
364 ptr = decode_varint64(d, ptr, limit, &u64);
365 if (u64 > UINT32_MAX) decode_err(d);
366 *val = (uint32_t)u64;
367 return ptr;
368 }
369
decode_munge(int type,wireval * val)370 static void decode_munge(int type, wireval *val) {
371 switch (type) {
372 case UPB_DESCRIPTOR_TYPE_BOOL:
373 val->bool_val = val->uint64_val != 0;
374 break;
375 case UPB_DESCRIPTOR_TYPE_SINT32: {
376 uint32_t n = val->uint32_val;
377 val->int32_val = (n >> 1) ^ -(int32_t)(n & 1);
378 break;
379 }
380 case UPB_DESCRIPTOR_TYPE_SINT64: {
381 uint64_t n = val->uint64_val;
382 val->int64_val = (n >> 1) ^ -(int64_t)(n & 1);
383 break;
384 }
385 }
386 }
387
upb_find_field(const upb_msglayout * l,uint32_t field_number)388 static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
389 uint32_t field_number) {
390 static upb_msglayout_field none = {0};
391
392 /* Lots of optimization opportunities here. */
393 int i;
394 if (l == NULL) return &none;
395 for (i = 0; i < l->field_count; i++) {
396 if (l->fields[i].number == field_number) {
397 return &l->fields[i];
398 }
399 }
400
401 return &none; /* Unknown field. */
402 }
403
decode_newsubmsg(upb_decstate * d,const upb_msglayout * layout,const upb_msglayout_field * field)404 static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout,
405 const upb_msglayout_field *field) {
406 const upb_msglayout *subl = layout->submsgs[field->submsg_index];
407 return _upb_msg_new(subl, d->arena);
408 }
409
decode_tosubmsg(upb_decstate * d,upb_msg * submsg,const upb_msglayout * layout,const upb_msglayout_field * field,upb_strview val)410 static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg,
411 const upb_msglayout *layout,
412 const upb_msglayout_field *field, upb_strview val) {
413 const upb_msglayout *subl = layout->submsgs[field->submsg_index];
414 const char *saved_limit = d->limit;
415 if (--d->depth < 0) decode_err(d);
416 d->limit = val.data + val.size;
417 decode_msg(d, val.data, submsg, subl);
418 d->limit = saved_limit;
419 if (d->end_group != 0) decode_err(d);
420 d->depth++;
421 }
422
decode_group(upb_decstate * d,const char * ptr,upb_msg * submsg,const upb_msglayout * subl,uint32_t number)423 static const char *decode_group(upb_decstate *d, const char *ptr,
424 upb_msg *submsg, const upb_msglayout *subl,
425 uint32_t number) {
426 if (--d->depth < 0) decode_err(d);
427 ptr = decode_msg(d, ptr, submsg, subl);
428 if (d->end_group != number) decode_err(d);
429 d->end_group = 0;
430 d->depth++;
431 return ptr;
432 }
433
decode_togroup(upb_decstate * d,const char * ptr,upb_msg * submsg,const upb_msglayout * layout,const upb_msglayout_field * field)434 static const char *decode_togroup(upb_decstate *d, const char *ptr,
435 upb_msg *submsg, const upb_msglayout *layout,
436 const upb_msglayout_field *field) {
437 const upb_msglayout *subl = layout->submsgs[field->submsg_index];
438 return decode_group(d, ptr, submsg, subl, field->number);
439 }
440
decode_toarray(upb_decstate * d,const char * ptr,upb_msg * msg,const upb_msglayout * layout,const upb_msglayout_field * field,wireval val,int op)441 static const char *decode_toarray(upb_decstate *d, const char *ptr,
442 upb_msg *msg, const upb_msglayout *layout,
443 const upb_msglayout_field *field, wireval val,
444 int op) {
445 upb_array **arrp = UPB_PTR_AT(msg, field->offset, void);
446 upb_array *arr = *arrp;
447 void *mem;
448
449 if (!arr) {
450 upb_fieldtype_t type = desctype_to_fieldtype[field->descriptortype];
451 arr = _upb_array_new(d->arena, type);
452 if (!arr) decode_err(d);
453 *arrp = arr;
454 }
455
456 decode_reserve(d, arr, 1);
457
458 switch (op) {
459 case OP_SCALAR_LG2(0):
460 case OP_SCALAR_LG2(2):
461 case OP_SCALAR_LG2(3):
462 /* Append scalar value. */
463 mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
464 arr->len++;
465 memcpy(mem, &val, 1 << op);
466 return ptr;
467 case OP_STRING:
468 /* Append string. */
469 mem =
470 UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void);
471 arr->len++;
472 memcpy(mem, &val, sizeof(upb_strview));
473 return ptr;
474 case OP_SUBMSG: {
475 /* Append submessage / group. */
476 upb_msg *submsg = decode_newsubmsg(d, layout, field);
477 *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) =
478 submsg;
479 arr->len++;
480 if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) {
481 ptr = decode_togroup(d, ptr, submsg, layout, field);
482 } else {
483 decode_tosubmsg(d, submsg, layout, field, val.str_val);
484 }
485 return ptr;
486 }
487 case OP_FIXPCK_LG2(2):
488 case OP_FIXPCK_LG2(3): {
489 /* Fixed packed. */
490 int lg2 = op - OP_FIXPCK_LG2(0);
491 int mask = (1 << lg2) - 1;
492 int count = val.str_val.size >> lg2;
493 if ((val.str_val.size & mask) != 0) {
494 decode_err(d); /* Length isn't a round multiple of elem size. */
495 }
496 decode_reserve(d, arr, count);
497 mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
498 arr->len += count;
499 memcpy(mem, val.str_val.data, count << op);
500 return ptr;
501 }
502 case OP_VARPCK_LG2(0):
503 case OP_VARPCK_LG2(2):
504 case OP_VARPCK_LG2(3): {
505 /* Varint packed. */
506 int lg2 = op - OP_VARPCK_LG2(0);
507 int scale = 1 << lg2;
508 const char *ptr = val.str_val.data;
509 const char *end = ptr + val.str_val.size;
510 char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
511 while (ptr < end) {
512 wireval elem;
513 ptr = decode_varint64(d, ptr, end, &elem.uint64_val);
514 decode_munge(field->descriptortype, &elem);
515 if (decode_reserve(d, arr, 1)) {
516 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
517 }
518 arr->len++;
519 memcpy(out, &elem, scale);
520 out += scale;
521 }
522 if (ptr != end) decode_err(d);
523 return ptr;
524 }
525 default:
526 UPB_UNREACHABLE();
527 }
528 }
529
decode_tomap(upb_decstate * d,upb_msg * msg,const upb_msglayout * layout,const upb_msglayout_field * field,wireval val)530 static void decode_tomap(upb_decstate *d, upb_msg *msg,
531 const upb_msglayout *layout,
532 const upb_msglayout_field *field, wireval val) {
533 upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
534 upb_map *map = *map_p;
535 upb_map_entry ent;
536 const upb_msglayout *entry = layout->submsgs[field->submsg_index];
537
538 if (!map) {
539 /* Lazily create map. */
540 const upb_msglayout *entry = layout->submsgs[field->submsg_index];
541 const upb_msglayout_field *key_field = &entry->fields[0];
542 const upb_msglayout_field *val_field = &entry->fields[1];
543 char key_size = desctype_to_mapsize[key_field->descriptortype];
544 char val_size = desctype_to_mapsize[val_field->descriptortype];
545 UPB_ASSERT(key_field->offset == 0);
546 UPB_ASSERT(val_field->offset == sizeof(upb_strview));
547 map = _upb_map_new(d->arena, key_size, val_size);
548 *map_p = map;
549 }
550
551 /* Parse map entry. */
552 memset(&ent, 0, sizeof(ent));
553
554 if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
555 entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
556 /* Create proactively to handle the case where it doesn't appear. */
557 ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena);
558 }
559
560 decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
561
562 /* Insert into map. */
563 _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, d->arena);
564 }
565
decode_tomsg(upb_decstate * d,const char * ptr,upb_msg * msg,const upb_msglayout * layout,const upb_msglayout_field * field,wireval val,int op)566 static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
567 const upb_msglayout *layout,
568 const upb_msglayout_field *field, wireval val,
569 int op) {
570 void *mem = UPB_PTR_AT(msg, field->offset, void);
571 int type = field->descriptortype;
572
573 /* Set presence if necessary. */
574 if (field->presence < 0) {
575 /* Oneof case */
576 *UPB_PTR_AT(msg, -field->presence, int32_t) = field->number;
577 } else if (field->presence > 0) {
578 /* Hasbit */
579 uint32_t hasbit = field->presence;
580 *UPB_PTR_AT(msg, hasbit / 8, uint8_t) |= (1 << (hasbit % 8));
581 }
582
583 /* Store into message. */
584 switch (op) {
585 case OP_SUBMSG: {
586 upb_msg **submsgp = mem;
587 upb_msg *submsg = *submsgp;
588 if (!submsg) {
589 submsg = decode_newsubmsg(d, layout, field);
590 *submsgp = submsg;
591 }
592 if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) {
593 ptr = decode_togroup(d, ptr, submsg, layout, field);
594 } else {
595 decode_tosubmsg(d, submsg, layout, field, val.str_val);
596 }
597 break;
598 }
599 case OP_STRING:
600 memcpy(mem, &val, sizeof(upb_strview));
601 break;
602 case OP_SCALAR_LG2(3):
603 memcpy(mem, &val, 8);
604 break;
605 case OP_SCALAR_LG2(2):
606 memcpy(mem, &val, 4);
607 break;
608 case OP_SCALAR_LG2(0):
609 memcpy(mem, &val, 1);
610 break;
611 default:
612 UPB_UNREACHABLE();
613 }
614
615 return ptr;
616 }
617
decode_msg(upb_decstate * d,const char * ptr,upb_msg * msg,const upb_msglayout * layout)618 static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
619 const upb_msglayout *layout) {
620 while (ptr < d->limit) {
621 uint32_t tag;
622 const upb_msglayout_field *field;
623 int field_number;
624 int wire_type;
625 const char *field_start = ptr;
626 wireval val;
627 int op;
628
629 ptr = decode_varint32(d, ptr, d->limit, &tag);
630 field_number = tag >> 3;
631 wire_type = tag & 7;
632
633 field = upb_find_field(layout, field_number);
634
635 switch (wire_type) {
636 case UPB_WIRE_TYPE_VARINT:
637 ptr = decode_varint64(d, ptr, d->limit, &val.uint64_val);
638 op = varint_ops[field->descriptortype];
639 decode_munge(field->descriptortype, &val);
640 break;
641 case UPB_WIRE_TYPE_32BIT:
642 if (d->limit - ptr < 4) decode_err(d);
643 memcpy(&val, ptr, 4);
644 ptr += 4;
645 op = OP_SCALAR_LG2(2);
646 if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
647 break;
648 case UPB_WIRE_TYPE_64BIT:
649 if (d->limit - ptr < 8) decode_err(d);
650 memcpy(&val, ptr, 8);
651 ptr += 8;
652 op = OP_SCALAR_LG2(3);
653 if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
654 break;
655 case UPB_WIRE_TYPE_DELIMITED: {
656 uint32_t size;
657 int ndx = field->descriptortype;
658 if (_upb_isrepeated(field)) ndx += 18;
659 ptr = decode_varint32(d, ptr, d->limit, &size);
660 if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) {
661 decode_err(d); /* Length overflow. */
662 }
663 val.str_val.data = ptr;
664 val.str_val.size = size;
665 ptr += size;
666 op = delim_ops[ndx];
667 break;
668 }
669 case UPB_WIRE_TYPE_START_GROUP:
670 val.int32_val = field_number;
671 op = OP_SUBMSG;
672 if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown;
673 break;
674 case UPB_WIRE_TYPE_END_GROUP:
675 d->end_group = field_number;
676 return ptr;
677 default:
678 decode_err(d);
679 }
680
681 if (op >= 0) {
682 /* Parse, using op for dispatch. */
683 switch (field->label) {
684 case UPB_LABEL_REPEATED:
685 case _UPB_LABEL_PACKED:
686 ptr = decode_toarray(d, ptr, msg, layout, field, val, op);
687 break;
688 case _UPB_LABEL_MAP:
689 decode_tomap(d, msg, layout, field, val);
690 break;
691 default:
692 ptr = decode_tomsg(d, ptr, msg, layout, field, val, op);
693 break;
694 }
695 } else {
696 unknown:
697 /* Skip unknown field. */
698 if (field_number == 0) decode_err(d);
699 if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
700 ptr = decode_group(d, ptr, NULL, NULL, field_number);
701 }
702 if (msg) {
703 if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
704 d->arena)) {
705 decode_err(d);
706 }
707 }
708 }
709 }
710
711 if (ptr != d->limit) decode_err(d);
712 return ptr;
713 }
714
upb_decode(const char * buf,size_t size,void * msg,const upb_msglayout * l,upb_arena * arena)715 bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
716 upb_arena *arena) {
717 upb_decstate state;
718 state.limit = buf + size;
719 state.arena = arena;
720 state.depth = 64;
721 state.end_group = 0;
722
723 if (setjmp(state.err)) return false;
724
725 if (size == 0) return true;
726 decode_msg(&state, buf, msg, l);
727
728 return state.end_group == 0;
729 }
730
731 #undef OP_SCALAR_LG2
732 #undef OP_FIXPCK_LG2
733 #undef OP_VARPCK_LG2
734 #undef OP_STRING
735 #undef OP_SUBMSG
736 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
737
738
739 #include <string.h>
740
741
742
743 #define UPB_PB_VARINT_MAX_LEN 10
744 #define CHK(x) do { if (!(x)) { return false; } } while(0)
745
upb_encode_varint(uint64_t val,char * buf)746 static size_t upb_encode_varint(uint64_t val, char *buf) {
747 size_t i;
748 if (val < 128) { buf[0] = val; return 1; }
749 i = 0;
750 while (val) {
751 uint8_t byte = val & 0x7fU;
752 val >>= 7;
753 if (val) byte |= 0x80U;
754 buf[i++] = byte;
755 }
756 return i;
757 }
758
upb_zzencode_32(int32_t n)759 static uint32_t upb_zzencode_32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
upb_zzencode_64(int64_t n)760 static uint64_t upb_zzencode_64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
761
762 typedef struct {
763 upb_alloc *alloc;
764 char *buf, *ptr, *limit;
765 } upb_encstate;
766
upb_roundup_pow2(size_t bytes)767 static size_t upb_roundup_pow2(size_t bytes) {
768 size_t ret = 128;
769 while (ret < bytes) {
770 ret *= 2;
771 }
772 return ret;
773 }
774
upb_encode_growbuffer(upb_encstate * e,size_t bytes)775 static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
776 size_t old_size = e->limit - e->buf;
777 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
778 char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
779 CHK(new_buf);
780
781 /* We want previous data at the end, realloc() put it at the beginning. */
782 if (old_size > 0) {
783 memmove(new_buf + new_size - old_size, e->buf, old_size);
784 }
785
786 e->ptr = new_buf + new_size - (e->limit - e->ptr);
787 e->limit = new_buf + new_size;
788 e->buf = new_buf;
789 return true;
790 }
791
792 /* Call to ensure that at least "bytes" bytes are available for writing at
793 * e->ptr. Returns false if the bytes could not be allocated. */
upb_encode_reserve(upb_encstate * e,size_t bytes)794 static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
795 CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
796 upb_encode_growbuffer(e, bytes));
797
798 e->ptr -= bytes;
799 return true;
800 }
801
802 /* Writes the given bytes to the buffer, handling reserve/advance. */
upb_put_bytes(upb_encstate * e,const void * data,size_t len)803 static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
804 if (len == 0) return true;
805 CHK(upb_encode_reserve(e, len));
806 memcpy(e->ptr, data, len);
807 return true;
808 }
809
upb_put_fixed64(upb_encstate * e,uint64_t val)810 static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
811 /* TODO(haberman): byte-swap for big endian. */
812 return upb_put_bytes(e, &val, sizeof(uint64_t));
813 }
814
upb_put_fixed32(upb_encstate * e,uint32_t val)815 static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
816 /* TODO(haberman): byte-swap for big endian. */
817 return upb_put_bytes(e, &val, sizeof(uint32_t));
818 }
819
upb_put_varint(upb_encstate * e,uint64_t val)820 static bool upb_put_varint(upb_encstate *e, uint64_t val) {
821 size_t len;
822 char *start;
823 CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
824 len = upb_encode_varint(val, e->ptr);
825 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
826 memmove(start, e->ptr, len);
827 e->ptr = start;
828 return true;
829 }
830
upb_put_double(upb_encstate * e,double d)831 static bool upb_put_double(upb_encstate *e, double d) {
832 uint64_t u64;
833 UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
834 memcpy(&u64, &d, sizeof(uint64_t));
835 return upb_put_fixed64(e, u64);
836 }
837
upb_put_float(upb_encstate * e,float d)838 static bool upb_put_float(upb_encstate *e, float d) {
839 uint32_t u32;
840 UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
841 memcpy(&u32, &d, sizeof(uint32_t));
842 return upb_put_fixed32(e, u32);
843 }
844
upb_readcase(const char * msg,const upb_msglayout_field * f)845 static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) {
846 uint32_t ret;
847 memcpy(&ret, msg - f->presence, sizeof(ret));
848 return ret;
849 }
850
upb_readhasbit(const char * msg,const upb_msglayout_field * f)851 static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) {
852 uint32_t hasbit = f->presence;
853 UPB_ASSERT(f->presence > 0);
854 return (*UPB_PTR_AT(msg, hasbit / 8, uint8_t)) & (1 << (hasbit % 8));
855 }
856
upb_put_tag(upb_encstate * e,int field_number,int wire_type)857 static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
858 return upb_put_varint(e, (field_number << 3) | wire_type);
859 }
860
upb_put_fixedarray(upb_encstate * e,const upb_array * arr,size_t elem_size,uint32_t tag)861 static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
862 size_t elem_size, uint32_t tag) {
863 size_t bytes = arr->len * elem_size;
864 const char* data = _upb_array_constptr(arr);
865 const char* ptr = data + bytes - elem_size;
866 if (tag) {
867 while (true) {
868 CHK(upb_put_bytes(e, ptr, elem_size) && upb_put_varint(e, tag));
869 if (ptr == data) break;
870 ptr -= elem_size;
871 }
872 return true;
873 } else {
874 return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes);
875 }
876 }
877
878 bool upb_encode_message(upb_encstate *e, const char *msg,
879 const upb_msglayout *m, size_t *size);
880
upb_encode_scalarfield(upb_encstate * e,const void * _field_mem,const upb_msglayout * m,const upb_msglayout_field * f,bool skip_zero_value)881 static bool upb_encode_scalarfield(upb_encstate *e, const void *_field_mem,
882 const upb_msglayout *m,
883 const upb_msglayout_field *f,
884 bool skip_zero_value) {
885 const char *field_mem = _field_mem;
886 #define CASE(ctype, type, wire_type, encodeval) do { \
887 ctype val = *(ctype*)field_mem; \
888 if (skip_zero_value && val == 0) { \
889 return true; \
890 } \
891 return upb_put_ ## type(e, encodeval) && \
892 upb_put_tag(e, f->number, wire_type); \
893 } while(0)
894
895 switch (f->descriptortype) {
896 case UPB_DESCRIPTOR_TYPE_DOUBLE:
897 CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
898 case UPB_DESCRIPTOR_TYPE_FLOAT:
899 CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
900 case UPB_DESCRIPTOR_TYPE_INT64:
901 case UPB_DESCRIPTOR_TYPE_UINT64:
902 CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
903 case UPB_DESCRIPTOR_TYPE_UINT32:
904 CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
905 case UPB_DESCRIPTOR_TYPE_INT32:
906 case UPB_DESCRIPTOR_TYPE_ENUM:
907 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
908 case UPB_DESCRIPTOR_TYPE_SFIXED64:
909 case UPB_DESCRIPTOR_TYPE_FIXED64:
910 CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
911 case UPB_DESCRIPTOR_TYPE_FIXED32:
912 case UPB_DESCRIPTOR_TYPE_SFIXED32:
913 CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
914 case UPB_DESCRIPTOR_TYPE_BOOL:
915 CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
916 case UPB_DESCRIPTOR_TYPE_SINT32:
917 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
918 case UPB_DESCRIPTOR_TYPE_SINT64:
919 CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
920 case UPB_DESCRIPTOR_TYPE_STRING:
921 case UPB_DESCRIPTOR_TYPE_BYTES: {
922 upb_strview view = *(upb_strview*)field_mem;
923 if (skip_zero_value && view.size == 0) {
924 return true;
925 }
926 return upb_put_bytes(e, view.data, view.size) &&
927 upb_put_varint(e, view.size) &&
928 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
929 }
930 case UPB_DESCRIPTOR_TYPE_GROUP: {
931 size_t size;
932 void *submsg = *(void **)field_mem;
933 const upb_msglayout *subm = m->submsgs[f->submsg_index];
934 if (submsg == NULL) {
935 return true;
936 }
937 return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
938 upb_encode_message(e, submsg, subm, &size) &&
939 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
940 }
941 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
942 size_t size;
943 void *submsg = *(void **)field_mem;
944 const upb_msglayout *subm = m->submsgs[f->submsg_index];
945 if (submsg == NULL) {
946 return true;
947 }
948 return upb_encode_message(e, submsg, subm, &size) &&
949 upb_put_varint(e, size) &&
950 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
951 }
952 }
953 #undef CASE
954 UPB_UNREACHABLE();
955 }
956
upb_encode_array(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)957 static bool upb_encode_array(upb_encstate *e, const char *field_mem,
958 const upb_msglayout *m,
959 const upb_msglayout_field *f) {
960 const upb_array *arr = *(const upb_array**)field_mem;
961 bool packed = f->label == _UPB_LABEL_PACKED;
962
963 if (arr == NULL || arr->len == 0) {
964 return true;
965 }
966
967 #define VARINT_CASE(ctype, encode) \
968 { \
969 const ctype *start = _upb_array_constptr(arr); \
970 const ctype *ptr = start + arr->len; \
971 size_t pre_len = e->limit - e->ptr; \
972 uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
973 do { \
974 ptr--; \
975 CHK(upb_put_varint(e, encode)); \
976 if (tag) CHK(upb_put_varint(e, tag)); \
977 } while (ptr != start); \
978 if (!tag) CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
979 } \
980 break; \
981 do { \
982 ; \
983 } while (0)
984
985 #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
986
987 switch (f->descriptortype) {
988 case UPB_DESCRIPTOR_TYPE_DOUBLE:
989 CHK(upb_put_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT)));
990 break;
991 case UPB_DESCRIPTOR_TYPE_FLOAT:
992 CHK(upb_put_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT)));
993 break;
994 case UPB_DESCRIPTOR_TYPE_SFIXED64:
995 case UPB_DESCRIPTOR_TYPE_FIXED64:
996 CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT)));
997 break;
998 case UPB_DESCRIPTOR_TYPE_FIXED32:
999 case UPB_DESCRIPTOR_TYPE_SFIXED32:
1000 CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT)));
1001 break;
1002 case UPB_DESCRIPTOR_TYPE_INT64:
1003 case UPB_DESCRIPTOR_TYPE_UINT64:
1004 VARINT_CASE(uint64_t, *ptr);
1005 case UPB_DESCRIPTOR_TYPE_UINT32:
1006 VARINT_CASE(uint32_t, *ptr);
1007 case UPB_DESCRIPTOR_TYPE_INT32:
1008 case UPB_DESCRIPTOR_TYPE_ENUM:
1009 VARINT_CASE(int32_t, (int64_t)*ptr);
1010 case UPB_DESCRIPTOR_TYPE_BOOL:
1011 VARINT_CASE(bool, *ptr);
1012 case UPB_DESCRIPTOR_TYPE_SINT32:
1013 VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
1014 case UPB_DESCRIPTOR_TYPE_SINT64:
1015 VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
1016 case UPB_DESCRIPTOR_TYPE_STRING:
1017 case UPB_DESCRIPTOR_TYPE_BYTES: {
1018 const upb_strview *start = _upb_array_constptr(arr);
1019 const upb_strview *ptr = start + arr->len;
1020 do {
1021 ptr--;
1022 CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
1023 upb_put_varint(e, ptr->size) &&
1024 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1025 } while (ptr != start);
1026 return true;
1027 }
1028 case UPB_DESCRIPTOR_TYPE_GROUP: {
1029 const void *const*start = _upb_array_constptr(arr);
1030 const void *const*ptr = start + arr->len;
1031 const upb_msglayout *subm = m->submsgs[f->submsg_index];
1032 do {
1033 size_t size;
1034 ptr--;
1035 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
1036 upb_encode_message(e, *ptr, subm, &size) &&
1037 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
1038 } while (ptr != start);
1039 return true;
1040 }
1041 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
1042 const void *const*start = _upb_array_constptr(arr);
1043 const void *const*ptr = start + arr->len;
1044 const upb_msglayout *subm = m->submsgs[f->submsg_index];
1045 do {
1046 size_t size;
1047 ptr--;
1048 CHK(upb_encode_message(e, *ptr, subm, &size) &&
1049 upb_put_varint(e, size) &&
1050 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1051 } while (ptr != start);
1052 return true;
1053 }
1054 }
1055 #undef VARINT_CASE
1056
1057 if (packed) {
1058 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1059 }
1060 return true;
1061 }
1062
upb_encode_map(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)1063 static bool upb_encode_map(upb_encstate *e, const char *field_mem,
1064 const upb_msglayout *m,
1065 const upb_msglayout_field *f) {
1066 const upb_map *map = *(const upb_map**)field_mem;
1067 const upb_msglayout *entry = m->submsgs[f->submsg_index];
1068 const upb_msglayout_field *key_field = &entry->fields[0];
1069 const upb_msglayout_field *val_field = &entry->fields[1];
1070 upb_strtable_iter i;
1071 if (map == NULL) {
1072 return true;
1073 }
1074
1075 upb_strtable_begin(&i, &map->table);
1076 for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
1077 size_t pre_len = e->limit - e->ptr;
1078 size_t size;
1079 upb_strview key = upb_strtable_iter_key(&i);
1080 const upb_value val = upb_strtable_iter_value(&i);
1081 const void *keyp =
1082 map->key_size == UPB_MAPTYPE_STRING ? (void *)&key : key.data;
1083 const void *valp =
1084 map->val_size == UPB_MAPTYPE_STRING ? upb_value_getptr(val) : &val;
1085
1086 CHK(upb_encode_scalarfield(e, valp, entry, val_field, false));
1087 CHK(upb_encode_scalarfield(e, keyp, entry, key_field, false));
1088 size = (e->limit - e->ptr) - pre_len;
1089 CHK(upb_put_varint(e, size));
1090 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1091 }
1092
1093 return true;
1094 }
1095
1096
upb_encode_message(upb_encstate * e,const char * msg,const upb_msglayout * m,size_t * size)1097 bool upb_encode_message(upb_encstate *e, const char *msg,
1098 const upb_msglayout *m, size_t *size) {
1099 int i;
1100 size_t pre_len = e->limit - e->ptr;
1101 const char *unknown;
1102 size_t unknown_size;
1103
1104 unknown = upb_msg_getunknown(msg, &unknown_size);
1105
1106 if (unknown) {
1107 upb_put_bytes(e, unknown, unknown_size);
1108 }
1109
1110 for (i = m->field_count - 1; i >= 0; i--) {
1111 const upb_msglayout_field *f = &m->fields[i];
1112
1113 if (_upb_isrepeated(f)) {
1114 CHK(upb_encode_array(e, msg + f->offset, m, f));
1115 } else if (f->label == _UPB_LABEL_MAP) {
1116 CHK(upb_encode_map(e, msg + f->offset, m, f));
1117 } else {
1118 bool skip_empty = false;
1119 if (f->presence == 0) {
1120 /* Proto3 presence. */
1121 skip_empty = true;
1122 } else if (f->presence > 0) {
1123 /* Proto2 presence: hasbit. */
1124 if (!upb_readhasbit(msg, f)) {
1125 continue;
1126 }
1127 } else {
1128 /* Field is in a oneof. */
1129 if (upb_readcase(msg, f) != f->number) {
1130 continue;
1131 }
1132 }
1133 CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
1134 }
1135 }
1136
1137 *size = (e->limit - e->ptr) - pre_len;
1138 return true;
1139 }
1140
upb_encode(const void * msg,const upb_msglayout * m,upb_arena * arena,size_t * size)1141 char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
1142 size_t *size) {
1143 upb_encstate e;
1144 e.alloc = upb_arena_alloc(arena);
1145 e.buf = NULL;
1146 e.limit = NULL;
1147 e.ptr = NULL;
1148
1149 if (!upb_encode_message(&e, msg, m, size)) {
1150 *size = 0;
1151 return NULL;
1152 }
1153
1154 *size = e.limit - e.ptr;
1155
1156 if (*size == 0) {
1157 static char ch;
1158 return &ch;
1159 } else {
1160 UPB_ASSERT(e.ptr);
1161 return e.ptr;
1162 }
1163 }
1164
1165 #undef CHK
1166
1167
1168
1169
1170 /** upb_msg *******************************************************************/
1171
1172 static const char _upb_fieldtype_to_sizelg2[12] = {
1173 0,
1174 0, /* UPB_TYPE_BOOL */
1175 2, /* UPB_TYPE_FLOAT */
1176 2, /* UPB_TYPE_INT32 */
1177 2, /* UPB_TYPE_UINT32 */
1178 2, /* UPB_TYPE_ENUM */
1179 UPB_SIZE(2, 3), /* UPB_TYPE_MESSAGE */
1180 3, /* UPB_TYPE_DOUBLE */
1181 3, /* UPB_TYPE_INT64 */
1182 3, /* UPB_TYPE_UINT64 */
1183 UPB_SIZE(3, 4), /* UPB_TYPE_STRING */
1184 UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */
1185 };
1186
tag_arrptr(void * ptr,int elem_size_lg2)1187 static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) {
1188 UPB_ASSERT(elem_size_lg2 <= 4);
1189 return (uintptr_t)ptr | elem_size_lg2;
1190 }
1191
upb_msg_internalsize(const upb_msglayout * l)1192 static int upb_msg_internalsize(const upb_msglayout *l) {
1193 return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
1194 }
1195
upb_msg_sizeof(const upb_msglayout * l)1196 static size_t upb_msg_sizeof(const upb_msglayout *l) {
1197 return l->size + upb_msg_internalsize(l);
1198 }
1199
upb_msg_getinternal(upb_msg * msg)1200 static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
1201 return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
1202 }
1203
upb_msg_getinternal_const(const upb_msg * msg)1204 static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
1205 return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
1206 }
1207
upb_msg_getinternalwithext(upb_msg * msg,const upb_msglayout * l)1208 static upb_msg_internal_withext *upb_msg_getinternalwithext(
1209 upb_msg *msg, const upb_msglayout *l) {
1210 UPB_ASSERT(l->extendable);
1211 return UPB_PTR_AT(msg, -sizeof(upb_msg_internal_withext),
1212 upb_msg_internal_withext);
1213 }
1214
_upb_msg_new(const upb_msglayout * l,upb_arena * a)1215 upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
1216 void *mem = upb_arena_malloc(a, upb_msg_sizeof(l));
1217 upb_msg_internal *in;
1218 upb_msg *msg;
1219
1220 if (!mem) {
1221 return NULL;
1222 }
1223
1224 msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg);
1225
1226 /* Initialize normal members. */
1227 memset(msg, 0, l->size);
1228
1229 /* Initialize internal members. */
1230 in = upb_msg_getinternal(msg);
1231 in->unknown = NULL;
1232 in->unknown_len = 0;
1233 in->unknown_size = 0;
1234
1235 if (l->extendable) {
1236 upb_msg_getinternalwithext(msg, l)->extdict = NULL;
1237 }
1238
1239 return msg;
1240 }
1241
_upb_msg_addunknown(upb_msg * msg,const char * data,size_t len,upb_arena * arena)1242 bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
1243 upb_arena *arena) {
1244 upb_msg_internal *in = upb_msg_getinternal(msg);
1245 if (len > in->unknown_size - in->unknown_len) {
1246 upb_alloc *alloc = upb_arena_alloc(arena);
1247 size_t need = in->unknown_size + len;
1248 size_t newsize = UPB_MAX(in->unknown_size * 2, need);
1249 void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
1250 if (!mem) return false;
1251 in->unknown = mem;
1252 in->unknown_size = newsize;
1253 }
1254 memcpy(in->unknown + in->unknown_len, data, len);
1255 in->unknown_len += len;
1256 return true;
1257 }
1258
upb_msg_getunknown(const upb_msg * msg,size_t * len)1259 const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
1260 const upb_msg_internal *in = upb_msg_getinternal_const(msg);
1261 *len = in->unknown_len;
1262 return in->unknown;
1263 }
1264
1265 /** upb_array *****************************************************************/
1266
_upb_array_new(upb_arena * a,upb_fieldtype_t type)1267 upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type) {
1268 upb_array *arr = upb_arena_malloc(a, sizeof(upb_array));
1269
1270 if (!arr) {
1271 return NULL;
1272 }
1273
1274 arr->data = tag_arrptr(NULL, _upb_fieldtype_to_sizelg2[type]);
1275 arr->len = 0;
1276 arr->size = 0;
1277
1278 return arr;
1279 }
1280
_upb_array_realloc(upb_array * arr,size_t min_size,upb_arena * arena)1281 bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) {
1282 size_t new_size = UPB_MAX(arr->size, 4);
1283 int elem_size_lg2 = arr->data & 7;
1284 size_t old_bytes = arr->size << elem_size_lg2;
1285 size_t new_bytes;
1286 void* ptr = _upb_array_ptr(arr);
1287
1288 /* Log2 ceiling of size. */
1289 while (new_size < min_size) new_size *= 2;
1290
1291 new_bytes = new_size << elem_size_lg2;
1292 ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes);
1293
1294 if (!ptr) {
1295 return false;
1296 }
1297
1298 arr->data = tag_arrptr(ptr, elem_size_lg2);
1299 arr->size = new_size;
1300 return true;
1301 }
1302
getorcreate_array(upb_array ** arr_ptr,upb_fieldtype_t type,upb_arena * arena)1303 static upb_array *getorcreate_array(upb_array **arr_ptr, upb_fieldtype_t type,
1304 upb_arena *arena) {
1305 upb_array *arr = *arr_ptr;
1306 if (!arr) {
1307 arr = _upb_array_new(arena, type);
1308 if (!arr) return NULL;
1309 *arr_ptr = arr;
1310 }
1311 return arr;
1312 }
1313
resize_array(upb_array * arr,size_t size,upb_arena * arena)1314 static bool resize_array(upb_array *arr, size_t size, upb_arena *arena) {
1315 if (size > arr->size && !_upb_array_realloc(arr, size, arena)) {
1316 return false;
1317 }
1318
1319 arr->len = size;
1320 return true;
1321 }
1322
_upb_array_resize_fallback(upb_array ** arr_ptr,size_t size,upb_fieldtype_t type,upb_arena * arena)1323 void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
1324 upb_fieldtype_t type, upb_arena *arena) {
1325 upb_array *arr = getorcreate_array(arr_ptr, type, arena);
1326 return arr && resize_array(arr, size, arena) ? _upb_array_ptr(arr) : NULL;
1327 }
1328
_upb_array_append_fallback(upb_array ** arr_ptr,const void * value,upb_fieldtype_t type,upb_arena * arena)1329 bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
1330 upb_fieldtype_t type, upb_arena *arena) {
1331 upb_array *arr = getorcreate_array(arr_ptr, type, arena);
1332 size_t elem = arr->len;
1333 int lg2 = _upb_fieldtype_to_sizelg2[type];
1334 char *data;
1335
1336 if (!arr || !resize_array(arr, elem + 1, arena)) return false;
1337
1338 data = _upb_array_ptr(arr);
1339 memcpy(data + (elem << lg2), value, 1 << lg2);
1340 return true;
1341 }
1342
1343 /** upb_map *******************************************************************/
1344
_upb_map_new(upb_arena * a,size_t key_size,size_t value_size)1345 upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
1346 upb_map *map = upb_arena_malloc(a, sizeof(upb_map));
1347
1348 if (!map) {
1349 return NULL;
1350 }
1351
1352 upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a));
1353 map->key_size = key_size;
1354 map->val_size = value_size;
1355
1356 return map;
1357 }
1358 /*
1359 ** upb_table Implementation
1360 **
1361 ** Implementation is heavily inspired by Lua's ltable.c.
1362 */
1363
1364
1365 #include <string.h>
1366
1367
1368 #define UPB_MAXARRSIZE 16 /* 64k. */
1369
1370 /* From Chromium. */
1371 #define ARRAY_SIZE(x) \
1372 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
1373
1374 static const double MAX_LOAD = 0.85;
1375
1376 /* The minimum utilization of the array part of a mixed hash/array table. This
1377 * is a speed/memory-usage tradeoff (though it's not straightforward because of
1378 * cache effects). The lower this is, the more memory we'll use. */
1379 static const double MIN_DENSITY = 0.1;
1380
is_pow2(uint64_t v)1381 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
1382
log2ceil(uint64_t v)1383 int log2ceil(uint64_t v) {
1384 int ret = 0;
1385 bool pow2 = is_pow2(v);
1386 while (v >>= 1) ret++;
1387 ret = pow2 ? ret : ret + 1; /* Ceiling. */
1388 return UPB_MIN(UPB_MAXARRSIZE, ret);
1389 }
1390
upb_strdup(const char * s,upb_alloc * a)1391 char *upb_strdup(const char *s, upb_alloc *a) {
1392 return upb_strdup2(s, strlen(s), a);
1393 }
1394
upb_strdup2(const char * s,size_t len,upb_alloc * a)1395 char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
1396 size_t n;
1397 char *p;
1398
1399 /* Prevent overflow errors. */
1400 if (len == SIZE_MAX) return NULL;
1401 /* Always null-terminate, even if binary data; but don't rely on the input to
1402 * have a null-terminating byte since it may be a raw binary buffer. */
1403 n = len + 1;
1404 p = upb_malloc(a, n);
1405 if (p) {
1406 memcpy(p, s, len);
1407 p[len] = 0;
1408 }
1409 return p;
1410 }
1411
1412 /* A type to represent the lookup key of either a strtable or an inttable. */
1413 typedef union {
1414 uintptr_t num;
1415 struct {
1416 const char *str;
1417 size_t len;
1418 } str;
1419 } lookupkey_t;
1420
strkey2(const char * str,size_t len)1421 static lookupkey_t strkey2(const char *str, size_t len) {
1422 lookupkey_t k;
1423 k.str.str = str;
1424 k.str.len = len;
1425 return k;
1426 }
1427
intkey(uintptr_t key)1428 static lookupkey_t intkey(uintptr_t key) {
1429 lookupkey_t k;
1430 k.num = key;
1431 return k;
1432 }
1433
1434 typedef uint32_t hashfunc_t(upb_tabkey key);
1435 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
1436
1437 /* Base table (shared code) ***************************************************/
1438
1439 /* For when we need to cast away const. */
mutable_entries(upb_table * t)1440 static upb_tabent *mutable_entries(upb_table *t) {
1441 return (upb_tabent*)t->entries;
1442 }
1443
isfull(upb_table * t)1444 static bool isfull(upb_table *t) {
1445 if (upb_table_size(t) == 0) {
1446 return true;
1447 } else {
1448 return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
1449 }
1450 }
1451
init(upb_table * t,uint8_t size_lg2,upb_alloc * a)1452 static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
1453 size_t bytes;
1454
1455 t->count = 0;
1456 t->size_lg2 = size_lg2;
1457 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
1458 bytes = upb_table_size(t) * sizeof(upb_tabent);
1459 if (bytes > 0) {
1460 t->entries = upb_malloc(a, bytes);
1461 if (!t->entries) return false;
1462 memset(mutable_entries(t), 0, bytes);
1463 } else {
1464 t->entries = NULL;
1465 }
1466 return true;
1467 }
1468
uninit(upb_table * t,upb_alloc * a)1469 static void uninit(upb_table *t, upb_alloc *a) {
1470 upb_free(a, mutable_entries(t));
1471 }
1472
emptyent(upb_table * t)1473 static upb_tabent *emptyent(upb_table *t) {
1474 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
1475 while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
1476 }
1477
getentry_mutable(upb_table * t,uint32_t hash)1478 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
1479 return (upb_tabent*)upb_getentry(t, hash);
1480 }
1481
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)1482 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
1483 uint32_t hash, eqlfunc_t *eql) {
1484 const upb_tabent *e;
1485
1486 if (t->size_lg2 == 0) return NULL;
1487 e = upb_getentry(t, hash);
1488 if (upb_tabent_isempty(e)) return NULL;
1489 while (1) {
1490 if (eql(e->key, key)) return e;
1491 if ((e = e->next) == NULL) return NULL;
1492 }
1493 }
1494
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)1495 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
1496 uint32_t hash, eqlfunc_t *eql) {
1497 return (upb_tabent*)findentry(t, key, hash, eql);
1498 }
1499
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)1500 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
1501 uint32_t hash, eqlfunc_t *eql) {
1502 const upb_tabent *e = findentry(t, key, hash, eql);
1503 if (e) {
1504 if (v) {
1505 _upb_value_setval(v, e->val.val);
1506 }
1507 return true;
1508 } else {
1509 return false;
1510 }
1511 }
1512
1513 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)1514 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
1515 upb_value val, uint32_t hash,
1516 hashfunc_t *hashfunc, eqlfunc_t *eql) {
1517 upb_tabent *mainpos_e;
1518 upb_tabent *our_e;
1519
1520 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
1521
1522 t->count++;
1523 mainpos_e = getentry_mutable(t, hash);
1524 our_e = mainpos_e;
1525
1526 if (upb_tabent_isempty(mainpos_e)) {
1527 /* Our main position is empty; use it. */
1528 our_e->next = NULL;
1529 } else {
1530 /* Collision. */
1531 upb_tabent *new_e = emptyent(t);
1532 /* Head of collider's chain. */
1533 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
1534 if (chain == mainpos_e) {
1535 /* Existing ent is in its main posisiton (it has the same hash as us, and
1536 * is the head of our chain). Insert to new ent and append to this chain. */
1537 new_e->next = mainpos_e->next;
1538 mainpos_e->next = new_e;
1539 our_e = new_e;
1540 } else {
1541 /* Existing ent is not in its main position (it is a node in some other
1542 * chain). This implies that no existing ent in the table has our hash.
1543 * Evict it (updating its chain) and use its ent for head of our chain. */
1544 *new_e = *mainpos_e; /* copies next. */
1545 while (chain->next != mainpos_e) {
1546 chain = (upb_tabent*)chain->next;
1547 UPB_ASSERT(chain);
1548 }
1549 chain->next = new_e;
1550 our_e = mainpos_e;
1551 our_e->next = NULL;
1552 }
1553 }
1554 our_e->key = tabkey;
1555 our_e->val.val = val.val;
1556 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
1557 }
1558
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)1559 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
1560 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
1561 upb_tabent *chain = getentry_mutable(t, hash);
1562 if (upb_tabent_isempty(chain)) return false;
1563 if (eql(chain->key, key)) {
1564 /* Element to remove is at the head of its chain. */
1565 t->count--;
1566 if (val) _upb_value_setval(val, chain->val.val);
1567 if (removed) *removed = chain->key;
1568 if (chain->next) {
1569 upb_tabent *move = (upb_tabent*)chain->next;
1570 *chain = *move;
1571 move->key = 0; /* Make the slot empty. */
1572 } else {
1573 chain->key = 0; /* Make the slot empty. */
1574 }
1575 return true;
1576 } else {
1577 /* Element to remove is either in a non-head position or not in the
1578 * table. */
1579 while (chain->next && !eql(chain->next->key, key)) {
1580 chain = (upb_tabent*)chain->next;
1581 }
1582 if (chain->next) {
1583 /* Found element to remove. */
1584 upb_tabent *rm = (upb_tabent*)chain->next;
1585 t->count--;
1586 if (val) _upb_value_setval(val, chain->next->val.val);
1587 if (removed) *removed = rm->key;
1588 rm->key = 0; /* Make the slot empty. */
1589 chain->next = rm->next;
1590 return true;
1591 } else {
1592 /* Element to remove is not in the table. */
1593 return false;
1594 }
1595 }
1596 }
1597
next(const upb_table * t,size_t i)1598 static size_t next(const upb_table *t, size_t i) {
1599 do {
1600 if (++i >= upb_table_size(t))
1601 return SIZE_MAX;
1602 } while(upb_tabent_isempty(&t->entries[i]));
1603
1604 return i;
1605 }
1606
begin(const upb_table * t)1607 static size_t begin(const upb_table *t) {
1608 return next(t, -1);
1609 }
1610
1611
1612 /* upb_strtable ***************************************************************/
1613
1614 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
1615
strcopy(lookupkey_t k2,upb_alloc * a)1616 static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
1617 uint32_t len = (uint32_t) k2.str.len;
1618 char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
1619 if (str == NULL) return 0;
1620 memcpy(str, &len, sizeof(uint32_t));
1621 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
1622 str[sizeof(uint32_t) + k2.str.len] = '\0';
1623 return (uintptr_t)str;
1624 }
1625
strhash(upb_tabkey key)1626 static uint32_t strhash(upb_tabkey key) {
1627 uint32_t len;
1628 char *str = upb_tabstr(key, &len);
1629 return upb_murmur_hash2(str, len, 0);
1630 }
1631
streql(upb_tabkey k1,lookupkey_t k2)1632 static bool streql(upb_tabkey k1, lookupkey_t k2) {
1633 uint32_t len;
1634 char *str = upb_tabstr(k1, &len);
1635 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
1636 }
1637
upb_strtable_init2(upb_strtable * t,upb_ctype_t ctype,upb_alloc * a)1638 bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
1639 return init(&t->t, 2, a);
1640 }
1641
upb_strtable_clear(upb_strtable * t)1642 void upb_strtable_clear(upb_strtable *t) {
1643 size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
1644 t->t.count = 0;
1645 memset((char*)t->t.entries, 0, bytes);
1646 }
1647
upb_strtable_uninit2(upb_strtable * t,upb_alloc * a)1648 void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
1649 size_t i;
1650 for (i = 0; i < upb_table_size(&t->t); i++)
1651 upb_free(a, (void*)t->t.entries[i].key);
1652 uninit(&t->t, a);
1653 }
1654
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_alloc * a)1655 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
1656 upb_strtable new_table;
1657 upb_strtable_iter i;
1658
1659 if (!init(&new_table.t, size_lg2, a))
1660 return false;
1661 upb_strtable_begin(&i, t);
1662 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
1663 upb_strview key = upb_strtable_iter_key(&i);
1664 upb_strtable_insert3(
1665 &new_table, key.data, key.size,
1666 upb_strtable_iter_value(&i), a);
1667 }
1668 upb_strtable_uninit2(t, a);
1669 *t = new_table;
1670 return true;
1671 }
1672
upb_strtable_insert3(upb_strtable * t,const char * k,size_t len,upb_value v,upb_alloc * a)1673 bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
1674 upb_value v, upb_alloc *a) {
1675 lookupkey_t key;
1676 upb_tabkey tabkey;
1677 uint32_t hash;
1678
1679 if (isfull(&t->t)) {
1680 /* Need to resize. New table of double the size, add old elements to it. */
1681 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
1682 return false;
1683 }
1684 }
1685
1686 key = strkey2(k, len);
1687 tabkey = strcopy(key, a);
1688 if (tabkey == 0) return false;
1689
1690 hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
1691 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
1692 return true;
1693 }
1694
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)1695 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
1696 upb_value *v) {
1697 uint32_t hash = upb_murmur_hash2(key, len, 0);
1698 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
1699 }
1700
upb_strtable_remove3(upb_strtable * t,const char * key,size_t len,upb_value * val,upb_alloc * alloc)1701 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
1702 upb_value *val, upb_alloc *alloc) {
1703 uint32_t hash = upb_murmur_hash2(key, len, 0);
1704 upb_tabkey tabkey;
1705 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
1706 if (alloc) {
1707 /* Arena-based allocs don't need to free and won't pass this. */
1708 upb_free(alloc, (void*)tabkey);
1709 }
1710 return true;
1711 } else {
1712 return false;
1713 }
1714 }
1715
1716 /* Iteration */
1717
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)1718 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
1719 i->t = t;
1720 i->index = begin(&t->t);
1721 }
1722
upb_strtable_next(upb_strtable_iter * i)1723 void upb_strtable_next(upb_strtable_iter *i) {
1724 i->index = next(&i->t->t, i->index);
1725 }
1726
upb_strtable_done(const upb_strtable_iter * i)1727 bool upb_strtable_done(const upb_strtable_iter *i) {
1728 if (!i->t) return true;
1729 return i->index >= upb_table_size(&i->t->t) ||
1730 upb_tabent_isempty(str_tabent(i));
1731 }
1732
upb_strtable_iter_key(const upb_strtable_iter * i)1733 upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) {
1734 upb_strview key;
1735 uint32_t len;
1736 UPB_ASSERT(!upb_strtable_done(i));
1737 key.data = upb_tabstr(str_tabent(i)->key, &len);
1738 key.size = len;
1739 return key;
1740 }
1741
upb_strtable_iter_value(const upb_strtable_iter * i)1742 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
1743 UPB_ASSERT(!upb_strtable_done(i));
1744 return _upb_value_val(str_tabent(i)->val.val);
1745 }
1746
upb_strtable_iter_setdone(upb_strtable_iter * i)1747 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
1748 i->t = NULL;
1749 i->index = SIZE_MAX;
1750 }
1751
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)1752 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
1753 const upb_strtable_iter *i2) {
1754 if (upb_strtable_done(i1) && upb_strtable_done(i2))
1755 return true;
1756 return i1->t == i2->t && i1->index == i2->index;
1757 }
1758
1759
1760 /* upb_inttable ***************************************************************/
1761
1762 /* For inttables we use a hybrid structure where small keys are kept in an
1763 * array and large keys are put in the hash table. */
1764
inthash(upb_tabkey key)1765 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
1766
inteql(upb_tabkey k1,lookupkey_t k2)1767 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
1768 return k1 == k2.num;
1769 }
1770
mutable_array(upb_inttable * t)1771 static upb_tabval *mutable_array(upb_inttable *t) {
1772 return (upb_tabval*)t->array;
1773 }
1774
inttable_val(upb_inttable * t,uintptr_t key)1775 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
1776 if (key < t->array_size) {
1777 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
1778 } else {
1779 upb_tabent *e =
1780 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
1781 return e ? &e->val : NULL;
1782 }
1783 }
1784
inttable_val_const(const upb_inttable * t,uintptr_t key)1785 static const upb_tabval *inttable_val_const(const upb_inttable *t,
1786 uintptr_t key) {
1787 return inttable_val((upb_inttable*)t, key);
1788 }
1789
upb_inttable_count(const upb_inttable * t)1790 size_t upb_inttable_count(const upb_inttable *t) {
1791 return t->t.count + t->array_count;
1792 }
1793
check(upb_inttable * t)1794 static void check(upb_inttable *t) {
1795 UPB_UNUSED(t);
1796 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
1797 {
1798 /* This check is very expensive (makes inserts/deletes O(N)). */
1799 size_t count = 0;
1800 upb_inttable_iter i;
1801 upb_inttable_begin(&i, t);
1802 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
1803 UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
1804 }
1805 UPB_ASSERT(count == upb_inttable_count(t));
1806 }
1807 #endif
1808 }
1809
upb_inttable_sizedinit(upb_inttable * t,size_t asize,int hsize_lg2,upb_alloc * a)1810 bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2,
1811 upb_alloc *a) {
1812 size_t array_bytes;
1813
1814 if (!init(&t->t, hsize_lg2, a)) return false;
1815 /* Always make the array part at least 1 long, so that we know key 0
1816 * won't be in the hash part, which simplifies things. */
1817 t->array_size = UPB_MAX(1, asize);
1818 t->array_count = 0;
1819 array_bytes = t->array_size * sizeof(upb_value);
1820 t->array = upb_malloc(a, array_bytes);
1821 if (!t->array) {
1822 uninit(&t->t, a);
1823 return false;
1824 }
1825 memset(mutable_array(t), 0xff, array_bytes);
1826 check(t);
1827 return true;
1828 }
1829
upb_inttable_init2(upb_inttable * t,upb_ctype_t ctype,upb_alloc * a)1830 bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
1831 return upb_inttable_sizedinit(t, 0, 4, a);
1832 }
1833
upb_inttable_uninit2(upb_inttable * t,upb_alloc * a)1834 void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
1835 uninit(&t->t, a);
1836 upb_free(a, mutable_array(t));
1837 }
1838
upb_inttable_insert2(upb_inttable * t,uintptr_t key,upb_value val,upb_alloc * a)1839 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
1840 upb_alloc *a) {
1841 upb_tabval tabval;
1842 tabval.val = val.val;
1843 UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
1844
1845 if (key < t->array_size) {
1846 UPB_ASSERT(!upb_arrhas(t->array[key]));
1847 t->array_count++;
1848 mutable_array(t)[key].val = val.val;
1849 } else {
1850 if (isfull(&t->t)) {
1851 /* Need to resize the hash part, but we re-use the array part. */
1852 size_t i;
1853 upb_table new_table;
1854
1855 if (!init(&new_table, t->t.size_lg2 + 1, a)) {
1856 return false;
1857 }
1858
1859 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
1860 const upb_tabent *e = &t->t.entries[i];
1861 uint32_t hash;
1862 upb_value v;
1863
1864 _upb_value_setval(&v, e->val.val);
1865 hash = upb_inthash(e->key);
1866 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
1867 }
1868
1869 UPB_ASSERT(t->t.count == new_table.count);
1870
1871 uninit(&t->t, a);
1872 t->t = new_table;
1873 }
1874 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
1875 }
1876 check(t);
1877 return true;
1878 }
1879
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)1880 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
1881 const upb_tabval *table_v = inttable_val_const(t, key);
1882 if (!table_v) return false;
1883 if (v) _upb_value_setval(v, table_v->val);
1884 return true;
1885 }
1886
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)1887 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
1888 upb_tabval *table_v = inttable_val(t, key);
1889 if (!table_v) return false;
1890 table_v->val = val.val;
1891 return true;
1892 }
1893
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)1894 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
1895 bool success;
1896 if (key < t->array_size) {
1897 if (upb_arrhas(t->array[key])) {
1898 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
1899 t->array_count--;
1900 if (val) {
1901 _upb_value_setval(val, t->array[key].val);
1902 }
1903 mutable_array(t)[key] = empty;
1904 success = true;
1905 } else {
1906 success = false;
1907 }
1908 } else {
1909 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
1910 }
1911 check(t);
1912 return success;
1913 }
1914
upb_inttable_push2(upb_inttable * t,upb_value val,upb_alloc * a)1915 bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
1916 return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
1917 }
1918
upb_inttable_pop(upb_inttable * t)1919 upb_value upb_inttable_pop(upb_inttable *t) {
1920 upb_value val;
1921 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
1922 UPB_ASSERT(ok);
1923 return val;
1924 }
1925
upb_inttable_insertptr2(upb_inttable * t,const void * key,upb_value val,upb_alloc * a)1926 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
1927 upb_alloc *a) {
1928 return upb_inttable_insert2(t, (uintptr_t)key, val, a);
1929 }
1930
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)1931 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
1932 upb_value *v) {
1933 return upb_inttable_lookup(t, (uintptr_t)key, v);
1934 }
1935
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)1936 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
1937 return upb_inttable_remove(t, (uintptr_t)key, val);
1938 }
1939
upb_inttable_compact2(upb_inttable * t,upb_alloc * a)1940 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
1941 /* A power-of-two histogram of the table keys. */
1942 size_t counts[UPB_MAXARRSIZE + 1] = {0};
1943
1944 /* The max key in each bucket. */
1945 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
1946
1947 upb_inttable_iter i;
1948 size_t arr_count;
1949 int size_lg2;
1950 upb_inttable new_t;
1951
1952 upb_inttable_begin(&i, t);
1953 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1954 uintptr_t key = upb_inttable_iter_key(&i);
1955 int bucket = log2ceil(key);
1956 max[bucket] = UPB_MAX(max[bucket], key);
1957 counts[bucket]++;
1958 }
1959
1960 /* Find the largest power of two that satisfies the MIN_DENSITY
1961 * definition (while actually having some keys). */
1962 arr_count = upb_inttable_count(t);
1963
1964 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
1965 if (counts[size_lg2] == 0) {
1966 /* We can halve again without losing any entries. */
1967 continue;
1968 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
1969 break;
1970 }
1971
1972 arr_count -= counts[size_lg2];
1973 }
1974
1975 UPB_ASSERT(arr_count <= upb_inttable_count(t));
1976
1977 {
1978 /* Insert all elements into new, perfectly-sized table. */
1979 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
1980 size_t hash_count = upb_inttable_count(t) - arr_count;
1981 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
1982 int hashsize_lg2 = log2ceil(hash_size);
1983
1984 upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
1985 upb_inttable_begin(&i, t);
1986 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1987 uintptr_t k = upb_inttable_iter_key(&i);
1988 upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
1989 }
1990 UPB_ASSERT(new_t.array_size == arr_size);
1991 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
1992 }
1993 upb_inttable_uninit2(t, a);
1994 *t = new_t;
1995 }
1996
1997 /* Iteration. */
1998
int_tabent(const upb_inttable_iter * i)1999 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
2000 UPB_ASSERT(!i->array_part);
2001 return &i->t->t.entries[i->index];
2002 }
2003
int_arrent(const upb_inttable_iter * i)2004 static upb_tabval int_arrent(const upb_inttable_iter *i) {
2005 UPB_ASSERT(i->array_part);
2006 return i->t->array[i->index];
2007 }
2008
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)2009 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
2010 i->t = t;
2011 i->index = -1;
2012 i->array_part = true;
2013 upb_inttable_next(i);
2014 }
2015
upb_inttable_next(upb_inttable_iter * iter)2016 void upb_inttable_next(upb_inttable_iter *iter) {
2017 const upb_inttable *t = iter->t;
2018 if (iter->array_part) {
2019 while (++iter->index < t->array_size) {
2020 if (upb_arrhas(int_arrent(iter))) {
2021 return;
2022 }
2023 }
2024 iter->array_part = false;
2025 iter->index = begin(&t->t);
2026 } else {
2027 iter->index = next(&t->t, iter->index);
2028 }
2029 }
2030
upb_inttable_done(const upb_inttable_iter * i)2031 bool upb_inttable_done(const upb_inttable_iter *i) {
2032 if (!i->t) return true;
2033 if (i->array_part) {
2034 return i->index >= i->t->array_size ||
2035 !upb_arrhas(int_arrent(i));
2036 } else {
2037 return i->index >= upb_table_size(&i->t->t) ||
2038 upb_tabent_isempty(int_tabent(i));
2039 }
2040 }
2041
upb_inttable_iter_key(const upb_inttable_iter * i)2042 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
2043 UPB_ASSERT(!upb_inttable_done(i));
2044 return i->array_part ? i->index : int_tabent(i)->key;
2045 }
2046
upb_inttable_iter_value(const upb_inttable_iter * i)2047 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
2048 UPB_ASSERT(!upb_inttable_done(i));
2049 return _upb_value_val(
2050 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val);
2051 }
2052
upb_inttable_iter_setdone(upb_inttable_iter * i)2053 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
2054 i->t = NULL;
2055 i->index = SIZE_MAX;
2056 i->array_part = false;
2057 }
2058
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)2059 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
2060 const upb_inttable_iter *i2) {
2061 if (upb_inttable_done(i1) && upb_inttable_done(i2))
2062 return true;
2063 return i1->t == i2->t && i1->index == i2->index &&
2064 i1->array_part == i2->array_part;
2065 }
2066
2067 #if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
2068 /* -----------------------------------------------------------------------------
2069 * MurmurHash2, by Austin Appleby (released as public domain).
2070 * Reformatted and C99-ified by Joshua Haberman.
2071 * Note - This code makes a few assumptions about how your machine behaves -
2072 * 1. We can read a 4-byte value from any address without crashing
2073 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
2074 * And it has a few limitations -
2075 * 1. It will not work incrementally.
2076 * 2. It will not produce the same results on little-endian and big-endian
2077 * machines. */
upb_murmur_hash2(const void * key,size_t len,uint32_t seed)2078 uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
2079 /* 'm' and 'r' are mixing constants generated offline.
2080 * They're not really 'magic', they just happen to work well. */
2081 const uint32_t m = 0x5bd1e995;
2082 const int32_t r = 24;
2083
2084 /* Initialize the hash to a 'random' value */
2085 uint32_t h = seed ^ len;
2086
2087 /* Mix 4 bytes at a time into the hash */
2088 const uint8_t * data = (const uint8_t *)key;
2089 while(len >= 4) {
2090 uint32_t k;
2091 memcpy(&k, data, sizeof(k));
2092
2093 k *= m;
2094 k ^= k >> r;
2095 k *= m;
2096
2097 h *= m;
2098 h ^= k;
2099
2100 data += 4;
2101 len -= 4;
2102 }
2103
2104 /* Handle the last few bytes of the input array */
2105 switch(len) {
2106 case 3: h ^= data[2] << 16;
2107 case 2: h ^= data[1] << 8;
2108 case 1: h ^= data[0]; h *= m;
2109 };
2110
2111 /* Do a few final mixes of the hash to ensure the last few
2112 * bytes are well-incorporated. */
2113 h ^= h >> 13;
2114 h *= m;
2115 h ^= h >> 15;
2116
2117 return h;
2118 }
2119
2120 #else /* !UPB_UNALIGNED_READS_OK */
2121
2122 /* -----------------------------------------------------------------------------
2123 * MurmurHashAligned2, by Austin Appleby
2124 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
2125 * on certain platforms.
2126 * Performance will be lower than MurmurHash2 */
2127
2128 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
2129
upb_murmur_hash2(const void * key,size_t len,uint32_t seed)2130 uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
2131 const uint32_t m = 0x5bd1e995;
2132 const int32_t r = 24;
2133 const uint8_t * data = (const uint8_t *)key;
2134 uint32_t h = (uint32_t)(seed ^ len);
2135 uint8_t align = (uintptr_t)data & 3;
2136
2137 if(align && (len >= 4)) {
2138 /* Pre-load the temp registers */
2139 uint32_t t = 0, d = 0;
2140 int32_t sl;
2141 int32_t sr;
2142
2143 switch(align) {
2144 case 1: t |= data[2] << 16;
2145 case 2: t |= data[1] << 8;
2146 case 3: t |= data[0];
2147 }
2148
2149 t <<= (8 * align);
2150
2151 data += 4-align;
2152 len -= 4-align;
2153
2154 sl = 8 * (4-align);
2155 sr = 8 * align;
2156
2157 /* Mix */
2158
2159 while(len >= 4) {
2160 uint32_t k;
2161
2162 d = *(uint32_t *)data;
2163 t = (t >> sr) | (d << sl);
2164
2165 k = t;
2166
2167 MIX(h,k,m);
2168
2169 t = d;
2170
2171 data += 4;
2172 len -= 4;
2173 }
2174
2175 /* Handle leftover data in temp registers */
2176
2177 d = 0;
2178
2179 if(len >= align) {
2180 uint32_t k;
2181
2182 switch(align) {
2183 case 3: d |= data[2] << 16;
2184 case 2: d |= data[1] << 8;
2185 case 1: d |= data[0];
2186 }
2187
2188 k = (t >> sr) | (d << sl);
2189 MIX(h,k,m);
2190
2191 data += align;
2192 len -= align;
2193
2194 /* ----------
2195 * Handle tail bytes */
2196
2197 switch(len) {
2198 case 3: h ^= data[2] << 16;
2199 case 2: h ^= data[1] << 8;
2200 case 1: h ^= data[0]; h *= m;
2201 };
2202 } else {
2203 switch(len) {
2204 case 3: d |= data[2] << 16;
2205 case 2: d |= data[1] << 8;
2206 case 1: d |= data[0];
2207 case 0: h ^= (t >> sr) | (d << sl); h *= m;
2208 }
2209 }
2210
2211 h ^= h >> 13;
2212 h *= m;
2213 h ^= h >> 15;
2214
2215 return h;
2216 } else {
2217 while(len >= 4) {
2218 uint32_t k = *(uint32_t *)data;
2219
2220 MIX(h,k,m);
2221
2222 data += 4;
2223 len -= 4;
2224 }
2225
2226 /* ----------
2227 * Handle tail bytes */
2228
2229 switch(len) {
2230 case 3: h ^= data[2] << 16;
2231 case 2: h ^= data[1] << 8;
2232 case 1: h ^= data[0]; h *= m;
2233 };
2234
2235 h ^= h >> 13;
2236 h *= m;
2237 h ^= h >> 15;
2238
2239 return h;
2240 }
2241 }
2242 #undef MIX
2243
2244 #endif /* UPB_UNALIGNED_READS_OK */
2245
2246
2247 #include <errno.h>
2248 #include <stdarg.h>
2249 #include <stddef.h>
2250 #include <stdint.h>
2251 #include <stdio.h>
2252 #include <stdlib.h>
2253 #include <string.h>
2254
2255
2256 /* upb_status *****************************************************************/
2257
upb_status_clear(upb_status * status)2258 void upb_status_clear(upb_status *status) {
2259 if (!status) return;
2260 status->ok = true;
2261 status->msg[0] = '\0';
2262 }
2263
upb_ok(const upb_status * status)2264 bool upb_ok(const upb_status *status) { return status->ok; }
2265
upb_status_errmsg(const upb_status * status)2266 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
2267
upb_status_seterrmsg(upb_status * status,const char * msg)2268 void upb_status_seterrmsg(upb_status *status, const char *msg) {
2269 if (!status) return;
2270 status->ok = false;
2271 strncpy(status->msg, msg, UPB_STATUS_MAX_MESSAGE - 1);
2272 status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
2273 }
2274
upb_status_seterrf(upb_status * status,const char * fmt,...)2275 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
2276 va_list args;
2277 va_start(args, fmt);
2278 upb_status_vseterrf(status, fmt, args);
2279 va_end(args);
2280 }
2281
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)2282 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
2283 if (!status) return;
2284 status->ok = false;
2285 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
2286 status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
2287 }
2288
2289 /* upb_alloc ******************************************************************/
2290
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)2291 static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
2292 size_t size) {
2293 UPB_UNUSED(alloc);
2294 UPB_UNUSED(oldsize);
2295 if (size == 0) {
2296 free(ptr);
2297 return NULL;
2298 } else {
2299 return realloc(ptr, size);
2300 }
2301 }
2302
2303 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
2304
2305 /* upb_arena ******************************************************************/
2306
2307 /* Be conservative and choose 16 in case anyone is using SSE. */
2308
2309 struct upb_arena {
2310 _upb_arena_head head;
2311 char *start;
2312
2313 /* Allocator to allocate arena blocks. We are responsible for freeing these
2314 * when we are destroyed. */
2315 upb_alloc *block_alloc;
2316
2317 size_t bytes_allocated;
2318 size_t next_block_size;
2319 size_t max_block_size;
2320
2321 /* Linked list of blocks. Points to an arena_block, defined in env.c */
2322 void *block_head;
2323
2324 /* Cleanup entries. Pointer to a cleanup_ent, defined in env.c */
2325 void *cleanup_head;
2326 };
2327
2328 typedef struct mem_block {
2329 struct mem_block *next;
2330 bool owned;
2331 /* Data follows. */
2332 } mem_block;
2333
2334 typedef struct cleanup_ent {
2335 struct cleanup_ent *next;
2336 upb_cleanup_func *cleanup;
2337 void *ud;
2338 } cleanup_ent;
2339
upb_arena_addblock(upb_arena * a,void * ptr,size_t size,bool owned)2340 static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
2341 bool owned) {
2342 mem_block *block = ptr;
2343
2344 if (a->block_head) {
2345 a->bytes_allocated += a->head.ptr - a->start;
2346 }
2347
2348 block->next = a->block_head;
2349 block->owned = owned;
2350
2351 a->block_head = block;
2352 a->start = (char*)block + _upb_arena_alignup(sizeof(mem_block));
2353 a->head.ptr = a->start;
2354 a->head.end = (char*)block + size;
2355
2356 /* TODO(haberman): ASAN poison. */
2357 }
2358
upb_arena_allocblock(upb_arena * a,size_t size)2359 static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
2360 size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
2361 mem_block *block = upb_malloc(a->block_alloc, block_size);
2362
2363 if (!block) {
2364 return NULL;
2365 }
2366
2367 upb_arena_addblock(a, block, block_size, true);
2368 a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
2369
2370 return block;
2371 }
2372
_upb_arena_slowmalloc(upb_arena * a,size_t size)2373 void *_upb_arena_slowmalloc(upb_arena *a, size_t size) {
2374 mem_block *block = upb_arena_allocblock(a, size);
2375 if (!block) return NULL; /* Out of memory. */
2376 return upb_arena_malloc(a, size);
2377 }
2378
upb_arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)2379 static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
2380 size_t size) {
2381 upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */
2382 void *ret;
2383
2384 if (size == 0) {
2385 return NULL; /* We are an arena, don't need individual frees. */
2386 }
2387
2388 ret = upb_arena_malloc(a, size);
2389 if (!ret) return NULL;
2390
2391 /* TODO(haberman): special-case if this is a realloc of the last alloc? */
2392
2393 if (oldsize > 0) {
2394 memcpy(ret, ptr, oldsize); /* Preserve existing data. */
2395 }
2396
2397 /* TODO(haberman): ASAN unpoison. */
2398 return ret;
2399 }
2400
2401 /* Public Arena API ***********************************************************/
2402
2403 #define upb_alignof(type) offsetof (struct { char c; type member; }, member)
2404
upb_arena_init(void * mem,size_t n,upb_alloc * alloc)2405 upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) {
2406 const size_t first_block_overhead = sizeof(upb_arena) + sizeof(mem_block);
2407 upb_arena *a;
2408 bool owned = false;
2409
2410 /* Round block size down to alignof(*a) since we will allocate the arena
2411 * itself at the end. */
2412 n &= ~(upb_alignof(upb_arena) - 1);
2413
2414 if (n < first_block_overhead) {
2415 /* We need to malloc the initial block. */
2416 n = first_block_overhead + 256;
2417 owned = true;
2418 if (!alloc || !(mem = upb_malloc(alloc, n))) {
2419 return NULL;
2420 }
2421 }
2422
2423 a = (void*)((char*)mem + n - sizeof(*a));
2424 n -= sizeof(*a);
2425
2426 a->head.alloc.func = &upb_arena_doalloc;
2427 a->head.ptr = NULL;
2428 a->head.end = NULL;
2429 a->start = NULL;
2430 a->block_alloc = &upb_alloc_global;
2431 a->bytes_allocated = 0;
2432 a->next_block_size = 256;
2433 a->max_block_size = 16384;
2434 a->cleanup_head = NULL;
2435 a->block_head = NULL;
2436 a->block_alloc = alloc;
2437
2438 upb_arena_addblock(a, mem, n, owned);
2439
2440 return a;
2441 }
2442
2443 #undef upb_alignof
2444
upb_arena_free(upb_arena * a)2445 void upb_arena_free(upb_arena *a) {
2446 cleanup_ent *ent = a->cleanup_head;
2447 mem_block *block = a->block_head;
2448
2449 while (ent) {
2450 ent->cleanup(ent->ud);
2451 ent = ent->next;
2452 }
2453
2454 /* Must do this after running cleanup functions, because this will delete
2455 * the memory we store our cleanup entries in! */
2456 while (block) {
2457 /* Load first since we are deleting block. */
2458 mem_block *next = block->next;
2459
2460 if (block->owned) {
2461 upb_free(a->block_alloc, block);
2462 }
2463
2464 block = next;
2465 }
2466 }
2467
upb_arena_addcleanup(upb_arena * a,void * ud,upb_cleanup_func * func)2468 bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
2469 cleanup_ent *ent = upb_malloc(&a->head.alloc, sizeof(cleanup_ent));
2470 if (!ent) {
2471 return false; /* Out of memory. */
2472 }
2473
2474 ent->cleanup = func;
2475 ent->ud = ud;
2476 ent->next = a->cleanup_head;
2477 a->cleanup_head = ent;
2478
2479 return true;
2480 }
2481
upb_arena_bytesallocated(const upb_arena * a)2482 size_t upb_arena_bytesallocated(const upb_arena *a) {
2483 return a->bytes_allocated + (a->head.ptr - a->start);
2484 }
2485 /* This file was generated by upbc (the upb compiler) from the input
2486 * file:
2487 *
2488 * google/protobuf/descriptor.proto
2489 *
2490 * Do not edit -- your changes will be discarded when the file is
2491 * regenerated. */
2492
2493 #include <stddef.h>
2494
2495
2496 static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
2497 &google_protobuf_FileDescriptorProto_msginit,
2498 };
2499
2500 static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
2501 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2502 };
2503
2504 const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
2505 &google_protobuf_FileDescriptorSet_submsgs[0],
2506 &google_protobuf_FileDescriptorSet__fields[0],
2507 UPB_SIZE(4, 8), 1, false,
2508 };
2509
2510 static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
2511 &google_protobuf_DescriptorProto_msginit,
2512 &google_protobuf_EnumDescriptorProto_msginit,
2513 &google_protobuf_FieldDescriptorProto_msginit,
2514 &google_protobuf_FileOptions_msginit,
2515 &google_protobuf_ServiceDescriptorProto_msginit,
2516 &google_protobuf_SourceCodeInfo_msginit,
2517 };
2518
2519 static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
2520 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2521 {2, UPB_SIZE(12, 24), 2, 0, 9, 1},
2522 {3, UPB_SIZE(36, 72), 0, 0, 9, 3},
2523 {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
2524 {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
2525 {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
2526 {7, UPB_SIZE(52, 104), 0, 2, 11, 3},
2527 {8, UPB_SIZE(28, 56), 4, 3, 11, 1},
2528 {9, UPB_SIZE(32, 64), 5, 5, 11, 1},
2529 {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
2530 {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
2531 {12, UPB_SIZE(20, 40), 3, 0, 9, 1},
2532 };
2533
2534 const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
2535 &google_protobuf_FileDescriptorProto_submsgs[0],
2536 &google_protobuf_FileDescriptorProto__fields[0],
2537 UPB_SIZE(64, 128), 12, false,
2538 };
2539
2540 static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
2541 &google_protobuf_DescriptorProto_msginit,
2542 &google_protobuf_DescriptorProto_ExtensionRange_msginit,
2543 &google_protobuf_DescriptorProto_ReservedRange_msginit,
2544 &google_protobuf_EnumDescriptorProto_msginit,
2545 &google_protobuf_FieldDescriptorProto_msginit,
2546 &google_protobuf_MessageOptions_msginit,
2547 &google_protobuf_OneofDescriptorProto_msginit,
2548 };
2549
2550 static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
2551 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2552 {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
2553 {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
2554 {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
2555 {5, UPB_SIZE(28, 56), 0, 1, 11, 3},
2556 {6, UPB_SIZE(32, 64), 0, 4, 11, 3},
2557 {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
2558 {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
2559 {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
2560 {10, UPB_SIZE(44, 88), 0, 0, 9, 3},
2561 };
2562
2563 const upb_msglayout google_protobuf_DescriptorProto_msginit = {
2564 &google_protobuf_DescriptorProto_submsgs[0],
2565 &google_protobuf_DescriptorProto__fields[0],
2566 UPB_SIZE(48, 96), 10, false,
2567 };
2568
2569 static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
2570 &google_protobuf_ExtensionRangeOptions_msginit,
2571 };
2572
2573 static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
2574 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2575 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2576 {3, UPB_SIZE(12, 16), 3, 0, 11, 1},
2577 };
2578
2579 const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
2580 &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
2581 &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
2582 UPB_SIZE(16, 24), 3, false,
2583 };
2584
2585 static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
2586 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2587 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2588 };
2589
2590 const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
2591 NULL,
2592 &google_protobuf_DescriptorProto_ReservedRange__fields[0],
2593 UPB_SIZE(12, 12), 2, false,
2594 };
2595
2596 static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
2597 &google_protobuf_UninterpretedOption_msginit,
2598 };
2599
2600 static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
2601 {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
2602 };
2603
2604 const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
2605 &google_protobuf_ExtensionRangeOptions_submsgs[0],
2606 &google_protobuf_ExtensionRangeOptions__fields[0],
2607 UPB_SIZE(4, 8), 1, false,
2608 };
2609
2610 static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
2611 &google_protobuf_FieldOptions_msginit,
2612 };
2613
2614 static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = {
2615 {1, UPB_SIZE(36, 40), 6, 0, 9, 1},
2616 {2, UPB_SIZE(44, 56), 7, 0, 9, 1},
2617 {3, UPB_SIZE(24, 24), 3, 0, 5, 1},
2618 {4, UPB_SIZE(8, 8), 1, 0, 14, 1},
2619 {5, UPB_SIZE(16, 16), 2, 0, 14, 1},
2620 {6, UPB_SIZE(52, 72), 8, 0, 9, 1},
2621 {7, UPB_SIZE(60, 88), 9, 0, 9, 1},
2622 {8, UPB_SIZE(76, 120), 11, 0, 11, 1},
2623 {9, UPB_SIZE(28, 28), 4, 0, 5, 1},
2624 {10, UPB_SIZE(68, 104), 10, 0, 9, 1},
2625 {17, UPB_SIZE(32, 32), 5, 0, 8, 1},
2626 };
2627
2628 const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
2629 &google_protobuf_FieldDescriptorProto_submsgs[0],
2630 &google_protobuf_FieldDescriptorProto__fields[0],
2631 UPB_SIZE(80, 128), 11, false,
2632 };
2633
2634 static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
2635 &google_protobuf_OneofOptions_msginit,
2636 };
2637
2638 static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
2639 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2640 {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
2641 };
2642
2643 const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
2644 &google_protobuf_OneofDescriptorProto_submsgs[0],
2645 &google_protobuf_OneofDescriptorProto__fields[0],
2646 UPB_SIZE(16, 32), 2, false,
2647 };
2648
2649 static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
2650 &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
2651 &google_protobuf_EnumOptions_msginit,
2652 &google_protobuf_EnumValueDescriptorProto_msginit,
2653 };
2654
2655 static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
2656 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2657 {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
2658 {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
2659 {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
2660 {5, UPB_SIZE(24, 48), 0, 0, 9, 3},
2661 };
2662
2663 const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
2664 &google_protobuf_EnumDescriptorProto_submsgs[0],
2665 &google_protobuf_EnumDescriptorProto__fields[0],
2666 UPB_SIZE(32, 64), 5, false,
2667 };
2668
2669 static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
2670 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2671 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2672 };
2673
2674 const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
2675 NULL,
2676 &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
2677 UPB_SIZE(12, 12), 2, false,
2678 };
2679
2680 static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
2681 &google_protobuf_EnumValueOptions_msginit,
2682 };
2683
2684 static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
2685 {1, UPB_SIZE(8, 8), 2, 0, 9, 1},
2686 {2, UPB_SIZE(4, 4), 1, 0, 5, 1},
2687 {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
2688 };
2689
2690 const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
2691 &google_protobuf_EnumValueDescriptorProto_submsgs[0],
2692 &google_protobuf_EnumValueDescriptorProto__fields[0],
2693 UPB_SIZE(24, 32), 3, false,
2694 };
2695
2696 static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
2697 &google_protobuf_MethodDescriptorProto_msginit,
2698 &google_protobuf_ServiceOptions_msginit,
2699 };
2700
2701 static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
2702 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2703 {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
2704 {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
2705 };
2706
2707 const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
2708 &google_protobuf_ServiceDescriptorProto_submsgs[0],
2709 &google_protobuf_ServiceDescriptorProto__fields[0],
2710 UPB_SIZE(24, 48), 3, false,
2711 };
2712
2713 static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
2714 &google_protobuf_MethodOptions_msginit,
2715 };
2716
2717 static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
2718 {1, UPB_SIZE(4, 8), 3, 0, 9, 1},
2719 {2, UPB_SIZE(12, 24), 4, 0, 9, 1},
2720 {3, UPB_SIZE(20, 40), 5, 0, 9, 1},
2721 {4, UPB_SIZE(28, 56), 6, 0, 11, 1},
2722 {5, UPB_SIZE(1, 1), 1, 0, 8, 1},
2723 {6, UPB_SIZE(2, 2), 2, 0, 8, 1},
2724 };
2725
2726 const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
2727 &google_protobuf_MethodDescriptorProto_submsgs[0],
2728 &google_protobuf_MethodDescriptorProto__fields[0],
2729 UPB_SIZE(32, 64), 6, false,
2730 };
2731
2732 static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
2733 &google_protobuf_UninterpretedOption_msginit,
2734 };
2735
2736 static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
2737 {1, UPB_SIZE(28, 32), 11, 0, 9, 1},
2738 {8, UPB_SIZE(36, 48), 12, 0, 9, 1},
2739 {9, UPB_SIZE(8, 8), 1, 0, 14, 1},
2740 {10, UPB_SIZE(16, 16), 2, 0, 8, 1},
2741 {11, UPB_SIZE(44, 64), 13, 0, 9, 1},
2742 {16, UPB_SIZE(17, 17), 3, 0, 8, 1},
2743 {17, UPB_SIZE(18, 18), 4, 0, 8, 1},
2744 {18, UPB_SIZE(19, 19), 5, 0, 8, 1},
2745 {20, UPB_SIZE(20, 20), 6, 0, 8, 1},
2746 {23, UPB_SIZE(21, 21), 7, 0, 8, 1},
2747 {27, UPB_SIZE(22, 22), 8, 0, 8, 1},
2748 {31, UPB_SIZE(23, 23), 9, 0, 8, 1},
2749 {36, UPB_SIZE(52, 80), 14, 0, 9, 1},
2750 {37, UPB_SIZE(60, 96), 15, 0, 9, 1},
2751 {39, UPB_SIZE(68, 112), 16, 0, 9, 1},
2752 {40, UPB_SIZE(76, 128), 17, 0, 9, 1},
2753 {41, UPB_SIZE(84, 144), 18, 0, 9, 1},
2754 {42, UPB_SIZE(24, 24), 10, 0, 8, 1},
2755 {44, UPB_SIZE(92, 160), 19, 0, 9, 1},
2756 {45, UPB_SIZE(100, 176), 20, 0, 9, 1},
2757 {999, UPB_SIZE(108, 192), 0, 0, 11, 3},
2758 };
2759
2760 const upb_msglayout google_protobuf_FileOptions_msginit = {
2761 &google_protobuf_FileOptions_submsgs[0],
2762 &google_protobuf_FileOptions__fields[0],
2763 UPB_SIZE(112, 208), 21, false,
2764 };
2765
2766 static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
2767 &google_protobuf_UninterpretedOption_msginit,
2768 };
2769
2770 static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
2771 {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
2772 {2, UPB_SIZE(2, 2), 2, 0, 8, 1},
2773 {3, UPB_SIZE(3, 3), 3, 0, 8, 1},
2774 {7, UPB_SIZE(4, 4), 4, 0, 8, 1},
2775 {999, UPB_SIZE(8, 8), 0, 0, 11, 3},
2776 };
2777
2778 const upb_msglayout google_protobuf_MessageOptions_msginit = {
2779 &google_protobuf_MessageOptions_submsgs[0],
2780 &google_protobuf_MessageOptions__fields[0],
2781 UPB_SIZE(12, 16), 5, false,
2782 };
2783
2784 static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
2785 &google_protobuf_UninterpretedOption_msginit,
2786 };
2787
2788 static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
2789 {1, UPB_SIZE(8, 8), 1, 0, 14, 1},
2790 {2, UPB_SIZE(24, 24), 3, 0, 8, 1},
2791 {3, UPB_SIZE(25, 25), 4, 0, 8, 1},
2792 {5, UPB_SIZE(26, 26), 5, 0, 8, 1},
2793 {6, UPB_SIZE(16, 16), 2, 0, 14, 1},
2794 {10, UPB_SIZE(27, 27), 6, 0, 8, 1},
2795 {999, UPB_SIZE(28, 32), 0, 0, 11, 3},
2796 };
2797
2798 const upb_msglayout google_protobuf_FieldOptions_msginit = {
2799 &google_protobuf_FieldOptions_submsgs[0],
2800 &google_protobuf_FieldOptions__fields[0],
2801 UPB_SIZE(32, 40), 7, false,
2802 };
2803
2804 static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
2805 &google_protobuf_UninterpretedOption_msginit,
2806 };
2807
2808 static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
2809 {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
2810 };
2811
2812 const upb_msglayout google_protobuf_OneofOptions_msginit = {
2813 &google_protobuf_OneofOptions_submsgs[0],
2814 &google_protobuf_OneofOptions__fields[0],
2815 UPB_SIZE(4, 8), 1, false,
2816 };
2817
2818 static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
2819 &google_protobuf_UninterpretedOption_msginit,
2820 };
2821
2822 static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
2823 {2, UPB_SIZE(1, 1), 1, 0, 8, 1},
2824 {3, UPB_SIZE(2, 2), 2, 0, 8, 1},
2825 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2826 };
2827
2828 const upb_msglayout google_protobuf_EnumOptions_msginit = {
2829 &google_protobuf_EnumOptions_submsgs[0],
2830 &google_protobuf_EnumOptions__fields[0],
2831 UPB_SIZE(8, 16), 3, false,
2832 };
2833
2834 static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
2835 &google_protobuf_UninterpretedOption_msginit,
2836 };
2837
2838 static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
2839 {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
2840 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2841 };
2842
2843 const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
2844 &google_protobuf_EnumValueOptions_submsgs[0],
2845 &google_protobuf_EnumValueOptions__fields[0],
2846 UPB_SIZE(8, 16), 2, false,
2847 };
2848
2849 static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
2850 &google_protobuf_UninterpretedOption_msginit,
2851 };
2852
2853 static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
2854 {33, UPB_SIZE(1, 1), 1, 0, 8, 1},
2855 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2856 };
2857
2858 const upb_msglayout google_protobuf_ServiceOptions_msginit = {
2859 &google_protobuf_ServiceOptions_submsgs[0],
2860 &google_protobuf_ServiceOptions__fields[0],
2861 UPB_SIZE(8, 16), 2, false,
2862 };
2863
2864 static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
2865 &google_protobuf_UninterpretedOption_msginit,
2866 };
2867
2868 static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
2869 {33, UPB_SIZE(16, 16), 2, 0, 8, 1},
2870 {34, UPB_SIZE(8, 8), 1, 0, 14, 1},
2871 {999, UPB_SIZE(20, 24), 0, 0, 11, 3},
2872 };
2873
2874 const upb_msglayout google_protobuf_MethodOptions_msginit = {
2875 &google_protobuf_MethodOptions_submsgs[0],
2876 &google_protobuf_MethodOptions__fields[0],
2877 UPB_SIZE(24, 32), 3, false,
2878 };
2879
2880 static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
2881 &google_protobuf_UninterpretedOption_NamePart_msginit,
2882 };
2883
2884 static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
2885 {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
2886 {3, UPB_SIZE(32, 32), 4, 0, 9, 1},
2887 {4, UPB_SIZE(8, 8), 1, 0, 4, 1},
2888 {5, UPB_SIZE(16, 16), 2, 0, 3, 1},
2889 {6, UPB_SIZE(24, 24), 3, 0, 1, 1},
2890 {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
2891 {8, UPB_SIZE(48, 64), 6, 0, 9, 1},
2892 };
2893
2894 const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
2895 &google_protobuf_UninterpretedOption_submsgs[0],
2896 &google_protobuf_UninterpretedOption__fields[0],
2897 UPB_SIZE(64, 96), 7, false,
2898 };
2899
2900 static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
2901 {1, UPB_SIZE(4, 8), 2, 0, 9, 2},
2902 {2, UPB_SIZE(1, 1), 1, 0, 8, 2},
2903 };
2904
2905 const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
2906 NULL,
2907 &google_protobuf_UninterpretedOption_NamePart__fields[0],
2908 UPB_SIZE(16, 32), 2, false,
2909 };
2910
2911 static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
2912 &google_protobuf_SourceCodeInfo_Location_msginit,
2913 };
2914
2915 static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
2916 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2917 };
2918
2919 const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
2920 &google_protobuf_SourceCodeInfo_submsgs[0],
2921 &google_protobuf_SourceCodeInfo__fields[0],
2922 UPB_SIZE(4, 8), 1, false,
2923 };
2924
2925 static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
2926 {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED},
2927 {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED},
2928 {3, UPB_SIZE(4, 8), 1, 0, 9, 1},
2929 {4, UPB_SIZE(12, 24), 2, 0, 9, 1},
2930 {6, UPB_SIZE(28, 56), 0, 0, 9, 3},
2931 };
2932
2933 const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
2934 NULL,
2935 &google_protobuf_SourceCodeInfo_Location__fields[0],
2936 UPB_SIZE(32, 64), 5, false,
2937 };
2938
2939 static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
2940 &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
2941 };
2942
2943 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
2944 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2945 };
2946
2947 const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
2948 &google_protobuf_GeneratedCodeInfo_submsgs[0],
2949 &google_protobuf_GeneratedCodeInfo__fields[0],
2950 UPB_SIZE(4, 8), 1, false,
2951 };
2952
2953 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
2954 {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED},
2955 {2, UPB_SIZE(12, 16), 3, 0, 9, 1},
2956 {3, UPB_SIZE(4, 4), 1, 0, 5, 1},
2957 {4, UPB_SIZE(8, 8), 2, 0, 5, 1},
2958 };
2959
2960 const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
2961 NULL,
2962 &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
2963 UPB_SIZE(24, 48), 4, false,
2964 };
2965
2966
2967
2968
2969 #include <ctype.h>
2970 #include <errno.h>
2971 #include <stdlib.h>
2972 #include <string.h>
2973
2974
2975 typedef struct {
2976 size_t len;
2977 char str[1]; /* Null-terminated string data follows. */
2978 } str_t;
2979
newstr(upb_alloc * alloc,const char * data,size_t len)2980 static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) {
2981 str_t *ret = upb_malloc(alloc, sizeof(*ret) + len);
2982 if (!ret) return NULL;
2983 ret->len = len;
2984 memcpy(ret->str, data, len);
2985 ret->str[len] = '\0';
2986 return ret;
2987 }
2988
2989 struct upb_fielddef {
2990 const upb_filedef *file;
2991 const upb_msgdef *msgdef;
2992 const char *full_name;
2993 const char *json_name;
2994 union {
2995 int64_t sint;
2996 uint64_t uint;
2997 double dbl;
2998 float flt;
2999 bool boolean;
3000 str_t *str;
3001 } defaultval;
3002 const upb_oneofdef *oneof;
3003 union {
3004 const upb_msgdef *msgdef;
3005 const upb_enumdef *enumdef;
3006 const google_protobuf_FieldDescriptorProto *unresolved;
3007 } sub;
3008 uint32_t number_;
3009 uint16_t index_;
3010 uint16_t layout_index;
3011 uint32_t selector_base; /* Used to index into a upb::Handlers table. */
3012 bool is_extension_;
3013 bool lazy_;
3014 bool packed_;
3015 bool proto3_optional_;
3016 upb_descriptortype_t type_;
3017 upb_label_t label_;
3018 };
3019
3020 struct upb_msgdef {
3021 const upb_msglayout *layout;
3022 const upb_filedef *file;
3023 const char *full_name;
3024 uint32_t selector_count;
3025 uint32_t submsg_field_count;
3026
3027 /* Tables for looking up fields by number and name. */
3028 upb_inttable itof;
3029 upb_strtable ntof;
3030
3031 const upb_fielddef *fields;
3032 const upb_oneofdef *oneofs;
3033 int field_count;
3034 int oneof_count;
3035 int real_oneof_count;
3036
3037 /* Is this a map-entry message? */
3038 bool map_entry;
3039 upb_wellknowntype_t well_known_type;
3040
3041 /* TODO(haberman): proper extension ranges (there can be multiple). */
3042 };
3043
3044 struct upb_enumdef {
3045 const upb_filedef *file;
3046 const char *full_name;
3047 upb_strtable ntoi;
3048 upb_inttable iton;
3049 int32_t defaultval;
3050 };
3051
3052 struct upb_oneofdef {
3053 const upb_msgdef *parent;
3054 const char *full_name;
3055 uint32_t index;
3056 upb_strtable ntof;
3057 upb_inttable itof;
3058 };
3059
3060 struct upb_filedef {
3061 const char *name;
3062 const char *package;
3063 const char *phpprefix;
3064 const char *phpnamespace;
3065 upb_syntax_t syntax;
3066
3067 const upb_filedef **deps;
3068 const upb_msgdef *msgs;
3069 const upb_enumdef *enums;
3070 const upb_fielddef *exts;
3071
3072 int dep_count;
3073 int msg_count;
3074 int enum_count;
3075 int ext_count;
3076 };
3077
3078 struct upb_symtab {
3079 upb_arena *arena;
3080 upb_strtable syms; /* full_name -> packed def ptr */
3081 upb_strtable files; /* file_name -> upb_filedef* */
3082 };
3083
3084 /* Inside a symtab we store tagged pointers to specific def types. */
3085 typedef enum {
3086 UPB_DEFTYPE_FIELD = 0,
3087
3088 /* Only inside symtab table. */
3089 UPB_DEFTYPE_MSG = 1,
3090 UPB_DEFTYPE_ENUM = 2,
3091
3092 /* Only inside message table. */
3093 UPB_DEFTYPE_ONEOF = 1,
3094 UPB_DEFTYPE_FIELD_JSONNAME = 2
3095 } upb_deftype_t;
3096
unpack_def(upb_value v,upb_deftype_t type)3097 static const void *unpack_def(upb_value v, upb_deftype_t type) {
3098 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
3099 return (num & 3) == type ? (const void*)(num & ~3) : NULL;
3100 }
3101
pack_def(const void * ptr,upb_deftype_t type)3102 static upb_value pack_def(const void *ptr, upb_deftype_t type) {
3103 uintptr_t num = (uintptr_t)ptr | type;
3104 return upb_value_constptr((const void*)num);
3105 }
3106
3107 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)3108 static bool upb_isbetween(char c, char low, char high) {
3109 return c >= low && c <= high;
3110 }
3111
upb_isletter(char c)3112 static bool upb_isletter(char c) {
3113 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
3114 }
3115
upb_isalphanum(char c)3116 static bool upb_isalphanum(char c) {
3117 return upb_isletter(c) || upb_isbetween(c, '0', '9');
3118 }
3119
upb_isident(upb_strview name,bool full,upb_status * s)3120 static bool upb_isident(upb_strview name, bool full, upb_status *s) {
3121 const char *str = name.data;
3122 size_t len = name.size;
3123 bool start = true;
3124 size_t i;
3125 for (i = 0; i < len; i++) {
3126 char c = str[i];
3127 if (c == '.') {
3128 if (start || !full) {
3129 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
3130 return false;
3131 }
3132 start = true;
3133 } else if (start) {
3134 if (!upb_isletter(c)) {
3135 upb_status_seterrf(
3136 s, "invalid name: path components must start with a letter (%s)",
3137 str);
3138 return false;
3139 }
3140 start = false;
3141 } else {
3142 if (!upb_isalphanum(c)) {
3143 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
3144 str);
3145 return false;
3146 }
3147 }
3148 }
3149 return !start;
3150 }
3151
shortdefname(const char * fullname)3152 static const char *shortdefname(const char *fullname) {
3153 const char *p;
3154
3155 if (fullname == NULL) {
3156 return NULL;
3157 } else if ((p = strrchr(fullname, '.')) == NULL) {
3158 /* No '.' in the name, return the full string. */
3159 return fullname;
3160 } else {
3161 /* Return one past the last '.'. */
3162 return p + 1;
3163 }
3164 }
3165
3166 /* All submessage fields are lower than all other fields.
3167 * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)3168 uint32_t field_rank(const upb_fielddef *f) {
3169 uint32_t ret = upb_fielddef_number(f);
3170 const uint32_t high_bit = 1 << 30;
3171 UPB_ASSERT(ret < high_bit);
3172 if (!upb_fielddef_issubmsg(f))
3173 ret |= high_bit;
3174 return ret;
3175 }
3176
cmp_fields(const void * p1,const void * p2)3177 int cmp_fields(const void *p1, const void *p2) {
3178 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
3179 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
3180 return field_rank(f1) - field_rank(f2);
3181 }
3182
3183 /* A few implementation details of handlers. We put these here to avoid
3184 * a def -> handlers dependency. */
3185
3186 #define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */
3187
upb_handlers_selectorbaseoffset(const upb_fielddef * f)3188 static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
3189 return upb_fielddef_isseq(f) ? 2 : 0;
3190 }
3191
upb_handlers_selectorcount(const upb_fielddef * f)3192 static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
3193 uint32_t ret = 1;
3194 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
3195 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
3196 if (upb_fielddef_issubmsg(f)) {
3197 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
3198 ret += 0;
3199 if (upb_fielddef_lazy(f)) {
3200 /* STARTSTR/ENDSTR/STRING (for lazy) */
3201 ret += 3;
3202 }
3203 }
3204 return ret;
3205 }
3206
upb_status_setoom(upb_status * status)3207 static void upb_status_setoom(upb_status *status) {
3208 upb_status_seterrmsg(status, "out of memory");
3209 }
3210
assign_msg_indices(upb_msgdef * m,upb_status * s)3211 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
3212 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
3213 * lowest indexes, but we do not publicly guarantee this. */
3214 upb_msg_field_iter j;
3215 int i;
3216 uint32_t selector;
3217 int n = upb_msgdef_numfields(m);
3218 upb_fielddef **fields;
3219
3220 if (n == 0) {
3221 m->selector_count = UPB_STATIC_SELECTOR_COUNT;
3222 m->submsg_field_count = 0;
3223 return true;
3224 }
3225
3226 fields = upb_gmalloc(n * sizeof(*fields));
3227 if (!fields) {
3228 upb_status_setoom(s);
3229 return false;
3230 }
3231
3232 m->submsg_field_count = 0;
3233 for(i = 0, upb_msg_field_begin(&j, m);
3234 !upb_msg_field_done(&j);
3235 upb_msg_field_next(&j), i++) {
3236 upb_fielddef *f = upb_msg_iter_field(&j);
3237 UPB_ASSERT(f->msgdef == m);
3238 if (upb_fielddef_issubmsg(f)) {
3239 m->submsg_field_count++;
3240 }
3241 fields[i] = f;
3242 }
3243
3244 qsort(fields, n, sizeof(*fields), cmp_fields);
3245
3246 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
3247 for (i = 0; i < n; i++) {
3248 upb_fielddef *f = fields[i];
3249 f->index_ = i;
3250 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
3251 selector += upb_handlers_selectorcount(f);
3252 }
3253 m->selector_count = selector;
3254
3255 upb_gfree(fields);
3256 return true;
3257 }
3258
check_oneofs(upb_msgdef * m,upb_status * s)3259 static bool check_oneofs(upb_msgdef *m, upb_status *s) {
3260 int i;
3261 int first_synthetic = -1;
3262 upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
3263
3264 for (i = 0; i < m->oneof_count; i++) {
3265 mutable_oneofs[i].index = i;
3266
3267 if (upb_oneofdef_issynthetic(&mutable_oneofs[i])) {
3268 if (first_synthetic == -1) {
3269 first_synthetic = i;
3270 }
3271 } else {
3272 if (first_synthetic != -1) {
3273 upb_status_seterrf(
3274 s, "Synthetic oneofs must be after all other oneofs: %s",
3275 upb_oneofdef_name(&mutable_oneofs[i]));
3276 return false;
3277 }
3278 }
3279 }
3280
3281 if (first_synthetic == -1) {
3282 m->real_oneof_count = m->oneof_count;
3283 } else {
3284 m->real_oneof_count = first_synthetic;
3285 }
3286
3287 return true;
3288 }
3289
assign_msg_wellknowntype(upb_msgdef * m)3290 static void assign_msg_wellknowntype(upb_msgdef *m) {
3291 const char *name = upb_msgdef_fullname(m);
3292 if (name == NULL) {
3293 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
3294 return;
3295 }
3296 if (!strcmp(name, "google.protobuf.Any")) {
3297 m->well_known_type = UPB_WELLKNOWN_ANY;
3298 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
3299 m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
3300 } else if (!strcmp(name, "google.protobuf.Duration")) {
3301 m->well_known_type = UPB_WELLKNOWN_DURATION;
3302 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
3303 m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
3304 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
3305 m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
3306 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
3307 m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
3308 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
3309 m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
3310 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
3311 m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
3312 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
3313 m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
3314 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
3315 m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
3316 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
3317 m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
3318 } else if (!strcmp(name, "google.protobuf.StringValue")) {
3319 m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
3320 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
3321 m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
3322 } else if (!strcmp(name, "google.protobuf.Value")) {
3323 m->well_known_type = UPB_WELLKNOWN_VALUE;
3324 } else if (!strcmp(name, "google.protobuf.ListValue")) {
3325 m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
3326 } else if (!strcmp(name, "google.protobuf.Struct")) {
3327 m->well_known_type = UPB_WELLKNOWN_STRUCT;
3328 } else {
3329 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
3330 }
3331 }
3332
3333
3334 /* upb_enumdef ****************************************************************/
3335
upb_enumdef_fullname(const upb_enumdef * e)3336 const char *upb_enumdef_fullname(const upb_enumdef *e) {
3337 return e->full_name;
3338 }
3339
upb_enumdef_name(const upb_enumdef * e)3340 const char *upb_enumdef_name(const upb_enumdef *e) {
3341 return shortdefname(e->full_name);
3342 }
3343
upb_enumdef_file(const upb_enumdef * e)3344 const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
3345 return e->file;
3346 }
3347
upb_enumdef_default(const upb_enumdef * e)3348 int32_t upb_enumdef_default(const upb_enumdef *e) {
3349 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
3350 return e->defaultval;
3351 }
3352
upb_enumdef_numvals(const upb_enumdef * e)3353 int upb_enumdef_numvals(const upb_enumdef *e) {
3354 return (int)upb_strtable_count(&e->ntoi);
3355 }
3356
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)3357 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
3358 /* We iterate over the ntoi table, to account for duplicate numbers. */
3359 upb_strtable_begin(i, &e->ntoi);
3360 }
3361
upb_enum_next(upb_enum_iter * iter)3362 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)3363 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
3364
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)3365 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
3366 size_t len, int32_t *num) {
3367 upb_value v;
3368 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
3369 return false;
3370 }
3371 if (num) *num = upb_value_getint32(v);
3372 return true;
3373 }
3374
upb_enumdef_iton(const upb_enumdef * def,int32_t num)3375 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
3376 upb_value v;
3377 return upb_inttable_lookup32(&def->iton, num, &v) ?
3378 upb_value_getcstr(v) : NULL;
3379 }
3380
upb_enum_iter_name(upb_enum_iter * iter)3381 const char *upb_enum_iter_name(upb_enum_iter *iter) {
3382 return upb_strtable_iter_key(iter).data;
3383 }
3384
upb_enum_iter_number(upb_enum_iter * iter)3385 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
3386 return upb_value_getint32(upb_strtable_iter_value(iter));
3387 }
3388
3389
3390 /* upb_fielddef ***************************************************************/
3391
upb_fielddef_fullname(const upb_fielddef * f)3392 const char *upb_fielddef_fullname(const upb_fielddef *f) {
3393 return f->full_name;
3394 }
3395
upb_fielddef_type(const upb_fielddef * f)3396 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
3397 switch (f->type_) {
3398 case UPB_DESCRIPTOR_TYPE_DOUBLE:
3399 return UPB_TYPE_DOUBLE;
3400 case UPB_DESCRIPTOR_TYPE_FLOAT:
3401 return UPB_TYPE_FLOAT;
3402 case UPB_DESCRIPTOR_TYPE_INT64:
3403 case UPB_DESCRIPTOR_TYPE_SINT64:
3404 case UPB_DESCRIPTOR_TYPE_SFIXED64:
3405 return UPB_TYPE_INT64;
3406 case UPB_DESCRIPTOR_TYPE_INT32:
3407 case UPB_DESCRIPTOR_TYPE_SFIXED32:
3408 case UPB_DESCRIPTOR_TYPE_SINT32:
3409 return UPB_TYPE_INT32;
3410 case UPB_DESCRIPTOR_TYPE_UINT64:
3411 case UPB_DESCRIPTOR_TYPE_FIXED64:
3412 return UPB_TYPE_UINT64;
3413 case UPB_DESCRIPTOR_TYPE_UINT32:
3414 case UPB_DESCRIPTOR_TYPE_FIXED32:
3415 return UPB_TYPE_UINT32;
3416 case UPB_DESCRIPTOR_TYPE_ENUM:
3417 return UPB_TYPE_ENUM;
3418 case UPB_DESCRIPTOR_TYPE_BOOL:
3419 return UPB_TYPE_BOOL;
3420 case UPB_DESCRIPTOR_TYPE_STRING:
3421 return UPB_TYPE_STRING;
3422 case UPB_DESCRIPTOR_TYPE_BYTES:
3423 return UPB_TYPE_BYTES;
3424 case UPB_DESCRIPTOR_TYPE_GROUP:
3425 case UPB_DESCRIPTOR_TYPE_MESSAGE:
3426 return UPB_TYPE_MESSAGE;
3427 }
3428 UPB_UNREACHABLE();
3429 }
3430
upb_fielddef_descriptortype(const upb_fielddef * f)3431 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
3432 return f->type_;
3433 }
3434
upb_fielddef_index(const upb_fielddef * f)3435 uint32_t upb_fielddef_index(const upb_fielddef *f) {
3436 return f->index_;
3437 }
3438
upb_fielddef_label(const upb_fielddef * f)3439 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
3440 return f->label_;
3441 }
3442
upb_fielddef_number(const upb_fielddef * f)3443 uint32_t upb_fielddef_number(const upb_fielddef *f) {
3444 return f->number_;
3445 }
3446
upb_fielddef_isextension(const upb_fielddef * f)3447 bool upb_fielddef_isextension(const upb_fielddef *f) {
3448 return f->is_extension_;
3449 }
3450
upb_fielddef_lazy(const upb_fielddef * f)3451 bool upb_fielddef_lazy(const upb_fielddef *f) {
3452 return f->lazy_;
3453 }
3454
upb_fielddef_packed(const upb_fielddef * f)3455 bool upb_fielddef_packed(const upb_fielddef *f) {
3456 return f->packed_;
3457 }
3458
upb_fielddef_name(const upb_fielddef * f)3459 const char *upb_fielddef_name(const upb_fielddef *f) {
3460 return shortdefname(f->full_name);
3461 }
3462
upb_fielddef_jsonname(const upb_fielddef * f)3463 const char *upb_fielddef_jsonname(const upb_fielddef *f) {
3464 return f->json_name;
3465 }
3466
upb_fielddef_selectorbase(const upb_fielddef * f)3467 uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
3468 return f->selector_base;
3469 }
3470
upb_fielddef_file(const upb_fielddef * f)3471 const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
3472 return f->file;
3473 }
3474
upb_fielddef_containingtype(const upb_fielddef * f)3475 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
3476 return f->msgdef;
3477 }
3478
upb_fielddef_containingoneof(const upb_fielddef * f)3479 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
3480 return f->oneof;
3481 }
3482
upb_fielddef_realcontainingoneof(const upb_fielddef * f)3483 const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
3484 if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
3485 return f->oneof;
3486 }
3487
chkdefaulttype(const upb_fielddef * f,int ctype)3488 static void chkdefaulttype(const upb_fielddef *f, int ctype) {
3489 UPB_UNUSED(f);
3490 UPB_UNUSED(ctype);
3491 }
3492
upb_fielddef_defaultint64(const upb_fielddef * f)3493 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
3494 chkdefaulttype(f, UPB_TYPE_INT64);
3495 return f->defaultval.sint;
3496 }
3497
upb_fielddef_defaultint32(const upb_fielddef * f)3498 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
3499 chkdefaulttype(f, UPB_TYPE_INT32);
3500 return (int32_t)f->defaultval.sint;
3501 }
3502
upb_fielddef_defaultuint64(const upb_fielddef * f)3503 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
3504 chkdefaulttype(f, UPB_TYPE_UINT64);
3505 return f->defaultval.uint;
3506 }
3507
upb_fielddef_defaultuint32(const upb_fielddef * f)3508 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
3509 chkdefaulttype(f, UPB_TYPE_UINT32);
3510 return (uint32_t)f->defaultval.uint;
3511 }
3512
upb_fielddef_defaultbool(const upb_fielddef * f)3513 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
3514 chkdefaulttype(f, UPB_TYPE_BOOL);
3515 return f->defaultval.boolean;
3516 }
3517
upb_fielddef_defaultfloat(const upb_fielddef * f)3518 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
3519 chkdefaulttype(f, UPB_TYPE_FLOAT);
3520 return f->defaultval.flt;
3521 }
3522
upb_fielddef_defaultdouble(const upb_fielddef * f)3523 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
3524 chkdefaulttype(f, UPB_TYPE_DOUBLE);
3525 return f->defaultval.dbl;
3526 }
3527
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)3528 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
3529 str_t *str = f->defaultval.str;
3530 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
3531 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
3532 upb_fielddef_type(f) == UPB_TYPE_ENUM);
3533 if (str) {
3534 if (len) *len = str->len;
3535 return str->str;
3536 } else {
3537 if (len) *len = 0;
3538 return NULL;
3539 }
3540 }
3541
upb_fielddef_msgsubdef(const upb_fielddef * f)3542 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
3543 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
3544 return f->sub.msgdef;
3545 }
3546
upb_fielddef_enumsubdef(const upb_fielddef * f)3547 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
3548 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_ENUM);
3549 return f->sub.enumdef;
3550 }
3551
upb_fielddef_layout(const upb_fielddef * f)3552 const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
3553 return &f->msgdef->layout->fields[f->layout_index];
3554 }
3555
upb_fielddef_issubmsg(const upb_fielddef * f)3556 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
3557 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
3558 }
3559
upb_fielddef_isstring(const upb_fielddef * f)3560 bool upb_fielddef_isstring(const upb_fielddef *f) {
3561 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
3562 upb_fielddef_type(f) == UPB_TYPE_BYTES;
3563 }
3564
upb_fielddef_isseq(const upb_fielddef * f)3565 bool upb_fielddef_isseq(const upb_fielddef *f) {
3566 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
3567 }
3568
upb_fielddef_isprimitive(const upb_fielddef * f)3569 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
3570 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
3571 }
3572
upb_fielddef_ismap(const upb_fielddef * f)3573 bool upb_fielddef_ismap(const upb_fielddef *f) {
3574 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
3575 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
3576 }
3577
upb_fielddef_hassubdef(const upb_fielddef * f)3578 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
3579 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
3580 }
3581
upb_fielddef_haspresence(const upb_fielddef * f)3582 bool upb_fielddef_haspresence(const upb_fielddef *f) {
3583 if (upb_fielddef_isseq(f)) return false;
3584 return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
3585 f->file->syntax == UPB_SYNTAX_PROTO2;
3586 }
3587
between(int32_t x,int32_t low,int32_t high)3588 static bool between(int32_t x, int32_t low, int32_t high) {
3589 return x >= low && x <= high;
3590 }
3591
upb_fielddef_checklabel(int32_t label)3592 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)3593 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)3594 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
3595
upb_fielddef_checkdescriptortype(int32_t type)3596 bool upb_fielddef_checkdescriptortype(int32_t type) {
3597 return between(type, 1, 18);
3598 }
3599
3600 /* upb_msgdef *****************************************************************/
3601
upb_msgdef_fullname(const upb_msgdef * m)3602 const char *upb_msgdef_fullname(const upb_msgdef *m) {
3603 return m->full_name;
3604 }
3605
upb_msgdef_file(const upb_msgdef * m)3606 const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
3607 return m->file;
3608 }
3609
upb_msgdef_name(const upb_msgdef * m)3610 const char *upb_msgdef_name(const upb_msgdef *m) {
3611 return shortdefname(m->full_name);
3612 }
3613
upb_msgdef_syntax(const upb_msgdef * m)3614 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
3615 return m->file->syntax;
3616 }
3617
upb_msgdef_selectorcount(const upb_msgdef * m)3618 size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
3619 return m->selector_count;
3620 }
3621
upb_msgdef_submsgfieldcount(const upb_msgdef * m)3622 uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
3623 return m->submsg_field_count;
3624 }
3625
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)3626 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
3627 upb_value val;
3628 return upb_inttable_lookup32(&m->itof, i, &val) ?
3629 upb_value_getconstptr(val) : NULL;
3630 }
3631
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)3632 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
3633 size_t len) {
3634 upb_value val;
3635
3636 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3637 return NULL;
3638 }
3639
3640 return unpack_def(val, UPB_DEFTYPE_FIELD);
3641 }
3642
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)3643 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
3644 size_t len) {
3645 upb_value val;
3646
3647 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3648 return NULL;
3649 }
3650
3651 return unpack_def(val, UPB_DEFTYPE_ONEOF);
3652 }
3653
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)3654 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
3655 const upb_fielddef **f, const upb_oneofdef **o) {
3656 upb_value val;
3657
3658 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3659 return false;
3660 }
3661
3662 *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
3663 *f = unpack_def(val, UPB_DEFTYPE_FIELD);
3664 return *o || *f; /* False if this was a JSON name. */
3665 }
3666
upb_msgdef_lookupjsonname(const upb_msgdef * m,const char * name,size_t len)3667 const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
3668 const char *name, size_t len) {
3669 upb_value val;
3670 const upb_fielddef* f;
3671
3672 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3673 return NULL;
3674 }
3675
3676 f = unpack_def(val, UPB_DEFTYPE_FIELD);
3677 if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
3678
3679 return f;
3680 }
3681
upb_msgdef_numfields(const upb_msgdef * m)3682 int upb_msgdef_numfields(const upb_msgdef *m) {
3683 return m->field_count;
3684 }
3685
upb_msgdef_numoneofs(const upb_msgdef * m)3686 int upb_msgdef_numoneofs(const upb_msgdef *m) {
3687 return m->oneof_count;
3688 }
3689
upb_msgdef_numrealoneofs(const upb_msgdef * m)3690 int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
3691 return m->real_oneof_count;
3692 }
3693
upb_msgdef_layout(const upb_msgdef * m)3694 const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
3695 return m->layout;
3696 }
3697
_upb_msgdef_field(const upb_msgdef * m,int i)3698 const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i) {
3699 if (i >= m->field_count) return NULL;
3700 return &m->fields[i];
3701 }
3702
upb_msgdef_mapentry(const upb_msgdef * m)3703 bool upb_msgdef_mapentry(const upb_msgdef *m) {
3704 return m->map_entry;
3705 }
3706
upb_msgdef_wellknowntype(const upb_msgdef * m)3707 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
3708 return m->well_known_type;
3709 }
3710
upb_msgdef_isnumberwrapper(const upb_msgdef * m)3711 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
3712 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
3713 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
3714 type <= UPB_WELLKNOWN_UINT32VALUE;
3715 }
3716
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)3717 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
3718 upb_inttable_begin(iter, &m->itof);
3719 }
3720
upb_msg_field_next(upb_msg_field_iter * iter)3721 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
3722
upb_msg_field_done(const upb_msg_field_iter * iter)3723 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
3724 return upb_inttable_done(iter);
3725 }
3726
upb_msg_iter_field(const upb_msg_field_iter * iter)3727 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
3728 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
3729 }
3730
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)3731 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
3732 upb_inttable_iter_setdone(iter);
3733 }
3734
upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,const upb_msg_field_iter * iter2)3735 bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
3736 const upb_msg_field_iter * iter2) {
3737 return upb_inttable_iter_isequal(iter1, iter2);
3738 }
3739
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)3740 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
3741 upb_strtable_begin(iter, &m->ntof);
3742 /* We need to skip past any initial fields. */
3743 while (!upb_strtable_done(iter) &&
3744 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
3745 upb_strtable_next(iter);
3746 }
3747 }
3748
upb_msg_oneof_next(upb_msg_oneof_iter * iter)3749 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
3750 /* We need to skip past fields to return only oneofs. */
3751 do {
3752 upb_strtable_next(iter);
3753 } while (!upb_strtable_done(iter) &&
3754 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
3755 }
3756
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)3757 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
3758 return upb_strtable_done(iter);
3759 }
3760
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)3761 const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
3762 return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
3763 }
3764
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)3765 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
3766 upb_strtable_iter_setdone(iter);
3767 }
3768
upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter * iter1,const upb_msg_oneof_iter * iter2)3769 bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
3770 const upb_msg_oneof_iter *iter2) {
3771 return upb_strtable_iter_isequal(iter1, iter2);
3772 }
3773
3774 /* upb_oneofdef ***************************************************************/
3775
upb_oneofdef_name(const upb_oneofdef * o)3776 const char *upb_oneofdef_name(const upb_oneofdef *o) {
3777 return shortdefname(o->full_name);
3778 }
3779
upb_oneofdef_containingtype(const upb_oneofdef * o)3780 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
3781 return o->parent;
3782 }
3783
upb_oneofdef_numfields(const upb_oneofdef * o)3784 int upb_oneofdef_numfields(const upb_oneofdef *o) {
3785 return (int)upb_strtable_count(&o->ntof);
3786 }
3787
upb_oneofdef_index(const upb_oneofdef * o)3788 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
3789 return o->index;
3790 }
3791
upb_oneofdef_issynthetic(const upb_oneofdef * o)3792 bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
3793 upb_inttable_iter iter;
3794 const upb_fielddef *f;
3795 upb_inttable_begin(&iter, &o->itof);
3796 if (upb_oneofdef_numfields(o) != 1) return false;
3797 f = upb_value_getptr(upb_inttable_iter_value(&iter));
3798 UPB_ASSERT(f);
3799 return f->proto3_optional_;
3800 }
3801
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)3802 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
3803 const char *name, size_t length) {
3804 upb_value val;
3805 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
3806 upb_value_getptr(val) : NULL;
3807 }
3808
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)3809 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
3810 upb_value val;
3811 return upb_inttable_lookup32(&o->itof, num, &val) ?
3812 upb_value_getptr(val) : NULL;
3813 }
3814
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)3815 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
3816 upb_inttable_begin(iter, &o->itof);
3817 }
3818
upb_oneof_next(upb_oneof_iter * iter)3819 void upb_oneof_next(upb_oneof_iter *iter) {
3820 upb_inttable_next(iter);
3821 }
3822
upb_oneof_done(upb_oneof_iter * iter)3823 bool upb_oneof_done(upb_oneof_iter *iter) {
3824 return upb_inttable_done(iter);
3825 }
3826
upb_oneof_iter_field(const upb_oneof_iter * iter)3827 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
3828 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
3829 }
3830
upb_oneof_iter_setdone(upb_oneof_iter * iter)3831 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
3832 upb_inttable_iter_setdone(iter);
3833 }
3834
3835 /* Dynamic Layout Generation. *************************************************/
3836
is_power_of_two(size_t val)3837 static bool is_power_of_two(size_t val) {
3838 return (val & (val - 1)) == 0;
3839 }
3840
3841 /* Align up to the given power of 2. */
align_up(size_t val,size_t align)3842 static size_t align_up(size_t val, size_t align) {
3843 UPB_ASSERT(is_power_of_two(align));
3844 return (val + align - 1) & ~(align - 1);
3845 }
3846
div_round_up(size_t n,size_t d)3847 static size_t div_round_up(size_t n, size_t d) {
3848 return (n + d - 1) / d;
3849 }
3850
upb_msgval_sizeof(upb_fieldtype_t type)3851 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
3852 switch (type) {
3853 case UPB_TYPE_DOUBLE:
3854 case UPB_TYPE_INT64:
3855 case UPB_TYPE_UINT64:
3856 return 8;
3857 case UPB_TYPE_ENUM:
3858 case UPB_TYPE_INT32:
3859 case UPB_TYPE_UINT32:
3860 case UPB_TYPE_FLOAT:
3861 return 4;
3862 case UPB_TYPE_BOOL:
3863 return 1;
3864 case UPB_TYPE_MESSAGE:
3865 return sizeof(void*);
3866 case UPB_TYPE_BYTES:
3867 case UPB_TYPE_STRING:
3868 return sizeof(upb_strview);
3869 }
3870 UPB_UNREACHABLE();
3871 }
3872
upb_msg_fielddefsize(const upb_fielddef * f)3873 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
3874 if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
3875 upb_map_entry ent;
3876 UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
3877 return sizeof(ent.k);
3878 } else if (upb_fielddef_isseq(f)) {
3879 return sizeof(void*);
3880 } else {
3881 return upb_msgval_sizeof(upb_fielddef_type(f));
3882 }
3883 }
3884
upb_msglayout_place(upb_msglayout * l,size_t size)3885 static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
3886 uint32_t ret;
3887
3888 l->size = align_up(l->size, size);
3889 ret = l->size;
3890 l->size += size;
3891 return ret;
3892 }
3893
3894 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
3895 * It computes a dynamic layout for all of the fields in |m|. */
make_layout(const upb_symtab * symtab,const upb_msgdef * m)3896 static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
3897 upb_msglayout *l = (upb_msglayout*)m->layout;
3898 upb_msg_field_iter it;
3899 upb_msg_oneof_iter oit;
3900 size_t hasbit;
3901 size_t submsg_count = m->submsg_field_count;
3902 const upb_msglayout **submsgs;
3903 upb_msglayout_field *fields;
3904 upb_alloc *alloc = upb_arena_alloc(symtab->arena);
3905
3906 memset(l, 0, sizeof(*l));
3907
3908 fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields));
3909 submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs));
3910
3911 if ((!fields && upb_msgdef_numfields(m)) ||
3912 (!submsgs && submsg_count)) {
3913 /* OOM. */
3914 return false;
3915 }
3916
3917 l->field_count = upb_msgdef_numfields(m);
3918 l->fields = fields;
3919 l->submsgs = submsgs;
3920
3921 if (upb_msgdef_mapentry(m)) {
3922 /* TODO(haberman): refactor this method so this special case is more
3923 * elegant. */
3924 const upb_fielddef *key = upb_msgdef_itof(m, 1);
3925 const upb_fielddef *val = upb_msgdef_itof(m, 2);
3926 fields[0].number = 1;
3927 fields[1].number = 2;
3928 fields[0].label = UPB_LABEL_OPTIONAL;
3929 fields[1].label = UPB_LABEL_OPTIONAL;
3930 fields[0].presence = 0;
3931 fields[1].presence = 0;
3932 fields[0].descriptortype = upb_fielddef_descriptortype(key);
3933 fields[1].descriptortype = upb_fielddef_descriptortype(val);
3934 fields[0].offset = 0;
3935 fields[1].offset = sizeof(upb_strview);
3936 fields[1].submsg_index = 0;
3937
3938 if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
3939 submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
3940 }
3941
3942 l->field_count = 2;
3943 l->size = 2 * sizeof(upb_strview);align_up(l->size, 8);
3944 return true;
3945 }
3946
3947 /* Allocate data offsets in three stages:
3948 *
3949 * 1. hasbits.
3950 * 2. regular fields.
3951 * 3. oneof fields.
3952 *
3953 * OPT: There is a lot of room for optimization here to minimize the size.
3954 */
3955
3956 /* Allocate hasbits and set basic field attributes. */
3957 submsg_count = 0;
3958 for (upb_msg_field_begin(&it, m), hasbit = 0;
3959 !upb_msg_field_done(&it);
3960 upb_msg_field_next(&it)) {
3961 upb_fielddef* f = upb_msg_iter_field(&it);
3962 upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
3963
3964 field->number = upb_fielddef_number(f);
3965 field->descriptortype = upb_fielddef_descriptortype(f);
3966 field->label = upb_fielddef_label(f);
3967
3968 if (upb_fielddef_ismap(f)) {
3969 field->label = _UPB_LABEL_MAP;
3970 } else if (upb_fielddef_packed(f)) {
3971 field->label = _UPB_LABEL_PACKED;
3972 }
3973
3974 /* TODO: we probably should sort the fields by field number to match the
3975 * output of upbc, and to improve search speed for the table parser. */
3976 f->layout_index = f->index_;
3977
3978 if (upb_fielddef_issubmsg(f)) {
3979 const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
3980 field->submsg_index = submsg_count++;
3981 submsgs[field->submsg_index] = subm->layout;
3982 }
3983
3984 if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
3985 /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
3986 * table. This wastes one hasbit, but we don't worry about it for now. */
3987 field->presence = ++hasbit;
3988 } else {
3989 field->presence = 0;
3990 }
3991 }
3992
3993 /* Account for space used by hasbits. */
3994 l->size = div_round_up(hasbit, 8);
3995
3996 /* Allocate non-oneof fields. */
3997 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
3998 upb_msg_field_next(&it)) {
3999 const upb_fielddef* f = upb_msg_iter_field(&it);
4000 size_t field_size = upb_msg_fielddefsize(f);
4001 size_t index = upb_fielddef_index(f);
4002
4003 if (upb_fielddef_realcontainingoneof(f)) {
4004 /* Oneofs are handled separately below. */
4005 continue;
4006 }
4007
4008 fields[index].offset = upb_msglayout_place(l, field_size);
4009 }
4010
4011 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
4012 * and space for the actual data. */
4013 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
4014 upb_msg_oneof_next(&oit)) {
4015 const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
4016 upb_oneof_iter fit;
4017
4018 if (upb_oneofdef_issynthetic(o)) continue;
4019
4020 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
4021 size_t field_size = 0;
4022 uint32_t case_offset;
4023 uint32_t data_offset;
4024
4025 /* Calculate field size: the max of all field sizes. */
4026 for (upb_oneof_begin(&fit, o);
4027 !upb_oneof_done(&fit);
4028 upb_oneof_next(&fit)) {
4029 const upb_fielddef* f = upb_oneof_iter_field(&fit);
4030 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
4031 }
4032
4033 /* Align and allocate case offset. */
4034 case_offset = upb_msglayout_place(l, case_size);
4035 data_offset = upb_msglayout_place(l, field_size);
4036
4037 for (upb_oneof_begin(&fit, o);
4038 !upb_oneof_done(&fit);
4039 upb_oneof_next(&fit)) {
4040 const upb_fielddef* f = upb_oneof_iter_field(&fit);
4041 fields[upb_fielddef_index(f)].offset = data_offset;
4042 fields[upb_fielddef_index(f)].presence = ~case_offset;
4043 }
4044 }
4045
4046 /* Size of the entire structure should be a multiple of its greatest
4047 * alignment. TODO: track overall alignment for real? */
4048 l->size = align_up(l->size, 8);
4049
4050 return true;
4051 }
4052
4053 /* Code to build defs from descriptor protos. *********************************/
4054
4055 /* There is a question of how much validation to do here. It will be difficult
4056 * to perfectly match the amount of validation performed by proto2. But since
4057 * this code is used to directly build defs from Ruby (for example) we do need
4058 * to validate important constraints like uniqueness of names and numbers. */
4059
4060 #define CHK(x) if (!(x)) { return false; }
4061 #define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; }
4062
4063 typedef struct {
4064 const upb_symtab *symtab;
4065 upb_filedef *file; /* File we are building. */
4066 upb_alloc *alloc; /* Allocate defs here. */
4067 upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */
4068 upb_strtable *addtab; /* full_name -> packed def ptr for new defs */
4069 const upb_msglayout **layouts; /* NULL if we should build layouts. */
4070 upb_status *status; /* Record errors here. */
4071 } symtab_addctx;
4072
strviewdup(const symtab_addctx * ctx,upb_strview view)4073 static char* strviewdup(const symtab_addctx *ctx, upb_strview view) {
4074 return upb_strdup2(view.data, view.size, ctx->alloc);
4075 }
4076
streql2(const char * a,size_t n,const char * b)4077 static bool streql2(const char *a, size_t n, const char *b) {
4078 return n == strlen(b) && memcmp(a, b, n) == 0;
4079 }
4080
streql_view(upb_strview view,const char * b)4081 static bool streql_view(upb_strview view, const char *b) {
4082 return streql2(view.data, view.size, b);
4083 }
4084
makefullname(const symtab_addctx * ctx,const char * prefix,upb_strview name)4085 static const char *makefullname(const symtab_addctx *ctx, const char *prefix,
4086 upb_strview name) {
4087 if (prefix) {
4088 /* ret = prefix + '.' + name; */
4089 size_t n = strlen(prefix);
4090 char *ret = upb_malloc(ctx->alloc, n + name.size + 2);
4091 CHK_OOM(ret);
4092 strcpy(ret, prefix);
4093 ret[n] = '.';
4094 memcpy(&ret[n + 1], name.data, name.size);
4095 ret[n + 1 + name.size] = '\0';
4096 return ret;
4097 } else {
4098 return strviewdup(ctx, name);
4099 }
4100 }
4101
getjsonname(const char * name,char * buf,size_t len)4102 size_t getjsonname(const char *name, char *buf, size_t len) {
4103 size_t src, dst = 0;
4104 bool ucase_next = false;
4105
4106 #define WRITE(byte) \
4107 ++dst; \
4108 if (dst < len) buf[dst - 1] = byte; \
4109 else if (dst == len) buf[dst - 1] = '\0'
4110
4111 if (!name) {
4112 WRITE('\0');
4113 return 0;
4114 }
4115
4116 /* Implement the transformation as described in the spec:
4117 * 1. upper case all letters after an underscore.
4118 * 2. remove all underscores.
4119 */
4120 for (src = 0; name[src]; src++) {
4121 if (name[src] == '_') {
4122 ucase_next = true;
4123 continue;
4124 }
4125
4126 if (ucase_next) {
4127 WRITE(toupper(name[src]));
4128 ucase_next = false;
4129 } else {
4130 WRITE(name[src]);
4131 }
4132 }
4133
4134 WRITE('\0');
4135 return dst;
4136
4137 #undef WRITE
4138 }
4139
makejsonname(const char * name,upb_alloc * alloc)4140 static char* makejsonname(const char* name, upb_alloc *alloc) {
4141 size_t size = getjsonname(name, NULL, 0);
4142 char* json_name = upb_malloc(alloc, size);
4143 getjsonname(name, json_name, size);
4144 return json_name;
4145 }
4146
symtab_add(const symtab_addctx * ctx,const char * name,upb_value v)4147 static bool symtab_add(const symtab_addctx *ctx, const char *name,
4148 upb_value v) {
4149 upb_value tmp;
4150 if (upb_strtable_lookup(ctx->addtab, name, &tmp) ||
4151 upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) {
4152 upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name);
4153 return false;
4154 }
4155
4156 CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp));
4157 return true;
4158 }
4159
4160 /* Given a symbol and the base symbol inside which it is defined, find the
4161 * symbol's definition in t. */
resolvename(const upb_strtable * t,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type,upb_status * status,const void ** def)4162 static bool resolvename(const upb_strtable *t, const upb_fielddef *f,
4163 const char *base, upb_strview sym,
4164 upb_deftype_t type, upb_status *status,
4165 const void **def) {
4166 if(sym.size == 0) return NULL;
4167 if(sym.data[0] == '.') {
4168 /* Symbols starting with '.' are absolute, so we do a single lookup.
4169 * Slice to omit the leading '.' */
4170 upb_value v;
4171 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
4172 return false;
4173 }
4174
4175 *def = unpack_def(v, type);
4176
4177 if (!*def) {
4178 upb_status_seterrf(status,
4179 "type mismatch when resolving field %s, name %s",
4180 f->full_name, sym.data);
4181 return false;
4182 }
4183
4184 return true;
4185 } else {
4186 /* Remove components from base until we find an entry or run out.
4187 * TODO: This branch is totally broken, but currently not used. */
4188 (void)base;
4189 UPB_ASSERT(false);
4190 return false;
4191 }
4192 }
4193
symtab_resolve(const symtab_addctx * ctx,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type)4194 const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f,
4195 const char *base, upb_strview sym,
4196 upb_deftype_t type) {
4197 const void *ret;
4198 if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) &&
4199 !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) {
4200 if (upb_ok(ctx->status)) {
4201 upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data);
4202 }
4203 return false;
4204 }
4205 return ret;
4206 }
4207
create_oneofdef(const symtab_addctx * ctx,upb_msgdef * m,const google_protobuf_OneofDescriptorProto * oneof_proto)4208 static bool create_oneofdef(
4209 const symtab_addctx *ctx, upb_msgdef *m,
4210 const google_protobuf_OneofDescriptorProto *oneof_proto) {
4211 upb_oneofdef *o;
4212 upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
4213 upb_value v;
4214
4215 o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
4216 o->parent = m;
4217 o->full_name = makefullname(ctx, m->full_name, name);
4218
4219 v = pack_def(o, UPB_DEFTYPE_ONEOF);
4220 CHK_OOM(symtab_add(ctx, o->full_name, v));
4221 CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
4222
4223 CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4224 CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4225
4226 return true;
4227 }
4228
parse_default(const symtab_addctx * ctx,const char * str,size_t len,upb_fielddef * f)4229 static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len,
4230 upb_fielddef *f) {
4231 char *end;
4232 char nullz[64];
4233 errno = 0;
4234
4235 switch (upb_fielddef_type(f)) {
4236 case UPB_TYPE_INT32:
4237 case UPB_TYPE_INT64:
4238 case UPB_TYPE_UINT32:
4239 case UPB_TYPE_UINT64:
4240 case UPB_TYPE_DOUBLE:
4241 case UPB_TYPE_FLOAT:
4242 /* Standard C number parsing functions expect null-terminated strings. */
4243 if (len >= sizeof(nullz) - 1) {
4244 return false;
4245 }
4246 memcpy(nullz, str, len);
4247 nullz[len] = '\0';
4248 str = nullz;
4249 break;
4250 default:
4251 break;
4252 }
4253
4254 switch (upb_fielddef_type(f)) {
4255 case UPB_TYPE_INT32: {
4256 long val = strtol(str, &end, 0);
4257 CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end);
4258 f->defaultval.sint = val;
4259 break;
4260 }
4261 case UPB_TYPE_ENUM: {
4262 const upb_enumdef *e = f->sub.enumdef;
4263 int32_t val;
4264 CHK(upb_enumdef_ntoi(e, str, len, &val));
4265 f->defaultval.sint = val;
4266 break;
4267 }
4268 case UPB_TYPE_INT64: {
4269 /* XXX: Need to write our own strtoll, since it's not available in c89. */
4270 int64_t val = strtol(str, &end, 0);
4271 CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end);
4272 f->defaultval.sint = val;
4273 break;
4274 }
4275 case UPB_TYPE_UINT32: {
4276 unsigned long val = strtoul(str, &end, 0);
4277 CHK(val <= UINT32_MAX && errno != ERANGE && !*end);
4278 f->defaultval.uint = val;
4279 break;
4280 }
4281 case UPB_TYPE_UINT64: {
4282 /* XXX: Need to write our own strtoull, since it's not available in c89. */
4283 uint64_t val = strtoul(str, &end, 0);
4284 CHK(val <= UINT64_MAX && errno != ERANGE && !*end);
4285 f->defaultval.uint = val;
4286 break;
4287 }
4288 case UPB_TYPE_DOUBLE: {
4289 double val = strtod(str, &end);
4290 CHK(errno != ERANGE && !*end);
4291 f->defaultval.dbl = val;
4292 break;
4293 }
4294 case UPB_TYPE_FLOAT: {
4295 /* XXX: Need to write our own strtof, since it's not available in c89. */
4296 float val = strtod(str, &end);
4297 CHK(errno != ERANGE && !*end);
4298 f->defaultval.flt = val;
4299 break;
4300 }
4301 case UPB_TYPE_BOOL: {
4302 if (streql2(str, len, "false")) {
4303 f->defaultval.boolean = false;
4304 } else if (streql2(str, len, "true")) {
4305 f->defaultval.boolean = true;
4306 } else {
4307 return false;
4308 }
4309 break;
4310 }
4311 case UPB_TYPE_STRING:
4312 f->defaultval.str = newstr(ctx->alloc, str, len);
4313 break;
4314 case UPB_TYPE_BYTES:
4315 /* XXX: need to interpret the C-escaped value. */
4316 f->defaultval.str = newstr(ctx->alloc, str, len);
4317 break;
4318 case UPB_TYPE_MESSAGE:
4319 /* Should not have a default value. */
4320 return false;
4321 }
4322 return true;
4323 }
4324
set_default_default(const symtab_addctx * ctx,upb_fielddef * f)4325 static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) {
4326 switch (upb_fielddef_type(f)) {
4327 case UPB_TYPE_INT32:
4328 case UPB_TYPE_INT64:
4329 case UPB_TYPE_ENUM:
4330 f->defaultval.sint = 0;
4331 break;
4332 case UPB_TYPE_UINT64:
4333 case UPB_TYPE_UINT32:
4334 f->defaultval.uint = 0;
4335 break;
4336 case UPB_TYPE_DOUBLE:
4337 case UPB_TYPE_FLOAT:
4338 f->defaultval.dbl = 0;
4339 break;
4340 case UPB_TYPE_STRING:
4341 case UPB_TYPE_BYTES:
4342 f->defaultval.str = newstr(ctx->alloc, NULL, 0);
4343 break;
4344 case UPB_TYPE_BOOL:
4345 f->defaultval.boolean = false;
4346 break;
4347 case UPB_TYPE_MESSAGE:
4348 break;
4349 }
4350 }
4351
create_fielddef(const symtab_addctx * ctx,const char * prefix,upb_msgdef * m,const google_protobuf_FieldDescriptorProto * field_proto)4352 static bool create_fielddef(
4353 const symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
4354 const google_protobuf_FieldDescriptorProto *field_proto) {
4355 upb_alloc *alloc = ctx->alloc;
4356 upb_fielddef *f;
4357 const google_protobuf_FieldOptions *options;
4358 upb_strview name;
4359 const char *full_name;
4360 const char *json_name;
4361 const char *shortname;
4362 uint32_t field_number;
4363
4364 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
4365 upb_status_seterrmsg(ctx->status, "field has no name");
4366 return false;
4367 }
4368
4369 name = google_protobuf_FieldDescriptorProto_name(field_proto);
4370 CHK(upb_isident(name, false, ctx->status));
4371 full_name = makefullname(ctx, prefix, name);
4372 shortname = shortdefname(full_name);
4373
4374 if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
4375 json_name = strviewdup(
4376 ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
4377 } else {
4378 json_name = makejsonname(shortname, ctx->alloc);
4379 }
4380
4381 field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
4382
4383 if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
4384 upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number);
4385 return false;
4386 }
4387
4388 if (m) {
4389 /* direct message field. */
4390 upb_value v, field_v, json_v;
4391 size_t json_size;
4392
4393 f = (upb_fielddef*)&m->fields[m->field_count++];
4394 f->msgdef = m;
4395 f->is_extension_ = false;
4396
4397 if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
4398 upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname);
4399 return false;
4400 }
4401
4402 if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
4403 upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name);
4404 return false;
4405 }
4406
4407 if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
4408 upb_status_seterrf(ctx->status, "duplicate field number (%u)",
4409 field_number);
4410 return false;
4411 }
4412
4413 field_v = pack_def(f, UPB_DEFTYPE_FIELD);
4414 json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
4415 v = upb_value_constptr(f);
4416 json_size = strlen(json_name);
4417
4418 CHK_OOM(
4419 upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
4420 CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
4421
4422 if (strcmp(shortname, json_name) != 0) {
4423 upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
4424 }
4425
4426 if (ctx->layouts) {
4427 const upb_msglayout_field *fields = m->layout->fields;
4428 int count = m->layout->field_count;
4429 bool found = false;
4430 int i;
4431 for (i = 0; i < count; i++) {
4432 if (fields[i].number == field_number) {
4433 f->layout_index = i;
4434 found = true;
4435 break;
4436 }
4437 }
4438 UPB_ASSERT(found);
4439 }
4440 } else {
4441 /* extension field. */
4442 f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
4443 f->is_extension_ = true;
4444 CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)));
4445 }
4446
4447 f->full_name = full_name;
4448 f->json_name = json_name;
4449 f->file = ctx->file;
4450 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
4451 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
4452 f->number_ = field_number;
4453 f->oneof = NULL;
4454 f->proto3_optional_ =
4455 google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
4456
4457 /* We can't resolve the subdef or (in the case of extensions) the containing
4458 * message yet, because it may not have been defined yet. We stash a pointer
4459 * to the field_proto until later when we can properly resolve it. */
4460 f->sub.unresolved = field_proto;
4461
4462 if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
4463 upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)",
4464 f->full_name);
4465 return false;
4466 }
4467
4468 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
4469 int oneof_index =
4470 google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
4471 upb_oneofdef *oneof;
4472 upb_value v = upb_value_constptr(f);
4473
4474 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
4475 upb_status_seterrf(ctx->status,
4476 "fields in oneof must have OPTIONAL label (%s)",
4477 f->full_name);
4478 return false;
4479 }
4480
4481 if (!m) {
4482 upb_status_seterrf(ctx->status,
4483 "oneof_index provided for extension field (%s)",
4484 f->full_name);
4485 return false;
4486 }
4487
4488 if (oneof_index >= m->oneof_count) {
4489 upb_status_seterrf(ctx->status, "oneof_index out of range (%s)",
4490 f->full_name);
4491 return false;
4492 }
4493
4494 oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
4495 f->oneof = oneof;
4496
4497 CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
4498 CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
4499 } else {
4500 f->oneof = NULL;
4501 }
4502
4503 if (google_protobuf_FieldDescriptorProto_has_options(field_proto)) {
4504 options = google_protobuf_FieldDescriptorProto_options(field_proto);
4505 f->lazy_ = google_protobuf_FieldOptions_lazy(options);
4506 f->packed_ = google_protobuf_FieldOptions_packed(options);
4507 } else {
4508 f->lazy_ = false;
4509 f->packed_ = false;
4510 }
4511
4512 return true;
4513 }
4514
create_enumdef(const symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto)4515 static bool create_enumdef(
4516 const symtab_addctx *ctx, const char *prefix,
4517 const google_protobuf_EnumDescriptorProto *enum_proto) {
4518 upb_enumdef *e;
4519 const google_protobuf_EnumValueDescriptorProto *const *values;
4520 upb_strview name;
4521 size_t i, n;
4522
4523 name = google_protobuf_EnumDescriptorProto_name(enum_proto);
4524 CHK(upb_isident(name, false, ctx->status));
4525
4526 e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
4527 e->full_name = makefullname(ctx, prefix, name);
4528 CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
4529
4530 CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
4531 CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
4532
4533 e->file = ctx->file;
4534 e->defaultval = 0;
4535
4536 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
4537
4538 if (n == 0) {
4539 upb_status_seterrf(ctx->status,
4540 "enums must contain at least one value (%s)",
4541 e->full_name);
4542 return false;
4543 }
4544
4545 for (i = 0; i < n; i++) {
4546 const google_protobuf_EnumValueDescriptorProto *value = values[i];
4547 upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
4548 char *name2 = strviewdup(ctx, name);
4549 int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
4550 upb_value v = upb_value_int32(num);
4551
4552 if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
4553 upb_status_seterrf(ctx->status,
4554 "for proto3, the first enum value must be zero (%s)",
4555 e->full_name);
4556 return false;
4557 }
4558
4559 if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
4560 upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2);
4561 return false;
4562 }
4563
4564 CHK_OOM(name2)
4565 CHK_OOM(
4566 upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
4567
4568 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
4569 upb_value v = upb_value_cstr(name2);
4570 CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
4571 }
4572 }
4573
4574 upb_inttable_compact2(&e->iton, ctx->alloc);
4575
4576 return true;
4577 }
4578
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto)4579 static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
4580 const google_protobuf_DescriptorProto *msg_proto) {
4581 upb_msgdef *m;
4582 const google_protobuf_MessageOptions *options;
4583 const google_protobuf_OneofDescriptorProto *const *oneofs;
4584 const google_protobuf_FieldDescriptorProto *const *fields;
4585 const google_protobuf_EnumDescriptorProto *const *enums;
4586 const google_protobuf_DescriptorProto *const *msgs;
4587 size_t i, n;
4588 upb_strview name;
4589
4590 name = google_protobuf_DescriptorProto_name(msg_proto);
4591 CHK(upb_isident(name, false, ctx->status));
4592
4593 m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
4594 m->full_name = makefullname(ctx, prefix, name);
4595 CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
4596
4597 CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4598 CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4599
4600 m->file = ctx->file;
4601 m->map_entry = false;
4602
4603 options = google_protobuf_DescriptorProto_options(msg_proto);
4604
4605 if (options) {
4606 m->map_entry = google_protobuf_MessageOptions_map_entry(options);
4607 }
4608
4609 if (ctx->layouts) {
4610 m->layout = *ctx->layouts;
4611 ctx->layouts++;
4612 } else {
4613 /* Allocate now (to allow cross-linking), populate later. */
4614 m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout));
4615 }
4616
4617 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
4618 m->oneof_count = 0;
4619 m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
4620 for (i = 0; i < n; i++) {
4621 CHK(create_oneofdef(ctx, m, oneofs[i]));
4622 }
4623
4624 fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
4625 m->field_count = 0;
4626 m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
4627 for (i = 0; i < n; i++) {
4628 CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
4629 }
4630
4631 CHK(assign_msg_indices(m, ctx->status));
4632 CHK(check_oneofs(m, ctx->status));
4633 assign_msg_wellknowntype(m);
4634 upb_inttable_compact2(&m->itof, ctx->alloc);
4635
4636 /* This message is built. Now build nested messages and enums. */
4637
4638 enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
4639 for (i = 0; i < n; i++) {
4640 CHK(create_enumdef(ctx, m->full_name, enums[i]));
4641 }
4642
4643 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
4644 for (i = 0; i < n; i++) {
4645 CHK(create_msgdef(ctx, m->full_name, msgs[i]));
4646 }
4647
4648 return true;
4649 }
4650
4651 typedef struct {
4652 int msg_count;
4653 int enum_count;
4654 int ext_count;
4655 } decl_counts;
4656
count_types_in_msg(const google_protobuf_DescriptorProto * msg_proto,decl_counts * counts)4657 static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
4658 decl_counts *counts) {
4659 const google_protobuf_DescriptorProto *const *msgs;
4660 size_t i, n;
4661
4662 counts->msg_count++;
4663
4664 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
4665 for (i = 0; i < n; i++) {
4666 count_types_in_msg(msgs[i], counts);
4667 }
4668
4669 google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
4670 counts->enum_count += n;
4671
4672 google_protobuf_DescriptorProto_extension(msg_proto, &n);
4673 counts->ext_count += n;
4674 }
4675
count_types_in_file(const google_protobuf_FileDescriptorProto * file_proto,decl_counts * counts)4676 static void count_types_in_file(
4677 const google_protobuf_FileDescriptorProto *file_proto,
4678 decl_counts *counts) {
4679 const google_protobuf_DescriptorProto *const *msgs;
4680 size_t i, n;
4681
4682 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
4683 for (i = 0; i < n; i++) {
4684 count_types_in_msg(msgs[i], counts);
4685 }
4686
4687 google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
4688 counts->enum_count += n;
4689
4690 google_protobuf_FileDescriptorProto_extension(file_proto, &n);
4691 counts->ext_count += n;
4692 }
4693
resolve_fielddef(const symtab_addctx * ctx,const char * prefix,upb_fielddef * f)4694 static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix,
4695 upb_fielddef *f) {
4696 upb_strview name;
4697 const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
4698
4699 if (f->is_extension_) {
4700 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
4701 upb_status_seterrf(ctx->status,
4702 "extension for field '%s' had no extendee",
4703 f->full_name);
4704 return false;
4705 }
4706
4707 name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
4708 f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
4709 CHK(f->msgdef);
4710 }
4711
4712 if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
4713 !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
4714 upb_status_seterrf(ctx->status, "field '%s' is missing type name",
4715 f->full_name);
4716 return false;
4717 }
4718
4719 name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
4720
4721 if (upb_fielddef_issubmsg(f)) {
4722 f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
4723 CHK(f->sub.msgdef);
4724 } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
4725 f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
4726 CHK(f->sub.enumdef);
4727 }
4728
4729 /* Have to delay resolving of the default value until now because of the enum
4730 * case, since enum defaults are specified with a label. */
4731 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
4732 upb_strview defaultval =
4733 google_protobuf_FieldDescriptorProto_default_value(field_proto);
4734
4735 if (f->file->syntax == UPB_SYNTAX_PROTO3) {
4736 upb_status_seterrf(ctx->status,
4737 "proto3 fields cannot have explicit defaults (%s)",
4738 f->full_name);
4739 return false;
4740 }
4741
4742 if (upb_fielddef_issubmsg(f)) {
4743 upb_status_seterrf(ctx->status,
4744 "message fields cannot have explicit defaults (%s)",
4745 f->full_name);
4746 return false;
4747 }
4748
4749 if (!parse_default(ctx, defaultval.data, defaultval.size, f)) {
4750 upb_status_seterrf(ctx->status,
4751 "couldn't parse default '" UPB_STRVIEW_FORMAT
4752 "' for field (%s)",
4753 UPB_STRVIEW_ARGS(defaultval), f->full_name);
4754 return false;
4755 }
4756 } else {
4757 set_default_default(ctx, f);
4758 }
4759
4760 return true;
4761 }
4762
build_filedef(symtab_addctx * ctx,upb_filedef * file,const google_protobuf_FileDescriptorProto * file_proto)4763 static bool build_filedef(
4764 symtab_addctx *ctx, upb_filedef *file,
4765 const google_protobuf_FileDescriptorProto *file_proto) {
4766 upb_alloc *alloc = ctx->alloc;
4767 const google_protobuf_FileOptions *file_options_proto;
4768 const google_protobuf_DescriptorProto *const *msgs;
4769 const google_protobuf_EnumDescriptorProto *const *enums;
4770 const google_protobuf_FieldDescriptorProto *const *exts;
4771 const upb_strview* strs;
4772 size_t i, n;
4773 decl_counts counts = {0};
4774
4775 count_types_in_file(file_proto, &counts);
4776
4777 file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count);
4778 file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count);
4779 file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count);
4780
4781 CHK_OOM(counts.msg_count == 0 || file->msgs);
4782 CHK_OOM(counts.enum_count == 0 || file->enums);
4783 CHK_OOM(counts.ext_count == 0 || file->exts);
4784
4785 /* We increment these as defs are added. */
4786 file->msg_count = 0;
4787 file->enum_count = 0;
4788 file->ext_count = 0;
4789
4790 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
4791 upb_status_seterrmsg(ctx->status, "File has no name");
4792 return false;
4793 }
4794
4795 file->name =
4796 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
4797 file->phpprefix = NULL;
4798 file->phpnamespace = NULL;
4799
4800 if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
4801 upb_strview package =
4802 google_protobuf_FileDescriptorProto_package(file_proto);
4803 CHK(upb_isident(package, true, ctx->status));
4804 file->package = strviewdup(ctx, package);
4805 } else {
4806 file->package = NULL;
4807 }
4808
4809 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
4810 upb_strview syntax =
4811 google_protobuf_FileDescriptorProto_syntax(file_proto);
4812
4813 if (streql_view(syntax, "proto2")) {
4814 file->syntax = UPB_SYNTAX_PROTO2;
4815 } else if (streql_view(syntax, "proto3")) {
4816 file->syntax = UPB_SYNTAX_PROTO3;
4817 } else {
4818 upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
4819 UPB_STRVIEW_ARGS(syntax));
4820 return false;
4821 }
4822 } else {
4823 file->syntax = UPB_SYNTAX_PROTO2;
4824 }
4825
4826 /* Read options. */
4827 file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
4828 if (file_options_proto) {
4829 if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
4830 file->phpprefix = strviewdup(
4831 ctx,
4832 google_protobuf_FileOptions_php_class_prefix(file_options_proto));
4833 }
4834 if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
4835 file->phpnamespace = strviewdup(
4836 ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
4837 }
4838 }
4839
4840 /* Verify dependencies. */
4841 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
4842 file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ;
4843 CHK_OOM(n == 0 || file->deps);
4844
4845 for (i = 0; i < n; i++) {
4846 upb_strview dep_name = strs[i];
4847 upb_value v;
4848 if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
4849 dep_name.size, &v)) {
4850 upb_status_seterrf(ctx->status,
4851 "Depends on file '" UPB_STRVIEW_FORMAT
4852 "', but it has not been loaded",
4853 UPB_STRVIEW_ARGS(dep_name));
4854 return false;
4855 }
4856 file->deps[i] = upb_value_getconstptr(v);
4857 }
4858
4859 /* Create messages. */
4860 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
4861 for (i = 0; i < n; i++) {
4862 CHK(create_msgdef(ctx, file->package, msgs[i]));
4863 }
4864
4865 /* Create enums. */
4866 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
4867 for (i = 0; i < n; i++) {
4868 CHK(create_enumdef(ctx, file->package, enums[i]));
4869 }
4870
4871 /* Create extensions. */
4872 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
4873 file->exts = upb_malloc(alloc, sizeof(*file->exts) * n);
4874 CHK_OOM(n == 0 || file->exts);
4875 for (i = 0; i < n; i++) {
4876 CHK(create_fielddef(ctx, file->package, NULL, exts[i]));
4877 }
4878
4879 /* Now that all names are in the table, build layouts and resolve refs. */
4880 for (i = 0; i < file->ext_count; i++) {
4881 CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]));
4882 }
4883
4884 for (i = 0; i < file->msg_count; i++) {
4885 const upb_msgdef *m = &file->msgs[i];
4886 int j;
4887 for (j = 0; j < m->field_count; j++) {
4888 CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]));
4889 }
4890 }
4891
4892 if (!ctx->layouts) {
4893 for (i = 0; i < file->msg_count; i++) {
4894 const upb_msgdef *m = &file->msgs[i];
4895 make_layout(ctx->symtab, m);
4896 }
4897 }
4898
4899 return true;
4900 }
4901
upb_symtab_addtotabs(upb_symtab * s,symtab_addctx * ctx,upb_status * status)4902 static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx,
4903 upb_status *status) {
4904 const upb_filedef *file = ctx->file;
4905 upb_alloc *alloc = upb_arena_alloc(s->arena);
4906 upb_strtable_iter iter;
4907
4908 CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name),
4909 upb_value_constptr(file), alloc));
4910
4911 upb_strtable_begin(&iter, ctx->addtab);
4912 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4913 upb_strview key = upb_strtable_iter_key(&iter);
4914 upb_value value = upb_strtable_iter_value(&iter);
4915 CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc));
4916 }
4917
4918 return true;
4919 }
4920
4921 /* upb_filedef ****************************************************************/
4922
upb_filedef_name(const upb_filedef * f)4923 const char *upb_filedef_name(const upb_filedef *f) {
4924 return f->name;
4925 }
4926
upb_filedef_package(const upb_filedef * f)4927 const char *upb_filedef_package(const upb_filedef *f) {
4928 return f->package;
4929 }
4930
upb_filedef_phpprefix(const upb_filedef * f)4931 const char *upb_filedef_phpprefix(const upb_filedef *f) {
4932 return f->phpprefix;
4933 }
4934
upb_filedef_phpnamespace(const upb_filedef * f)4935 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
4936 return f->phpnamespace;
4937 }
4938
upb_filedef_syntax(const upb_filedef * f)4939 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
4940 return f->syntax;
4941 }
4942
upb_filedef_msgcount(const upb_filedef * f)4943 int upb_filedef_msgcount(const upb_filedef *f) {
4944 return f->msg_count;
4945 }
4946
upb_filedef_depcount(const upb_filedef * f)4947 int upb_filedef_depcount(const upb_filedef *f) {
4948 return f->dep_count;
4949 }
4950
upb_filedef_enumcount(const upb_filedef * f)4951 int upb_filedef_enumcount(const upb_filedef *f) {
4952 return f->enum_count;
4953 }
4954
upb_filedef_dep(const upb_filedef * f,int i)4955 const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
4956 return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
4957 }
4958
upb_filedef_msg(const upb_filedef * f,int i)4959 const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
4960 return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
4961 }
4962
upb_filedef_enum(const upb_filedef * f,int i)4963 const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
4964 return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
4965 }
4966
upb_symtab_free(upb_symtab * s)4967 void upb_symtab_free(upb_symtab *s) {
4968 upb_arena_free(s->arena);
4969 upb_gfree(s);
4970 }
4971
upb_symtab_new(void)4972 upb_symtab *upb_symtab_new(void) {
4973 upb_symtab *s = upb_gmalloc(sizeof(*s));
4974 upb_alloc *alloc;
4975
4976 if (!s) {
4977 return NULL;
4978 }
4979
4980 s->arena = upb_arena_new();
4981 alloc = upb_arena_alloc(s->arena);
4982
4983 if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
4984 !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
4985 upb_arena_free(s->arena);
4986 upb_gfree(s);
4987 s = NULL;
4988 }
4989 return s;
4990 }
4991
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)4992 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
4993 upb_value v;
4994 return upb_strtable_lookup(&s->syms, sym, &v) ?
4995 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
4996 }
4997
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)4998 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
4999 size_t len) {
5000 upb_value v;
5001 return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
5002 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
5003 }
5004
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)5005 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
5006 upb_value v;
5007 return upb_strtable_lookup(&s->syms, sym, &v) ?
5008 unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
5009 }
5010
upb_symtab_lookupfile(const upb_symtab * s,const char * name)5011 const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
5012 upb_value v;
5013 return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
5014 : NULL;
5015 }
5016
upb_symtab_filecount(const upb_symtab * s)5017 int upb_symtab_filecount(const upb_symtab *s) {
5018 return (int)upb_strtable_count(&s->files);
5019 }
5020
_upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_msglayout ** layouts,upb_status * status)5021 static const upb_filedef *_upb_symtab_addfile(
5022 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
5023 const upb_msglayout **layouts, upb_status *status) {
5024 upb_arena *tmparena = upb_arena_new();
5025 upb_strtable addtab;
5026 upb_alloc *alloc = upb_arena_alloc(s->arena);
5027 upb_filedef *file = upb_malloc(alloc, sizeof(*file));
5028 bool ok;
5029 symtab_addctx ctx;
5030
5031 ctx.file = file;
5032 ctx.symtab = s;
5033 ctx.alloc = alloc;
5034 ctx.tmp = upb_arena_alloc(tmparena);
5035 ctx.addtab = &addtab;
5036 ctx.layouts = layouts;
5037 ctx.status = status;
5038
5039 ok = file &&
5040 upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
5041 build_filedef(&ctx, file, file_proto) &&
5042 upb_symtab_addtotabs(s, &ctx, status);
5043
5044 upb_arena_free(tmparena);
5045 return ok ? file : NULL;
5046 }
5047
upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,upb_status * status)5048 const upb_filedef *upb_symtab_addfile(
5049 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
5050 upb_status *status) {
5051 return _upb_symtab_addfile(s, file_proto, NULL, status);
5052 }
5053
5054 /* Include here since we want most of this file to be stdio-free. */
5055 #include <stdio.h>
5056
_upb_symtab_loaddefinit(upb_symtab * s,const upb_def_init * init)5057 bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
5058 /* Since this function should never fail (it would indicate a bug in upb) we
5059 * print errors to stderr instead of returning error status to the user. */
5060 upb_def_init **deps = init->deps;
5061 google_protobuf_FileDescriptorProto *file;
5062 upb_arena *arena;
5063 upb_status status;
5064
5065 upb_status_clear(&status);
5066
5067 if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
5068 return true;
5069 }
5070
5071 arena = upb_arena_new();
5072
5073 for (; *deps; deps++) {
5074 if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
5075 }
5076
5077 file = google_protobuf_FileDescriptorProto_parse(
5078 init->descriptor.data, init->descriptor.size, arena);
5079
5080 if (!file) {
5081 upb_status_seterrf(
5082 &status,
5083 "Failed to parse compiled-in descriptor for file '%s'. This should "
5084 "never happen.",
5085 init->filename);
5086 goto err;
5087 }
5088
5089 if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
5090
5091 upb_arena_free(arena);
5092 return true;
5093
5094 err:
5095 fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
5096 upb_status_errmsg(&status));
5097 upb_arena_free(arena);
5098 return false;
5099 }
5100
5101 #undef CHK
5102 #undef CHK_OOM
5103
5104
5105 #include <string.h>
5106
5107
5108 static char field_size[] = {
5109 0,/* 0 */
5110 8, /* UPB_DESCRIPTOR_TYPE_DOUBLE */
5111 4, /* UPB_DESCRIPTOR_TYPE_FLOAT */
5112 8, /* UPB_DESCRIPTOR_TYPE_INT64 */
5113 8, /* UPB_DESCRIPTOR_TYPE_UINT64 */
5114 4, /* UPB_DESCRIPTOR_TYPE_INT32 */
5115 8, /* UPB_DESCRIPTOR_TYPE_FIXED64 */
5116 4, /* UPB_DESCRIPTOR_TYPE_FIXED32 */
5117 1, /* UPB_DESCRIPTOR_TYPE_BOOL */
5118 sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_STRING */
5119 sizeof(void*), /* UPB_DESCRIPTOR_TYPE_GROUP */
5120 sizeof(void*), /* UPB_DESCRIPTOR_TYPE_MESSAGE */
5121 sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_BYTES */
5122 4, /* UPB_DESCRIPTOR_TYPE_UINT32 */
5123 4, /* UPB_DESCRIPTOR_TYPE_ENUM */
5124 4, /* UPB_DESCRIPTOR_TYPE_SFIXED32 */
5125 8, /* UPB_DESCRIPTOR_TYPE_SFIXED64 */
5126 4, /* UPB_DESCRIPTOR_TYPE_SINT32 */
5127 8, /* UPB_DESCRIPTOR_TYPE_SINT64 */
5128 };
5129
5130 /* Strings/bytes are special-cased in maps. */
5131 static char _upb_fieldtype_to_mapsize[12] = {
5132 0,
5133 1, /* UPB_TYPE_BOOL */
5134 4, /* UPB_TYPE_FLOAT */
5135 4, /* UPB_TYPE_INT32 */
5136 4, /* UPB_TYPE_UINT32 */
5137 4, /* UPB_TYPE_ENUM */
5138 sizeof(void*), /* UPB_TYPE_MESSAGE */
5139 8, /* UPB_TYPE_DOUBLE */
5140 8, /* UPB_TYPE_INT64 */
5141 8, /* UPB_TYPE_UINT64 */
5142 0, /* UPB_TYPE_STRING */
5143 0, /* UPB_TYPE_BYTES */
5144 };
5145
5146 /** upb_msg *******************************************************************/
5147
upb_msg_new(const upb_msgdef * m,upb_arena * a)5148 upb_msg *upb_msg_new(const upb_msgdef *m, upb_arena *a) {
5149 return _upb_msg_new(upb_msgdef_layout(m), a);
5150 }
5151
in_oneof(const upb_msglayout_field * field)5152 static bool in_oneof(const upb_msglayout_field *field) {
5153 return field->presence < 0;
5154 }
5155
oneofcase(const upb_msg * msg,const upb_msglayout_field * field)5156 static uint32_t *oneofcase(const upb_msg *msg,
5157 const upb_msglayout_field *field) {
5158 UPB_ASSERT(in_oneof(field));
5159 return UPB_PTR_AT(msg, -field->presence, uint32_t);
5160 }
5161
_upb_msg_getraw(const upb_msg * msg,const upb_fielddef * f)5162 static upb_msgval _upb_msg_getraw(const upb_msg *msg, const upb_fielddef *f) {
5163 const upb_msglayout_field *field = upb_fielddef_layout(f);
5164 const char *mem = UPB_PTR_AT(msg, field->offset, char);
5165 upb_msgval val = {0};
5166 int size = upb_fielddef_isseq(f) ? sizeof(void *)
5167 : field_size[field->descriptortype];
5168 memcpy(&val, mem, size);
5169 return val;
5170 }
5171
upb_msg_has(const upb_msg * msg,const upb_fielddef * f)5172 bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) {
5173 const upb_msglayout_field *field = upb_fielddef_layout(f);
5174 if (in_oneof(field)) {
5175 return *oneofcase(msg, field) == field->number;
5176 } else if (field->presence > 0) {
5177 uint32_t hasbit = field->presence;
5178 return *UPB_PTR_AT(msg, hasbit / 8, uint8_t) & (1 << (hasbit % 8));
5179 } else {
5180 UPB_ASSERT(field->descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
5181 field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP);
5182 return _upb_msg_getraw(msg, f).msg_val != NULL;
5183 }
5184 }
5185
upb_msg_hasoneof(const upb_msg * msg,const upb_oneofdef * o)5186 bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o) {
5187 upb_oneof_iter i;
5188 const upb_fielddef *f;
5189 const upb_msglayout_field *field;
5190
5191 upb_oneof_begin(&i, o);
5192 if (upb_oneof_done(&i)) return false;
5193 f = upb_oneof_iter_field(&i);
5194 field = upb_fielddef_layout(f);
5195 return *oneofcase(msg, field) != 0;
5196 }
5197
upb_msg_get(const upb_msg * msg,const upb_fielddef * f)5198 upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) {
5199 if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) {
5200 return _upb_msg_getraw(msg, f);
5201 } else {
5202 /* TODO(haberman): change upb_fielddef to not require this switch(). */
5203 upb_msgval val = {0};
5204 switch (upb_fielddef_type(f)) {
5205 case UPB_TYPE_INT32:
5206 case UPB_TYPE_ENUM:
5207 val.int32_val = upb_fielddef_defaultint32(f);
5208 break;
5209 case UPB_TYPE_INT64:
5210 val.int64_val = upb_fielddef_defaultint64(f);
5211 break;
5212 case UPB_TYPE_UINT32:
5213 val.uint32_val = upb_fielddef_defaultuint32(f);
5214 break;
5215 case UPB_TYPE_UINT64:
5216 val.uint64_val = upb_fielddef_defaultuint64(f);
5217 break;
5218 case UPB_TYPE_FLOAT:
5219 val.float_val = upb_fielddef_defaultfloat(f);
5220 break;
5221 case UPB_TYPE_DOUBLE:
5222 val.double_val = upb_fielddef_defaultdouble(f);
5223 break;
5224 case UPB_TYPE_BOOL:
5225 val.double_val = upb_fielddef_defaultbool(f);
5226 break;
5227 case UPB_TYPE_STRING:
5228 case UPB_TYPE_BYTES:
5229 val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size);
5230 break;
5231 case UPB_TYPE_MESSAGE:
5232 val.msg_val = NULL;
5233 break;
5234 }
5235 return val;
5236 }
5237 }
5238
upb_msg_mutable(upb_msg * msg,const upb_fielddef * f,upb_arena * a)5239 upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f,
5240 upb_arena *a) {
5241 const upb_msglayout_field *field = upb_fielddef_layout(f);
5242 upb_mutmsgval ret;
5243 char *mem = UPB_PTR_AT(msg, field->offset, char);
5244 bool wrong_oneof = in_oneof(field) && *oneofcase(msg, field) != field->number;
5245
5246 memcpy(&ret, mem, sizeof(void*));
5247
5248 if (a && (!ret.msg || wrong_oneof)) {
5249 if (upb_fielddef_ismap(f)) {
5250 const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
5251 const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY);
5252 const upb_fielddef *value = upb_msgdef_itof(entry, UPB_MAPENTRY_VALUE);
5253 ret.map = upb_map_new(a, upb_fielddef_type(key), upb_fielddef_type(value));
5254 } else if (upb_fielddef_isseq(f)) {
5255 ret.array = upb_array_new(a, upb_fielddef_type(f));
5256 } else {
5257 UPB_ASSERT(upb_fielddef_issubmsg(f));
5258 ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a);
5259 }
5260
5261 memcpy(mem, &ret, sizeof(void*));
5262
5263 if (wrong_oneof) {
5264 *oneofcase(msg, field) = field->number;
5265 }
5266 }
5267 return ret;
5268 }
5269
upb_msg_set(upb_msg * msg,const upb_fielddef * f,upb_msgval val,upb_arena * a)5270 void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val,
5271 upb_arena *a) {
5272 const upb_msglayout_field *field = upb_fielddef_layout(f);
5273 char *mem = UPB_PTR_AT(msg, field->offset, char);
5274 int size = upb_fielddef_isseq(f) ? sizeof(void *)
5275 : field_size[field->descriptortype];
5276 memcpy(mem, &val, size);
5277 if (in_oneof(field)) {
5278 *oneofcase(msg, field) = field->number;
5279 }
5280 }
5281
upb_msg_next(const upb_msg * msg,const upb_msgdef * m,const upb_symtab * ext_pool,const upb_fielddef ** out_f,upb_msgval * out_val,size_t * iter)5282 bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m,
5283 const upb_symtab *ext_pool, const upb_fielddef **out_f,
5284 upb_msgval *out_val, size_t *iter) {
5285 size_t i = *iter;
5286 const upb_msgval zero = {0};
5287 const upb_fielddef *f;
5288 while ((f = _upb_msgdef_field(m, (int)++i)) != NULL) {
5289 upb_msgval val = _upb_msg_getraw(msg, f);
5290
5291 /* Skip field if unset or empty. */
5292 if (upb_fielddef_haspresence(f)) {
5293 if (!upb_msg_has(msg, f)) continue;
5294 } else {
5295 upb_msgval test = val;
5296 if (upb_fielddef_isstring(f) && !upb_fielddef_isseq(f)) {
5297 /* Clear string pointer, only size matters (ptr could be non-NULL). */
5298 test.str_val.data = NULL;
5299 }
5300 /* Continue if NULL or 0. */
5301 if (memcmp(&test, &zero, sizeof(test)) == 0) continue;
5302
5303 /* Continue on empty array or map. */
5304 if (upb_fielddef_ismap(f)) {
5305 if (upb_map_size(test.map_val) == 0) continue;
5306 } else if (upb_fielddef_isseq(f)) {
5307 if (upb_array_size(test.array_val) == 0) continue;
5308 }
5309 }
5310
5311 *out_val = val;
5312 *out_f = f;
5313 *iter = i;
5314 return true;
5315 }
5316 *iter = i;
5317 return false;
5318 }
5319
5320 /** upb_array *****************************************************************/
5321
upb_array_new(upb_arena * a,upb_fieldtype_t type)5322 upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type) {
5323 return _upb_array_new(a, type);
5324 }
5325
upb_array_size(const upb_array * arr)5326 size_t upb_array_size(const upb_array *arr) {
5327 return arr->len;
5328 }
5329
upb_array_get(const upb_array * arr,size_t i)5330 upb_msgval upb_array_get(const upb_array *arr, size_t i) {
5331 upb_msgval ret;
5332 const char* data = _upb_array_constptr(arr);
5333 int lg2 = arr->data & 7;
5334 UPB_ASSERT(i < arr->len);
5335 memcpy(&ret, data + (i << lg2), 1 << lg2);
5336 return ret;
5337 }
5338
upb_array_set(upb_array * arr,size_t i,upb_msgval val)5339 void upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
5340 char* data = _upb_array_ptr(arr);
5341 int lg2 = arr->data & 7;
5342 UPB_ASSERT(i < arr->len);
5343 memcpy(data + (i << lg2), &val, 1 << lg2);
5344 }
5345
upb_array_append(upb_array * arr,upb_msgval val,upb_arena * arena)5346 bool upb_array_append(upb_array *arr, upb_msgval val, upb_arena *arena) {
5347 if (!_upb_array_realloc(arr, arr->len + 1, arena)) {
5348 return false;
5349 }
5350 arr->len++;
5351 upb_array_set(arr, arr->len - 1, val);
5352 return true;
5353 }
5354
5355 /* Resizes the array to the given size, reallocating if necessary, and returns a
5356 * pointer to the new array elements. */
upb_array_resize(upb_array * arr,size_t size,upb_arena * arena)5357 bool upb_array_resize(upb_array *arr, size_t size, upb_arena *arena) {
5358 return _upb_array_realloc(arr, size, arena);
5359 }
5360
5361 /** upb_map *******************************************************************/
5362
upb_map_new(upb_arena * a,upb_fieldtype_t key_type,upb_fieldtype_t value_type)5363 upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type,
5364 upb_fieldtype_t value_type) {
5365 return _upb_map_new(a, _upb_fieldtype_to_mapsize[key_type],
5366 _upb_fieldtype_to_mapsize[value_type]);
5367 }
5368
upb_map_size(const upb_map * map)5369 size_t upb_map_size(const upb_map *map) {
5370 return _upb_map_size(map);
5371 }
5372
upb_map_get(const upb_map * map,upb_msgval key,upb_msgval * val)5373 bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
5374 return _upb_map_get(map, &key, map->key_size, val, map->val_size);
5375 }
5376
upb_map_set(upb_map * map,upb_msgval key,upb_msgval val,upb_arena * arena)5377 bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
5378 upb_arena *arena) {
5379 return _upb_map_set(map, &key, map->key_size, &val, map->val_size, arena);
5380 }
5381
upb_map_delete(upb_map * map,upb_msgval key)5382 bool upb_map_delete(upb_map *map, upb_msgval key) {
5383 return _upb_map_delete(map, &key, map->key_size);
5384 }
5385
upb_mapiter_next(const upb_map * map,size_t * iter)5386 bool upb_mapiter_next(const upb_map *map, size_t *iter) {
5387 return _upb_map_next(map, iter);
5388 }
5389
5390 /* Returns the key and value for this entry of the map. */
upb_mapiter_key(const upb_map * map,size_t iter)5391 upb_msgval upb_mapiter_key(const upb_map *map, size_t iter) {
5392 upb_strtable_iter i;
5393 upb_msgval ret;
5394 i.t = &map->table;
5395 i.index = iter;
5396 _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
5397 return ret;
5398 }
5399
upb_mapiter_value(const upb_map * map,size_t iter)5400 upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) {
5401 upb_strtable_iter i;
5402 upb_msgval ret;
5403 i.t = &map->table;
5404 i.index = iter;
5405 _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
5406 return ret;
5407 }
5408
5409 /* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */
5410 /*
5411 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
5412 ** UPB_ASSERT() or return false.
5413 */
5414
5415
5416 #include <string.h>
5417
5418
5419
5420 struct upb_handlers {
5421 upb_handlercache *cache;
5422 const upb_msgdef *msg;
5423 const upb_handlers **sub;
5424 const void *top_closure_type;
5425 upb_handlers_tabent table[1]; /* Dynamically-sized field handler array. */
5426 };
5427
upb_calloc(upb_arena * arena,size_t size)5428 static void *upb_calloc(upb_arena *arena, size_t size) {
5429 void *mem = upb_malloc(upb_arena_alloc(arena), size);
5430 if (mem) {
5431 memset(mem, 0, size);
5432 }
5433 return mem;
5434 }
5435
5436 /* Defined for the sole purpose of having a unique pointer value for
5437 * UPB_NO_CLOSURE. */
5438 char _upb_noclosure;
5439
5440 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
5441 * subhandlers for this submessage field. */
5442 #define SUBH(h, selector) (h->sub[selector])
5443
5444 /* The selector for a submessage field is the field index. */
5445 #define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f))
5446
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5447 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
5448 upb_handlertype_t type) {
5449 upb_selector_t sel;
5450 bool ok;
5451
5452 ok = upb_handlers_getselector(f, type, &sel);
5453
5454 UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f));
5455 UPB_ASSERT(ok);
5456
5457 return sel;
5458 }
5459
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5460 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
5461 upb_handlertype_t type) {
5462 int32_t sel = trygetsel(h, f, type);
5463 UPB_ASSERT(sel >= 0);
5464 return sel;
5465 }
5466
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5467 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
5468 upb_handlertype_t type) {
5469 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type;
5470 }
5471
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,const upb_handlerattr * attr)5472 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
5473 upb_handlertype_t type, upb_func *func,
5474 const upb_handlerattr *attr) {
5475 upb_handlerattr set_attr = UPB_HANDLERATTR_INIT;
5476 const void *closure_type;
5477 const void **context_closure_type;
5478
5479 UPB_ASSERT(!h->table[sel].func);
5480
5481 if (attr) {
5482 set_attr = *attr;
5483 }
5484
5485 /* Check that the given closure type matches the closure type that has been
5486 * established for this context (if any). */
5487 closure_type = set_attr.closure_type;
5488
5489 if (type == UPB_HANDLER_STRING) {
5490 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
5491 } else if (f && upb_fielddef_isseq(f) &&
5492 type != UPB_HANDLER_STARTSEQ &&
5493 type != UPB_HANDLER_ENDSEQ) {
5494 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
5495 } else {
5496 context_closure_type = &h->top_closure_type;
5497 }
5498
5499 if (closure_type && *context_closure_type &&
5500 closure_type != *context_closure_type) {
5501 return false;
5502 }
5503
5504 if (closure_type)
5505 *context_closure_type = closure_type;
5506
5507 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
5508 * matches any pre-existing expectations about what type is expected. */
5509 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
5510 const void *return_type = set_attr.return_closure_type;
5511 const void *table_return_type = h->table[sel].attr.return_closure_type;
5512 if (return_type && table_return_type && return_type != table_return_type) {
5513 return false;
5514 }
5515
5516 if (table_return_type && !return_type) {
5517 set_attr.return_closure_type = table_return_type;
5518 }
5519 }
5520
5521 h->table[sel].func = (upb_func*)func;
5522 h->table[sel].attr = set_attr;
5523 return true;
5524 }
5525
5526 /* Returns the effective closure type for this handler (which will propagate
5527 * from outer frames if this frame has no START* handler). Not implemented for
5528 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
5529 * the effective closure type is unspecified (either no handler was registered
5530 * to specify it or the handler that was registered did not specify the closure
5531 * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5532 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
5533 upb_handlertype_t type) {
5534 const void *ret;
5535 upb_selector_t sel;
5536
5537 UPB_ASSERT(type != UPB_HANDLER_STRING);
5538 ret = h->top_closure_type;
5539
5540 if (upb_fielddef_isseq(f) &&
5541 type != UPB_HANDLER_STARTSEQ &&
5542 type != UPB_HANDLER_ENDSEQ &&
5543 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
5544 ret = h->table[sel].attr.return_closure_type;
5545 }
5546
5547 if (type == UPB_HANDLER_STRING &&
5548 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
5549 ret = h->table[sel].attr.return_closure_type;
5550 }
5551
5552 /* The effective type of the submessage; not used yet.
5553 * if (type == SUBMESSAGE &&
5554 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
5555 * ret = h->table[sel].attr.return_closure_type;
5556 * } */
5557
5558 return ret;
5559 }
5560
5561 /* Checks whether the START* handler specified by f & type is missing even
5562 * though it is required to convert the established type of an outer frame
5563 * ("closure_type") into the established type of an inner frame (represented in
5564 * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)5565 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
5566 upb_status *status) {
5567 const void *closure_type;
5568 const upb_handlerattr *attr;
5569 const void *return_closure_type;
5570
5571 upb_selector_t sel = handlers_getsel(h, f, type);
5572 if (h->table[sel].func) return true;
5573 closure_type = effective_closure_type(h, f, type);
5574 attr = &h->table[sel].attr;
5575 return_closure_type = attr->return_closure_type;
5576 if (closure_type && return_closure_type &&
5577 closure_type != return_closure_type) {
5578 return false;
5579 }
5580 return true;
5581 }
5582
upb_handlers_new(const upb_msgdef * md,upb_handlercache * cache,upb_arena * arena)5583 static upb_handlers *upb_handlers_new(const upb_msgdef *md,
5584 upb_handlercache *cache,
5585 upb_arena *arena) {
5586 int extra;
5587 upb_handlers *h;
5588
5589 extra =
5590 (int)(sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1));
5591 h = upb_calloc(arena, sizeof(*h) + extra);
5592 if (!h) return NULL;
5593
5594 h->cache = cache;
5595 h->msg = md;
5596
5597 if (upb_msgdef_submsgfieldcount(md) > 0) {
5598 size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub);
5599 h->sub = upb_calloc(arena, bytes);
5600 if (!h->sub) return NULL;
5601 } else {
5602 h->sub = 0;
5603 }
5604
5605 /* calloc() above initialized all handlers to NULL. */
5606 return h;
5607 }
5608
5609 /* Public interface ***********************************************************/
5610
5611 #define SETTER(name, handlerctype, handlertype) \
5612 bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \
5613 handlerctype func, \
5614 const upb_handlerattr *attr) { \
5615 int32_t sel = trygetsel(h, f, handlertype); \
5616 return doset(h, sel, f, handlertype, (upb_func *)func, attr); \
5617 }
5618
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)5619 SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
5620 SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
5621 SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
5622 SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
5623 SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
5624 SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
5625 SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
5626 SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
5627 SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
5628 SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
5629 SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
5630 SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
5631 SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
5632 SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
5633
5634 #undef SETTER
5635
5636 bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
5637 const upb_handlerattr *attr) {
5638 return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
5639 (upb_func *)func, attr);
5640 }
5641
upb_handlers_setstartmsg(upb_handlers * h,upb_startmsg_handlerfunc * func,const upb_handlerattr * attr)5642 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
5643 const upb_handlerattr *attr) {
5644 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
5645 (upb_func *)func, attr);
5646 }
5647
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,const upb_handlerattr * attr)5648 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
5649 const upb_handlerattr *attr) {
5650 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
5651 (upb_func *)func, attr);
5652 }
5653
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)5654 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
5655 const upb_handlers *sub) {
5656 UPB_ASSERT(sub);
5657 UPB_ASSERT(upb_fielddef_issubmsg(f));
5658 if (SUBH_F(h, f)) return false; /* Can't reset. */
5659 if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) {
5660 return false;
5661 }
5662 SUBH_F(h, f) = sub;
5663 return true;
5664 }
5665
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)5666 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
5667 const upb_fielddef *f) {
5668 UPB_ASSERT(upb_fielddef_issubmsg(f));
5669 return SUBH_F(h, f);
5670 }
5671
upb_handlers_gethandler(const upb_handlers * h,upb_selector_t s,const void ** handler_data)5672 upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s,
5673 const void **handler_data) {
5674 upb_func *ret = (upb_func *)h->table[s].func;
5675 if (ret && handler_data) {
5676 *handler_data = h->table[s].attr.handler_data;
5677 }
5678 return ret;
5679 }
5680
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)5681 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
5682 upb_handlerattr *attr) {
5683 if (!upb_handlers_gethandler(h, sel, NULL))
5684 return false;
5685 *attr = h->table[sel].attr;
5686 return true;
5687 }
5688
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)5689 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
5690 upb_selector_t sel) {
5691 /* STARTSUBMSG selector in sel is the field's selector base. */
5692 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
5693 }
5694
upb_handlers_msgdef(const upb_handlers * h)5695 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
5696
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)5697 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
5698 return upb_handlercache_addcleanup(h->cache, p, func);
5699 }
5700
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)5701 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
5702 switch (upb_fielddef_type(f)) {
5703 case UPB_TYPE_INT32:
5704 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
5705 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
5706 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
5707 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
5708 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
5709 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
5710 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
5711 default: UPB_ASSERT(false); return -1; /* Invalid input. */
5712 }
5713 }
5714
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)5715 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
5716 upb_selector_t *s) {
5717 uint32_t selector_base = upb_fielddef_selectorbase(f);
5718 switch (type) {
5719 case UPB_HANDLER_INT32:
5720 case UPB_HANDLER_INT64:
5721 case UPB_HANDLER_UINT32:
5722 case UPB_HANDLER_UINT64:
5723 case UPB_HANDLER_FLOAT:
5724 case UPB_HANDLER_DOUBLE:
5725 case UPB_HANDLER_BOOL:
5726 if (!upb_fielddef_isprimitive(f) ||
5727 upb_handlers_getprimitivehandlertype(f) != type)
5728 return false;
5729 *s = selector_base;
5730 break;
5731 case UPB_HANDLER_STRING:
5732 if (upb_fielddef_isstring(f)) {
5733 *s = selector_base;
5734 } else if (upb_fielddef_lazy(f)) {
5735 *s = selector_base + 3;
5736 } else {
5737 return false;
5738 }
5739 break;
5740 case UPB_HANDLER_STARTSTR:
5741 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
5742 *s = selector_base + 1;
5743 } else {
5744 return false;
5745 }
5746 break;
5747 case UPB_HANDLER_ENDSTR:
5748 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
5749 *s = selector_base + 2;
5750 } else {
5751 return false;
5752 }
5753 break;
5754 case UPB_HANDLER_STARTSEQ:
5755 if (!upb_fielddef_isseq(f)) return false;
5756 *s = selector_base - 2;
5757 break;
5758 case UPB_HANDLER_ENDSEQ:
5759 if (!upb_fielddef_isseq(f)) return false;
5760 *s = selector_base - 1;
5761 break;
5762 case UPB_HANDLER_STARTSUBMSG:
5763 if (!upb_fielddef_issubmsg(f)) return false;
5764 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
5765 * selector can also be used as an index into the "sub" array of
5766 * subhandlers. The indexes for the two into these two tables are the
5767 * same, except that in the handler table the static selectors come first. */
5768 *s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT;
5769 break;
5770 case UPB_HANDLER_ENDSUBMSG:
5771 if (!upb_fielddef_issubmsg(f)) return false;
5772 *s = selector_base;
5773 break;
5774 }
5775 UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f)));
5776 return true;
5777 }
5778
5779 /* upb_handlercache ***********************************************************/
5780
5781 struct upb_handlercache {
5782 upb_arena *arena;
5783 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
5784 upb_handlers_callback *callback;
5785 const void *closure;
5786 };
5787
upb_handlercache_get(upb_handlercache * c,const upb_msgdef * md)5788 const upb_handlers *upb_handlercache_get(upb_handlercache *c,
5789 const upb_msgdef *md) {
5790 upb_msg_field_iter i;
5791 upb_value v;
5792 upb_handlers *h;
5793
5794 if (upb_inttable_lookupptr(&c->tab, md, &v)) {
5795 return upb_value_getptr(v);
5796 }
5797
5798 h = upb_handlers_new(md, c, c->arena);
5799 v = upb_value_ptr(h);
5800
5801 if (!h) return NULL;
5802 if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL;
5803
5804 c->callback(c->closure, h);
5805
5806 /* For each submessage field, get or create a handlers object and set it as
5807 * the subhandlers. */
5808 for(upb_msg_field_begin(&i, md);
5809 !upb_msg_field_done(&i);
5810 upb_msg_field_next(&i)) {
5811 upb_fielddef *f = upb_msg_iter_field(&i);
5812
5813 if (upb_fielddef_issubmsg(f)) {
5814 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
5815 const upb_handlers *sub_mh = upb_handlercache_get(c, subdef);
5816
5817 if (!sub_mh) return NULL;
5818
5819 upb_handlers_setsubhandlers(h, f, sub_mh);
5820 }
5821 }
5822
5823 return h;
5824 }
5825
5826
upb_handlercache_new(upb_handlers_callback * callback,const void * closure)5827 upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback,
5828 const void *closure) {
5829 upb_handlercache *cache = upb_gmalloc(sizeof(*cache));
5830
5831 if (!cache) return NULL;
5832
5833 cache->arena = upb_arena_new();
5834
5835 cache->callback = callback;
5836 cache->closure = closure;
5837
5838 if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom;
5839
5840 return cache;
5841
5842 oom:
5843 upb_gfree(cache);
5844 return NULL;
5845 }
5846
upb_handlercache_free(upb_handlercache * cache)5847 void upb_handlercache_free(upb_handlercache *cache) {
5848 upb_inttable_uninit(&cache->tab);
5849 upb_arena_free(cache->arena);
5850 upb_gfree(cache);
5851 }
5852
upb_handlercache_addcleanup(upb_handlercache * c,void * p,upb_handlerfree * func)5853 bool upb_handlercache_addcleanup(upb_handlercache *c, void *p,
5854 upb_handlerfree *func) {
5855 return upb_arena_addcleanup(c->arena, p, func);
5856 }
5857
5858 /* upb_byteshandler ***********************************************************/
5859
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)5860 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
5861 upb_startstr_handlerfunc *func, void *d) {
5862 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
5863 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d;
5864 return true;
5865 }
5866
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)5867 bool upb_byteshandler_setstring(upb_byteshandler *h,
5868 upb_string_handlerfunc *func, void *d) {
5869 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
5870 h->table[UPB_STRING_SELECTOR].attr.handler_data = d;
5871 return true;
5872 }
5873
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)5874 bool upb_byteshandler_setendstr(upb_byteshandler *h,
5875 upb_endfield_handlerfunc *func, void *d) {
5876 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
5877 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d;
5878 return true;
5879 }
5880
5881 /** Handlers for upb_msg ******************************************************/
5882
5883 typedef struct {
5884 size_t offset;
5885 int32_t hasbit;
5886 } upb_msg_handlerdata;
5887
5888 /* Fallback implementation if the handler is not specialized by the producer. */
5889 #define MSG_WRITER(type, ctype) \
5890 bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \
5891 uint8_t *m = c; \
5892 const upb_msg_handlerdata *d = hd; \
5893 if (d->hasbit > 0) \
5894 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
5895 *(ctype*)&m[d->offset] = val; \
5896 return true; \
5897 } \
5898
MSG_WRITER(double,double)5899 MSG_WRITER(double, double)
5900 MSG_WRITER(float, float)
5901 MSG_WRITER(int32, int32_t)
5902 MSG_WRITER(int64, int64_t)
5903 MSG_WRITER(uint32, uint32_t)
5904 MSG_WRITER(uint64, uint64_t)
5905 MSG_WRITER(bool, bool)
5906
5907 bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
5908 size_t offset, int32_t hasbit) {
5909 upb_handlerattr attr = UPB_HANDLERATTR_INIT;
5910 bool ok;
5911
5912 upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
5913 if (!d) return false;
5914 d->offset = offset;
5915 d->hasbit = hasbit;
5916
5917 attr.handler_data = d;
5918 attr.alwaysok = true;
5919 upb_handlers_addcleanup(h, d, upb_gfree);
5920
5921 #define TYPE(u, l) \
5922 case UPB_TYPE_##u: \
5923 ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
5924
5925 ok = false;
5926
5927 switch (upb_fielddef_type(f)) {
5928 TYPE(INT64, int64);
5929 TYPE(INT32, int32);
5930 TYPE(ENUM, int32);
5931 TYPE(UINT64, uint64);
5932 TYPE(UINT32, uint32);
5933 TYPE(DOUBLE, double);
5934 TYPE(FLOAT, float);
5935 TYPE(BOOL, bool);
5936 default: UPB_ASSERT(false); break;
5937 }
5938 #undef TYPE
5939
5940 return ok;
5941 }
5942
upb_msg_getscalarhandlerdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type,size_t * offset,int32_t * hasbit)5943 bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
5944 upb_selector_t s,
5945 upb_fieldtype_t *type,
5946 size_t *offset,
5947 int32_t *hasbit) {
5948 const upb_msg_handlerdata *d;
5949 const void *p;
5950 upb_func *f = upb_handlers_gethandler(h, s, &p);
5951
5952 if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
5953 *type = UPB_TYPE_INT64;
5954 } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
5955 *type = UPB_TYPE_INT32;
5956 } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
5957 *type = UPB_TYPE_UINT64;
5958 } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
5959 *type = UPB_TYPE_UINT32;
5960 } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
5961 *type = UPB_TYPE_DOUBLE;
5962 } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
5963 *type = UPB_TYPE_FLOAT;
5964 } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
5965 *type = UPB_TYPE_BOOL;
5966 } else {
5967 return false;
5968 }
5969
5970 d = p;
5971 *offset = d->offset;
5972 *hasbit = d->hasbit;
5973 return true;
5974 }
5975
5976
upb_bufsrc_putbuf(const char * buf,size_t len,upb_bytessink sink)5977 bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink) {
5978 void *subc;
5979 bool ret;
5980 upb_bufhandle handle = UPB_BUFHANDLE_INIT;
5981 handle.buf = buf;
5982 ret = upb_bytessink_start(sink, len, &subc);
5983 if (ret && len != 0) {
5984 ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
5985 }
5986 if (ret) {
5987 ret = upb_bytessink_end(sink);
5988 }
5989 return ret;
5990 }
5991
5992
5993 #ifdef UPB_MSVC_VSNPRINTF
5994 /* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and
5995 * vsnprintf. To support them, missing functions are manually implemented
5996 * using the existing secure functions. */
msvc_vsnprintf(char * s,size_t n,const char * format,va_list arg)5997 int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) {
5998 if (!s) {
5999 return _vscprintf(format, arg);
6000 }
6001 int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg);
6002 if (ret < 0) {
6003 ret = _vscprintf(format, arg);
6004 }
6005 return ret;
6006 }
6007
msvc_snprintf(char * s,size_t n,const char * format,...)6008 int msvc_snprintf(char* s, size_t n, const char* format, ...) {
6009 va_list arg;
6010 va_start(arg, format);
6011 int ret = msvc_vsnprintf(s, n, format, arg);
6012 va_end(arg);
6013 return ret;
6014 }
6015 #endif
6016 /*
6017 ** protobuf decoder bytecode compiler
6018 **
6019 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6020 ** according to that specific schema and destination handlers.
6021 **
6022 ** Bytecode definition is in decoder.int.h.
6023 */
6024
6025 #include <stdarg.h>
6026
6027 #ifdef UPB_DUMP_BYTECODE
6028 #include <stdio.h>
6029 #endif
6030
6031
6032 #define MAXLABEL 5
6033 #define EMPTYLABEL -1
6034
6035 /* upb_pbdecodermethod ********************************************************/
6036
freemethod(upb_pbdecodermethod * method)6037 static void freemethod(upb_pbdecodermethod *method) {
6038 upb_inttable_uninit(&method->dispatch);
6039 upb_gfree(method);
6040 }
6041
newmethod(const upb_handlers * dest_handlers,mgroup * group)6042 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6043 mgroup *group) {
6044 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
6045 upb_byteshandler_init(&ret->input_handler_);
6046
6047 ret->group = group;
6048 ret->dest_handlers_ = dest_handlers;
6049 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6050
6051 return ret;
6052 }
6053
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)6054 const upb_handlers *upb_pbdecodermethod_desthandlers(
6055 const upb_pbdecodermethod *m) {
6056 return m->dest_handlers_;
6057 }
6058
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)6059 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6060 const upb_pbdecodermethod *m) {
6061 return &m->input_handler_;
6062 }
6063
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)6064 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6065 return m->is_native_;
6066 }
6067
6068
6069 /* mgroup *********************************************************************/
6070
freegroup(mgroup * g)6071 static void freegroup(mgroup *g) {
6072 upb_inttable_iter i;
6073
6074 upb_inttable_begin(&i, &g->methods);
6075 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6076 freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
6077 }
6078
6079 upb_inttable_uninit(&g->methods);
6080 upb_gfree(g->bytecode);
6081 upb_gfree(g);
6082 }
6083
newgroup(void)6084 mgroup *newgroup(void) {
6085 mgroup *g = upb_gmalloc(sizeof(*g));
6086 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6087 g->bytecode = NULL;
6088 g->bytecode_end = NULL;
6089 return g;
6090 }
6091
6092
6093 /* bytecode compiler **********************************************************/
6094
6095 /* Data used only at compilation time. */
6096 typedef struct {
6097 mgroup *group;
6098
6099 uint32_t *pc;
6100 int fwd_labels[MAXLABEL];
6101 int back_labels[MAXLABEL];
6102
6103 /* For fields marked "lazy", parse them lazily or eagerly? */
6104 bool lazy;
6105 } compiler;
6106
newcompiler(mgroup * group,bool lazy)6107 static compiler *newcompiler(mgroup *group, bool lazy) {
6108 compiler *ret = upb_gmalloc(sizeof(*ret));
6109 int i;
6110
6111 ret->group = group;
6112 ret->lazy = lazy;
6113 for (i = 0; i < MAXLABEL; i++) {
6114 ret->fwd_labels[i] = EMPTYLABEL;
6115 ret->back_labels[i] = EMPTYLABEL;
6116 }
6117 return ret;
6118 }
6119
freecompiler(compiler * c)6120 static void freecompiler(compiler *c) {
6121 upb_gfree(c);
6122 }
6123
6124 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6125
6126 /* How many words an instruction is. */
instruction_len(uint32_t instr)6127 static int instruction_len(uint32_t instr) {
6128 switch (getop(instr)) {
6129 case OP_SETDISPATCH: return 1 + ptr_words;
6130 case OP_TAGN: return 3;
6131 case OP_SETBIGGROUPNUM: return 2;
6132 default: return 1;
6133 }
6134 }
6135
op_has_longofs(int32_t instruction)6136 bool op_has_longofs(int32_t instruction) {
6137 switch (getop(instruction)) {
6138 case OP_CALL:
6139 case OP_BRANCH:
6140 case OP_CHECKDELIM:
6141 return true;
6142 /* The "tag" instructions only have 8 bytes available for the jump target,
6143 * but that is ok because these opcodes only require short jumps. */
6144 case OP_TAG1:
6145 case OP_TAG2:
6146 case OP_TAGN:
6147 return false;
6148 default:
6149 UPB_ASSERT(false);
6150 return false;
6151 }
6152 }
6153
getofs(uint32_t instruction)6154 static int32_t getofs(uint32_t instruction) {
6155 if (op_has_longofs(instruction)) {
6156 return (int32_t)instruction >> 8;
6157 } else {
6158 return (int8_t)(instruction >> 8);
6159 }
6160 }
6161
setofs(uint32_t * instruction,int32_t ofs)6162 static void setofs(uint32_t *instruction, int32_t ofs) {
6163 if (op_has_longofs(*instruction)) {
6164 *instruction = getop(*instruction) | (uint32_t)ofs << 8;
6165 } else {
6166 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6167 }
6168 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
6169 }
6170
pcofs(compiler * c)6171 static uint32_t pcofs(compiler *c) {
6172 return (uint32_t)(c->pc - c->group->bytecode);
6173 }
6174
6175 /* Defines a local label at the current PC location. All previous forward
6176 * references are updated to point to this location. The location is noted
6177 * for any future backward references. */
label(compiler * c,unsigned int label)6178 static void label(compiler *c, unsigned int label) {
6179 int val;
6180 uint32_t *codep;
6181
6182 UPB_ASSERT(label < MAXLABEL);
6183 val = c->fwd_labels[label];
6184 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
6185 while (codep) {
6186 int ofs = getofs(*codep);
6187 setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep)));
6188 codep = ofs ? codep + ofs : NULL;
6189 }
6190 c->fwd_labels[label] = EMPTYLABEL;
6191 c->back_labels[label] = pcofs(c);
6192 }
6193
6194 /* Creates a reference to a numbered label; either a forward reference
6195 * (positive arg) or backward reference (negative arg). For forward references
6196 * the value returned now is actually a "next" pointer into a linked list of all
6197 * instructions that use this label and will be patched later when the label is
6198 * defined with label().
6199 *
6200 * The returned value is the offset that should be written into the instruction.
6201 */
labelref(compiler * c,int label)6202 static int32_t labelref(compiler *c, int label) {
6203 UPB_ASSERT(label < MAXLABEL);
6204 if (label == LABEL_DISPATCH) {
6205 /* No resolving required. */
6206 return 0;
6207 } else if (label < 0) {
6208 /* Backward local label. Relative to the next instruction. */
6209 uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode);
6210 return c->back_labels[-label] - from;
6211 } else {
6212 /* Forward local label: prepend to (possibly-empty) linked list. */
6213 int *lptr = &c->fwd_labels[label];
6214 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6215 *lptr = pcofs(c);
6216 return ret;
6217 }
6218 }
6219
put32(compiler * c,uint32_t v)6220 static void put32(compiler *c, uint32_t v) {
6221 mgroup *g = c->group;
6222 if (c->pc == g->bytecode_end) {
6223 int ofs = pcofs(c);
6224 size_t oldsize = g->bytecode_end - g->bytecode;
6225 size_t newsize = UPB_MAX(oldsize * 2, 64);
6226 /* TODO(haberman): handle OOM. */
6227 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
6228 newsize * sizeof(uint32_t));
6229 g->bytecode_end = g->bytecode + newsize;
6230 c->pc = g->bytecode + ofs;
6231 }
6232 *c->pc++ = v;
6233 }
6234
putop(compiler * c,int op,...)6235 static void putop(compiler *c, int op, ...) {
6236 va_list ap;
6237 va_start(ap, op);
6238
6239 switch (op) {
6240 case OP_SETDISPATCH: {
6241 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6242 put32(c, OP_SETDISPATCH);
6243 put32(c, (uint32_t)ptr);
6244 if (sizeof(uintptr_t) > sizeof(uint32_t))
6245 put32(c, (uint64_t)ptr >> 32);
6246 break;
6247 }
6248 case OP_STARTMSG:
6249 case OP_ENDMSG:
6250 case OP_PUSHLENDELIM:
6251 case OP_POP:
6252 case OP_SETDELIM:
6253 case OP_HALT:
6254 case OP_RET:
6255 case OP_DISPATCH:
6256 put32(c, op);
6257 break;
6258 case OP_PARSE_DOUBLE:
6259 case OP_PARSE_FLOAT:
6260 case OP_PARSE_INT64:
6261 case OP_PARSE_UINT64:
6262 case OP_PARSE_INT32:
6263 case OP_PARSE_FIXED64:
6264 case OP_PARSE_FIXED32:
6265 case OP_PARSE_BOOL:
6266 case OP_PARSE_UINT32:
6267 case OP_PARSE_SFIXED32:
6268 case OP_PARSE_SFIXED64:
6269 case OP_PARSE_SINT32:
6270 case OP_PARSE_SINT64:
6271 case OP_STARTSEQ:
6272 case OP_ENDSEQ:
6273 case OP_STARTSUBMSG:
6274 case OP_ENDSUBMSG:
6275 case OP_STARTSTR:
6276 case OP_STRING:
6277 case OP_ENDSTR:
6278 case OP_PUSHTAGDELIM:
6279 put32(c, op | va_arg(ap, upb_selector_t) << 8);
6280 break;
6281 case OP_SETBIGGROUPNUM:
6282 put32(c, op);
6283 put32(c, va_arg(ap, int));
6284 break;
6285 case OP_CALL: {
6286 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6287 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6288 break;
6289 }
6290 case OP_CHECKDELIM:
6291 case OP_BRANCH: {
6292 uint32_t instruction = op;
6293 int label = va_arg(ap, int);
6294 setofs(&instruction, labelref(c, label));
6295 put32(c, instruction);
6296 break;
6297 }
6298 case OP_TAG1:
6299 case OP_TAG2: {
6300 int label = va_arg(ap, int);
6301 uint64_t tag = va_arg(ap, uint64_t);
6302 uint32_t instruction = (uint32_t)(op | (tag << 16));
6303 UPB_ASSERT(tag <= 0xffff);
6304 setofs(&instruction, labelref(c, label));
6305 put32(c, instruction);
6306 break;
6307 }
6308 case OP_TAGN: {
6309 int label = va_arg(ap, int);
6310 uint64_t tag = va_arg(ap, uint64_t);
6311 uint32_t instruction = op | (upb_value_size(tag) << 16);
6312 setofs(&instruction, labelref(c, label));
6313 put32(c, instruction);
6314 put32(c, (uint32_t)tag);
6315 put32(c, tag >> 32);
6316 break;
6317 }
6318 }
6319
6320 va_end(ap);
6321 }
6322
6323 #if defined(UPB_DUMP_BYTECODE)
6324
upb_pbdecoder_getopname(unsigned int op)6325 const char *upb_pbdecoder_getopname(unsigned int op) {
6326 #define QUOTE(x) #x
6327 #define EXPAND_AND_QUOTE(x) QUOTE(x)
6328 #define OPNAME(x) OP_##x
6329 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6330 #define T(x) OP(PARSE_##x)
6331 /* Keep in sync with list in decoder.int.h. */
6332 switch ((opcode)op) {
6333 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6334 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6335 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6336 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6337 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6338 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6339 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6340 }
6341 return "<unknown op>";
6342 #undef OP
6343 #undef T
6344 }
6345
6346 #endif
6347
6348 #ifdef UPB_DUMP_BYTECODE
6349
dumpbc(uint32_t * p,uint32_t * end,FILE * f)6350 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6351
6352 uint32_t *begin = p;
6353
6354 while (p < end) {
6355 fprintf(f, "%p %8tx", p, p - begin);
6356 uint32_t instr = *p++;
6357 uint8_t op = getop(instr);
6358 fprintf(f, " %s", upb_pbdecoder_getopname(op));
6359 switch ((opcode)op) {
6360 case OP_SETDISPATCH: {
6361 const upb_inttable *dispatch;
6362 memcpy(&dispatch, p, sizeof(void*));
6363 p += ptr_words;
6364 const upb_pbdecodermethod *method =
6365 (void *)((char *)dispatch -
6366 offsetof(upb_pbdecodermethod, dispatch));
6367 fprintf(f, " %s", upb_msgdef_fullname(
6368 upb_handlers_msgdef(method->dest_handlers_)));
6369 break;
6370 }
6371 case OP_DISPATCH:
6372 case OP_STARTMSG:
6373 case OP_ENDMSG:
6374 case OP_PUSHLENDELIM:
6375 case OP_POP:
6376 case OP_SETDELIM:
6377 case OP_HALT:
6378 case OP_RET:
6379 break;
6380 case OP_PARSE_DOUBLE:
6381 case OP_PARSE_FLOAT:
6382 case OP_PARSE_INT64:
6383 case OP_PARSE_UINT64:
6384 case OP_PARSE_INT32:
6385 case OP_PARSE_FIXED64:
6386 case OP_PARSE_FIXED32:
6387 case OP_PARSE_BOOL:
6388 case OP_PARSE_UINT32:
6389 case OP_PARSE_SFIXED32:
6390 case OP_PARSE_SFIXED64:
6391 case OP_PARSE_SINT32:
6392 case OP_PARSE_SINT64:
6393 case OP_STARTSEQ:
6394 case OP_ENDSEQ:
6395 case OP_STARTSUBMSG:
6396 case OP_ENDSUBMSG:
6397 case OP_STARTSTR:
6398 case OP_STRING:
6399 case OP_ENDSTR:
6400 case OP_PUSHTAGDELIM:
6401 fprintf(f, " %d", instr >> 8);
6402 break;
6403 case OP_SETBIGGROUPNUM:
6404 fprintf(f, " %d", *p++);
6405 break;
6406 case OP_CHECKDELIM:
6407 case OP_CALL:
6408 case OP_BRANCH:
6409 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6410 break;
6411 case OP_TAG1:
6412 case OP_TAG2: {
6413 fprintf(f, " tag:0x%x", instr >> 16);
6414 if (getofs(instr)) {
6415 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6416 }
6417 break;
6418 }
6419 case OP_TAGN: {
6420 uint64_t tag = *p++;
6421 tag |= (uint64_t)*p++ << 32;
6422 fprintf(f, " tag:0x%llx", (long long)tag);
6423 fprintf(f, " n:%d", instr >> 16);
6424 if (getofs(instr)) {
6425 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6426 }
6427 break;
6428 }
6429 }
6430 fputs("\n", f);
6431 }
6432 }
6433
6434 #endif
6435
get_encoded_tag(const upb_fielddef * f,int wire_type)6436 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6437 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6438 uint64_t encoded_tag = upb_vencode32(tag);
6439 /* No tag should be greater than 5 bytes. */
6440 UPB_ASSERT(encoded_tag <= 0xffffffffff);
6441 return encoded_tag;
6442 }
6443
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)6444 static void putchecktag(compiler *c, const upb_fielddef *f,
6445 int wire_type, int dest) {
6446 uint64_t tag = get_encoded_tag(f, wire_type);
6447 switch (upb_value_size(tag)) {
6448 case 1:
6449 putop(c, OP_TAG1, dest, tag);
6450 break;
6451 case 2:
6452 putop(c, OP_TAG2, dest, tag);
6453 break;
6454 default:
6455 putop(c, OP_TAGN, dest, tag);
6456 break;
6457 }
6458 }
6459
getsel(const upb_fielddef * f,upb_handlertype_t type)6460 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6461 upb_selector_t selector;
6462 bool ok = upb_handlers_getselector(f, type, &selector);
6463 UPB_ASSERT(ok);
6464 return selector;
6465 }
6466
6467 /* Takes an existing, primary dispatch table entry and repacks it with a
6468 * different alternate wire type. Called when we are inserting a secondary
6469 * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)6470 static uint64_t repack(uint64_t dispatch, int new_wt2) {
6471 uint64_t ofs;
6472 uint8_t wt1;
6473 uint8_t old_wt2;
6474 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
6475 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
6476 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6477 }
6478
6479 /* Marks the current bytecode position as the dispatch target for this message,
6480 * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)6481 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6482 const upb_fielddef *f, int wire_type) {
6483 /* Offset is relative to msg base. */
6484 uint64_t ofs = pcofs(c) - method->code_base.ofs;
6485 uint32_t fn = upb_fielddef_number(f);
6486 upb_inttable *d = &method->dispatch;
6487 upb_value v;
6488 if (upb_inttable_remove(d, fn, &v)) {
6489 /* TODO: prioritize based on packed setting in .proto file. */
6490 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6491 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6492 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6493 } else {
6494 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
6495 upb_inttable_insert(d, fn, upb_value_uint64(val));
6496 }
6497 }
6498
putpush(compiler * c,const upb_fielddef * f)6499 static void putpush(compiler *c, const upb_fielddef *f) {
6500 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
6501 putop(c, OP_PUSHLENDELIM);
6502 } else {
6503 uint32_t fn = upb_fielddef_number(f);
6504 if (fn >= 1 << 24) {
6505 putop(c, OP_PUSHTAGDELIM, 0);
6506 putop(c, OP_SETBIGGROUPNUM, fn);
6507 } else {
6508 putop(c, OP_PUSHTAGDELIM, fn);
6509 }
6510 }
6511 }
6512
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)6513 static upb_pbdecodermethod *find_submethod(const compiler *c,
6514 const upb_pbdecodermethod *method,
6515 const upb_fielddef *f) {
6516 const upb_handlers *sub =
6517 upb_handlers_getsubhandlers(method->dest_handlers_, f);
6518 upb_value v;
6519 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
6520 ? upb_value_getptr(v)
6521 : NULL;
6522 }
6523
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)6524 static void putsel(compiler *c, opcode op, upb_selector_t sel,
6525 const upb_handlers *h) {
6526 if (upb_handlers_gethandler(h, sel, NULL)) {
6527 putop(c, op, sel);
6528 }
6529 }
6530
6531 /* Puts an opcode to call a callback, but only if a callback actually exists for
6532 * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)6533 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
6534 const upb_fielddef *f, upb_handlertype_t type) {
6535 putsel(c, op, getsel(f, type), h);
6536 }
6537
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)6538 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
6539 if (!upb_fielddef_lazy(f))
6540 return false;
6541
6542 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
6543 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
6544 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
6545 }
6546
6547
6548 /* bytecode compiler code generation ******************************************/
6549
6550 /* Symbolic names for our local labels. */
6551 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
6552 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
6553 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
6554 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
6555
6556 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)6557 static void generate_msgfield(compiler *c, const upb_fielddef *f,
6558 upb_pbdecodermethod *method) {
6559 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6560 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
6561 int wire_type;
6562
6563 if (!sub_m) {
6564 /* Don't emit any code for this field at all; it will be parsed as an
6565 * unknown field.
6566 *
6567 * TODO(haberman): we should change this to parse it as a string field
6568 * instead. It will probably be faster, but more importantly, once we
6569 * start vending unknown fields, a field shouldn't be treated as unknown
6570 * just because it doesn't have subhandlers registered. */
6571 return;
6572 }
6573
6574 label(c, LABEL_FIELD);
6575
6576 wire_type =
6577 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
6578 ? UPB_WIRE_TYPE_DELIMITED
6579 : UPB_WIRE_TYPE_START_GROUP;
6580
6581 if (upb_fielddef_isseq(f)) {
6582 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6583 putchecktag(c, f, wire_type, LABEL_DISPATCH);
6584 dispatchtarget(c, method, f, wire_type);
6585 putop(c, OP_PUSHTAGDELIM, 0);
6586 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6587 label(c, LABEL_LOOPSTART);
6588 putpush(c, f);
6589 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6590 putop(c, OP_CALL, sub_m);
6591 putop(c, OP_POP);
6592 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6593 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6594 putop(c, OP_SETDELIM);
6595 }
6596 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6597 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6598 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6599 label(c, LABEL_LOOPBREAK);
6600 putop(c, OP_POP);
6601 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6602 } else {
6603 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6604 putchecktag(c, f, wire_type, LABEL_DISPATCH);
6605 dispatchtarget(c, method, f, wire_type);
6606 putpush(c, f);
6607 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6608 putop(c, OP_CALL, sub_m);
6609 putop(c, OP_POP);
6610 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6611 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6612 putop(c, OP_SETDELIM);
6613 }
6614 }
6615 }
6616
6617 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)6618 static void generate_delimfield(compiler *c, const upb_fielddef *f,
6619 upb_pbdecodermethod *method) {
6620 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6621
6622 label(c, LABEL_FIELD);
6623 if (upb_fielddef_isseq(f)) {
6624 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6625 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6626 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6627 putop(c, OP_PUSHTAGDELIM, 0);
6628 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6629 label(c, LABEL_LOOPSTART);
6630 putop(c, OP_PUSHLENDELIM);
6631 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6632 /* Need to emit even if no handler to skip past the string. */
6633 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
6634 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
6635 putop(c, OP_POP);
6636 putop(c, OP_SETDELIM);
6637 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6638 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
6639 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6640 label(c, LABEL_LOOPBREAK);
6641 putop(c, OP_POP);
6642 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6643 } else {
6644 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6645 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6646 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6647 putop(c, OP_PUSHLENDELIM);
6648 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6649 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
6650 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
6651 putop(c, OP_POP);
6652 putop(c, OP_SETDELIM);
6653 }
6654 }
6655
6656 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)6657 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
6658 upb_pbdecodermethod *method) {
6659 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6660 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
6661 opcode parse_type;
6662 upb_selector_t sel;
6663 int wire_type;
6664
6665 label(c, LABEL_FIELD);
6666
6667 /* From a decoding perspective, ENUM is the same as INT32. */
6668 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
6669 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
6670
6671 parse_type = (opcode)descriptor_type;
6672
6673 /* TODO(haberman): generate packed or non-packed first depending on "packed"
6674 * setting in the fielddef. This will favor (in speed) whichever was
6675 * specified. */
6676
6677 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
6678 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
6679 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
6680 if (upb_fielddef_isseq(f)) {
6681 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6682 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6683 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6684 putop(c, OP_PUSHLENDELIM);
6685 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
6686 label(c, LABEL_LOOPSTART);
6687 putop(c, parse_type, sel);
6688 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6689 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6690 dispatchtarget(c, method, f, wire_type);
6691 putop(c, OP_PUSHTAGDELIM, 0);
6692 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
6693 label(c, LABEL_LOOPSTART);
6694 putop(c, parse_type, sel);
6695 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6696 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6697 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6698 label(c, LABEL_LOOPBREAK);
6699 putop(c, OP_POP); /* Packed and non-packed join. */
6700 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6701 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
6702 } else {
6703 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6704 putchecktag(c, f, wire_type, LABEL_DISPATCH);
6705 dispatchtarget(c, method, f, wire_type);
6706 putop(c, parse_type, sel);
6707 }
6708 }
6709
6710 /* Adds bytecode for parsing the given message to the given decoderplan,
6711 * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)6712 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6713 const upb_handlers *h;
6714 const upb_msgdef *md;
6715 uint32_t* start_pc;
6716 upb_msg_field_iter i;
6717 upb_value val;
6718
6719 UPB_ASSERT(method);
6720
6721 /* Clear all entries in the dispatch table. */
6722 upb_inttable_uninit(&method->dispatch);
6723 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
6724
6725 h = upb_pbdecodermethod_desthandlers(method);
6726 md = upb_handlers_msgdef(h);
6727
6728 method->code_base.ofs = pcofs(c);
6729 putop(c, OP_SETDISPATCH, &method->dispatch);
6730 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
6731 label(c, LABEL_FIELD);
6732 start_pc = c->pc;
6733 for(upb_msg_field_begin(&i, md);
6734 !upb_msg_field_done(&i);
6735 upb_msg_field_next(&i)) {
6736 const upb_fielddef *f = upb_msg_iter_field(&i);
6737 upb_fieldtype_t type = upb_fielddef_type(f);
6738
6739 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
6740 generate_msgfield(c, f, method);
6741 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
6742 type == UPB_TYPE_MESSAGE) {
6743 generate_delimfield(c, f, method);
6744 } else {
6745 generate_primitivefield(c, f, method);
6746 }
6747 }
6748
6749 /* If there were no fields, or if no handlers were defined, we need to
6750 * generate a non-empty loop body so that we can at least dispatch for unknown
6751 * fields and check for the end of the message. */
6752 if (c->pc == start_pc) {
6753 /* Check for end-of-message. */
6754 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6755 /* Unconditionally dispatch. */
6756 putop(c, OP_DISPATCH, 0);
6757 }
6758
6759 /* For now we just loop back to the last field of the message (or if none,
6760 * the DISPATCH opcode for the message). */
6761 putop(c, OP_BRANCH, -LABEL_FIELD);
6762
6763 /* Insert both a label and a dispatch table entry for this end-of-msg. */
6764 label(c, LABEL_ENDMSG);
6765 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
6766 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
6767
6768 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
6769 putop(c, OP_RET);
6770
6771 upb_inttable_compact(&method->dispatch);
6772 }
6773
6774 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
6775 * Returns the method for these handlers.
6776 *
6777 * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)6778 static void find_methods(compiler *c, const upb_handlers *h) {
6779 upb_value v;
6780 upb_msg_field_iter i;
6781 const upb_msgdef *md;
6782 upb_pbdecodermethod *method;
6783
6784 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
6785 return;
6786
6787 method = newmethod(h, c->group);
6788 upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
6789
6790 /* Find submethods. */
6791 md = upb_handlers_msgdef(h);
6792 for(upb_msg_field_begin(&i, md);
6793 !upb_msg_field_done(&i);
6794 upb_msg_field_next(&i)) {
6795 const upb_fielddef *f = upb_msg_iter_field(&i);
6796 const upb_handlers *sub_h;
6797 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
6798 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
6799 /* We only generate a decoder method for submessages with handlers.
6800 * Others will be parsed as unknown fields. */
6801 find_methods(c, sub_h);
6802 }
6803 }
6804 }
6805
6806 /* (Re-)compile bytecode for all messages in "msgs."
6807 * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)6808 static void compile_methods(compiler *c) {
6809 upb_inttable_iter i;
6810
6811 /* Start over at the beginning of the bytecode. */
6812 c->pc = c->group->bytecode;
6813
6814 upb_inttable_begin(&i, &c->group->methods);
6815 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6816 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6817 compile_method(c, method);
6818 }
6819 }
6820
set_bytecode_handlers(mgroup * g)6821 static void set_bytecode_handlers(mgroup *g) {
6822 upb_inttable_iter i;
6823 upb_inttable_begin(&i, &g->methods);
6824 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6825 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
6826 upb_byteshandler *h = &m->input_handler_;
6827
6828 m->code_base.ptr = g->bytecode + m->code_base.ofs;
6829
6830 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
6831 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
6832 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
6833 }
6834 }
6835
6836
6837 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
6838 * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool lazy)6839 const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
6840 mgroup *g;
6841 compiler *c;
6842
6843 g = newgroup();
6844 c = newcompiler(g, lazy);
6845 find_methods(c, dest);
6846
6847 /* We compile in two passes:
6848 * 1. all messages are assigned relative offsets from the beginning of the
6849 * bytecode (saved in method->code_base).
6850 * 2. forwards OP_CALL instructions can be correctly linked since message
6851 * offsets have been previously assigned.
6852 *
6853 * Could avoid the second pass by linking OP_CALL instructions somehow. */
6854 compile_methods(c);
6855 compile_methods(c);
6856 g->bytecode_end = c->pc;
6857 freecompiler(c);
6858
6859 #ifdef UPB_DUMP_BYTECODE
6860 {
6861 FILE *f = fopen("/tmp/upb-bytecode", "w");
6862 UPB_ASSERT(f);
6863 dumpbc(g->bytecode, g->bytecode_end, stderr);
6864 dumpbc(g->bytecode, g->bytecode_end, f);
6865 fclose(f);
6866
6867 f = fopen("/tmp/upb-bytecode.bin", "wb");
6868 UPB_ASSERT(f);
6869 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
6870 fclose(f);
6871 }
6872 #endif
6873
6874 set_bytecode_handlers(g);
6875 return g;
6876 }
6877
6878
6879 /* upb_pbcodecache ************************************************************/
6880
upb_pbcodecache_new(upb_handlercache * dest)6881 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
6882 upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
6883
6884 if (!c) return NULL;
6885
6886 c->dest = dest;
6887 c->lazy = false;
6888
6889 c->arena = upb_arena_new();
6890 if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
6891
6892 return c;
6893 }
6894
upb_pbcodecache_free(upb_pbcodecache * c)6895 void upb_pbcodecache_free(upb_pbcodecache *c) {
6896 upb_inttable_iter i;
6897
6898 upb_inttable_begin(&i, &c->groups);
6899 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6900 upb_value val = upb_inttable_iter_value(&i);
6901 freegroup((void*)upb_value_getconstptr(val));
6902 }
6903
6904 upb_inttable_uninit(&c->groups);
6905 upb_arena_free(c->arena);
6906 upb_gfree(c);
6907 }
6908
upb_pbdecodermethodopts_setlazy(upb_pbcodecache * c,bool lazy)6909 void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
6910 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
6911 c->lazy = lazy;
6912 }
6913
upb_pbcodecache_get(upb_pbcodecache * c,const upb_msgdef * md)6914 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
6915 const upb_msgdef *md) {
6916 upb_value v;
6917 bool ok;
6918 const upb_handlers *h;
6919 const mgroup *g;
6920
6921 h = upb_handlercache_get(c->dest, md);
6922 if (upb_inttable_lookupptr(&c->groups, md, &v)) {
6923 g = upb_value_getconstptr(v);
6924 } else {
6925 g = mgroup_new(h, c->lazy);
6926 ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
6927 UPB_ASSUME(ok);
6928 }
6929
6930 ok = upb_inttable_lookupptr(&g->methods, h, &v);
6931 UPB_ASSUME(ok);
6932 return upb_value_getptr(v);
6933 }
6934 /*
6935 ** upb::Decoder (Bytecode Decoder VM)
6936 **
6937 ** Bytecode must previously have been generated using the bytecode compiler in
6938 ** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
6939 ** parse the input.
6940 **
6941 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
6942 ** instruction and resume from there. A fair amount of the logic here is to
6943 ** handle the fact that values can span buffer seams and we have to be able to
6944 ** be capable of suspending/resuming from any byte in the stream. This
6945 ** sometimes requires keeping a few trailing bytes from the last buffer around
6946 ** in the "residual" buffer.
6947 */
6948
6949 #include <inttypes.h>
6950 #include <stddef.h>
6951
6952 #ifdef UPB_DUMP_BYTECODE
6953 #include <stdio.h>
6954 #endif
6955
6956
6957 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
6958
6959 /* Error messages that are shared between the bytecode and JIT decoders. */
6960 const char *kPbDecoderStackOverflow = "Nesting too deep.";
6961 const char *kPbDecoderSubmessageTooLong =
6962 "Submessage end extends past enclosing submessage.";
6963
6964 /* Error messages shared within this file. */
6965 static const char *kUnterminatedVarint = "Unterminated varint.";
6966
6967 /* upb_pbdecoder **************************************************************/
6968
6969 static opcode halt = OP_HALT;
6970
6971 /* A dummy character we can point to when the user passes us a NULL buffer.
6972 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
6973 * behavior, which would invalidate functions like curbufleft(). */
6974 static const char dummy_char;
6975
6976 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)6977 static bool consumes_input(opcode op) {
6978 switch (op) {
6979 case OP_SETDISPATCH:
6980 case OP_STARTMSG:
6981 case OP_ENDMSG:
6982 case OP_STARTSEQ:
6983 case OP_ENDSEQ:
6984 case OP_STARTSUBMSG:
6985 case OP_ENDSUBMSG:
6986 case OP_STARTSTR:
6987 case OP_ENDSTR:
6988 case OP_PUSHTAGDELIM:
6989 case OP_POP:
6990 case OP_SETDELIM:
6991 case OP_SETBIGGROUPNUM:
6992 case OP_CHECKDELIM:
6993 case OP_CALL:
6994 case OP_RET:
6995 case OP_BRANCH:
6996 return false;
6997 default:
6998 return true;
6999 }
7000 }
7001
stacksize(upb_pbdecoder * d,size_t entries)7002 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
7003 UPB_UNUSED(d);
7004 return entries * sizeof(upb_pbdecoder_frame);
7005 }
7006
callstacksize(upb_pbdecoder * d,size_t entries)7007 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7008 UPB_UNUSED(d);
7009
7010 return entries * sizeof(uint32_t*);
7011 }
7012
7013
7014 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7015
7016 /* It's unfortunate that we have to micro-manage the compiler with
7017 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7018 * specific to one hardware configuration. But empirically on a Core i7,
7019 * performance increases 30-50% with these annotations. Every instance where
7020 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7021 * benchmarks. */
7022
seterr(upb_pbdecoder * d,const char * msg)7023 static void seterr(upb_pbdecoder *d, const char *msg) {
7024 upb_status_seterrmsg(d->status, msg);
7025 }
7026
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)7027 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7028 seterr(d, msg);
7029 }
7030
7031
7032 /* Buffering ******************************************************************/
7033
7034 /* We operate on one buffer at a time, which is either the user's buffer passed
7035 * to our "decode" callback or some residual bytes from the previous buffer. */
7036
7037 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
7038 * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)7039 static size_t curbufleft(const upb_pbdecoder *d) {
7040 UPB_ASSERT(d->data_end >= d->ptr);
7041 return d->data_end - d->ptr;
7042 }
7043
7044 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)7045 static size_t bufleft(const upb_pbdecoder *d) {
7046 return d->end - d->ptr;
7047 }
7048
7049 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)7050 uint64_t offset(const upb_pbdecoder *d) {
7051 return d->bufstart_ofs + (d->ptr - d->buf);
7052 }
7053
7054 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)7055 size_t delim_remaining(const upb_pbdecoder *d) {
7056 return d->top->end_ofs - offset(d);
7057 }
7058
7059 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)7060 static void advance(upb_pbdecoder *d, size_t len) {
7061 UPB_ASSERT(curbufleft(d) >= len);
7062 d->ptr += len;
7063 }
7064
in_buf(const char * p,const char * buf,const char * end)7065 static bool in_buf(const char *p, const char *buf, const char *end) {
7066 return p >= buf && p <= end;
7067 }
7068
in_residual_buf(const upb_pbdecoder * d,const char * p)7069 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7070 return in_buf(p, d->residual, d->residual_end);
7071 }
7072
7073 /* Calculates the delim_end value, which is affected by both the current buffer
7074 * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)7075 static void set_delim_end(upb_pbdecoder *d) {
7076 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7077 if (delim_ofs <= (size_t)(d->end - d->buf)) {
7078 d->delim_end = d->buf + delim_ofs;
7079 d->data_end = d->delim_end;
7080 } else {
7081 d->data_end = d->end;
7082 d->delim_end = NULL;
7083 }
7084 }
7085
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)7086 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7087 d->ptr = buf;
7088 d->buf = buf;
7089 d->end = end;
7090 set_delim_end(d);
7091 }
7092
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)7093 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7094 UPB_ASSERT(curbufleft(d) == 0);
7095 d->bufstart_ofs += (d->end - d->buf);
7096 switchtobuf(d, buf, buf + len);
7097 }
7098
checkpoint(upb_pbdecoder * d)7099 static void checkpoint(upb_pbdecoder *d) {
7100 /* The assertion here is in the interests of efficiency, not correctness.
7101 * We are trying to ensure that we don't checkpoint() more often than
7102 * necessary. */
7103 UPB_ASSERT(d->checkpoint != d->ptr);
7104 d->checkpoint = d->ptr;
7105 }
7106
7107 /* Skips "bytes" bytes in the stream, which may be more than available. If we
7108 * skip more bytes than are available, we return a long read count to the caller
7109 * indicating how many bytes can be skipped over before passing actual data
7110 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
7111 * won't actually be read.
7112 */
skip(upb_pbdecoder * d,size_t bytes)7113 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7114 UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7115 UPB_ASSERT(d->skip == 0);
7116 if (bytes > delim_remaining(d)) {
7117 seterr(d, "Skipped value extended beyond enclosing submessage.");
7118 return (int32_t)upb_pbdecoder_suspend(d);
7119 } else if (bufleft(d) >= bytes) {
7120 /* Skipped data is all in current buffer, and more is still available. */
7121 advance(d, bytes);
7122 d->skip = 0;
7123 return DECODE_OK;
7124 } else {
7125 /* Skipped data extends beyond currently available buffers. */
7126 d->pc = d->last;
7127 d->skip = bytes - curbufleft(d);
7128 d->bufstart_ofs += (d->end - d->buf);
7129 d->residual_end = d->residual;
7130 switchtobuf(d, d->residual, d->residual_end);
7131 return (int32_t)(d->size_param + d->skip);
7132 }
7133 }
7134
7135
7136 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)7137 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7138 size_t size, const upb_bufhandle *handle) {
7139 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
7140
7141 /* d->skip and d->residual_end could probably elegantly be represented
7142 * as a single variable, to more easily represent this invariant. */
7143 UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
7144
7145 /* We need to remember the original size_param, so that the value we return
7146 * is relative to it, even if we do some skipping first. */
7147 d->size_param = size;
7148 d->handle = handle;
7149
7150 /* Have to handle this case specially (ie. not with skip()) because the user
7151 * is allowed to pass a NULL buffer here, which won't allow us to safely
7152 * calculate a d->end or use our normal functions like curbufleft(). */
7153 if (d->skip && d->skip >= size) {
7154 d->skip -= size;
7155 d->bufstart_ofs += size;
7156 buf = &dummy_char;
7157 size = 0;
7158
7159 /* We can't just return now, because we might need to execute some ops
7160 * like CHECKDELIM, which could call some callbacks and pop the stack. */
7161 }
7162
7163 /* We need to pretend that this was the actual buffer param, since some of the
7164 * calculations assume that d->ptr/d->buf is relative to this. */
7165 d->buf_param = buf;
7166
7167 if (!buf) {
7168 /* NULL buf is ok if its entire span is covered by the "skip" above, but
7169 * by this point we know that "skip" doesn't cover the buffer. */
7170 seterr(d, "Passed NULL buffer over non-skippable region.");
7171 return (int32_t)upb_pbdecoder_suspend(d);
7172 }
7173
7174 if (d->residual_end > d->residual) {
7175 /* We have residual bytes from the last buffer. */
7176 UPB_ASSERT(d->ptr == d->residual);
7177 } else {
7178 switchtobuf(d, buf, buf + size);
7179 }
7180
7181 d->checkpoint = d->ptr;
7182
7183 /* Handle skips that don't cover the whole buffer (as above). */
7184 if (d->skip) {
7185 size_t skip_bytes = d->skip;
7186 d->skip = 0;
7187 CHECK_RETURN(skip(d, skip_bytes));
7188 checkpoint(d);
7189 }
7190
7191 /* If we're inside an unknown group, continue to parse unknown values. */
7192 if (d->top->groupnum < 0) {
7193 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
7194 checkpoint(d);
7195 }
7196
7197 return DECODE_OK;
7198 }
7199
7200 /* Suspends the decoder at the last checkpoint, without saving any residual
7201 * bytes. If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)7202 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7203 d->pc = d->last;
7204 if (d->checkpoint == d->residual) {
7205 /* Checkpoint was in residual buf; no user bytes were consumed. */
7206 d->ptr = d->residual;
7207 return 0;
7208 } else {
7209 size_t ret = d->size_param - (d->end - d->checkpoint);
7210 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
7211 UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
7212
7213 d->bufstart_ofs += (d->checkpoint - d->buf);
7214 d->residual_end = d->residual;
7215 switchtobuf(d, d->residual, d->residual_end);
7216 return ret;
7217 }
7218 }
7219
7220 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
7221 * bytes in our residual buffer. This is necessary if we need more user
7222 * bytes to form a complete value, which might not be contiguous in the
7223 * user's buffers. Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)7224 static size_t suspend_save(upb_pbdecoder *d) {
7225 /* We hit end-of-buffer before we could parse a full value.
7226 * Save any unconsumed bytes (if any) to the residual buffer. */
7227 d->pc = d->last;
7228
7229 if (d->checkpoint == d->residual) {
7230 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
7231 UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
7232 sizeof(d->residual));
7233 if (!in_residual_buf(d, d->ptr)) {
7234 d->bufstart_ofs -= (d->residual_end - d->residual);
7235 }
7236 memcpy(d->residual_end, d->buf_param, d->size_param);
7237 d->residual_end += d->size_param;
7238 } else {
7239 /* Checkpoint was in user buf; old residual bytes not needed. */
7240 size_t save;
7241 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
7242
7243 d->ptr = d->checkpoint;
7244 save = curbufleft(d);
7245 UPB_ASSERT(save <= sizeof(d->residual));
7246 memcpy(d->residual, d->ptr, save);
7247 d->residual_end = d->residual + save;
7248 d->bufstart_ofs = offset(d);
7249 }
7250
7251 switchtobuf(d, d->residual, d->residual_end);
7252 return d->size_param;
7253 }
7254
7255 /* Copies the next "bytes" bytes into "buf" and advances the stream.
7256 * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)7257 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7258 size_t bytes) {
7259 UPB_ASSERT(bytes <= curbufleft(d));
7260 memcpy(buf, d->ptr, bytes);
7261 advance(d, bytes);
7262 }
7263
7264 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
7265 * available in the current buffer or not. Returns a status code as described
7266 * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7267 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7268 size_t bytes) {
7269 const size_t avail = curbufleft(d);
7270 consumebytes(d, buf, avail);
7271 bytes -= avail;
7272 UPB_ASSERT(bytes > 0);
7273 if (in_residual_buf(d, d->ptr)) {
7274 advancetobuf(d, d->buf_param, d->size_param);
7275 }
7276 if (curbufleft(d) >= bytes) {
7277 consumebytes(d, (char *)buf + avail, bytes);
7278 return DECODE_OK;
7279 } else if (d->data_end == d->delim_end) {
7280 seterr(d, "Submessage ended in the middle of a value or group");
7281 return (int32_t)upb_pbdecoder_suspend(d);
7282 } else {
7283 return (int32_t)suspend_save(d);
7284 }
7285 }
7286
7287 /* Gets the next "bytes" bytes, regardless of whether they are available in the
7288 * current buffer or not. Returns a status code as described in decoder.int.h.
7289 */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)7290 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7291 size_t bytes) {
7292 if (curbufleft(d) >= bytes) {
7293 /* Buffer has enough data to satisfy. */
7294 consumebytes(d, buf, bytes);
7295 return DECODE_OK;
7296 } else {
7297 return getbytes_slow(d, buf, bytes);
7298 }
7299 }
7300
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7301 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7302 size_t bytes) {
7303 size_t ret = curbufleft(d);
7304 memcpy(buf, d->ptr, ret);
7305 if (in_residual_buf(d, d->ptr)) {
7306 size_t copy = UPB_MIN(bytes - ret, d->size_param);
7307 memcpy((char *)buf + ret, d->buf_param, copy);
7308 ret += copy;
7309 }
7310 return ret;
7311 }
7312
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)7313 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7314 size_t bytes) {
7315 if (curbufleft(d) >= bytes) {
7316 memcpy(buf, d->ptr, bytes);
7317 return bytes;
7318 } else {
7319 return peekbytes_slow(d, buf, bytes);
7320 }
7321 }
7322
7323
7324 /* Decoding of wire types *****************************************************/
7325
7326 /* Slow path for decoding a varint from the current buffer position.
7327 * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)7328 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7329 uint64_t *u64) {
7330 uint8_t byte = 0x80;
7331 int bitpos;
7332 *u64 = 0;
7333 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
7334 CHECK_RETURN(getbytes(d, &byte, 1));
7335 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7336 }
7337 if(bitpos == 70 && (byte & 0x80)) {
7338 seterr(d, kUnterminatedVarint);
7339 return (int32_t)upb_pbdecoder_suspend(d);
7340 }
7341 return DECODE_OK;
7342 }
7343
7344 /* Decodes a varint from the current buffer position.
7345 * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)7346 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7347 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7348 *u64 = *d->ptr;
7349 advance(d, 1);
7350 return DECODE_OK;
7351 } else if (curbufleft(d) >= 10) {
7352 /* Fast case. */
7353 upb_decoderet r = upb_vdecode_fast(d->ptr);
7354 if (r.p == NULL) {
7355 seterr(d, kUnterminatedVarint);
7356 return (int32_t)upb_pbdecoder_suspend(d);
7357 }
7358 advance(d, r.p - d->ptr);
7359 *u64 = r.val;
7360 return DECODE_OK;
7361 } else {
7362 /* Slow case -- varint spans buffer seam. */
7363 return upb_pbdecoder_decode_varint_slow(d, u64);
7364 }
7365 }
7366
7367 /* Decodes a 32-bit varint from the current buffer position.
7368 * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)7369 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7370 uint64_t u64;
7371 int32_t ret = decode_varint(d, &u64);
7372 if (ret >= 0) return ret;
7373 if (u64 > UINT32_MAX) {
7374 seterr(d, "Unterminated 32-bit varint");
7375 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7376 * so we know this path will always be treated as error by our caller.
7377 * Right now the size_t -> int32_t can overflow and produce negative values.
7378 */
7379 *u32 = 0;
7380 return (int32_t)upb_pbdecoder_suspend(d);
7381 }
7382 *u32 = (uint32_t)u64;
7383 return DECODE_OK;
7384 }
7385
7386 /* Decodes a fixed32 from the current buffer position.
7387 * Returns a status code as described in decoder.int.h.
7388 * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)7389 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7390 return getbytes(d, u32, 4);
7391 }
7392
7393 /* Decodes a fixed64 from the current buffer position.
7394 * Returns a status code as described in decoder.int.h.
7395 * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)7396 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7397 return getbytes(d, u64, 8);
7398 }
7399
7400 /* Non-static versions of the above functions.
7401 * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)7402 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7403 return decode_fixed32(d, u32);
7404 }
7405
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)7406 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7407 return decode_fixed64(d, u64);
7408 }
7409
as_double(uint64_t n)7410 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)7411 static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
7412
7413 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)7414 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7415 upb_pbdecoder_frame *fr = d->top;
7416
7417 if (end > fr->end_ofs) {
7418 seterr(d, kPbDecoderSubmessageTooLong);
7419 return false;
7420 } else if (fr == d->limit) {
7421 seterr(d, kPbDecoderStackOverflow);
7422 return false;
7423 }
7424
7425 fr++;
7426 fr->end_ofs = end;
7427 fr->dispatch = NULL;
7428 fr->groupnum = 0;
7429 d->top = fr;
7430 return true;
7431 }
7432
pushtagdelim(upb_pbdecoder * d,uint32_t arg)7433 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7434 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7435 * field number) prior to hitting any enclosing submessage end, pushing our
7436 * existing delim end prevents us from continuing to parse values from a
7437 * corrupt proto that doesn't give us an END tag in time. */
7438 if (!decoder_push(d, d->top->end_ofs))
7439 return false;
7440 d->top->groupnum = arg;
7441 return true;
7442 }
7443
7444 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)7445 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7446
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)7447 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7448 uint64_t expected) {
7449 uint64_t data = 0;
7450 size_t bytes = upb_value_size(expected);
7451 size_t read = peekbytes(d, &data, bytes);
7452 if (read == bytes && data == expected) {
7453 /* Advance past matched bytes. */
7454 int32_t ok = getbytes(d, &data, read);
7455 UPB_ASSERT(ok < 0);
7456 return DECODE_OK;
7457 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7458 return (int32_t)suspend_save(d);
7459 } else {
7460 return DECODE_MISMATCH;
7461 }
7462 }
7463
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)7464 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7465 uint8_t wire_type) {
7466 if (fieldnum >= 0)
7467 goto have_tag;
7468
7469 while (true) {
7470 uint32_t tag;
7471 CHECK_RETURN(decode_v32(d, &tag));
7472 wire_type = tag & 0x7;
7473 fieldnum = tag >> 3;
7474
7475 have_tag:
7476 if (fieldnum == 0) {
7477 seterr(d, "Saw invalid field number (0)");
7478 return (int32_t)upb_pbdecoder_suspend(d);
7479 }
7480
7481 switch (wire_type) {
7482 case UPB_WIRE_TYPE_32BIT:
7483 CHECK_RETURN(skip(d, 4));
7484 break;
7485 case UPB_WIRE_TYPE_64BIT:
7486 CHECK_RETURN(skip(d, 8));
7487 break;
7488 case UPB_WIRE_TYPE_VARINT: {
7489 uint64_t u64;
7490 CHECK_RETURN(decode_varint(d, &u64));
7491 break;
7492 }
7493 case UPB_WIRE_TYPE_DELIMITED: {
7494 uint32_t len;
7495 CHECK_RETURN(decode_v32(d, &len));
7496 CHECK_RETURN(skip(d, len));
7497 break;
7498 }
7499 case UPB_WIRE_TYPE_START_GROUP:
7500 if (!pushtagdelim(d, -fieldnum)) {
7501 return (int32_t)upb_pbdecoder_suspend(d);
7502 }
7503 break;
7504 case UPB_WIRE_TYPE_END_GROUP:
7505 if (fieldnum == -d->top->groupnum) {
7506 decoder_pop(d);
7507 } else if (fieldnum == d->top->groupnum) {
7508 return DECODE_ENDGROUP;
7509 } else {
7510 seterr(d, "Unmatched ENDGROUP tag.");
7511 return (int32_t)upb_pbdecoder_suspend(d);
7512 }
7513 break;
7514 default:
7515 seterr(d, "Invalid wire type");
7516 return (int32_t)upb_pbdecoder_suspend(d);
7517 }
7518
7519 if (d->top->groupnum >= 0) {
7520 /* TODO: More code needed for handling unknown groups. */
7521 upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
7522 return DECODE_OK;
7523 }
7524
7525 /* Unknown group -- continue looping over unknown fields. */
7526 checkpoint(d);
7527 }
7528 }
7529
goto_endmsg(upb_pbdecoder * d)7530 static void goto_endmsg(upb_pbdecoder *d) {
7531 upb_value v;
7532 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
7533 UPB_ASSERT(found);
7534 d->pc = d->top->base + upb_value_getuint64(v);
7535 }
7536
7537 /* Parses a tag and jumps to the corresponding bytecode instruction for this
7538 * field.
7539 *
7540 * If the tag is unknown (or the wire type doesn't match), parses the field as
7541 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
7542 * instruction for the end of message. */
dispatch(upb_pbdecoder * d)7543 static int32_t dispatch(upb_pbdecoder *d) {
7544 upb_inttable *dispatch = d->top->dispatch;
7545 uint32_t tag;
7546 uint8_t wire_type;
7547 uint32_t fieldnum;
7548 upb_value val;
7549 int32_t retval;
7550
7551 /* Decode tag. */
7552 CHECK_RETURN(decode_v32(d, &tag));
7553 wire_type = tag & 0x7;
7554 fieldnum = tag >> 3;
7555
7556 /* Lookup tag. Because of packed/non-packed compatibility, we have to
7557 * check the wire type against two possibilities. */
7558 if (fieldnum != DISPATCH_ENDMSG &&
7559 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
7560 uint64_t v = upb_value_getuint64(val);
7561 if (wire_type == (v & 0xff)) {
7562 d->pc = d->top->base + (v >> 16);
7563 return DECODE_OK;
7564 } else if (wire_type == ((v >> 8) & 0xff)) {
7565 bool found =
7566 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
7567 UPB_ASSERT(found);
7568 d->pc = d->top->base + upb_value_getuint64(val);
7569 return DECODE_OK;
7570 }
7571 }
7572
7573 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
7574 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
7575 * we need to back up to, so that when we're done skipping unknown data we
7576 * can re-check the delimited end. */
7577 d->last--; /* Necessary if we get suspended */
7578 d->pc = d->last;
7579 UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
7580
7581 /* Unknown field or ENDGROUP. */
7582 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
7583
7584 CHECK_RETURN(retval);
7585
7586 if (retval == DECODE_ENDGROUP) {
7587 goto_endmsg(d);
7588 return DECODE_OK;
7589 }
7590
7591 return DECODE_OK;
7592 }
7593
7594 /* Callers know that the stack is more than one deep because the opcodes that
7595 * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)7596 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
7597 UPB_ASSERT(d->top != d->stack);
7598 return d->top - 1;
7599 }
7600
7601
7602 /* The main decoding loop *****************************************************/
7603
7604 /* The main decoder VM function. Uses traditional bytecode dispatch loop with a
7605 * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)7606 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
7607 const upb_bufhandle* handle) {
7608
7609 #define VMCASE(op, code) \
7610 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
7611 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
7612 VMCASE(OP_PARSE_ ## type, { \
7613 ctype val; \
7614 CHECK_RETURN(decode_ ## wt(d, &val)); \
7615 upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \
7616 })
7617
7618 while(1) {
7619 int32_t instruction;
7620 opcode op;
7621 uint32_t arg;
7622 int32_t longofs;
7623
7624 d->last = d->pc;
7625 instruction = *d->pc++;
7626 op = getop(instruction);
7627 arg = instruction >> 8;
7628 longofs = arg;
7629 UPB_ASSERT(d->ptr != d->residual_end);
7630 UPB_UNUSED(group);
7631 #ifdef UPB_DUMP_BYTECODE
7632 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
7633 "%x %s (%d)\n",
7634 (int)offset(d),
7635 (int)(d->ptr - d->buf),
7636 (int)(d->data_end - d->ptr),
7637 (int)(d->end - d->ptr),
7638 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
7639 (int)(d->pc - 1 - group->bytecode),
7640 upb_pbdecoder_getopname(op),
7641 arg);
7642 #endif
7643 switch (op) {
7644 /* Technically, we are losing data if we see a 32-bit varint that is not
7645 * properly sign-extended. We could detect this and error about the data
7646 * loss, but proto2 does not do this, so we pass. */
7647 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
7648 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
7649 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
7650 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
7651 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
7652 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
7653 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
7654 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
7655 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
7656 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
7657 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
7658 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
7659 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
7660
7661 VMCASE(OP_SETDISPATCH,
7662 d->top->base = d->pc - 1;
7663 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
7664 d->pc += sizeof(void*) / sizeof(uint32_t);
7665 )
7666 VMCASE(OP_STARTMSG,
7667 CHECK_SUSPEND(upb_sink_startmsg(d->top->sink));
7668 )
7669 VMCASE(OP_ENDMSG,
7670 CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status));
7671 )
7672 VMCASE(OP_STARTSEQ,
7673 upb_pbdecoder_frame *outer = outer_frame(d);
7674 CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink));
7675 )
7676 VMCASE(OP_ENDSEQ,
7677 CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg));
7678 )
7679 VMCASE(OP_STARTSUBMSG,
7680 upb_pbdecoder_frame *outer = outer_frame(d);
7681 CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink));
7682 )
7683 VMCASE(OP_ENDSUBMSG,
7684 upb_sink subsink = (d->top + 1)->sink;
7685 CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, subsink, arg));
7686 )
7687 VMCASE(OP_STARTSTR,
7688 uint32_t len = (uint32_t)delim_remaining(d);
7689 upb_pbdecoder_frame *outer = outer_frame(d);
7690 CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink));
7691 if (len == 0) {
7692 d->pc++; /* Skip OP_STRING. */
7693 }
7694 )
7695 VMCASE(OP_STRING,
7696 uint32_t len = (uint32_t)curbufleft(d);
7697 size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle);
7698 if (n > len) {
7699 if (n > delim_remaining(d)) {
7700 seterr(d, "Tried to skip past end of string.");
7701 return upb_pbdecoder_suspend(d);
7702 } else {
7703 int32_t ret = skip(d, n);
7704 /* This shouldn't return DECODE_OK, because n > len. */
7705 UPB_ASSERT(ret >= 0);
7706 return ret;
7707 }
7708 }
7709 advance(d, n);
7710 if (n < len || d->delim_end == NULL) {
7711 /* We aren't finished with this string yet. */
7712 d->pc--; /* Repeat OP_STRING. */
7713 if (n > 0) checkpoint(d);
7714 return upb_pbdecoder_suspend(d);
7715 }
7716 )
7717 VMCASE(OP_ENDSTR,
7718 CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg));
7719 )
7720 VMCASE(OP_PUSHTAGDELIM,
7721 CHECK_SUSPEND(pushtagdelim(d, arg));
7722 )
7723 VMCASE(OP_SETBIGGROUPNUM,
7724 d->top->groupnum = *d->pc++;
7725 )
7726 VMCASE(OP_POP,
7727 UPB_ASSERT(d->top > d->stack);
7728 decoder_pop(d);
7729 )
7730 VMCASE(OP_PUSHLENDELIM,
7731 uint32_t len;
7732 CHECK_RETURN(decode_v32(d, &len));
7733 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
7734 set_delim_end(d);
7735 )
7736 VMCASE(OP_SETDELIM,
7737 set_delim_end(d);
7738 )
7739 VMCASE(OP_CHECKDELIM,
7740 /* We are guaranteed of this assert because we never allow ourselves to
7741 * consume bytes beyond data_end, which covers delim_end when non-NULL.
7742 */
7743 UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
7744 if (d->ptr == d->delim_end)
7745 d->pc += longofs;
7746 )
7747 VMCASE(OP_CALL,
7748 d->callstack[d->call_len++] = d->pc;
7749 d->pc += longofs;
7750 )
7751 VMCASE(OP_RET,
7752 UPB_ASSERT(d->call_len > 0);
7753 d->pc = d->callstack[--d->call_len];
7754 )
7755 VMCASE(OP_BRANCH,
7756 d->pc += longofs;
7757 )
7758 VMCASE(OP_TAG1,
7759 uint8_t expected;
7760 CHECK_SUSPEND(curbufleft(d) > 0);
7761 expected = (arg >> 8) & 0xff;
7762 if (*d->ptr == expected) {
7763 advance(d, 1);
7764 } else {
7765 int8_t shortofs;
7766 badtag:
7767 shortofs = arg;
7768 if (shortofs == LABEL_DISPATCH) {
7769 CHECK_RETURN(dispatch(d));
7770 } else {
7771 d->pc += shortofs;
7772 break; /* Avoid checkpoint(). */
7773 }
7774 }
7775 )
7776 VMCASE(OP_TAG2,
7777 uint16_t expected;
7778 CHECK_SUSPEND(curbufleft(d) > 0);
7779 expected = (arg >> 8) & 0xffff;
7780 if (curbufleft(d) >= 2) {
7781 uint16_t actual;
7782 memcpy(&actual, d->ptr, 2);
7783 if (expected == actual) {
7784 advance(d, 2);
7785 } else {
7786 goto badtag;
7787 }
7788 } else {
7789 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
7790 if (result == DECODE_MISMATCH) goto badtag;
7791 if (result >= 0) return result;
7792 }
7793 )
7794 VMCASE(OP_TAGN, {
7795 uint64_t expected;
7796 int32_t result;
7797 memcpy(&expected, d->pc, 8);
7798 d->pc += 2;
7799 result = upb_pbdecoder_checktag_slow(d, expected);
7800 if (result == DECODE_MISMATCH) goto badtag;
7801 if (result >= 0) return result;
7802 })
7803 VMCASE(OP_DISPATCH, {
7804 CHECK_RETURN(dispatch(d));
7805 })
7806 VMCASE(OP_HALT, {
7807 return d->size_param;
7808 })
7809 }
7810 }
7811 }
7812
7813
7814 /* BytesHandler handlers ******************************************************/
7815
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)7816 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
7817 upb_pbdecoder *d = closure;
7818 UPB_UNUSED(size_hint);
7819 d->top->end_ofs = UINT64_MAX;
7820 d->bufstart_ofs = 0;
7821 d->call_len = 1;
7822 d->callstack[0] = &halt;
7823 d->pc = pc;
7824 d->skip = 0;
7825 return d;
7826 }
7827
upb_pbdecoder_end(void * closure,const void * handler_data)7828 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7829 upb_pbdecoder *d = closure;
7830 const upb_pbdecodermethod *method = handler_data;
7831 uint64_t end;
7832 char dummy;
7833
7834 if (d->residual_end > d->residual) {
7835 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
7836 return false;
7837 }
7838
7839 if (d->skip) {
7840 seterr(d, "Unexpected EOF inside skipped data");
7841 return false;
7842 }
7843
7844 if (d->top->end_ofs != UINT64_MAX) {
7845 seterr(d, "Unexpected EOF inside delimited string");
7846 return false;
7847 }
7848
7849 /* The user's end() call indicates that the message ends here. */
7850 end = offset(d);
7851 d->top->end_ofs = end;
7852
7853 {
7854 const uint32_t *p = d->pc;
7855 d->stack->end_ofs = end;
7856 /* Check the previous bytecode, but guard against beginning. */
7857 if (p != method->code_base.ptr) p--;
7858 if (getop(*p) == OP_CHECKDELIM) {
7859 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
7860 UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
7861 getop(*d->pc) == OP_TAG2 ||
7862 getop(*d->pc) == OP_TAGN ||
7863 getop(*d->pc) == OP_DISPATCH);
7864 d->pc = p;
7865 }
7866 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
7867 }
7868
7869 if (d->call_len != 0) {
7870 seterr(d, "Unexpected EOF inside submessage or group");
7871 return false;
7872 }
7873
7874 return true;
7875 }
7876
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)7877 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
7878 size_t size, const upb_bufhandle *handle) {
7879 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
7880
7881 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
7882 CHECK_RETURN(result);
7883
7884 return run_decoder_vm(decoder, group, handle);
7885 }
7886
7887
7888 /* Public API *****************************************************************/
7889
upb_pbdecoder_reset(upb_pbdecoder * d)7890 void upb_pbdecoder_reset(upb_pbdecoder *d) {
7891 d->top = d->stack;
7892 d->top->groupnum = 0;
7893 d->ptr = d->residual;
7894 d->buf = d->residual;
7895 d->end = d->residual;
7896 d->residual_end = d->residual;
7897 }
7898
upb_pbdecoder_create(upb_arena * a,const upb_pbdecodermethod * m,upb_sink sink,upb_status * status)7899 upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m,
7900 upb_sink sink, upb_status *status) {
7901 const size_t default_max_nesting = 64;
7902 #ifndef NDEBUG
7903 size_t size_before = upb_arena_bytesallocated(a);
7904 #endif
7905
7906 upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder));
7907 if (!d) return NULL;
7908
7909 d->method_ = m;
7910 d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting));
7911 d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting));
7912 if (!d->stack || !d->callstack) {
7913 return NULL;
7914 }
7915
7916 d->arena = a;
7917 d->limit = d->stack + default_max_nesting - 1;
7918 d->stack_size = default_max_nesting;
7919 d->status = status;
7920
7921 upb_pbdecoder_reset(d);
7922 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
7923
7924 if (d->method_->dest_handlers_) {
7925 if (sink.handlers != d->method_->dest_handlers_)
7926 return NULL;
7927 }
7928 d->top->sink = sink;
7929
7930 /* If this fails, increase the value in decoder.h. */
7931 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
7932 UPB_PB_DECODER_SIZE);
7933 return d;
7934 }
7935
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)7936 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
7937 return offset(d);
7938 }
7939
upb_pbdecoder_method(const upb_pbdecoder * d)7940 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
7941 return d->method_;
7942 }
7943
upb_pbdecoder_input(upb_pbdecoder * d)7944 upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) {
7945 return d->input_;
7946 }
7947
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)7948 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
7949 return d->stack_size;
7950 }
7951
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)7952 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
7953 UPB_ASSERT(d->top >= d->stack);
7954
7955 if (max < (size_t)(d->top - d->stack)) {
7956 /* Can't set a limit smaller than what we are currently at. */
7957 return false;
7958 }
7959
7960 if (max > d->stack_size) {
7961 /* Need to reallocate stack and callstack to accommodate. */
7962 size_t old_size = stacksize(d, d->stack_size);
7963 size_t new_size = stacksize(d, max);
7964 void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size);
7965 if (!p) {
7966 return false;
7967 }
7968 d->stack = p;
7969
7970 old_size = callstacksize(d, d->stack_size);
7971 new_size = callstacksize(d, max);
7972 p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size);
7973 if (!p) {
7974 return false;
7975 }
7976 d->callstack = p;
7977
7978 d->stack_size = max;
7979 }
7980
7981 d->limit = d->stack + max - 1;
7982 return true;
7983 }
7984 /*
7985 ** upb::Encoder
7986 **
7987 ** Since we are implementing pure handlers (ie. without any out-of-band access
7988 ** to pre-computed lengths), we have to buffer all submessages before we can
7989 ** emit even their first byte.
7990 **
7991 ** Not knowing the size of submessages also means we can't write a perfect
7992 ** zero-copy implementation, even with buffering. Lengths are stored as
7993 ** varints, which means that we don't know how many bytes to reserve for the
7994 ** length until we know what the length is.
7995 **
7996 ** This leaves us with three main choices:
7997 **
7998 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
7999 ** once into the output buffer.
8000 **
8001 ** 2. attempt to buffer data directly into the output buffer, estimating how
8002 ** many bytes each length will take. When our guesses are wrong, use
8003 ** memmove() to grow or shrink the allotted space.
8004 **
8005 ** 3. buffer directly into the output buffer, allocating a max length
8006 ** ahead-of-time for each submessage length. If we overallocated, we waste
8007 ** space, but no memcpy() or memmove() is required. This approach requires
8008 ** defining a maximum size for submessages and rejecting submessages that
8009 ** exceed that size.
8010 **
8011 ** (2) and (3) have the potential to have better performance, but they are more
8012 ** complicated and subtle to implement:
8013 **
8014 ** (3) requires making an arbitrary choice of the maximum message size; it
8015 ** wastes space when submessages are shorter than this and fails
8016 ** completely when they are longer. This makes it more finicky and
8017 ** requires configuration based on the input. It also makes it impossible
8018 ** to perfectly match the output of reference encoders that always use the
8019 ** optimal amount of space for each length.
8020 **
8021 ** (2) requires guessing the the size upfront, and if multiple lengths are
8022 ** guessed wrong the minimum required number of memmove() operations may
8023 ** be complicated to compute correctly. Implemented properly, it may have
8024 ** a useful amortized or average cost, but more investigation is required
8025 ** to determine this and what the optimal algorithm is to achieve it.
8026 **
8027 ** (1) makes you always pay for exactly one copy, but its implementation is
8028 ** the simplest and its performance is predictable.
8029 **
8030 ** So for now, we implement (1) only. If we wish to optimize later, we should
8031 ** be able to do it without affecting users.
8032 **
8033 ** The strategy is to buffer the segments of data that do *not* depend on
8034 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8035 ** and lengths. When the top-level submessage ends, we can go beginning to end,
8036 ** alternating the writing of lengths with memcpy() of the rest of the data.
8037 ** At the top level though, no buffering is required.
8038 */
8039
8040
8041
8042 /* The output buffer is divided into segments; a segment is a string of data
8043 * that is "ready to go" -- it does not need any varint lengths inserted into
8044 * the middle. The seams between segments are where varints will be inserted
8045 * once they are known.
8046 *
8047 * We also use the concept of a "run", which is a range of encoded bytes that
8048 * occur at a single submessage level. Every segment contains one or more runs.
8049 *
8050 * A segment can span messages. Consider:
8051 *
8052 * .--Submessage lengths---------.
8053 * | | |
8054 * | V V
8055 * V | |--------------- | |-----------------
8056 * Submessages: | |-----------------------------------------------
8057 * Top-level msg: ------------------------------------------------------------
8058 *
8059 * Segments: ----- ------------------- -----------------
8060 * Runs: *---- *--------------*--- *----------------
8061 * (* marks the start)
8062 *
8063 * Note that the top-level menssage is not in any segment because it does not
8064 * have any length preceding it.
8065 *
8066 * A segment is only interrupted when another length needs to be inserted. So
8067 * observe how the second segment spans both the inner submessage and part of
8068 * the next enclosing message. */
8069 typedef struct {
8070 uint32_t msglen; /* The length to varint-encode before this segment. */
8071 uint32_t seglen; /* Length of the segment. */
8072 } upb_pb_encoder_segment;
8073
8074 struct upb_pb_encoder {
8075 upb_arena *arena;
8076
8077 /* Our input and output. */
8078 upb_sink input_;
8079 upb_bytessink output_;
8080
8081 /* The "subclosure" -- used as the inner closure as part of the bytessink
8082 * protocol. */
8083 void *subc;
8084
8085 /* The output buffer and limit, and our current write position. "buf"
8086 * initially points to "initbuf", but is dynamically allocated if we need to
8087 * grow beyond the initial size. */
8088 char *buf, *ptr, *limit;
8089
8090 /* The beginning of the current run, or undefined if we are at the top
8091 * level. */
8092 char *runbegin;
8093
8094 /* The list of segments we are accumulating. */
8095 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8096
8097 /* The stack of enclosing submessages. Each entry in the stack points to the
8098 * segment where this submessage's length is being accumulated. */
8099 int *stack, *top, *stacklimit;
8100
8101 /* Depth of startmsg/endmsg calls. */
8102 int depth;
8103 };
8104
8105 /* low-level buffering ********************************************************/
8106
8107 /* Low-level functions for interacting with the output buffer. */
8108
8109 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)8110 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8111 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8112 UPB_ASSERT(n == len);
8113 }
8114
top(upb_pb_encoder * e)8115 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8116 return &e->segbuf[*e->top];
8117 }
8118
8119 /* Call to ensure that at least "bytes" bytes are available for writing at
8120 * e->ptr. Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)8121 static bool reserve(upb_pb_encoder *e, size_t bytes) {
8122 if ((size_t)(e->limit - e->ptr) < bytes) {
8123 /* Grow buffer. */
8124 char *new_buf;
8125 size_t needed = bytes + (e->ptr - e->buf);
8126 size_t old_size = e->limit - e->buf;
8127
8128 size_t new_size = old_size;
8129
8130 while (new_size < needed) {
8131 new_size *= 2;
8132 }
8133
8134 new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size);
8135
8136 if (new_buf == NULL) {
8137 return false;
8138 }
8139
8140 e->ptr = new_buf + (e->ptr - e->buf);
8141 e->runbegin = new_buf + (e->runbegin - e->buf);
8142 e->limit = new_buf + new_size;
8143 e->buf = new_buf;
8144 }
8145
8146 return true;
8147 }
8148
8149 /* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
8150 * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)8151 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8152 UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
8153 e->ptr += bytes;
8154 }
8155
8156 /* Call when all of the bytes for a handler have been written. Flushes the
8157 * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)8158 static bool commit(upb_pb_encoder *e) {
8159 if (!e->top) {
8160 /* We aren't inside a delimited region. Flush our accumulated bytes to
8161 * the output.
8162 *
8163 * TODO(haberman): in the future we may want to delay flushing for
8164 * efficiency reasons. */
8165 putbuf(e, e->buf, e->ptr - e->buf);
8166 e->ptr = e->buf;
8167 }
8168
8169 return true;
8170 }
8171
8172 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)8173 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8174 if (!reserve(e, len)) {
8175 return false;
8176 }
8177
8178 memcpy(e->ptr, data, len);
8179 encoder_advance(e, len);
8180 return true;
8181 }
8182
8183 /* Finish the current run by adding the run totals to the segment and message
8184 * length. */
accumulate(upb_pb_encoder * e)8185 static void accumulate(upb_pb_encoder *e) {
8186 size_t run_len;
8187 UPB_ASSERT(e->ptr >= e->runbegin);
8188 run_len = e->ptr - e->runbegin;
8189 e->segptr->seglen += run_len;
8190 top(e)->msglen += run_len;
8191 e->runbegin = e->ptr;
8192 }
8193
8194 /* Call to indicate the start of delimited region for which the full length is
8195 * not yet known. All data will be buffered until the length is known.
8196 * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)8197 static bool start_delim(upb_pb_encoder *e) {
8198 if (e->top) {
8199 /* We are already buffering, advance to the next segment and push it on the
8200 * stack. */
8201 accumulate(e);
8202
8203 if (++e->top == e->stacklimit) {
8204 /* TODO(haberman): grow stack? */
8205 return false;
8206 }
8207
8208 if (++e->segptr == e->seglimit) {
8209 /* Grow segment buffer. */
8210 size_t old_size =
8211 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8212 size_t new_size = old_size * 2;
8213 upb_pb_encoder_segment *new_buf =
8214 upb_arena_realloc(e->arena, e->segbuf, old_size, new_size);
8215
8216 if (new_buf == NULL) {
8217 return false;
8218 }
8219
8220 e->segptr = new_buf + (e->segptr - e->segbuf);
8221 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8222 e->segbuf = new_buf;
8223 }
8224 } else {
8225 /* We were previously at the top level, start buffering. */
8226 e->segptr = e->segbuf;
8227 e->top = e->stack;
8228 e->runbegin = e->ptr;
8229 }
8230
8231 *e->top = (int)(e->segptr - e->segbuf);
8232 e->segptr->seglen = 0;
8233 e->segptr->msglen = 0;
8234
8235 return true;
8236 }
8237
8238 /* Call to indicate the end of a delimited region. We now know the length of
8239 * the delimited region. If we are not nested inside any other delimited
8240 * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)8241 static bool end_delim(upb_pb_encoder *e) {
8242 size_t msglen;
8243 accumulate(e);
8244 msglen = top(e)->msglen;
8245
8246 if (e->top == e->stack) {
8247 /* All lengths are now available, emit all buffered data. */
8248 char buf[UPB_PB_VARINT_MAX_LEN];
8249 upb_pb_encoder_segment *s;
8250 const char *ptr = e->buf;
8251 for (s = e->segbuf; s <= e->segptr; s++) {
8252 size_t lenbytes = upb_vencode64(s->msglen, buf);
8253 putbuf(e, buf, lenbytes);
8254 putbuf(e, ptr, s->seglen);
8255 ptr += s->seglen;
8256 }
8257
8258 e->ptr = e->buf;
8259 e->top = NULL;
8260 } else {
8261 /* Need to keep buffering; propagate length info into enclosing
8262 * submessages. */
8263 --e->top;
8264 top(e)->msglen += msglen + upb_varint_size(msglen);
8265 }
8266
8267 return true;
8268 }
8269
8270
8271 /* tag_t **********************************************************************/
8272
8273 /* A precomputed (pre-encoded) tag and length. */
8274
8275 typedef struct {
8276 uint8_t bytes;
8277 char tag[7];
8278 } tag_t;
8279
8280 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)8281 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8282 upb_handlerattr *attr) {
8283 uint32_t n = upb_fielddef_number(f);
8284
8285 tag_t *tag = upb_gmalloc(sizeof(tag_t));
8286 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8287
8288 attr->handler_data = tag;
8289 upb_handlers_addcleanup(h, tag, upb_gfree);
8290 }
8291
encode_tag(upb_pb_encoder * e,const tag_t * tag)8292 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8293 return encode_bytes(e, tag->tag, tag->bytes);
8294 }
8295
8296
8297 /* encoding of wire types *****************************************************/
8298
encode_fixed64(upb_pb_encoder * e,uint64_t val)8299 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8300 /* TODO(haberman): byte-swap for big endian. */
8301 return encode_bytes(e, &val, sizeof(uint64_t));
8302 }
8303
encode_fixed32(upb_pb_encoder * e,uint32_t val)8304 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8305 /* TODO(haberman): byte-swap for big endian. */
8306 return encode_bytes(e, &val, sizeof(uint32_t));
8307 }
8308
encode_varint(upb_pb_encoder * e,uint64_t val)8309 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8310 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8311 return false;
8312 }
8313
8314 encoder_advance(e, upb_vencode64(val, e->ptr));
8315 return true;
8316 }
8317
dbl2uint64(double d)8318 static uint64_t dbl2uint64(double d) {
8319 uint64_t ret;
8320 memcpy(&ret, &d, sizeof(uint64_t));
8321 return ret;
8322 }
8323
flt2uint32(float d)8324 static uint32_t flt2uint32(float d) {
8325 uint32_t ret;
8326 memcpy(&ret, &d, sizeof(uint32_t));
8327 return ret;
8328 }
8329
8330
8331 /* encoding of proto types ****************************************************/
8332
startmsg(void * c,const void * hd)8333 static bool startmsg(void *c, const void *hd) {
8334 upb_pb_encoder *e = c;
8335 UPB_UNUSED(hd);
8336 if (e->depth++ == 0) {
8337 upb_bytessink_start(e->output_, 0, &e->subc);
8338 }
8339 return true;
8340 }
8341
endmsg(void * c,const void * hd,upb_status * status)8342 static bool endmsg(void *c, const void *hd, upb_status *status) {
8343 upb_pb_encoder *e = c;
8344 UPB_UNUSED(hd);
8345 UPB_UNUSED(status);
8346 if (--e->depth == 0) {
8347 upb_bytessink_end(e->output_);
8348 }
8349 return true;
8350 }
8351
encode_startdelimfield(void * c,const void * hd)8352 static void *encode_startdelimfield(void *c, const void *hd) {
8353 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8354 return ok ? c : UPB_BREAK;
8355 }
8356
encode_unknown(void * c,const void * hd,const char * buf,size_t len)8357 static bool encode_unknown(void *c, const void *hd, const char *buf,
8358 size_t len) {
8359 UPB_UNUSED(hd);
8360 return encode_bytes(c, buf, len) && commit(c);
8361 }
8362
encode_enddelimfield(void * c,const void * hd)8363 static bool encode_enddelimfield(void *c, const void *hd) {
8364 UPB_UNUSED(hd);
8365 return end_delim(c);
8366 }
8367
encode_startgroup(void * c,const void * hd)8368 static void *encode_startgroup(void *c, const void *hd) {
8369 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8370 }
8371
encode_endgroup(void * c,const void * hd)8372 static bool encode_endgroup(void *c, const void *hd) {
8373 return encode_tag(c, hd) && commit(c);
8374 }
8375
encode_startstr(void * c,const void * hd,size_t size_hint)8376 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8377 UPB_UNUSED(size_hint);
8378 return encode_startdelimfield(c, hd);
8379 }
8380
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)8381 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8382 size_t len, const upb_bufhandle *h) {
8383 UPB_UNUSED(hd);
8384 UPB_UNUSED(h);
8385 return encode_bytes(c, buf, len) ? len : 0;
8386 }
8387
8388 #define T(type, ctype, convert, encode) \
8389 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8390 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
8391 } \
8392 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8393 UPB_UNUSED(hd); \
8394 return encode(e, (convert)(val)); \
8395 }
8396
T(double,double,dbl2uint64,encode_fixed64)8397 T(double, double, dbl2uint64, encode_fixed64)
8398 T(float, float, flt2uint32, encode_fixed32)
8399 T(int64, int64_t, uint64_t, encode_varint)
8400 T(int32, int32_t, int64_t, encode_varint)
8401 T(fixed64, uint64_t, uint64_t, encode_fixed64)
8402 T(fixed32, uint32_t, uint32_t, encode_fixed32)
8403 T(bool, bool, bool, encode_varint)
8404 T(uint32, uint32_t, uint32_t, encode_varint)
8405 T(uint64, uint64_t, uint64_t, encode_varint)
8406 T(enum, int32_t, uint32_t, encode_varint)
8407 T(sfixed32, int32_t, uint32_t, encode_fixed32)
8408 T(sfixed64, int64_t, uint64_t, encode_fixed64)
8409 T(sint32, int32_t, upb_zzenc_32, encode_varint)
8410 T(sint64, int64_t, upb_zzenc_64, encode_varint)
8411
8412 #undef T
8413
8414
8415 /* code to build the handlers *************************************************/
8416
8417 #include <stdio.h>
8418 static void newhandlers_callback(const void *closure, upb_handlers *h) {
8419 const upb_msgdef *m;
8420 upb_msg_field_iter i;
8421
8422 UPB_UNUSED(closure);
8423
8424 upb_handlers_setstartmsg(h, startmsg, NULL);
8425 upb_handlers_setendmsg(h, endmsg, NULL);
8426 upb_handlers_setunknown(h, encode_unknown, NULL);
8427
8428 m = upb_handlers_msgdef(h);
8429 for(upb_msg_field_begin(&i, m);
8430 !upb_msg_field_done(&i);
8431 upb_msg_field_next(&i)) {
8432 const upb_fielddef *f = upb_msg_iter_field(&i);
8433 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8434 upb_fielddef_packed(f);
8435 upb_handlerattr attr = UPB_HANDLERATTR_INIT;
8436 upb_wiretype_t wt =
8437 packed ? UPB_WIRE_TYPE_DELIMITED
8438 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8439
8440 /* Pre-encode the tag for this field. */
8441 new_tag(h, f, wt, &attr);
8442
8443 if (packed) {
8444 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8445 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8446 }
8447
8448 #define T(upper, lower, upbtype) \
8449 case UPB_DESCRIPTOR_TYPE_##upper: \
8450 if (packed) { \
8451 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8452 } else { \
8453 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8454 } \
8455 break;
8456
8457 switch (upb_fielddef_descriptortype(f)) {
8458 T(DOUBLE, double, double);
8459 T(FLOAT, float, float);
8460 T(INT64, int64, int64);
8461 T(INT32, int32, int32);
8462 T(FIXED64, fixed64, uint64);
8463 T(FIXED32, fixed32, uint32);
8464 T(BOOL, bool, bool);
8465 T(UINT32, uint32, uint32);
8466 T(UINT64, uint64, uint64);
8467 T(ENUM, enum, int32);
8468 T(SFIXED32, sfixed32, int32);
8469 T(SFIXED64, sfixed64, int64);
8470 T(SINT32, sint32, int32);
8471 T(SINT64, sint64, int64);
8472 case UPB_DESCRIPTOR_TYPE_STRING:
8473 case UPB_DESCRIPTOR_TYPE_BYTES:
8474 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8475 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8476 upb_handlers_setstring(h, f, encode_strbuf, &attr);
8477 break;
8478 case UPB_DESCRIPTOR_TYPE_MESSAGE:
8479 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8480 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8481 break;
8482 case UPB_DESCRIPTOR_TYPE_GROUP: {
8483 /* Endgroup takes a different tag (wire_type = END_GROUP). */
8484 upb_handlerattr attr2 = UPB_HANDLERATTR_INIT;
8485 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8486
8487 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
8488 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
8489
8490 break;
8491 }
8492 }
8493
8494 #undef T
8495 }
8496 }
8497
upb_pb_encoder_reset(upb_pb_encoder * e)8498 void upb_pb_encoder_reset(upb_pb_encoder *e) {
8499 e->segptr = NULL;
8500 e->top = NULL;
8501 e->depth = 0;
8502 }
8503
8504
8505 /* public API *****************************************************************/
8506
upb_pb_encoder_newcache(void)8507 upb_handlercache *upb_pb_encoder_newcache(void) {
8508 return upb_handlercache_new(newhandlers_callback, NULL);
8509 }
8510
upb_pb_encoder_create(upb_arena * arena,const upb_handlers * h,upb_bytessink output)8511 upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h,
8512 upb_bytessink output) {
8513 const size_t initial_bufsize = 256;
8514 const size_t initial_segbufsize = 16;
8515 /* TODO(haberman): make this configurable. */
8516 const size_t stack_size = 64;
8517 #ifndef NDEBUG
8518 const size_t size_before = upb_arena_bytesallocated(arena);
8519 #endif
8520
8521 upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder));
8522 if (!e) return NULL;
8523
8524 e->buf = upb_arena_malloc(arena, initial_bufsize);
8525 e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf));
8526 e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack));
8527
8528 if (!e->buf || !e->segbuf || !e->stack) {
8529 return NULL;
8530 }
8531
8532 e->limit = e->buf + initial_bufsize;
8533 e->seglimit = e->segbuf + initial_segbufsize;
8534 e->stacklimit = e->stack + stack_size;
8535
8536 upb_pb_encoder_reset(e);
8537 upb_sink_reset(&e->input_, h, e);
8538
8539 e->arena = arena;
8540 e->output_ = output;
8541 e->subc = output.closure;
8542 e->ptr = e->buf;
8543
8544 /* If this fails, increase the value in encoder.h. */
8545 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
8546 UPB_PB_ENCODER_SIZE);
8547 return e;
8548 }
8549
upb_pb_encoder_input(upb_pb_encoder * e)8550 upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; }
8551 /*
8552 * upb::pb::TextPrinter
8553 *
8554 * OPT: This is not optimized at all. It uses printf() which parses the format
8555 * string every time, and it allocates memory for every put.
8556 */
8557
8558
8559 #include <ctype.h>
8560 #include <float.h>
8561 #include <inttypes.h>
8562 #include <stdarg.h>
8563 #include <stdio.h>
8564 #include <string.h>
8565
8566
8567
8568 struct upb_textprinter {
8569 upb_sink input_;
8570 upb_bytessink output_;
8571 int indent_depth_;
8572 bool single_line_;
8573 void *subc;
8574 };
8575
8576 #define CHECK(x) if ((x) < 0) goto err;
8577
shortname(const char * longname)8578 static const char *shortname(const char *longname) {
8579 const char *last = strrchr(longname, '.');
8580 return last ? last + 1 : longname;
8581 }
8582
indent(upb_textprinter * p)8583 static int indent(upb_textprinter *p) {
8584 int i;
8585 if (!p->single_line_)
8586 for (i = 0; i < p->indent_depth_; i++)
8587 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
8588 return 0;
8589 }
8590
endfield(upb_textprinter * p)8591 static int endfield(upb_textprinter *p) {
8592 const char ch = (p->single_line_ ? ' ' : '\n');
8593 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
8594 return 0;
8595 }
8596
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)8597 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
8598 bool preserve_utf8) {
8599 /* Based on CEscapeInternal() from Google's protobuf release. */
8600 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
8601 const char *end = buf + len;
8602
8603 /* I think hex is prettier and more useful, but proto2 uses octal; should
8604 * investigate whether it can parse hex also. */
8605 const bool use_hex = false;
8606 bool last_hex_escape = false; /* true if last output char was \xNN */
8607
8608 for (; buf < end; buf++) {
8609 bool is_hex_escape;
8610
8611 if (dstend - dst < 4) {
8612 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8613 dst = dstbuf;
8614 }
8615
8616 is_hex_escape = false;
8617 switch (*buf) {
8618 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
8619 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
8620 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
8621 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
8622 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
8623 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
8624 default:
8625 /* Note that if we emit \xNN and the buf character after that is a hex
8626 * digit then that digit must be escaped too to prevent it being
8627 * interpreted as part of the character code by C. */
8628 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
8629 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
8630 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
8631 is_hex_escape = use_hex;
8632 dst += 4;
8633 } else {
8634 *(dst++) = *buf; break;
8635 }
8636 }
8637 last_hex_escape = is_hex_escape;
8638 }
8639 /* Flush remaining data. */
8640 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8641 return 0;
8642 }
8643
putf(upb_textprinter * p,const char * fmt,...)8644 bool putf(upb_textprinter *p, const char *fmt, ...) {
8645 va_list args;
8646 va_list args_copy;
8647 char *str;
8648 int written;
8649 int len;
8650 bool ok;
8651
8652 va_start(args, fmt);
8653
8654 /* Run once to get the length of the string. */
8655 _upb_va_copy(args_copy, args);
8656 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
8657 va_end(args_copy);
8658
8659 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
8660 str = upb_gmalloc(len + 1);
8661 if (!str) return false;
8662 written = vsprintf(str, fmt, args);
8663 va_end(args);
8664 UPB_ASSERT(written == len);
8665
8666 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
8667 upb_gfree(str);
8668 return ok;
8669 }
8670
8671
8672 /* handlers *******************************************************************/
8673
textprinter_startmsg(void * c,const void * hd)8674 static bool textprinter_startmsg(void *c, const void *hd) {
8675 upb_textprinter *p = c;
8676 UPB_UNUSED(hd);
8677 if (p->indent_depth_ == 0) {
8678 upb_bytessink_start(p->output_, 0, &p->subc);
8679 }
8680 return true;
8681 }
8682
textprinter_endmsg(void * c,const void * hd,upb_status * s)8683 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
8684 upb_textprinter *p = c;
8685 UPB_UNUSED(hd);
8686 UPB_UNUSED(s);
8687 if (p->indent_depth_ == 0) {
8688 upb_bytessink_end(p->output_);
8689 }
8690 return true;
8691 }
8692
8693 #define TYPE(name, ctype, fmt) \
8694 static bool textprinter_put ## name(void *closure, const void *handler_data, \
8695 ctype val) { \
8696 upb_textprinter *p = closure; \
8697 const upb_fielddef *f = handler_data; \
8698 CHECK(indent(p)); \
8699 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
8700 CHECK(endfield(p)); \
8701 return true; \
8702 err: \
8703 return false; \
8704 }
8705
textprinter_putbool(void * closure,const void * handler_data,bool val)8706 static bool textprinter_putbool(void *closure, const void *handler_data,
8707 bool val) {
8708 upb_textprinter *p = closure;
8709 const upb_fielddef *f = handler_data;
8710 CHECK(indent(p));
8711 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
8712 CHECK(endfield(p));
8713 return true;
8714 err:
8715 return false;
8716 }
8717
8718 #define STRINGIFY_HELPER(x) #x
8719 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
8720
8721 TYPE(int32, int32_t, "%" PRId32)
8722 TYPE(int64, int64_t, "%" PRId64)
8723 TYPE(uint32, uint32_t, "%" PRIu32)
8724 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)8725 TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
8726 TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
8727
8728 #undef TYPE
8729
8730 /* Output a symbolic value from the enum if found, else just print as int32. */
8731 static bool textprinter_putenum(void *closure, const void *handler_data,
8732 int32_t val) {
8733 upb_textprinter *p = closure;
8734 const upb_fielddef *f = handler_data;
8735 const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f);
8736 const char *label = upb_enumdef_iton(enum_def, val);
8737 if (label) {
8738 indent(p);
8739 putf(p, "%s: %s", upb_fielddef_name(f), label);
8740 endfield(p);
8741 } else {
8742 if (!textprinter_putint32(closure, handler_data, val))
8743 return false;
8744 }
8745 return true;
8746 }
8747
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)8748 static void *textprinter_startstr(void *closure, const void *handler_data,
8749 size_t size_hint) {
8750 upb_textprinter *p = closure;
8751 const upb_fielddef *f = handler_data;
8752 UPB_UNUSED(size_hint);
8753 indent(p);
8754 putf(p, "%s: \"", upb_fielddef_name(f));
8755 return p;
8756 }
8757
textprinter_endstr(void * closure,const void * handler_data)8758 static bool textprinter_endstr(void *closure, const void *handler_data) {
8759 upb_textprinter *p = closure;
8760 UPB_UNUSED(handler_data);
8761 putf(p, "\"");
8762 endfield(p);
8763 return true;
8764 }
8765
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)8766 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
8767 size_t len, const upb_bufhandle *handle) {
8768 upb_textprinter *p = closure;
8769 const upb_fielddef *f = hd;
8770 UPB_UNUSED(handle);
8771 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
8772 return len;
8773 err:
8774 return 0;
8775 }
8776
textprinter_startsubmsg(void * closure,const void * handler_data)8777 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
8778 upb_textprinter *p = closure;
8779 const char *name = handler_data;
8780 CHECK(indent(p));
8781 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
8782 p->indent_depth_++;
8783 return p;
8784 err:
8785 return UPB_BREAK;
8786 }
8787
textprinter_endsubmsg(void * closure,const void * handler_data)8788 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
8789 upb_textprinter *p = closure;
8790 UPB_UNUSED(handler_data);
8791 p->indent_depth_--;
8792 CHECK(indent(p));
8793 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
8794 CHECK(endfield(p));
8795 return true;
8796 err:
8797 return false;
8798 }
8799
onmreg(const void * c,upb_handlers * h)8800 static void onmreg(const void *c, upb_handlers *h) {
8801 const upb_msgdef *m = upb_handlers_msgdef(h);
8802 upb_msg_field_iter i;
8803 UPB_UNUSED(c);
8804
8805 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
8806 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
8807
8808 for(upb_msg_field_begin(&i, m);
8809 !upb_msg_field_done(&i);
8810 upb_msg_field_next(&i)) {
8811 upb_fielddef *f = upb_msg_iter_field(&i);
8812 upb_handlerattr attr = UPB_HANDLERATTR_INIT;
8813 attr.handler_data = f;
8814 switch (upb_fielddef_type(f)) {
8815 case UPB_TYPE_INT32:
8816 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
8817 break;
8818 case UPB_TYPE_INT64:
8819 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
8820 break;
8821 case UPB_TYPE_UINT32:
8822 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
8823 break;
8824 case UPB_TYPE_UINT64:
8825 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
8826 break;
8827 case UPB_TYPE_FLOAT:
8828 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
8829 break;
8830 case UPB_TYPE_DOUBLE:
8831 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
8832 break;
8833 case UPB_TYPE_BOOL:
8834 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
8835 break;
8836 case UPB_TYPE_STRING:
8837 case UPB_TYPE_BYTES:
8838 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
8839 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
8840 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
8841 break;
8842 case UPB_TYPE_MESSAGE: {
8843 const char *name =
8844 upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP
8845 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
8846 : upb_fielddef_name(f);
8847 attr.handler_data = name;
8848 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
8849 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
8850 break;
8851 }
8852 case UPB_TYPE_ENUM:
8853 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
8854 break;
8855 }
8856 }
8857 }
8858
textprinter_reset(upb_textprinter * p,bool single_line)8859 static void textprinter_reset(upb_textprinter *p, bool single_line) {
8860 p->single_line_ = single_line;
8861 p->indent_depth_ = 0;
8862 }
8863
8864
8865 /* Public API *****************************************************************/
8866
upb_textprinter_create(upb_arena * arena,const upb_handlers * h,upb_bytessink output)8867 upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h,
8868 upb_bytessink output) {
8869 upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter));
8870 if (!p) return NULL;
8871
8872 p->output_ = output;
8873 upb_sink_reset(&p->input_, h, p);
8874 textprinter_reset(p, false);
8875
8876 return p;
8877 }
8878
upb_textprinter_newcache(void)8879 upb_handlercache *upb_textprinter_newcache(void) {
8880 return upb_handlercache_new(&onmreg, NULL);
8881 }
8882
upb_textprinter_input(upb_textprinter * p)8883 upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; }
8884
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)8885 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
8886 p->single_line_ = single_line;
8887 }
8888
8889
8890 /* Index is descriptor type. */
8891 const uint8_t upb_pb_native_wire_types[] = {
8892 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
8893 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
8894 UPB_WIRE_TYPE_32BIT, /* FLOAT */
8895 UPB_WIRE_TYPE_VARINT, /* INT64 */
8896 UPB_WIRE_TYPE_VARINT, /* UINT64 */
8897 UPB_WIRE_TYPE_VARINT, /* INT32 */
8898 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
8899 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
8900 UPB_WIRE_TYPE_VARINT, /* BOOL */
8901 UPB_WIRE_TYPE_DELIMITED, /* STRING */
8902 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
8903 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
8904 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
8905 UPB_WIRE_TYPE_VARINT, /* UINT32 */
8906 UPB_WIRE_TYPE_VARINT, /* ENUM */
8907 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
8908 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
8909 UPB_WIRE_TYPE_VARINT, /* SINT32 */
8910 UPB_WIRE_TYPE_VARINT, /* SINT64 */
8911 };
8912
8913 /* A basic branch-based decoder, uses 32-bit values to get good performance
8914 * on 32-bit architectures (but performs well on 64-bits also).
8915 * This scheme comes from the original Google Protobuf implementation
8916 * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)8917 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
8918 upb_decoderet err = {NULL, 0};
8919 const char *p = r.p;
8920 uint32_t low = (uint32_t)r.val;
8921 uint32_t high = 0;
8922 uint32_t b;
8923 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8924 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8925 b = *(p++); low |= (b & 0x7fU) << 28;
8926 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
8927 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
8928 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
8929 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
8930 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
8931 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
8932 return err;
8933
8934 done:
8935 r.val = ((uint64_t)high << 32) | low;
8936 r.p = p;
8937 return r;
8938 }
8939
8940 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)8941 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
8942 const char *p = r.p;
8943 uint64_t val = r.val;
8944 uint64_t b;
8945 upb_decoderet err = {NULL, 0};
8946 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8947 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8948 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
8949 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
8950 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
8951 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
8952 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
8953 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
8954 return err;
8955
8956 done:
8957 r.val = val;
8958 r.p = p;
8959 return r;
8960 }
8961
8962 #line 1 "upb/json/parser.rl"
8963 /*
8964 ** upb::json::Parser (upb_json_parser)
8965 **
8966 ** A parser that uses the Ragel State Machine Compiler to generate
8967 ** the finite automata.
8968 **
8969 ** Ragel only natively handles regular languages, but we can manually
8970 ** program it a bit to handle context-free languages like JSON, by using
8971 ** the "fcall" and "fret" constructs.
8972 **
8973 ** This parser can handle the basics, but needs several things to be fleshed
8974 ** out:
8975 **
8976 ** - handling of unicode escape sequences (including high surrogate pairs).
8977 ** - properly check and report errors for unknown fields, stack overflow,
8978 ** improper array nesting (or lack of nesting).
8979 ** - handling of base64 sequences with padding characters.
8980 ** - handling of push-back (non-success returns from sink functions).
8981 ** - handling of keys/escape-sequences/etc that span input buffers.
8982 */
8983
8984 #include <ctype.h>
8985 #include <errno.h>
8986 #include <float.h>
8987 #include <math.h>
8988 #include <stdint.h>
8989 #include <stdio.h>
8990 #include <stdlib.h>
8991 #include <string.h>
8992
8993 #include <time.h>
8994
8995
8996
8997 #define UPB_JSON_MAX_DEPTH 64
8998
8999 /* Type of value message */
9000 enum {
9001 VALUE_NULLVALUE = 0,
9002 VALUE_NUMBERVALUE = 1,
9003 VALUE_STRINGVALUE = 2,
9004 VALUE_BOOLVALUE = 3,
9005 VALUE_STRUCTVALUE = 4,
9006 VALUE_LISTVALUE = 5
9007 };
9008
9009 /* Forward declare */
9010 static bool is_top_level(upb_json_parser *p);
9011 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
9012 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
9013
9014 static bool is_number_wrapper_object(upb_json_parser *p);
9015 static bool does_number_wrapper_start(upb_json_parser *p);
9016 static bool does_number_wrapper_end(upb_json_parser *p);
9017
9018 static bool is_string_wrapper_object(upb_json_parser *p);
9019 static bool does_string_wrapper_start(upb_json_parser *p);
9020 static bool does_string_wrapper_end(upb_json_parser *p);
9021
9022 static bool does_fieldmask_start(upb_json_parser *p);
9023 static bool does_fieldmask_end(upb_json_parser *p);
9024 static void start_fieldmask_object(upb_json_parser *p);
9025 static void end_fieldmask_object(upb_json_parser *p);
9026
9027 static void start_wrapper_object(upb_json_parser *p);
9028 static void end_wrapper_object(upb_json_parser *p);
9029
9030 static void start_value_object(upb_json_parser *p, int value_type);
9031 static void end_value_object(upb_json_parser *p);
9032
9033 static void start_listvalue_object(upb_json_parser *p);
9034 static void end_listvalue_object(upb_json_parser *p);
9035
9036 static void start_structvalue_object(upb_json_parser *p);
9037 static void end_structvalue_object(upb_json_parser *p);
9038
9039 static void start_object(upb_json_parser *p);
9040 static void end_object(upb_json_parser *p);
9041
9042 static void start_any_object(upb_json_parser *p, const char *ptr);
9043 static bool end_any_object(upb_json_parser *p, const char *ptr);
9044
9045 static bool start_subobject(upb_json_parser *p);
9046 static void end_subobject(upb_json_parser *p);
9047
9048 static void start_member(upb_json_parser *p);
9049 static void end_member(upb_json_parser *p);
9050 static bool end_membername(upb_json_parser *p);
9051
9052 static void start_any_member(upb_json_parser *p, const char *ptr);
9053 static void end_any_member(upb_json_parser *p, const char *ptr);
9054 static bool end_any_membername(upb_json_parser *p);
9055
9056 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
9057 const upb_bufhandle *handle);
9058 static bool end(void *closure, const void *hd);
9059
9060 static const char eof_ch = 'e';
9061
9062 /* stringsink */
9063 typedef struct {
9064 upb_byteshandler handler;
9065 upb_bytessink sink;
9066 char *ptr;
9067 size_t len, size;
9068 } upb_stringsink;
9069
9070
stringsink_start(void * _sink,const void * hd,size_t size_hint)9071 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
9072 upb_stringsink *sink = _sink;
9073 sink->len = 0;
9074 UPB_UNUSED(hd);
9075 UPB_UNUSED(size_hint);
9076 return sink;
9077 }
9078
stringsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)9079 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
9080 size_t len, const upb_bufhandle *handle) {
9081 upb_stringsink *sink = _sink;
9082 size_t new_size = sink->size;
9083
9084 UPB_UNUSED(hd);
9085 UPB_UNUSED(handle);
9086
9087 while (sink->len + len > new_size) {
9088 new_size *= 2;
9089 }
9090
9091 if (new_size != sink->size) {
9092 sink->ptr = realloc(sink->ptr, new_size);
9093 sink->size = new_size;
9094 }
9095
9096 memcpy(sink->ptr + sink->len, ptr, len);
9097 sink->len += len;
9098
9099 return len;
9100 }
9101
upb_stringsink_init(upb_stringsink * sink)9102 void upb_stringsink_init(upb_stringsink *sink) {
9103 upb_byteshandler_init(&sink->handler);
9104 upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
9105 upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
9106
9107 upb_bytessink_reset(&sink->sink, &sink->handler, sink);
9108
9109 sink->size = 32;
9110 sink->ptr = malloc(sink->size);
9111 sink->len = 0;
9112 }
9113
upb_stringsink_uninit(upb_stringsink * sink)9114 void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
9115
9116 typedef struct {
9117 /* For encoding Any value field in binary format. */
9118 upb_handlercache *encoder_handlercache;
9119 upb_stringsink stringsink;
9120
9121 /* For decoding Any value field in json format. */
9122 upb_json_codecache *parser_codecache;
9123 upb_sink sink;
9124 upb_json_parser *parser;
9125
9126 /* Mark the range of uninterpreted values in json input before type url. */
9127 const char *before_type_url_start;
9128 const char *before_type_url_end;
9129
9130 /* Mark the range of uninterpreted values in json input after type url. */
9131 const char *after_type_url_start;
9132 } upb_jsonparser_any_frame;
9133
9134 typedef struct {
9135 upb_sink sink;
9136
9137 /* The current message in which we're parsing, and the field whose value we're
9138 * expecting next. */
9139 const upb_msgdef *m;
9140 const upb_fielddef *f;
9141
9142 /* The table mapping json name to fielddef for this message. */
9143 const upb_strtable *name_table;
9144
9145 /* We are in a repeated-field context. We need this flag to decide whether to
9146 * handle the array as a normal repeated field or a
9147 * google.protobuf.ListValue/google.protobuf.Value. */
9148 bool is_repeated;
9149
9150 /* We are in a repeated-field context, ready to emit mapentries as
9151 * submessages. This flag alters the start-of-object (open-brace) behavior to
9152 * begin a sequence of mapentry messages rather than a single submessage. */
9153 bool is_map;
9154
9155 /* We are in a map-entry message context. This flag is set when parsing the
9156 * value field of a single map entry and indicates to all value-field parsers
9157 * (subobjects, strings, numbers, and bools) that the map-entry submessage
9158 * should end as soon as the value is parsed. */
9159 bool is_mapentry;
9160
9161 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9162 * message's map field that we're currently parsing. This differs from |f|
9163 * because |f| is the field in the *current* message (i.e., the map-entry
9164 * message itself), not the parent's field that leads to this map. */
9165 const upb_fielddef *mapfield;
9166
9167 /* We are in an Any message context. This flag is set when parsing the Any
9168 * message and indicates to all field parsers (subobjects, strings, numbers,
9169 * and bools) that the parsed field should be serialized as binary data or
9170 * cached (type url not found yet). */
9171 bool is_any;
9172
9173 /* The type of packed message in Any. */
9174 upb_jsonparser_any_frame *any_frame;
9175
9176 /* True if the field to be parsed is unknown. */
9177 bool is_unknown_field;
9178 } upb_jsonparser_frame;
9179
init_frame(upb_jsonparser_frame * frame)9180 static void init_frame(upb_jsonparser_frame* frame) {
9181 frame->m = NULL;
9182 frame->f = NULL;
9183 frame->name_table = NULL;
9184 frame->is_repeated = false;
9185 frame->is_map = false;
9186 frame->is_mapentry = false;
9187 frame->mapfield = NULL;
9188 frame->is_any = false;
9189 frame->any_frame = NULL;
9190 frame->is_unknown_field = false;
9191 }
9192
9193 struct upb_json_parser {
9194 upb_arena *arena;
9195 const upb_json_parsermethod *method;
9196 upb_bytessink input_;
9197
9198 /* Stack to track the JSON scopes we are in. */
9199 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9200 upb_jsonparser_frame *top;
9201 upb_jsonparser_frame *limit;
9202
9203 upb_status *status;
9204
9205 /* Ragel's internal parsing stack for the parsing state machine. */
9206 int current_state;
9207 int parser_stack[UPB_JSON_MAX_DEPTH];
9208 int parser_top;
9209
9210 /* The handle for the current buffer. */
9211 const upb_bufhandle *handle;
9212
9213 /* Accumulate buffer. See details in parser.rl. */
9214 const char *accumulated;
9215 size_t accumulated_len;
9216 char *accumulate_buf;
9217 size_t accumulate_buf_size;
9218
9219 /* Multi-part text data. See details in parser.rl. */
9220 int multipart_state;
9221 upb_selector_t string_selector;
9222
9223 /* Input capture. See details in parser.rl. */
9224 const char *capture;
9225
9226 /* Intermediate result of parsing a unicode escape sequence. */
9227 uint32_t digit;
9228
9229 /* For resolve type url in Any. */
9230 const upb_symtab *symtab;
9231
9232 /* Whether to proceed if unknown field is met. */
9233 bool ignore_json_unknown;
9234
9235 /* Cache for parsing timestamp due to base and zone are handled in different
9236 * handlers. */
9237 struct tm tm;
9238 };
9239
start_jsonparser_frame(upb_json_parser * p)9240 static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
9241 upb_jsonparser_frame *inner;
9242 inner = p->top + 1;
9243 init_frame(inner);
9244 return inner;
9245 }
9246
9247 struct upb_json_codecache {
9248 upb_arena *arena;
9249 upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */
9250 };
9251
9252 struct upb_json_parsermethod {
9253 const upb_json_codecache *cache;
9254 upb_byteshandler input_handler_;
9255
9256 /* Maps json_name -> fielddef */
9257 upb_strtable name_table;
9258 };
9259
9260 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
9261
json_parser_any_frame_new(upb_json_parser * p)9262 static upb_jsonparser_any_frame *json_parser_any_frame_new(
9263 upb_json_parser *p) {
9264 upb_jsonparser_any_frame *frame;
9265
9266 frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame));
9267
9268 frame->encoder_handlercache = upb_pb_encoder_newcache();
9269 frame->parser_codecache = upb_json_codecache_new();
9270 frame->parser = NULL;
9271 frame->before_type_url_start = NULL;
9272 frame->before_type_url_end = NULL;
9273 frame->after_type_url_start = NULL;
9274
9275 upb_stringsink_init(&frame->stringsink);
9276
9277 return frame;
9278 }
9279
json_parser_any_frame_set_payload_type(upb_json_parser * p,upb_jsonparser_any_frame * frame,const upb_msgdef * payload_type)9280 static void json_parser_any_frame_set_payload_type(
9281 upb_json_parser *p,
9282 upb_jsonparser_any_frame *frame,
9283 const upb_msgdef *payload_type) {
9284 const upb_handlers *h;
9285 const upb_json_parsermethod *parser_method;
9286 upb_pb_encoder *encoder;
9287
9288 /* Initialize encoder. */
9289 h = upb_handlercache_get(frame->encoder_handlercache, payload_type);
9290 encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink);
9291
9292 /* Initialize parser. */
9293 parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type);
9294 upb_sink_reset(&frame->sink, h, encoder);
9295 frame->parser =
9296 upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink,
9297 p->status, p->ignore_json_unknown);
9298 }
9299
json_parser_any_frame_free(upb_jsonparser_any_frame * frame)9300 static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
9301 upb_handlercache_free(frame->encoder_handlercache);
9302 upb_json_codecache_free(frame->parser_codecache);
9303 upb_stringsink_uninit(&frame->stringsink);
9304 }
9305
json_parser_any_frame_has_type_url(upb_jsonparser_any_frame * frame)9306 static bool json_parser_any_frame_has_type_url(
9307 upb_jsonparser_any_frame *frame) {
9308 return frame->parser != NULL;
9309 }
9310
json_parser_any_frame_has_value_before_type_url(upb_jsonparser_any_frame * frame)9311 static bool json_parser_any_frame_has_value_before_type_url(
9312 upb_jsonparser_any_frame *frame) {
9313 return frame->before_type_url_start != frame->before_type_url_end;
9314 }
9315
json_parser_any_frame_has_value_after_type_url(upb_jsonparser_any_frame * frame)9316 static bool json_parser_any_frame_has_value_after_type_url(
9317 upb_jsonparser_any_frame *frame) {
9318 return frame->after_type_url_start != NULL;
9319 }
9320
json_parser_any_frame_has_value(upb_jsonparser_any_frame * frame)9321 static bool json_parser_any_frame_has_value(
9322 upb_jsonparser_any_frame *frame) {
9323 return json_parser_any_frame_has_value_before_type_url(frame) ||
9324 json_parser_any_frame_has_value_after_type_url(frame);
9325 }
9326
json_parser_any_frame_set_before_type_url_end(upb_jsonparser_any_frame * frame,const char * ptr)9327 static void json_parser_any_frame_set_before_type_url_end(
9328 upb_jsonparser_any_frame *frame,
9329 const char *ptr) {
9330 if (frame->parser == NULL) {
9331 frame->before_type_url_end = ptr;
9332 }
9333 }
9334
json_parser_any_frame_set_after_type_url_start_once(upb_jsonparser_any_frame * frame,const char * ptr)9335 static void json_parser_any_frame_set_after_type_url_start_once(
9336 upb_jsonparser_any_frame *frame,
9337 const char *ptr) {
9338 if (json_parser_any_frame_has_type_url(frame) &&
9339 frame->after_type_url_start == NULL) {
9340 frame->after_type_url_start = ptr;
9341 }
9342 }
9343
9344 /* Used to signal that a capture has been suspended. */
9345 static char suspend_capture;
9346
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)9347 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9348 upb_handlertype_t type) {
9349 upb_selector_t sel;
9350 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9351 UPB_ASSUME(ok);
9352 return sel;
9353 }
9354
parser_getsel(upb_json_parser * p)9355 static upb_selector_t parser_getsel(upb_json_parser *p) {
9356 return getsel_for_handlertype(
9357 p, upb_handlers_getprimitivehandlertype(p->top->f));
9358 }
9359
check_stack(upb_json_parser * p)9360 static bool check_stack(upb_json_parser *p) {
9361 if ((p->top + 1) == p->limit) {
9362 upb_status_seterrmsg(p->status, "Nesting too deep");
9363 return false;
9364 }
9365
9366 return true;
9367 }
9368
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)9369 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
9370 upb_value v;
9371 const upb_json_codecache *cache = p->method->cache;
9372 bool ok;
9373 const upb_json_parsermethod *method;
9374
9375 ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v);
9376 UPB_ASSUME(ok);
9377 method = upb_value_getconstptr(v);
9378
9379 frame->name_table = &method->name_table;
9380 }
9381
9382 /* There are GCC/Clang built-ins for overflow checking which we could start
9383 * using if there was any performance benefit to it. */
9384
checked_add(size_t a,size_t b,size_t * c)9385 static bool checked_add(size_t a, size_t b, size_t *c) {
9386 if (SIZE_MAX - a < b) return false;
9387 *c = a + b;
9388 return true;
9389 }
9390
saturating_multiply(size_t a,size_t b)9391 static size_t saturating_multiply(size_t a, size_t b) {
9392 /* size_t is unsigned, so this is defined behavior even on overflow. */
9393 size_t ret = a * b;
9394 if (b != 0 && ret / b != a) {
9395 ret = SIZE_MAX;
9396 }
9397 return ret;
9398 }
9399
9400
9401 /* Base64 decoding ************************************************************/
9402
9403 /* TODO(haberman): make this streaming. */
9404
9405 static const signed char b64table[] = {
9406 -1, -1, -1, -1, -1, -1, -1, -1,
9407 -1, -1, -1, -1, -1, -1, -1, -1,
9408 -1, -1, -1, -1, -1, -1, -1, -1,
9409 -1, -1, -1, -1, -1, -1, -1, -1,
9410 -1, -1, -1, -1, -1, -1, -1, -1,
9411 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
9412 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9413 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
9414 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
9415 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9416 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9417 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
9418 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9419 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9420 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9421 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
9422 -1, -1, -1, -1, -1, -1, -1, -1,
9423 -1, -1, -1, -1, -1, -1, -1, -1,
9424 -1, -1, -1, -1, -1, -1, -1, -1,
9425 -1, -1, -1, -1, -1, -1, -1, -1,
9426 -1, -1, -1, -1, -1, -1, -1, -1,
9427 -1, -1, -1, -1, -1, -1, -1, -1,
9428 -1, -1, -1, -1, -1, -1, -1, -1,
9429 -1, -1, -1, -1, -1, -1, -1, -1,
9430 -1, -1, -1, -1, -1, -1, -1, -1,
9431 -1, -1, -1, -1, -1, -1, -1, -1,
9432 -1, -1, -1, -1, -1, -1, -1, -1,
9433 -1, -1, -1, -1, -1, -1, -1, -1,
9434 -1, -1, -1, -1, -1, -1, -1, -1,
9435 -1, -1, -1, -1, -1, -1, -1, -1,
9436 -1, -1, -1, -1, -1, -1, -1, -1,
9437 -1, -1, -1, -1, -1, -1, -1, -1
9438 };
9439
9440 /* Returns the table value sign-extended to 32 bits. Knowing that the upper
9441 * bits will be 1 for unrecognized characters makes it easier to check for
9442 * this error condition later (see below). */
b64lookup(unsigned char ch)9443 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9444
9445 /* Returns true if the given character is not a valid base64 character or
9446 * padding. */
nonbase64(unsigned char ch)9447 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9448
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)9449 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9450 size_t len) {
9451 const char *limit = ptr + len;
9452 for (; ptr < limit; ptr += 4) {
9453 uint32_t val;
9454 char output[3];
9455
9456 if (limit - ptr < 4) {
9457 upb_status_seterrf(p->status,
9458 "Base64 input for bytes field not a multiple of 4: %s",
9459 upb_fielddef_name(p->top->f));
9460 return false;
9461 }
9462
9463 val = b64lookup(ptr[0]) << 18 |
9464 b64lookup(ptr[1]) << 12 |
9465 b64lookup(ptr[2]) << 6 |
9466 b64lookup(ptr[3]);
9467
9468 /* Test the upper bit; returns true if any of the characters returned -1. */
9469 if (val & 0x80000000) {
9470 goto otherchar;
9471 }
9472
9473 output[0] = val >> 16;
9474 output[1] = (val >> 8) & 0xff;
9475 output[2] = val & 0xff;
9476 upb_sink_putstring(p->top->sink, sel, output, 3, NULL);
9477 }
9478 return true;
9479
9480 otherchar:
9481 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9482 nonbase64(ptr[3]) ) {
9483 upb_status_seterrf(p->status,
9484 "Non-base64 characters in bytes field: %s",
9485 upb_fielddef_name(p->top->f));
9486 return false;
9487 } if (ptr[2] == '=') {
9488 uint32_t val;
9489 char output;
9490
9491 /* Last group contains only two input bytes, one output byte. */
9492 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9493 goto badpadding;
9494 }
9495
9496 val = b64lookup(ptr[0]) << 18 |
9497 b64lookup(ptr[1]) << 12;
9498
9499 UPB_ASSERT(!(val & 0x80000000));
9500 output = val >> 16;
9501 upb_sink_putstring(p->top->sink, sel, &output, 1, NULL);
9502 return true;
9503 } else {
9504 uint32_t val;
9505 char output[2];
9506
9507 /* Last group contains only three input bytes, two output bytes. */
9508 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9509 goto badpadding;
9510 }
9511
9512 val = b64lookup(ptr[0]) << 18 |
9513 b64lookup(ptr[1]) << 12 |
9514 b64lookup(ptr[2]) << 6;
9515
9516 output[0] = val >> 16;
9517 output[1] = (val >> 8) & 0xff;
9518 upb_sink_putstring(p->top->sink, sel, output, 2, NULL);
9519 return true;
9520 }
9521
9522 badpadding:
9523 upb_status_seterrf(p->status,
9524 "Incorrect base64 padding for field: %s (%.*s)",
9525 upb_fielddef_name(p->top->f),
9526 4, ptr);
9527 return false;
9528 }
9529
9530
9531 /* Accumulate buffer **********************************************************/
9532
9533 /* Functionality for accumulating a buffer.
9534 *
9535 * Some parts of the parser need an entire value as a contiguous string. For
9536 * example, to look up a member name in a hash table, or to turn a string into
9537 * a number, the relevant library routines need the input string to be in
9538 * contiguous memory, even if the value spanned two or more buffers in the
9539 * input. These routines handle that.
9540 *
9541 * In the common case we can just point to the input buffer to get this
9542 * contiguous string and avoid any actual copy. So we optimistically begin
9543 * this way. But there are a few cases where we must instead copy into a
9544 * separate buffer:
9545 *
9546 * 1. The string was not contiguous in the input (it spanned buffers).
9547 *
9548 * 2. The string included escape sequences that need to be interpreted to get
9549 * the true value in a contiguous buffer. */
9550
assert_accumulate_empty(upb_json_parser * p)9551 static void assert_accumulate_empty(upb_json_parser *p) {
9552 UPB_ASSERT(p->accumulated == NULL);
9553 UPB_ASSERT(p->accumulated_len == 0);
9554 }
9555
accumulate_clear(upb_json_parser * p)9556 static void accumulate_clear(upb_json_parser *p) {
9557 p->accumulated = NULL;
9558 p->accumulated_len = 0;
9559 }
9560
9561 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)9562 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9563 void *mem;
9564 size_t old_size = p->accumulate_buf_size;
9565 size_t new_size = UPB_MAX(old_size, 128);
9566 while (new_size < need) {
9567 new_size = saturating_multiply(new_size, 2);
9568 }
9569
9570 mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size);
9571 if (!mem) {
9572 upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
9573 return false;
9574 }
9575
9576 p->accumulate_buf = mem;
9577 p->accumulate_buf_size = new_size;
9578 return true;
9579 }
9580
9581 /* Logically appends the given data to the append buffer.
9582 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9583 * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)9584 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9585 bool can_alias) {
9586 size_t need;
9587
9588 if (!p->accumulated && can_alias) {
9589 p->accumulated = buf;
9590 p->accumulated_len = len;
9591 return true;
9592 }
9593
9594 if (!checked_add(p->accumulated_len, len, &need)) {
9595 upb_status_seterrmsg(p->status, "Integer overflow.");
9596 return false;
9597 }
9598
9599 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9600 return false;
9601 }
9602
9603 if (p->accumulated != p->accumulate_buf) {
9604 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9605 p->accumulated = p->accumulate_buf;
9606 }
9607
9608 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9609 p->accumulated_len += len;
9610 return true;
9611 }
9612
9613 /* Returns a pointer to the data accumulated since the last accumulate_clear()
9614 * call, and writes the length to *len. This with point either to the input
9615 * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)9616 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
9617 UPB_ASSERT(p->accumulated);
9618 *len = p->accumulated_len;
9619 return p->accumulated;
9620 }
9621
9622
9623 /* Mult-part text data ********************************************************/
9624
9625 /* When we have text data in the input, it can often come in multiple segments.
9626 * For example, there may be some raw string data followed by an escape
9627 * sequence. The two segments are processed with different logic. Also buffer
9628 * seams in the input can cause multiple segments.
9629 *
9630 * As we see segments, there are two main cases for how we want to process them:
9631 *
9632 * 1. we want to push the captured input directly to string handlers.
9633 *
9634 * 2. we need to accumulate all the parts into a contiguous buffer for further
9635 * processing (field name lookup, string->number conversion, etc). */
9636
9637 /* This is the set of states for p->multipart_state. */
9638 enum {
9639 /* We are not currently processing multipart data. */
9640 MULTIPART_INACTIVE = 0,
9641
9642 /* We are processing multipart data by accumulating it into a contiguous
9643 * buffer. */
9644 MULTIPART_ACCUMULATE = 1,
9645
9646 /* We are processing multipart data by pushing each part directly to the
9647 * current string handlers. */
9648 MULTIPART_PUSHEAGERLY = 2
9649 };
9650
9651 /* Start a multi-part text value where we accumulate the data for processing at
9652 * the end. */
multipart_startaccum(upb_json_parser * p)9653 static void multipart_startaccum(upb_json_parser *p) {
9654 assert_accumulate_empty(p);
9655 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
9656 p->multipart_state = MULTIPART_ACCUMULATE;
9657 }
9658
9659 /* Start a multi-part text value where we immediately push text data to a string
9660 * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)9661 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
9662 assert_accumulate_empty(p);
9663 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
9664 p->multipart_state = MULTIPART_PUSHEAGERLY;
9665 p->string_selector = sel;
9666 }
9667
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)9668 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
9669 bool can_alias) {
9670 switch (p->multipart_state) {
9671 case MULTIPART_INACTIVE:
9672 upb_status_seterrmsg(
9673 p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
9674 return false;
9675
9676 case MULTIPART_ACCUMULATE:
9677 if (!accumulate_append(p, buf, len, can_alias)) {
9678 return false;
9679 }
9680 break;
9681
9682 case MULTIPART_PUSHEAGERLY: {
9683 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
9684 upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle);
9685 break;
9686 }
9687 }
9688
9689 return true;
9690 }
9691
9692 /* Note: this invalidates the accumulate buffer! Call only after reading its
9693 * contents. */
multipart_end(upb_json_parser * p)9694 static void multipart_end(upb_json_parser *p) {
9695 /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */
9696 p->multipart_state = MULTIPART_INACTIVE;
9697 accumulate_clear(p);
9698 }
9699
9700
9701 /* Input capture **************************************************************/
9702
9703 /* Functionality for capturing a region of the input as text. Gracefully
9704 * handles the case where a buffer seam occurs in the middle of the captured
9705 * region. */
9706
capture_begin(upb_json_parser * p,const char * ptr)9707 static void capture_begin(upb_json_parser *p, const char *ptr) {
9708 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
9709 UPB_ASSERT(p->capture == NULL);
9710 p->capture = ptr;
9711 }
9712
capture_end(upb_json_parser * p,const char * ptr)9713 static bool capture_end(upb_json_parser *p, const char *ptr) {
9714 UPB_ASSERT(p->capture);
9715 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
9716 p->capture = NULL;
9717 return true;
9718 } else {
9719 return false;
9720 }
9721 }
9722
9723 /* This is called at the end of each input buffer (ie. when we have hit a
9724 * buffer seam). If we are in the middle of capturing the input, this
9725 * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)9726 static void capture_suspend(upb_json_parser *p, const char **ptr) {
9727 if (!p->capture) return;
9728
9729 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
9730 /* We use this as a signal that we were in the middle of capturing, and
9731 * that capturing should resume at the beginning of the next buffer.
9732 *
9733 * We can't use *ptr here, because we have no guarantee that this pointer
9734 * will be valid when we resume (if the underlying memory is freed, then
9735 * using the pointer at all, even to compare to NULL, is likely undefined
9736 * behavior). */
9737 p->capture = &suspend_capture;
9738 } else {
9739 /* Need to back up the pointer to the beginning of the capture, since
9740 * we were not able to actually preserve it. */
9741 *ptr = p->capture;
9742 }
9743 }
9744
capture_resume(upb_json_parser * p,const char * ptr)9745 static void capture_resume(upb_json_parser *p, const char *ptr) {
9746 if (p->capture) {
9747 UPB_ASSERT(p->capture == &suspend_capture);
9748 p->capture = ptr;
9749 }
9750 }
9751
9752
9753 /* Callbacks from the parser **************************************************/
9754
9755 /* These are the functions called directly from the parser itself.
9756 * We define these in the same order as their declarations in the parser. */
9757
escape_char(char in)9758 static char escape_char(char in) {
9759 switch (in) {
9760 case 'r': return '\r';
9761 case 't': return '\t';
9762 case 'n': return '\n';
9763 case 'f': return '\f';
9764 case 'b': return '\b';
9765 case '/': return '/';
9766 case '"': return '"';
9767 case '\\': return '\\';
9768 default:
9769 UPB_ASSERT(0);
9770 return 'x';
9771 }
9772 }
9773
escape(upb_json_parser * p,const char * ptr)9774 static bool escape(upb_json_parser *p, const char *ptr) {
9775 char ch = escape_char(*ptr);
9776 return multipart_text(p, &ch, 1, false);
9777 }
9778
start_hex(upb_json_parser * p)9779 static void start_hex(upb_json_parser *p) {
9780 p->digit = 0;
9781 }
9782
hexdigit(upb_json_parser * p,const char * ptr)9783 static void hexdigit(upb_json_parser *p, const char *ptr) {
9784 char ch = *ptr;
9785
9786 p->digit <<= 4;
9787
9788 if (ch >= '0' && ch <= '9') {
9789 p->digit += (ch - '0');
9790 } else if (ch >= 'a' && ch <= 'f') {
9791 p->digit += ((ch - 'a') + 10);
9792 } else {
9793 UPB_ASSERT(ch >= 'A' && ch <= 'F');
9794 p->digit += ((ch - 'A') + 10);
9795 }
9796 }
9797
end_hex(upb_json_parser * p)9798 static bool end_hex(upb_json_parser *p) {
9799 uint32_t codepoint = p->digit;
9800
9801 /* emit the codepoint as UTF-8. */
9802 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
9803 int length = 0;
9804 if (codepoint <= 0x7F) {
9805 utf8[0] = codepoint;
9806 length = 1;
9807 } else if (codepoint <= 0x07FF) {
9808 utf8[1] = (codepoint & 0x3F) | 0x80;
9809 codepoint >>= 6;
9810 utf8[0] = (codepoint & 0x1F) | 0xC0;
9811 length = 2;
9812 } else /* codepoint <= 0xFFFF */ {
9813 utf8[2] = (codepoint & 0x3F) | 0x80;
9814 codepoint >>= 6;
9815 utf8[1] = (codepoint & 0x3F) | 0x80;
9816 codepoint >>= 6;
9817 utf8[0] = (codepoint & 0x0F) | 0xE0;
9818 length = 3;
9819 }
9820 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
9821 * we have to wait for the next escape to get the full code point). */
9822
9823 return multipart_text(p, utf8, length, false);
9824 }
9825
start_text(upb_json_parser * p,const char * ptr)9826 static void start_text(upb_json_parser *p, const char *ptr) {
9827 capture_begin(p, ptr);
9828 }
9829
end_text(upb_json_parser * p,const char * ptr)9830 static bool end_text(upb_json_parser *p, const char *ptr) {
9831 return capture_end(p, ptr);
9832 }
9833
start_number(upb_json_parser * p,const char * ptr)9834 static bool start_number(upb_json_parser *p, const char *ptr) {
9835 if (is_top_level(p)) {
9836 if (is_number_wrapper_object(p)) {
9837 start_wrapper_object(p);
9838 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9839 start_value_object(p, VALUE_NUMBERVALUE);
9840 } else {
9841 return false;
9842 }
9843 } else if (does_number_wrapper_start(p)) {
9844 if (!start_subobject(p)) {
9845 return false;
9846 }
9847 start_wrapper_object(p);
9848 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
9849 if (!start_subobject(p)) {
9850 return false;
9851 }
9852 start_value_object(p, VALUE_NUMBERVALUE);
9853 }
9854
9855 multipart_startaccum(p);
9856 capture_begin(p, ptr);
9857 return true;
9858 }
9859
9860 static bool parse_number(upb_json_parser *p, bool is_quoted);
9861
end_number_nontop(upb_json_parser * p,const char * ptr)9862 static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
9863 if (!capture_end(p, ptr)) {
9864 return false;
9865 }
9866
9867 if (p->top->f == NULL) {
9868 multipart_end(p);
9869 return true;
9870 }
9871
9872 return parse_number(p, false);
9873 }
9874
end_number(upb_json_parser * p,const char * ptr)9875 static bool end_number(upb_json_parser *p, const char *ptr) {
9876 if (!end_number_nontop(p, ptr)) {
9877 return false;
9878 }
9879
9880 if (does_number_wrapper_end(p)) {
9881 end_wrapper_object(p);
9882 if (!is_top_level(p)) {
9883 end_subobject(p);
9884 }
9885 return true;
9886 }
9887
9888 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9889 end_value_object(p);
9890 if (!is_top_level(p)) {
9891 end_subobject(p);
9892 }
9893 return true;
9894 }
9895
9896 return true;
9897 }
9898
9899 /* |buf| is NULL-terminated. |buf| itself will never include quotes;
9900 * |is_quoted| tells us whether this text originally appeared inside quotes. */
parse_number_from_buffer(upb_json_parser * p,const char * buf,bool is_quoted)9901 static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
9902 bool is_quoted) {
9903 size_t len = strlen(buf);
9904 const char *bufend = buf + len;
9905 char *end;
9906 upb_fieldtype_t type = upb_fielddef_type(p->top->f);
9907 double val;
9908 double dummy;
9909 double inf = UPB_INFINITY;
9910
9911 errno = 0;
9912
9913 if (len == 0 || buf[0] == ' ') {
9914 return false;
9915 }
9916
9917 /* For integer types, first try parsing with integer-specific routines.
9918 * If these succeed, they will be more accurate for int64/uint64 than
9919 * strtod().
9920 */
9921 switch (type) {
9922 case UPB_TYPE_ENUM:
9923 case UPB_TYPE_INT32: {
9924 long val = strtol(buf, &end, 0);
9925 if (errno == ERANGE || end != bufend) {
9926 break;
9927 } else if (val > INT32_MAX || val < INT32_MIN) {
9928 return false;
9929 } else {
9930 upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val);
9931 return true;
9932 }
9933 }
9934 case UPB_TYPE_UINT32: {
9935 unsigned long val = strtoul(buf, &end, 0);
9936 if (end != bufend) {
9937 break;
9938 } else if (val > UINT32_MAX || errno == ERANGE) {
9939 return false;
9940 } else {
9941 upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val);
9942 return true;
9943 }
9944 }
9945 /* XXX: We can't handle [u]int64 properly on 32-bit machines because
9946 * strto[u]ll isn't in C89. */
9947 case UPB_TYPE_INT64: {
9948 long val = strtol(buf, &end, 0);
9949 if (errno == ERANGE || end != bufend) {
9950 break;
9951 } else {
9952 upb_sink_putint64(p->top->sink, parser_getsel(p), val);
9953 return true;
9954 }
9955 }
9956 case UPB_TYPE_UINT64: {
9957 unsigned long val = strtoul(p->accumulated, &end, 0);
9958 if (end != bufend) {
9959 break;
9960 } else if (errno == ERANGE) {
9961 return false;
9962 } else {
9963 upb_sink_putuint64(p->top->sink, parser_getsel(p), val);
9964 return true;
9965 }
9966 }
9967 default:
9968 break;
9969 }
9970
9971 if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
9972 /* Quoted numbers for integer types are not allowed to be in double form. */
9973 return false;
9974 }
9975
9976 if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
9977 /* C89 does not have an INFINITY macro. */
9978 val = inf;
9979 } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
9980 val = -inf;
9981 } else {
9982 val = strtod(buf, &end);
9983 if (errno == ERANGE || end != bufend) {
9984 return false;
9985 }
9986 }
9987
9988 switch (type) {
9989 #define CASE(capitaltype, smalltype, ctype, min, max) \
9990 case UPB_TYPE_ ## capitaltype: { \
9991 if (modf(val, &dummy) != 0 || val > max || val < min) { \
9992 return false; \
9993 } else { \
9994 upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \
9995 (ctype)val); \
9996 return true; \
9997 } \
9998 break; \
9999 }
10000 case UPB_TYPE_ENUM:
10001 CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
10002 CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
10003 CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
10004 CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
10005 #undef CASE
10006
10007 case UPB_TYPE_DOUBLE:
10008 upb_sink_putdouble(p->top->sink, parser_getsel(p), val);
10009 return true;
10010 case UPB_TYPE_FLOAT:
10011 if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
10012 return false;
10013 } else {
10014 upb_sink_putfloat(p->top->sink, parser_getsel(p), val);
10015 return true;
10016 }
10017 default:
10018 return false;
10019 }
10020 }
10021
parse_number(upb_json_parser * p,bool is_quoted)10022 static bool parse_number(upb_json_parser *p, bool is_quoted) {
10023 size_t len;
10024 const char *buf;
10025
10026 /* strtol() and friends unfortunately do not support specifying the length of
10027 * the input string, so we need to force a copy into a NULL-terminated buffer. */
10028 if (!multipart_text(p, "\0", 1, false)) {
10029 return false;
10030 }
10031
10032 buf = accumulate_getptr(p, &len);
10033
10034 if (parse_number_from_buffer(p, buf, is_quoted)) {
10035 multipart_end(p);
10036 return true;
10037 } else {
10038 upb_status_seterrf(p->status, "error parsing number: %s", buf);
10039 multipart_end(p);
10040 return false;
10041 }
10042 }
10043
parser_putbool(upb_json_parser * p,bool val)10044 static bool parser_putbool(upb_json_parser *p, bool val) {
10045 bool ok;
10046
10047 if (p->top->f == NULL) {
10048 return true;
10049 }
10050
10051 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
10052 upb_status_seterrf(p->status,
10053 "Boolean value specified for non-bool field: %s",
10054 upb_fielddef_name(p->top->f));
10055 return false;
10056 }
10057
10058 ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val);
10059 UPB_ASSERT(ok);
10060
10061 return true;
10062 }
10063
end_bool(upb_json_parser * p,bool val)10064 static bool end_bool(upb_json_parser *p, bool val) {
10065 if (is_top_level(p)) {
10066 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
10067 start_wrapper_object(p);
10068 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10069 start_value_object(p, VALUE_BOOLVALUE);
10070 } else {
10071 return false;
10072 }
10073 } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
10074 if (!start_subobject(p)) {
10075 return false;
10076 }
10077 start_wrapper_object(p);
10078 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10079 if (!start_subobject(p)) {
10080 return false;
10081 }
10082 start_value_object(p, VALUE_BOOLVALUE);
10083 }
10084
10085 if (p->top->is_unknown_field) {
10086 return true;
10087 }
10088
10089 if (!parser_putbool(p, val)) {
10090 return false;
10091 }
10092
10093 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
10094 end_wrapper_object(p);
10095 if (!is_top_level(p)) {
10096 end_subobject(p);
10097 }
10098 return true;
10099 }
10100
10101 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10102 end_value_object(p);
10103 if (!is_top_level(p)) {
10104 end_subobject(p);
10105 }
10106 return true;
10107 }
10108
10109 return true;
10110 }
10111
end_null(upb_json_parser * p)10112 static bool end_null(upb_json_parser *p) {
10113 const char *zero_ptr = "0";
10114
10115 if (is_top_level(p)) {
10116 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10117 start_value_object(p, VALUE_NULLVALUE);
10118 } else {
10119 return true;
10120 }
10121 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10122 if (!start_subobject(p)) {
10123 return false;
10124 }
10125 start_value_object(p, VALUE_NULLVALUE);
10126 } else {
10127 return true;
10128 }
10129
10130 /* Fill null_value field. */
10131 multipart_startaccum(p);
10132 capture_begin(p, zero_ptr);
10133 capture_end(p, zero_ptr + 1);
10134 parse_number(p, false);
10135
10136 end_value_object(p);
10137 if (!is_top_level(p)) {
10138 end_subobject(p);
10139 }
10140
10141 return true;
10142 }
10143
start_any_stringval(upb_json_parser * p)10144 static bool start_any_stringval(upb_json_parser *p) {
10145 multipart_startaccum(p);
10146 return true;
10147 }
10148
start_stringval(upb_json_parser * p)10149 static bool start_stringval(upb_json_parser *p) {
10150 if (is_top_level(p)) {
10151 if (is_string_wrapper_object(p) ||
10152 is_number_wrapper_object(p)) {
10153 start_wrapper_object(p);
10154 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
10155 start_fieldmask_object(p);
10156 return true;
10157 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10158 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
10159 start_object(p);
10160 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10161 start_value_object(p, VALUE_STRINGVALUE);
10162 } else {
10163 return false;
10164 }
10165 } else if (does_string_wrapper_start(p) ||
10166 does_number_wrapper_start(p)) {
10167 if (!start_subobject(p)) {
10168 return false;
10169 }
10170 start_wrapper_object(p);
10171 } else if (does_fieldmask_start(p)) {
10172 if (!start_subobject(p)) {
10173 return false;
10174 }
10175 start_fieldmask_object(p);
10176 return true;
10177 } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
10178 is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
10179 if (!start_subobject(p)) {
10180 return false;
10181 }
10182 start_object(p);
10183 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10184 if (!start_subobject(p)) {
10185 return false;
10186 }
10187 start_value_object(p, VALUE_STRINGVALUE);
10188 }
10189
10190 if (p->top->f == NULL) {
10191 multipart_startaccum(p);
10192 return true;
10193 }
10194
10195 if (p->top->is_any) {
10196 return start_any_stringval(p);
10197 }
10198
10199 if (upb_fielddef_isstring(p->top->f)) {
10200 upb_jsonparser_frame *inner;
10201 upb_selector_t sel;
10202
10203 if (!check_stack(p)) return false;
10204
10205 /* Start a new parser frame: parser frames correspond one-to-one with
10206 * handler frames, and string events occur in a sub-frame. */
10207 inner = start_jsonparser_frame(p);
10208 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10209 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10210 inner->m = p->top->m;
10211 inner->f = p->top->f;
10212 p->top = inner;
10213
10214 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10215 /* For STRING fields we push data directly to the handlers as it is
10216 * parsed. We don't do this yet for BYTES fields, because our base64
10217 * decoder is not streaming.
10218 *
10219 * TODO(haberman): make base64 decoding streaming also. */
10220 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10221 return true;
10222 } else {
10223 multipart_startaccum(p);
10224 return true;
10225 }
10226 } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
10227 upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
10228 /* No need to push a frame -- numeric values in quotes remain in the
10229 * current parser frame. These values must accmulate so we can convert
10230 * them all at once at the end. */
10231 multipart_startaccum(p);
10232 return true;
10233 } else {
10234 upb_status_seterrf(p->status,
10235 "String specified for bool or submessage field: %s",
10236 upb_fielddef_name(p->top->f));
10237 return false;
10238 }
10239 }
10240
end_any_stringval(upb_json_parser * p)10241 static bool end_any_stringval(upb_json_parser *p) {
10242 size_t len;
10243 const char *buf = accumulate_getptr(p, &len);
10244
10245 /* Set type_url */
10246 upb_selector_t sel;
10247 upb_jsonparser_frame *inner;
10248 if (!check_stack(p)) return false;
10249 inner = p->top + 1;
10250
10251 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10252 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10253 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10254 upb_sink_putstring(inner->sink, sel, buf, len, NULL);
10255 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10256 upb_sink_endstr(inner->sink, sel);
10257
10258 multipart_end(p);
10259
10260 /* Resolve type url */
10261 if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
10262 const upb_msgdef *payload_type = NULL;
10263 buf += 20;
10264 len -= 20;
10265
10266 payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
10267 if (payload_type == NULL) {
10268 upb_status_seterrf(
10269 p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
10270 return false;
10271 }
10272
10273 json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
10274
10275 return true;
10276 } else {
10277 upb_status_seterrf(
10278 p->status, "Invalid type url: %.*s\n", (int)len, buf);
10279 return false;
10280 }
10281 }
10282
end_stringval_nontop(upb_json_parser * p)10283 static bool end_stringval_nontop(upb_json_parser *p) {
10284 bool ok = true;
10285
10286 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10287 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
10288 multipart_end(p);
10289 return true;
10290 }
10291
10292 if (p->top->f == NULL) {
10293 multipart_end(p);
10294 return true;
10295 }
10296
10297 if (p->top->is_any) {
10298 return end_any_stringval(p);
10299 }
10300
10301 switch (upb_fielddef_type(p->top->f)) {
10302 case UPB_TYPE_BYTES:
10303 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10304 p->accumulated, p->accumulated_len)) {
10305 return false;
10306 }
10307 /* Fall through. */
10308
10309 case UPB_TYPE_STRING: {
10310 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10311 upb_sink_endstr(p->top->sink, sel);
10312 p->top--;
10313 break;
10314 }
10315
10316 case UPB_TYPE_ENUM: {
10317 /* Resolve enum symbolic name to integer value. */
10318 const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f);
10319
10320 size_t len;
10321 const char *buf = accumulate_getptr(p, &len);
10322
10323 int32_t int_val = 0;
10324 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10325
10326 if (ok) {
10327 upb_selector_t sel = parser_getsel(p);
10328 upb_sink_putint32(p->top->sink, sel, int_val);
10329 } else {
10330 upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
10331 }
10332
10333 break;
10334 }
10335
10336 case UPB_TYPE_INT32:
10337 case UPB_TYPE_INT64:
10338 case UPB_TYPE_UINT32:
10339 case UPB_TYPE_UINT64:
10340 case UPB_TYPE_DOUBLE:
10341 case UPB_TYPE_FLOAT:
10342 ok = parse_number(p, true);
10343 break;
10344
10345 default:
10346 UPB_ASSERT(false);
10347 upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
10348 ok = false;
10349 break;
10350 }
10351
10352 multipart_end(p);
10353
10354 return ok;
10355 }
10356
end_stringval(upb_json_parser * p)10357 static bool end_stringval(upb_json_parser *p) {
10358 /* FieldMask's stringvals have been ended when handling them. Only need to
10359 * close FieldMask here.*/
10360 if (does_fieldmask_end(p)) {
10361 end_fieldmask_object(p);
10362 if (!is_top_level(p)) {
10363 end_subobject(p);
10364 }
10365 return true;
10366 }
10367
10368 if (!end_stringval_nontop(p)) {
10369 return false;
10370 }
10371
10372 if (does_string_wrapper_end(p) ||
10373 does_number_wrapper_end(p)) {
10374 end_wrapper_object(p);
10375 if (!is_top_level(p)) {
10376 end_subobject(p);
10377 }
10378 return true;
10379 }
10380
10381 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10382 end_value_object(p);
10383 if (!is_top_level(p)) {
10384 end_subobject(p);
10385 }
10386 return true;
10387 }
10388
10389 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10390 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
10391 is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
10392 end_object(p);
10393 if (!is_top_level(p)) {
10394 end_subobject(p);
10395 }
10396 return true;
10397 }
10398
10399 return true;
10400 }
10401
start_duration_base(upb_json_parser * p,const char * ptr)10402 static void start_duration_base(upb_json_parser *p, const char *ptr) {
10403 capture_begin(p, ptr);
10404 }
10405
end_duration_base(upb_json_parser * p,const char * ptr)10406 static bool end_duration_base(upb_json_parser *p, const char *ptr) {
10407 size_t len;
10408 const char *buf;
10409 char seconds_buf[14];
10410 char nanos_buf[12];
10411 char *end;
10412 int64_t seconds = 0;
10413 int32_t nanos = 0;
10414 double val = 0.0;
10415 const char *seconds_membername = "seconds";
10416 const char *nanos_membername = "nanos";
10417 size_t fraction_start;
10418
10419 if (!capture_end(p, ptr)) {
10420 return false;
10421 }
10422
10423 buf = accumulate_getptr(p, &len);
10424
10425 memset(seconds_buf, 0, 14);
10426 memset(nanos_buf, 0, 12);
10427
10428 /* Find out base end. The maximus duration is 315576000000, which cannot be
10429 * represented by double without losing precision. Thus, we need to handle
10430 * fraction and base separately. */
10431 for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
10432 fraction_start++);
10433
10434 /* Parse base */
10435 memcpy(seconds_buf, buf, fraction_start);
10436 seconds = strtol(seconds_buf, &end, 10);
10437 if (errno == ERANGE || end != seconds_buf + fraction_start) {
10438 upb_status_seterrf(p->status, "error parsing duration: %s",
10439 seconds_buf);
10440 return false;
10441 }
10442
10443 if (seconds > 315576000000) {
10444 upb_status_seterrf(p->status, "error parsing duration: "
10445 "maximum acceptable value is "
10446 "315576000000");
10447 return false;
10448 }
10449
10450 if (seconds < -315576000000) {
10451 upb_status_seterrf(p->status, "error parsing duration: "
10452 "minimum acceptable value is "
10453 "-315576000000");
10454 return false;
10455 }
10456
10457 /* Parse fraction */
10458 nanos_buf[0] = '0';
10459 memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
10460 val = strtod(nanos_buf, &end);
10461 if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
10462 upb_status_seterrf(p->status, "error parsing duration: %s",
10463 nanos_buf);
10464 return false;
10465 }
10466
10467 nanos = val * 1000000000;
10468 if (seconds < 0) nanos = -nanos;
10469
10470 /* Clean up buffer */
10471 multipart_end(p);
10472
10473 /* Set seconds */
10474 start_member(p);
10475 capture_begin(p, seconds_membername);
10476 capture_end(p, seconds_membername + 7);
10477 end_membername(p);
10478 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
10479 end_member(p);
10480
10481 /* Set nanos */
10482 start_member(p);
10483 capture_begin(p, nanos_membername);
10484 capture_end(p, nanos_membername + 5);
10485 end_membername(p);
10486 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
10487 end_member(p);
10488
10489 /* Continue previous arena */
10490 multipart_startaccum(p);
10491
10492 return true;
10493 }
10494
parse_timestamp_number(upb_json_parser * p)10495 static int parse_timestamp_number(upb_json_parser *p) {
10496 size_t len;
10497 const char *buf;
10498 int val;
10499
10500 /* atoi() and friends unfortunately do not support specifying the length of
10501 * the input string, so we need to force a copy into a NULL-terminated buffer. */
10502 multipart_text(p, "\0", 1, false);
10503
10504 buf = accumulate_getptr(p, &len);
10505 val = atoi(buf);
10506 multipart_end(p);
10507 multipart_startaccum(p);
10508
10509 return val;
10510 }
10511
start_year(upb_json_parser * p,const char * ptr)10512 static void start_year(upb_json_parser *p, const char *ptr) {
10513 capture_begin(p, ptr);
10514 }
10515
end_year(upb_json_parser * p,const char * ptr)10516 static bool end_year(upb_json_parser *p, const char *ptr) {
10517 if (!capture_end(p, ptr)) {
10518 return false;
10519 }
10520 p->tm.tm_year = parse_timestamp_number(p) - 1900;
10521 return true;
10522 }
10523
start_month(upb_json_parser * p,const char * ptr)10524 static void start_month(upb_json_parser *p, const char *ptr) {
10525 capture_begin(p, ptr);
10526 }
10527
end_month(upb_json_parser * p,const char * ptr)10528 static bool end_month(upb_json_parser *p, const char *ptr) {
10529 if (!capture_end(p, ptr)) {
10530 return false;
10531 }
10532 p->tm.tm_mon = parse_timestamp_number(p) - 1;
10533 return true;
10534 }
10535
start_day(upb_json_parser * p,const char * ptr)10536 static void start_day(upb_json_parser *p, const char *ptr) {
10537 capture_begin(p, ptr);
10538 }
10539
end_day(upb_json_parser * p,const char * ptr)10540 static bool end_day(upb_json_parser *p, const char *ptr) {
10541 if (!capture_end(p, ptr)) {
10542 return false;
10543 }
10544 p->tm.tm_mday = parse_timestamp_number(p);
10545 return true;
10546 }
10547
start_hour(upb_json_parser * p,const char * ptr)10548 static void start_hour(upb_json_parser *p, const char *ptr) {
10549 capture_begin(p, ptr);
10550 }
10551
end_hour(upb_json_parser * p,const char * ptr)10552 static bool end_hour(upb_json_parser *p, const char *ptr) {
10553 if (!capture_end(p, ptr)) {
10554 return false;
10555 }
10556 p->tm.tm_hour = parse_timestamp_number(p);
10557 return true;
10558 }
10559
start_minute(upb_json_parser * p,const char * ptr)10560 static void start_minute(upb_json_parser *p, const char *ptr) {
10561 capture_begin(p, ptr);
10562 }
10563
end_minute(upb_json_parser * p,const char * ptr)10564 static bool end_minute(upb_json_parser *p, const char *ptr) {
10565 if (!capture_end(p, ptr)) {
10566 return false;
10567 }
10568 p->tm.tm_min = parse_timestamp_number(p);
10569 return true;
10570 }
10571
start_second(upb_json_parser * p,const char * ptr)10572 static void start_second(upb_json_parser *p, const char *ptr) {
10573 capture_begin(p, ptr);
10574 }
10575
end_second(upb_json_parser * p,const char * ptr)10576 static bool end_second(upb_json_parser *p, const char *ptr) {
10577 if (!capture_end(p, ptr)) {
10578 return false;
10579 }
10580 p->tm.tm_sec = parse_timestamp_number(p);
10581 return true;
10582 }
10583
start_timestamp_base(upb_json_parser * p)10584 static void start_timestamp_base(upb_json_parser *p) {
10585 memset(&p->tm, 0, sizeof(struct tm));
10586 }
10587
start_timestamp_fraction(upb_json_parser * p,const char * ptr)10588 static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10589 capture_begin(p, ptr);
10590 }
10591
end_timestamp_fraction(upb_json_parser * p,const char * ptr)10592 static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10593 size_t len;
10594 const char *buf;
10595 char nanos_buf[12];
10596 char *end;
10597 double val = 0.0;
10598 int32_t nanos;
10599 const char *nanos_membername = "nanos";
10600
10601 memset(nanos_buf, 0, 12);
10602
10603 if (!capture_end(p, ptr)) {
10604 return false;
10605 }
10606
10607 buf = accumulate_getptr(p, &len);
10608
10609 if (len > 10) {
10610 upb_status_seterrf(p->status,
10611 "error parsing timestamp: at most 9-digit fraction.");
10612 return false;
10613 }
10614
10615 /* Parse nanos */
10616 nanos_buf[0] = '0';
10617 memcpy(nanos_buf + 1, buf, len);
10618 val = strtod(nanos_buf, &end);
10619
10620 if (errno == ERANGE || end != nanos_buf + len + 1) {
10621 upb_status_seterrf(p->status, "error parsing timestamp nanos: %s",
10622 nanos_buf);
10623 return false;
10624 }
10625
10626 nanos = val * 1000000000;
10627
10628 /* Clean up previous environment */
10629 multipart_end(p);
10630
10631 /* Set nanos */
10632 start_member(p);
10633 capture_begin(p, nanos_membername);
10634 capture_end(p, nanos_membername + 5);
10635 end_membername(p);
10636 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
10637 end_member(p);
10638
10639 /* Continue previous environment */
10640 multipart_startaccum(p);
10641
10642 return true;
10643 }
10644
start_timestamp_zone(upb_json_parser * p,const char * ptr)10645 static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
10646 capture_begin(p, ptr);
10647 }
10648
div_round_up2(int n,int d)10649 static int div_round_up2(int n, int d) {
10650 return (n + d - 1) / d;
10651 }
10652
10653 /* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */
epoch_days(int year,int month,int day)10654 static int epoch_days(int year, int month, int day) {
10655 static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151,
10656 181, 212, 243, 273, 304, 334};
10657 int febs_since_0 = month > 2 ? year + 1 : year;
10658 int leap_days_since_0 = div_round_up2(febs_since_0, 4) -
10659 div_round_up2(febs_since_0, 100) +
10660 div_round_up2(febs_since_0, 400);
10661 int days_since_0 =
10662 365 * year + month_yday[month - 1] + (day - 1) + leap_days_since_0;
10663
10664 /* Convert from 0-epoch (0001-01-01 BC) to Unix Epoch (1970-01-01 AD).
10665 * Since the "BC" system does not have a year zero, 1 BC == year zero. */
10666 return days_since_0 - 719528;
10667 }
10668
upb_timegm(const struct tm * tp)10669 static int64_t upb_timegm(const struct tm *tp) {
10670 int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday);
10671 ret = (ret * 24) + tp->tm_hour;
10672 ret = (ret * 60) + tp->tm_min;
10673 ret = (ret * 60) + tp->tm_sec;
10674 return ret;
10675 }
10676
end_timestamp_zone(upb_json_parser * p,const char * ptr)10677 static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
10678 size_t len;
10679 const char *buf;
10680 int hours;
10681 int64_t seconds;
10682 const char *seconds_membername = "seconds";
10683
10684 if (!capture_end(p, ptr)) {
10685 return false;
10686 }
10687
10688 buf = accumulate_getptr(p, &len);
10689
10690 if (buf[0] != 'Z') {
10691 if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
10692 upb_status_seterrf(p->status, "error parsing timestamp offset");
10693 return false;
10694 }
10695
10696 if (buf[0] == '+') {
10697 hours = -hours;
10698 }
10699
10700 p->tm.tm_hour += hours;
10701 }
10702
10703 /* Normalize tm */
10704 seconds = upb_timegm(&p->tm);
10705
10706 /* Check timestamp boundary */
10707 if (seconds < -62135596800) {
10708 upb_status_seterrf(p->status, "error parsing timestamp: "
10709 "minimum acceptable value is "
10710 "0001-01-01T00:00:00Z");
10711 return false;
10712 }
10713
10714 /* Clean up previous environment */
10715 multipart_end(p);
10716
10717 /* Set seconds */
10718 start_member(p);
10719 capture_begin(p, seconds_membername);
10720 capture_end(p, seconds_membername + 7);
10721 end_membername(p);
10722 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
10723 end_member(p);
10724
10725 /* Continue previous environment */
10726 multipart_startaccum(p);
10727
10728 return true;
10729 }
10730
start_fieldmask_path_text(upb_json_parser * p,const char * ptr)10731 static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
10732 capture_begin(p, ptr);
10733 }
10734
end_fieldmask_path_text(upb_json_parser * p,const char * ptr)10735 static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
10736 return capture_end(p, ptr);
10737 }
10738
start_fieldmask_path(upb_json_parser * p)10739 static bool start_fieldmask_path(upb_json_parser *p) {
10740 upb_jsonparser_frame *inner;
10741 upb_selector_t sel;
10742
10743 if (!check_stack(p)) return false;
10744
10745 /* Start a new parser frame: parser frames correspond one-to-one with
10746 * handler frames, and string events occur in a sub-frame. */
10747 inner = start_jsonparser_frame(p);
10748 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10749 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10750 inner->m = p->top->m;
10751 inner->f = p->top->f;
10752 p->top = inner;
10753
10754 multipart_startaccum(p);
10755 return true;
10756 }
10757
lower_camel_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)10758 static bool lower_camel_push(
10759 upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
10760 const char *limit = ptr + len;
10761 bool first = true;
10762 for (;ptr < limit; ptr++) {
10763 if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
10764 char lower = tolower(*ptr);
10765 upb_sink_putstring(p->top->sink, sel, "_", 1, NULL);
10766 upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL);
10767 } else {
10768 upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL);
10769 }
10770 first = false;
10771 }
10772 return true;
10773 }
10774
end_fieldmask_path(upb_json_parser * p)10775 static bool end_fieldmask_path(upb_json_parser *p) {
10776 upb_selector_t sel;
10777
10778 if (!lower_camel_push(
10779 p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10780 p->accumulated, p->accumulated_len)) {
10781 return false;
10782 }
10783
10784 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10785 upb_sink_endstr(p->top->sink, sel);
10786 p->top--;
10787
10788 multipart_end(p);
10789 return true;
10790 }
10791
start_member(upb_json_parser * p)10792 static void start_member(upb_json_parser *p) {
10793 UPB_ASSERT(!p->top->f);
10794 multipart_startaccum(p);
10795 }
10796
10797 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10798 * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)10799 static bool parse_mapentry_key(upb_json_parser *p) {
10800
10801 size_t len;
10802 const char *buf = accumulate_getptr(p, &len);
10803
10804 /* Emit the key field. We do a bit of ad-hoc parsing here because the
10805 * parser state machine has already decided that this is a string field
10806 * name, and we are reinterpreting it as some arbitrary key type. In
10807 * particular, integer and bool keys are quoted, so we need to parse the
10808 * quoted string contents here. */
10809
10810 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10811 if (p->top->f == NULL) {
10812 upb_status_seterrmsg(p->status, "mapentry message has no key");
10813 return false;
10814 }
10815 switch (upb_fielddef_type(p->top->f)) {
10816 case UPB_TYPE_INT32:
10817 case UPB_TYPE_INT64:
10818 case UPB_TYPE_UINT32:
10819 case UPB_TYPE_UINT64:
10820 /* Invoke end_number. The accum buffer has the number's text already. */
10821 if (!parse_number(p, true)) {
10822 return false;
10823 }
10824 break;
10825 case UPB_TYPE_BOOL:
10826 if (len == 4 && !strncmp(buf, "true", 4)) {
10827 if (!parser_putbool(p, true)) {
10828 return false;
10829 }
10830 } else if (len == 5 && !strncmp(buf, "false", 5)) {
10831 if (!parser_putbool(p, false)) {
10832 return false;
10833 }
10834 } else {
10835 upb_status_seterrmsg(p->status,
10836 "Map bool key not 'true' or 'false'");
10837 return false;
10838 }
10839 multipart_end(p);
10840 break;
10841 case UPB_TYPE_STRING:
10842 case UPB_TYPE_BYTES: {
10843 upb_sink subsink;
10844 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10845 upb_sink_startstr(p->top->sink, sel, len, &subsink);
10846 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10847 upb_sink_putstring(subsink, sel, buf, len, NULL);
10848 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10849 upb_sink_endstr(subsink, sel);
10850 multipart_end(p);
10851 break;
10852 }
10853 default:
10854 upb_status_seterrmsg(p->status, "Invalid field type for map key");
10855 return false;
10856 }
10857
10858 return true;
10859 }
10860
10861 /* Helper: emit one map entry (as a submessage in the map field sequence). This
10862 * is invoked from end_membername(), at the end of the map entry's key string,
10863 * with the map key in the accumulate buffer. It parses the key from that
10864 * buffer, emits the handler calls to start the mapentry submessage (setting up
10865 * its subframe in the process), and sets up state in the subframe so that the
10866 * value parser (invoked next) will emit the mapentry's value field and then
10867 * end the mapentry message. */
10868
handle_mapentry(upb_json_parser * p)10869 static bool handle_mapentry(upb_json_parser *p) {
10870 const upb_fielddef *mapfield;
10871 const upb_msgdef *mapentrymsg;
10872 upb_jsonparser_frame *inner;
10873 upb_selector_t sel;
10874
10875 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10876 * for the mapentry itself, and then set |f| in that frame so that the map
10877 * value field is parsed, and also set a flag to end the frame after the
10878 * map-entry value is parsed. */
10879 if (!check_stack(p)) return false;
10880
10881 mapfield = p->top->mapfield;
10882 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10883
10884 inner = start_jsonparser_frame(p);
10885 p->top->f = mapfield;
10886 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10887 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
10888 inner->m = mapentrymsg;
10889 inner->mapfield = mapfield;
10890
10891 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10892 * the key field value to the sink, and these handlers will pop the frame
10893 * if they see is_mapentry (when invoked by the parser state machine, they
10894 * would have just seen the map-entry value, not key). */
10895 inner->is_mapentry = false;
10896 p->top = inner;
10897
10898 /* send STARTMSG in submsg frame. */
10899 upb_sink_startmsg(p->top->sink);
10900
10901 parse_mapentry_key(p);
10902
10903 /* Set up the value field to receive the map-entry value. */
10904 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10905 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
10906 p->top->mapfield = mapfield;
10907 if (p->top->f == NULL) {
10908 upb_status_seterrmsg(p->status, "mapentry message has no value");
10909 return false;
10910 }
10911
10912 return true;
10913 }
10914
end_membername(upb_json_parser * p)10915 static bool end_membername(upb_json_parser *p) {
10916 UPB_ASSERT(!p->top->f);
10917
10918 if (!p->top->m) {
10919 p->top->is_unknown_field = true;
10920 multipart_end(p);
10921 return true;
10922 }
10923
10924 if (p->top->is_any) {
10925 return end_any_membername(p);
10926 } else if (p->top->is_map) {
10927 return handle_mapentry(p);
10928 } else {
10929 size_t len;
10930 const char *buf = accumulate_getptr(p, &len);
10931 upb_value v;
10932
10933 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
10934 p->top->f = upb_value_getconstptr(v);
10935 multipart_end(p);
10936
10937 return true;
10938 } else if (p->ignore_json_unknown) {
10939 p->top->is_unknown_field = true;
10940 multipart_end(p);
10941 return true;
10942 } else {
10943 upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
10944 return false;
10945 }
10946 }
10947 }
10948
end_any_membername(upb_json_parser * p)10949 static bool end_any_membername(upb_json_parser *p) {
10950 size_t len;
10951 const char *buf = accumulate_getptr(p, &len);
10952 upb_value v;
10953
10954 if (len == 5 && strncmp(buf, "@type", len) == 0) {
10955 upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
10956 p->top->f = upb_value_getconstptr(v);
10957 multipart_end(p);
10958 return true;
10959 } else {
10960 p->top->is_unknown_field = true;
10961 multipart_end(p);
10962 return true;
10963 }
10964 }
10965
end_member(upb_json_parser * p)10966 static void end_member(upb_json_parser *p) {
10967 /* If we just parsed a map-entry value, end that frame too. */
10968 if (p->top->is_mapentry) {
10969 upb_selector_t sel;
10970 bool ok;
10971 const upb_fielddef *mapfield;
10972
10973 UPB_ASSERT(p->top > p->stack);
10974 /* send ENDMSG on submsg. */
10975 upb_sink_endmsg(p->top->sink, p->status);
10976 mapfield = p->top->mapfield;
10977
10978 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10979 p->top--;
10980 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
10981 UPB_ASSUME(ok);
10982 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
10983 }
10984
10985 p->top->f = NULL;
10986 p->top->is_unknown_field = false;
10987 }
10988
start_any_member(upb_json_parser * p,const char * ptr)10989 static void start_any_member(upb_json_parser *p, const char *ptr) {
10990 start_member(p);
10991 json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
10992 }
10993
end_any_member(upb_json_parser * p,const char * ptr)10994 static void end_any_member(upb_json_parser *p, const char *ptr) {
10995 json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
10996 end_member(p);
10997 }
10998
start_subobject(upb_json_parser * p)10999 static bool start_subobject(upb_json_parser *p) {
11000 if (p->top->is_unknown_field) {
11001 if (!check_stack(p)) return false;
11002
11003 p->top = start_jsonparser_frame(p);
11004 return true;
11005 }
11006
11007 if (upb_fielddef_ismap(p->top->f)) {
11008 upb_jsonparser_frame *inner;
11009 upb_selector_t sel;
11010
11011 /* Beginning of a map. Start a new parser frame in a repeated-field
11012 * context. */
11013 if (!check_stack(p)) return false;
11014
11015 inner = start_jsonparser_frame(p);
11016 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11017 upb_sink_startseq(p->top->sink, sel, &inner->sink);
11018 inner->m = upb_fielddef_msgsubdef(p->top->f);
11019 inner->mapfield = p->top->f;
11020 inner->is_map = true;
11021 p->top = inner;
11022
11023 return true;
11024 } else if (upb_fielddef_issubmsg(p->top->f)) {
11025 upb_jsonparser_frame *inner;
11026 upb_selector_t sel;
11027
11028 /* Beginning of a subobject. Start a new parser frame in the submsg
11029 * context. */
11030 if (!check_stack(p)) return false;
11031
11032 inner = start_jsonparser_frame(p);
11033 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
11034 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
11035 inner->m = upb_fielddef_msgsubdef(p->top->f);
11036 set_name_table(p, inner);
11037 p->top = inner;
11038
11039 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
11040 p->top->is_any = true;
11041 p->top->any_frame = json_parser_any_frame_new(p);
11042 } else {
11043 p->top->is_any = false;
11044 p->top->any_frame = NULL;
11045 }
11046
11047 return true;
11048 } else {
11049 upb_status_seterrf(p->status,
11050 "Object specified for non-message/group field: %s",
11051 upb_fielddef_name(p->top->f));
11052 return false;
11053 }
11054 }
11055
start_subobject_full(upb_json_parser * p)11056 static bool start_subobject_full(upb_json_parser *p) {
11057 if (is_top_level(p)) {
11058 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11059 start_value_object(p, VALUE_STRUCTVALUE);
11060 if (!start_subobject(p)) return false;
11061 start_structvalue_object(p);
11062 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
11063 start_structvalue_object(p);
11064 } else {
11065 return true;
11066 }
11067 } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
11068 if (!start_subobject(p)) return false;
11069 start_structvalue_object(p);
11070 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
11071 if (!start_subobject(p)) return false;
11072 start_value_object(p, VALUE_STRUCTVALUE);
11073 if (!start_subobject(p)) return false;
11074 start_structvalue_object(p);
11075 }
11076
11077 return start_subobject(p);
11078 }
11079
end_subobject(upb_json_parser * p)11080 static void end_subobject(upb_json_parser *p) {
11081 if (is_top_level(p)) {
11082 return;
11083 }
11084
11085 if (p->top->is_map) {
11086 upb_selector_t sel;
11087 p->top--;
11088 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11089 upb_sink_endseq(p->top->sink, sel);
11090 } else {
11091 upb_selector_t sel;
11092 bool is_unknown = p->top->m == NULL;
11093 p->top--;
11094 if (!is_unknown) {
11095 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
11096 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
11097 }
11098 }
11099 }
11100
end_subobject_full(upb_json_parser * p)11101 static void end_subobject_full(upb_json_parser *p) {
11102 end_subobject(p);
11103
11104 if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
11105 end_structvalue_object(p);
11106 if (!is_top_level(p)) {
11107 end_subobject(p);
11108 }
11109 }
11110
11111 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11112 end_value_object(p);
11113 if (!is_top_level(p)) {
11114 end_subobject(p);
11115 }
11116 }
11117 }
11118
start_array(upb_json_parser * p)11119 static bool start_array(upb_json_parser *p) {
11120 upb_jsonparser_frame *inner;
11121 upb_selector_t sel;
11122
11123 if (is_top_level(p)) {
11124 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11125 start_value_object(p, VALUE_LISTVALUE);
11126 if (!start_subobject(p)) return false;
11127 start_listvalue_object(p);
11128 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
11129 start_listvalue_object(p);
11130 } else {
11131 return false;
11132 }
11133 } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
11134 (!upb_fielddef_isseq(p->top->f) ||
11135 p->top->is_repeated)) {
11136 if (!start_subobject(p)) return false;
11137 start_listvalue_object(p);
11138 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
11139 (!upb_fielddef_isseq(p->top->f) ||
11140 p->top->is_repeated)) {
11141 if (!start_subobject(p)) return false;
11142 start_value_object(p, VALUE_LISTVALUE);
11143 if (!start_subobject(p)) return false;
11144 start_listvalue_object(p);
11145 }
11146
11147 if (p->top->is_unknown_field) {
11148 inner = start_jsonparser_frame(p);
11149 inner->is_unknown_field = true;
11150 p->top = inner;
11151
11152 return true;
11153 }
11154
11155 if (!upb_fielddef_isseq(p->top->f)) {
11156 upb_status_seterrf(p->status,
11157 "Array specified for non-repeated field: %s",
11158 upb_fielddef_name(p->top->f));
11159 return false;
11160 }
11161
11162 if (!check_stack(p)) return false;
11163
11164 inner = start_jsonparser_frame(p);
11165 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11166 upb_sink_startseq(p->top->sink, sel, &inner->sink);
11167 inner->m = p->top->m;
11168 inner->f = p->top->f;
11169 inner->is_repeated = true;
11170 p->top = inner;
11171
11172 return true;
11173 }
11174
end_array(upb_json_parser * p)11175 static void end_array(upb_json_parser *p) {
11176 upb_selector_t sel;
11177
11178 UPB_ASSERT(p->top > p->stack);
11179
11180 p->top--;
11181
11182 if (p->top->is_unknown_field) {
11183 return;
11184 }
11185
11186 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11187 upb_sink_endseq(p->top->sink, sel);
11188
11189 if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
11190 end_listvalue_object(p);
11191 if (!is_top_level(p)) {
11192 end_subobject(p);
11193 }
11194 }
11195
11196 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11197 end_value_object(p);
11198 if (!is_top_level(p)) {
11199 end_subobject(p);
11200 }
11201 }
11202 }
11203
start_object(upb_json_parser * p)11204 static void start_object(upb_json_parser *p) {
11205 if (!p->top->is_map && p->top->m != NULL) {
11206 upb_sink_startmsg(p->top->sink);
11207 }
11208 }
11209
end_object(upb_json_parser * p)11210 static void end_object(upb_json_parser *p) {
11211 if (!p->top->is_map && p->top->m != NULL) {
11212 upb_sink_endmsg(p->top->sink, p->status);
11213 }
11214 }
11215
start_any_object(upb_json_parser * p,const char * ptr)11216 static void start_any_object(upb_json_parser *p, const char *ptr) {
11217 start_object(p);
11218 p->top->any_frame->before_type_url_start = ptr;
11219 p->top->any_frame->before_type_url_end = ptr;
11220 }
11221
end_any_object(upb_json_parser * p,const char * ptr)11222 static bool end_any_object(upb_json_parser *p, const char *ptr) {
11223 const char *value_membername = "value";
11224 bool is_well_known_packed = false;
11225 const char *packed_end = ptr + 1;
11226 upb_selector_t sel;
11227 upb_jsonparser_frame *inner;
11228
11229 if (json_parser_any_frame_has_value(p->top->any_frame) &&
11230 !json_parser_any_frame_has_type_url(p->top->any_frame)) {
11231 upb_status_seterrmsg(p->status, "No valid type url");
11232 return false;
11233 }
11234
11235 /* Well known types data is represented as value field. */
11236 if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
11237 UPB_WELLKNOWN_UNSPECIFIED) {
11238 is_well_known_packed = true;
11239
11240 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11241 p->top->any_frame->before_type_url_start =
11242 memchr(p->top->any_frame->before_type_url_start, ':',
11243 p->top->any_frame->before_type_url_end -
11244 p->top->any_frame->before_type_url_start);
11245 if (p->top->any_frame->before_type_url_start == NULL) {
11246 upb_status_seterrmsg(p->status, "invalid data for well known type.");
11247 return false;
11248 }
11249 p->top->any_frame->before_type_url_start++;
11250 }
11251
11252 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11253 p->top->any_frame->after_type_url_start =
11254 memchr(p->top->any_frame->after_type_url_start, ':',
11255 (ptr + 1) -
11256 p->top->any_frame->after_type_url_start);
11257 if (p->top->any_frame->after_type_url_start == NULL) {
11258 upb_status_seterrmsg(p->status, "Invalid data for well known type.");
11259 return false;
11260 }
11261 p->top->any_frame->after_type_url_start++;
11262 packed_end = ptr;
11263 }
11264 }
11265
11266 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11267 if (!parse(p->top->any_frame->parser, NULL,
11268 p->top->any_frame->before_type_url_start,
11269 p->top->any_frame->before_type_url_end -
11270 p->top->any_frame->before_type_url_start, NULL)) {
11271 return false;
11272 }
11273 } else {
11274 if (!is_well_known_packed) {
11275 if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
11276 return false;
11277 }
11278 }
11279 }
11280
11281 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
11282 json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11283 if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
11284 return false;
11285 }
11286 }
11287
11288 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11289 if (!parse(p->top->any_frame->parser, NULL,
11290 p->top->any_frame->after_type_url_start,
11291 packed_end - p->top->any_frame->after_type_url_start, NULL)) {
11292 return false;
11293 }
11294 } else {
11295 if (!is_well_known_packed) {
11296 if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
11297 return false;
11298 }
11299 }
11300 }
11301
11302 if (!end(p->top->any_frame->parser, NULL)) {
11303 return false;
11304 }
11305
11306 p->top->is_any = false;
11307
11308 /* Set value */
11309 start_member(p);
11310 capture_begin(p, value_membername);
11311 capture_end(p, value_membername + 5);
11312 end_membername(p);
11313
11314 if (!check_stack(p)) return false;
11315 inner = p->top + 1;
11316
11317 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
11318 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
11319 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
11320 upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr,
11321 p->top->any_frame->stringsink.len, NULL);
11322 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
11323 upb_sink_endstr(inner->sink, sel);
11324
11325 end_member(p);
11326
11327 end_object(p);
11328
11329 /* Deallocate any parse frame. */
11330 json_parser_any_frame_free(p->top->any_frame);
11331
11332 return true;
11333 }
11334
is_string_wrapper(const upb_msgdef * m)11335 static bool is_string_wrapper(const upb_msgdef *m) {
11336 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11337 return type == UPB_WELLKNOWN_STRINGVALUE ||
11338 type == UPB_WELLKNOWN_BYTESVALUE;
11339 }
11340
is_fieldmask(const upb_msgdef * m)11341 static bool is_fieldmask(const upb_msgdef *m) {
11342 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11343 return type == UPB_WELLKNOWN_FIELDMASK;
11344 }
11345
start_fieldmask_object(upb_json_parser * p)11346 static void start_fieldmask_object(upb_json_parser *p) {
11347 const char *membername = "paths";
11348
11349 start_object(p);
11350
11351 /* Set up context for parsing value */
11352 start_member(p);
11353 capture_begin(p, membername);
11354 capture_end(p, membername + 5);
11355 end_membername(p);
11356
11357 start_array(p);
11358 }
11359
end_fieldmask_object(upb_json_parser * p)11360 static void end_fieldmask_object(upb_json_parser *p) {
11361 end_array(p);
11362 end_member(p);
11363 end_object(p);
11364 }
11365
start_wrapper_object(upb_json_parser * p)11366 static void start_wrapper_object(upb_json_parser *p) {
11367 const char *membername = "value";
11368
11369 start_object(p);
11370
11371 /* Set up context for parsing value */
11372 start_member(p);
11373 capture_begin(p, membername);
11374 capture_end(p, membername + 5);
11375 end_membername(p);
11376 }
11377
end_wrapper_object(upb_json_parser * p)11378 static void end_wrapper_object(upb_json_parser *p) {
11379 end_member(p);
11380 end_object(p);
11381 }
11382
start_value_object(upb_json_parser * p,int value_type)11383 static void start_value_object(upb_json_parser *p, int value_type) {
11384 const char *nullmember = "null_value";
11385 const char *numbermember = "number_value";
11386 const char *stringmember = "string_value";
11387 const char *boolmember = "bool_value";
11388 const char *structmember = "struct_value";
11389 const char *listmember = "list_value";
11390 const char *membername = "";
11391
11392 switch (value_type) {
11393 case VALUE_NULLVALUE:
11394 membername = nullmember;
11395 break;
11396 case VALUE_NUMBERVALUE:
11397 membername = numbermember;
11398 break;
11399 case VALUE_STRINGVALUE:
11400 membername = stringmember;
11401 break;
11402 case VALUE_BOOLVALUE:
11403 membername = boolmember;
11404 break;
11405 case VALUE_STRUCTVALUE:
11406 membername = structmember;
11407 break;
11408 case VALUE_LISTVALUE:
11409 membername = listmember;
11410 break;
11411 }
11412
11413 start_object(p);
11414
11415 /* Set up context for parsing value */
11416 start_member(p);
11417 capture_begin(p, membername);
11418 capture_end(p, membername + strlen(membername));
11419 end_membername(p);
11420 }
11421
end_value_object(upb_json_parser * p)11422 static void end_value_object(upb_json_parser *p) {
11423 end_member(p);
11424 end_object(p);
11425 }
11426
start_listvalue_object(upb_json_parser * p)11427 static void start_listvalue_object(upb_json_parser *p) {
11428 const char *membername = "values";
11429
11430 start_object(p);
11431
11432 /* Set up context for parsing value */
11433 start_member(p);
11434 capture_begin(p, membername);
11435 capture_end(p, membername + strlen(membername));
11436 end_membername(p);
11437 }
11438
end_listvalue_object(upb_json_parser * p)11439 static void end_listvalue_object(upb_json_parser *p) {
11440 end_member(p);
11441 end_object(p);
11442 }
11443
start_structvalue_object(upb_json_parser * p)11444 static void start_structvalue_object(upb_json_parser *p) {
11445 const char *membername = "fields";
11446
11447 start_object(p);
11448
11449 /* Set up context for parsing value */
11450 start_member(p);
11451 capture_begin(p, membername);
11452 capture_end(p, membername + strlen(membername));
11453 end_membername(p);
11454 }
11455
end_structvalue_object(upb_json_parser * p)11456 static void end_structvalue_object(upb_json_parser *p) {
11457 end_member(p);
11458 end_object(p);
11459 }
11460
is_top_level(upb_json_parser * p)11461 static bool is_top_level(upb_json_parser *p) {
11462 return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
11463 }
11464
is_wellknown_msg(upb_json_parser * p,upb_wellknowntype_t type)11465 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
11466 return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
11467 }
11468
is_wellknown_field(upb_json_parser * p,upb_wellknowntype_t type)11469 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
11470 return p->top->f != NULL &&
11471 upb_fielddef_issubmsg(p->top->f) &&
11472 (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
11473 == type);
11474 }
11475
does_number_wrapper_start(upb_json_parser * p)11476 static bool does_number_wrapper_start(upb_json_parser *p) {
11477 return p->top->f != NULL &&
11478 upb_fielddef_issubmsg(p->top->f) &&
11479 upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
11480 }
11481
does_number_wrapper_end(upb_json_parser * p)11482 static bool does_number_wrapper_end(upb_json_parser *p) {
11483 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
11484 }
11485
is_number_wrapper_object(upb_json_parser * p)11486 static bool is_number_wrapper_object(upb_json_parser *p) {
11487 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
11488 }
11489
does_string_wrapper_start(upb_json_parser * p)11490 static bool does_string_wrapper_start(upb_json_parser *p) {
11491 return p->top->f != NULL &&
11492 upb_fielddef_issubmsg(p->top->f) &&
11493 is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
11494 }
11495
does_string_wrapper_end(upb_json_parser * p)11496 static bool does_string_wrapper_end(upb_json_parser *p) {
11497 return p->top->m != NULL && is_string_wrapper(p->top->m);
11498 }
11499
is_string_wrapper_object(upb_json_parser * p)11500 static bool is_string_wrapper_object(upb_json_parser *p) {
11501 return p->top->m != NULL && is_string_wrapper(p->top->m);
11502 }
11503
does_fieldmask_start(upb_json_parser * p)11504 static bool does_fieldmask_start(upb_json_parser *p) {
11505 return p->top->f != NULL &&
11506 upb_fielddef_issubmsg(p->top->f) &&
11507 is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
11508 }
11509
does_fieldmask_end(upb_json_parser * p)11510 static bool does_fieldmask_end(upb_json_parser *p) {
11511 return p->top->m != NULL && is_fieldmask(p->top->m);
11512 }
11513
11514 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
11515
11516
11517 /* The actual parser **********************************************************/
11518
11519 /* What follows is the Ragel parser itself. The language is specified in Ragel
11520 * and the actions call our C functions above.
11521 *
11522 * Ragel has an extensive set of functionality, and we use only a small part of
11523 * it. There are many action types but we only use a few:
11524 *
11525 * ">" -- transition into a machine
11526 * "%" -- transition out of a machine
11527 * "@" -- transition into a final state of a machine.
11528 *
11529 * "@" transitions are tricky because a machine can transition into a final
11530 * state repeatedly. But in some cases we know this can't happen, for example
11531 * a string which is delimited by a final '"' can only transition into its
11532 * final state once, when the closing '"' is seen. */
11533
11534
11535 #line 2780 "upb/json/parser.rl"
11536
11537
11538
11539 #line 2583 "upb/json/parser.c"
11540 static const char _json_actions[] = {
11541 0, 1, 0, 1, 1, 1, 3, 1,
11542 4, 1, 6, 1, 7, 1, 8, 1,
11543 9, 1, 11, 1, 12, 1, 13, 1,
11544 14, 1, 15, 1, 16, 1, 17, 1,
11545 18, 1, 19, 1, 20, 1, 22, 1,
11546 23, 1, 24, 1, 35, 1, 37, 1,
11547 39, 1, 40, 1, 42, 1, 43, 1,
11548 44, 1, 46, 1, 48, 1, 49, 1,
11549 50, 1, 51, 1, 53, 1, 54, 2,
11550 4, 9, 2, 5, 6, 2, 7, 3,
11551 2, 7, 9, 2, 21, 26, 2, 25,
11552 10, 2, 27, 28, 2, 29, 30, 2,
11553 32, 34, 2, 33, 31, 2, 38, 36,
11554 2, 40, 42, 2, 45, 2, 2, 46,
11555 54, 2, 47, 36, 2, 49, 54, 2,
11556 50, 54, 2, 51, 54, 2, 52, 41,
11557 2, 53, 54, 3, 32, 34, 35, 4,
11558 21, 26, 27, 28
11559 };
11560
11561 static const short _json_key_offsets[] = {
11562 0, 0, 12, 13, 18, 23, 28, 29,
11563 30, 31, 32, 33, 34, 35, 36, 37,
11564 38, 43, 44, 48, 53, 58, 63, 67,
11565 71, 74, 77, 79, 83, 87, 89, 91,
11566 96, 98, 100, 109, 115, 121, 127, 133,
11567 135, 139, 142, 144, 146, 149, 150, 154,
11568 156, 158, 160, 162, 163, 165, 167, 168,
11569 170, 172, 173, 175, 177, 178, 180, 182,
11570 183, 185, 187, 191, 193, 195, 196, 197,
11571 198, 199, 201, 206, 208, 210, 212, 221,
11572 222, 222, 222, 227, 232, 237, 238, 239,
11573 240, 241, 241, 242, 243, 244, 244, 245,
11574 246, 247, 247, 252, 253, 257, 262, 267,
11575 272, 276, 276, 279, 282, 285, 288, 291,
11576 294, 294, 294, 294, 294, 294
11577 };
11578
11579 static const char _json_trans_keys[] = {
11580 32, 34, 45, 91, 102, 110, 116, 123,
11581 9, 13, 48, 57, 34, 32, 93, 125,
11582 9, 13, 32, 44, 93, 9, 13, 32,
11583 93, 125, 9, 13, 97, 108, 115, 101,
11584 117, 108, 108, 114, 117, 101, 32, 34,
11585 125, 9, 13, 34, 32, 58, 9, 13,
11586 32, 93, 125, 9, 13, 32, 44, 125,
11587 9, 13, 32, 44, 125, 9, 13, 32,
11588 34, 9, 13, 45, 48, 49, 57, 48,
11589 49, 57, 46, 69, 101, 48, 57, 69,
11590 101, 48, 57, 43, 45, 48, 57, 48,
11591 57, 48, 57, 46, 69, 101, 48, 57,
11592 34, 92, 34, 92, 34, 47, 92, 98,
11593 102, 110, 114, 116, 117, 48, 57, 65,
11594 70, 97, 102, 48, 57, 65, 70, 97,
11595 102, 48, 57, 65, 70, 97, 102, 48,
11596 57, 65, 70, 97, 102, 34, 92, 45,
11597 48, 49, 57, 48, 49, 57, 46, 115,
11598 48, 57, 115, 48, 57, 34, 46, 115,
11599 48, 57, 48, 57, 48, 57, 48, 57,
11600 48, 57, 45, 48, 57, 48, 57, 45,
11601 48, 57, 48, 57, 84, 48, 57, 48,
11602 57, 58, 48, 57, 48, 57, 58, 48,
11603 57, 48, 57, 43, 45, 46, 90, 48,
11604 57, 48, 57, 58, 48, 48, 34, 48,
11605 57, 43, 45, 90, 48, 57, 34, 44,
11606 34, 44, 34, 44, 34, 45, 91, 102,
11607 110, 116, 123, 48, 57, 34, 32, 93,
11608 125, 9, 13, 32, 44, 93, 9, 13,
11609 32, 93, 125, 9, 13, 97, 108, 115,
11610 101, 117, 108, 108, 114, 117, 101, 32,
11611 34, 125, 9, 13, 34, 32, 58, 9,
11612 13, 32, 93, 125, 9, 13, 32, 44,
11613 125, 9, 13, 32, 44, 125, 9, 13,
11614 32, 34, 9, 13, 32, 9, 13, 32,
11615 9, 13, 32, 9, 13, 32, 9, 13,
11616 32, 9, 13, 32, 9, 13, 0
11617 };
11618
11619 static const char _json_single_lengths[] = {
11620 0, 8, 1, 3, 3, 3, 1, 1,
11621 1, 1, 1, 1, 1, 1, 1, 1,
11622 3, 1, 2, 3, 3, 3, 2, 2,
11623 1, 3, 0, 2, 2, 0, 0, 3,
11624 2, 2, 9, 0, 0, 0, 0, 2,
11625 2, 1, 2, 0, 1, 1, 2, 0,
11626 0, 0, 0, 1, 0, 0, 1, 0,
11627 0, 1, 0, 0, 1, 0, 0, 1,
11628 0, 0, 4, 0, 0, 1, 1, 1,
11629 1, 0, 3, 2, 2, 2, 7, 1,
11630 0, 0, 3, 3, 3, 1, 1, 1,
11631 1, 0, 1, 1, 1, 0, 1, 1,
11632 1, 0, 3, 1, 2, 3, 3, 3,
11633 2, 0, 1, 1, 1, 1, 1, 1,
11634 0, 0, 0, 0, 0, 0
11635 };
11636
11637 static const char _json_range_lengths[] = {
11638 0, 2, 0, 1, 1, 1, 0, 0,
11639 0, 0, 0, 0, 0, 0, 0, 0,
11640 1, 0, 1, 1, 1, 1, 1, 1,
11641 1, 0, 1, 1, 1, 1, 1, 1,
11642 0, 0, 0, 3, 3, 3, 3, 0,
11643 1, 1, 0, 1, 1, 0, 1, 1,
11644 1, 1, 1, 0, 1, 1, 0, 1,
11645 1, 0, 1, 1, 0, 1, 1, 0,
11646 1, 1, 0, 1, 1, 0, 0, 0,
11647 0, 1, 1, 0, 0, 0, 1, 0,
11648 0, 0, 1, 1, 1, 0, 0, 0,
11649 0, 0, 0, 0, 0, 0, 0, 0,
11650 0, 0, 1, 0, 1, 1, 1, 1,
11651 1, 0, 1, 1, 1, 1, 1, 1,
11652 0, 0, 0, 0, 0, 0
11653 };
11654
11655 static const short _json_index_offsets[] = {
11656 0, 0, 11, 13, 18, 23, 28, 30,
11657 32, 34, 36, 38, 40, 42, 44, 46,
11658 48, 53, 55, 59, 64, 69, 74, 78,
11659 82, 85, 89, 91, 95, 99, 101, 103,
11660 108, 111, 114, 124, 128, 132, 136, 140,
11661 143, 147, 150, 153, 155, 158, 160, 164,
11662 166, 168, 170, 172, 174, 176, 178, 180,
11663 182, 184, 186, 188, 190, 192, 194, 196,
11664 198, 200, 202, 207, 209, 211, 213, 215,
11665 217, 219, 221, 226, 229, 232, 235, 244,
11666 246, 247, 248, 253, 258, 263, 265, 267,
11667 269, 271, 272, 274, 276, 278, 279, 281,
11668 283, 285, 286, 291, 293, 297, 302, 307,
11669 312, 316, 317, 320, 323, 326, 329, 332,
11670 335, 336, 337, 338, 339, 340
11671 };
11672
11673 static const unsigned char _json_indicies[] = {
11674 0, 2, 3, 4, 5, 6, 7, 8,
11675 0, 3, 1, 9, 1, 11, 12, 1,
11676 11, 10, 13, 14, 12, 13, 1, 14,
11677 1, 1, 14, 10, 15, 1, 16, 1,
11678 17, 1, 18, 1, 19, 1, 20, 1,
11679 21, 1, 22, 1, 23, 1, 24, 1,
11680 25, 26, 27, 25, 1, 28, 1, 29,
11681 30, 29, 1, 30, 1, 1, 30, 31,
11682 32, 33, 34, 32, 1, 35, 36, 27,
11683 35, 1, 36, 26, 36, 1, 37, 38,
11684 39, 1, 38, 39, 1, 41, 42, 42,
11685 40, 43, 1, 42, 42, 43, 40, 44,
11686 44, 45, 1, 45, 1, 45, 40, 41,
11687 42, 42, 39, 40, 47, 48, 46, 50,
11688 51, 49, 52, 52, 52, 52, 52, 52,
11689 52, 52, 53, 1, 54, 54, 54, 1,
11690 55, 55, 55, 1, 56, 56, 56, 1,
11691 57, 57, 57, 1, 59, 60, 58, 61,
11692 62, 63, 1, 64, 65, 1, 66, 67,
11693 1, 68, 1, 67, 68, 1, 69, 1,
11694 66, 67, 65, 1, 70, 1, 71, 1,
11695 72, 1, 73, 1, 74, 1, 75, 1,
11696 76, 1, 77, 1, 78, 1, 79, 1,
11697 80, 1, 81, 1, 82, 1, 83, 1,
11698 84, 1, 85, 1, 86, 1, 87, 1,
11699 88, 1, 89, 89, 90, 91, 1, 92,
11700 1, 93, 1, 94, 1, 95, 1, 96,
11701 1, 97, 1, 98, 1, 99, 99, 100,
11702 98, 1, 102, 1, 101, 104, 105, 103,
11703 1, 1, 101, 106, 107, 108, 109, 110,
11704 111, 112, 107, 1, 113, 1, 114, 115,
11705 117, 118, 1, 117, 116, 119, 120, 118,
11706 119, 1, 120, 1, 1, 120, 116, 121,
11707 1, 122, 1, 123, 1, 124, 1, 125,
11708 126, 1, 127, 1, 128, 1, 129, 130,
11709 1, 131, 1, 132, 1, 133, 134, 135,
11710 136, 134, 1, 137, 1, 138, 139, 138,
11711 1, 139, 1, 1, 139, 140, 141, 142,
11712 143, 141, 1, 144, 145, 136, 144, 1,
11713 145, 135, 145, 1, 146, 147, 147, 1,
11714 148, 148, 1, 149, 149, 1, 150, 150,
11715 1, 151, 151, 1, 152, 152, 1, 1,
11716 1, 1, 1, 1, 1, 0
11717 };
11718
11719 static const char _json_trans_targs[] = {
11720 1, 0, 2, 107, 3, 6, 10, 13,
11721 16, 106, 4, 3, 106, 4, 5, 7,
11722 8, 9, 108, 11, 12, 109, 14, 15,
11723 110, 16, 17, 111, 18, 18, 19, 20,
11724 21, 22, 111, 21, 22, 24, 25, 31,
11725 112, 26, 28, 27, 29, 30, 33, 113,
11726 34, 33, 113, 34, 32, 35, 36, 37,
11727 38, 39, 33, 113, 34, 41, 42, 46,
11728 42, 46, 43, 45, 44, 114, 48, 49,
11729 50, 51, 52, 53, 54, 55, 56, 57,
11730 58, 59, 60, 61, 62, 63, 64, 65,
11731 66, 67, 73, 72, 68, 69, 70, 71,
11732 72, 115, 74, 67, 72, 76, 116, 76,
11733 116, 77, 79, 81, 82, 85, 90, 94,
11734 98, 80, 117, 117, 83, 82, 80, 83,
11735 84, 86, 87, 88, 89, 117, 91, 92,
11736 93, 117, 95, 96, 97, 117, 98, 99,
11737 105, 100, 100, 101, 102, 103, 104, 105,
11738 103, 104, 117, 106, 106, 106, 106, 106,
11739 106
11740 };
11741
11742 static const unsigned char _json_trans_actions[] = {
11743 0, 0, 113, 107, 53, 0, 0, 0,
11744 125, 59, 45, 0, 55, 0, 0, 0,
11745 0, 0, 0, 0, 0, 0, 0, 0,
11746 0, 0, 101, 51, 47, 0, 0, 45,
11747 49, 49, 104, 0, 0, 0, 0, 0,
11748 3, 0, 0, 0, 0, 0, 5, 15,
11749 0, 0, 71, 7, 13, 0, 74, 9,
11750 9, 9, 77, 80, 11, 37, 37, 37,
11751 0, 0, 0, 39, 0, 41, 86, 0,
11752 0, 0, 17, 19, 0, 21, 23, 0,
11753 25, 27, 0, 29, 31, 0, 33, 35,
11754 0, 135, 83, 135, 0, 0, 0, 0,
11755 0, 92, 0, 89, 89, 98, 43, 0,
11756 131, 95, 113, 107, 53, 0, 0, 0,
11757 125, 59, 69, 110, 45, 0, 55, 0,
11758 0, 0, 0, 0, 0, 119, 0, 0,
11759 0, 122, 0, 0, 0, 116, 0, 101,
11760 51, 47, 0, 0, 45, 49, 49, 104,
11761 0, 0, 128, 0, 57, 63, 65, 61,
11762 67
11763 };
11764
11765 static const unsigned char _json_eof_actions[] = {
11766 0, 0, 0, 0, 0, 0, 0, 0,
11767 0, 0, 0, 0, 0, 0, 0, 0,
11768 0, 0, 0, 0, 0, 0, 0, 0,
11769 0, 1, 0, 1, 0, 0, 1, 1,
11770 0, 0, 0, 0, 0, 0, 0, 0,
11771 0, 0, 0, 0, 0, 0, 0, 0,
11772 0, 0, 0, 0, 0, 0, 0, 0,
11773 0, 0, 0, 0, 0, 0, 0, 0,
11774 0, 0, 0, 0, 0, 0, 0, 0,
11775 0, 0, 0, 0, 0, 0, 0, 0,
11776 0, 0, 0, 0, 0, 0, 0, 0,
11777 0, 0, 0, 0, 0, 0, 0, 0,
11778 0, 0, 0, 0, 0, 0, 0, 0,
11779 0, 0, 0, 57, 63, 65, 61, 67,
11780 0, 0, 0, 0, 0, 0
11781 };
11782
11783 static const int json_start = 1;
11784
11785 static const int json_en_number_machine = 23;
11786 static const int json_en_string_machine = 32;
11787 static const int json_en_duration_machine = 40;
11788 static const int json_en_timestamp_machine = 47;
11789 static const int json_en_fieldmask_machine = 75;
11790 static const int json_en_value_machine = 78;
11791 static const int json_en_main = 1;
11792
11793
11794 #line 2783 "upb/json/parser.rl"
11795
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)11796 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
11797 const upb_bufhandle *handle) {
11798 upb_json_parser *parser = closure;
11799
11800 /* Variables used by Ragel's generated code. */
11801 int cs = parser->current_state;
11802 int *stack = parser->parser_stack;
11803 int top = parser->parser_top;
11804
11805 const char *p = buf;
11806 const char *pe = buf + size;
11807 const char *eof = &eof_ch;
11808
11809 parser->handle = handle;
11810
11811 UPB_UNUSED(hd);
11812 UPB_UNUSED(handle);
11813
11814 capture_resume(parser, buf);
11815
11816
11817 #line 2861 "upb/json/parser.c"
11818 {
11819 int _klen;
11820 unsigned int _trans;
11821 const char *_acts;
11822 unsigned int _nacts;
11823 const char *_keys;
11824
11825 if ( p == pe )
11826 goto _test_eof;
11827 if ( cs == 0 )
11828 goto _out;
11829 _resume:
11830 _keys = _json_trans_keys + _json_key_offsets[cs];
11831 _trans = _json_index_offsets[cs];
11832
11833 _klen = _json_single_lengths[cs];
11834 if ( _klen > 0 ) {
11835 const char *_lower = _keys;
11836 const char *_mid;
11837 const char *_upper = _keys + _klen - 1;
11838 while (1) {
11839 if ( _upper < _lower )
11840 break;
11841
11842 _mid = _lower + ((_upper-_lower) >> 1);
11843 if ( (*p) < *_mid )
11844 _upper = _mid - 1;
11845 else if ( (*p) > *_mid )
11846 _lower = _mid + 1;
11847 else {
11848 _trans += (unsigned int)(_mid - _keys);
11849 goto _match;
11850 }
11851 }
11852 _keys += _klen;
11853 _trans += _klen;
11854 }
11855
11856 _klen = _json_range_lengths[cs];
11857 if ( _klen > 0 ) {
11858 const char *_lower = _keys;
11859 const char *_mid;
11860 const char *_upper = _keys + (_klen<<1) - 2;
11861 while (1) {
11862 if ( _upper < _lower )
11863 break;
11864
11865 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
11866 if ( (*p) < _mid[0] )
11867 _upper = _mid - 2;
11868 else if ( (*p) > _mid[1] )
11869 _lower = _mid + 2;
11870 else {
11871 _trans += (unsigned int)((_mid - _keys)>>1);
11872 goto _match;
11873 }
11874 }
11875 _trans += _klen;
11876 }
11877
11878 _match:
11879 _trans = _json_indicies[_trans];
11880 cs = _json_trans_targs[_trans];
11881
11882 if ( _json_trans_actions[_trans] == 0 )
11883 goto _again;
11884
11885 _acts = _json_actions + _json_trans_actions[_trans];
11886 _nacts = (unsigned int) *_acts++;
11887 while ( _nacts-- > 0 )
11888 {
11889 switch ( *_acts++ )
11890 {
11891 case 1:
11892 #line 2588 "upb/json/parser.rl"
11893 { p--; {cs = stack[--top]; goto _again;} }
11894 break;
11895 case 2:
11896 #line 2590 "upb/json/parser.rl"
11897 { p--; {stack[top++] = cs; cs = 23;goto _again;} }
11898 break;
11899 case 3:
11900 #line 2594 "upb/json/parser.rl"
11901 { start_text(parser, p); }
11902 break;
11903 case 4:
11904 #line 2595 "upb/json/parser.rl"
11905 { CHECK_RETURN_TOP(end_text(parser, p)); }
11906 break;
11907 case 5:
11908 #line 2601 "upb/json/parser.rl"
11909 { start_hex(parser); }
11910 break;
11911 case 6:
11912 #line 2602 "upb/json/parser.rl"
11913 { hexdigit(parser, p); }
11914 break;
11915 case 7:
11916 #line 2603 "upb/json/parser.rl"
11917 { CHECK_RETURN_TOP(end_hex(parser)); }
11918 break;
11919 case 8:
11920 #line 2609 "upb/json/parser.rl"
11921 { CHECK_RETURN_TOP(escape(parser, p)); }
11922 break;
11923 case 9:
11924 #line 2615 "upb/json/parser.rl"
11925 { p--; {cs = stack[--top]; goto _again;} }
11926 break;
11927 case 10:
11928 #line 2620 "upb/json/parser.rl"
11929 { start_year(parser, p); }
11930 break;
11931 case 11:
11932 #line 2621 "upb/json/parser.rl"
11933 { CHECK_RETURN_TOP(end_year(parser, p)); }
11934 break;
11935 case 12:
11936 #line 2625 "upb/json/parser.rl"
11937 { start_month(parser, p); }
11938 break;
11939 case 13:
11940 #line 2626 "upb/json/parser.rl"
11941 { CHECK_RETURN_TOP(end_month(parser, p)); }
11942 break;
11943 case 14:
11944 #line 2630 "upb/json/parser.rl"
11945 { start_day(parser, p); }
11946 break;
11947 case 15:
11948 #line 2631 "upb/json/parser.rl"
11949 { CHECK_RETURN_TOP(end_day(parser, p)); }
11950 break;
11951 case 16:
11952 #line 2635 "upb/json/parser.rl"
11953 { start_hour(parser, p); }
11954 break;
11955 case 17:
11956 #line 2636 "upb/json/parser.rl"
11957 { CHECK_RETURN_TOP(end_hour(parser, p)); }
11958 break;
11959 case 18:
11960 #line 2640 "upb/json/parser.rl"
11961 { start_minute(parser, p); }
11962 break;
11963 case 19:
11964 #line 2641 "upb/json/parser.rl"
11965 { CHECK_RETURN_TOP(end_minute(parser, p)); }
11966 break;
11967 case 20:
11968 #line 2645 "upb/json/parser.rl"
11969 { start_second(parser, p); }
11970 break;
11971 case 21:
11972 #line 2646 "upb/json/parser.rl"
11973 { CHECK_RETURN_TOP(end_second(parser, p)); }
11974 break;
11975 case 22:
11976 #line 2651 "upb/json/parser.rl"
11977 { start_duration_base(parser, p); }
11978 break;
11979 case 23:
11980 #line 2652 "upb/json/parser.rl"
11981 { CHECK_RETURN_TOP(end_duration_base(parser, p)); }
11982 break;
11983 case 24:
11984 #line 2654 "upb/json/parser.rl"
11985 { p--; {cs = stack[--top]; goto _again;} }
11986 break;
11987 case 25:
11988 #line 2659 "upb/json/parser.rl"
11989 { start_timestamp_base(parser); }
11990 break;
11991 case 26:
11992 #line 2661 "upb/json/parser.rl"
11993 { start_timestamp_fraction(parser, p); }
11994 break;
11995 case 27:
11996 #line 2662 "upb/json/parser.rl"
11997 { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
11998 break;
11999 case 28:
12000 #line 2664 "upb/json/parser.rl"
12001 { start_timestamp_zone(parser, p); }
12002 break;
12003 case 29:
12004 #line 2665 "upb/json/parser.rl"
12005 { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
12006 break;
12007 case 30:
12008 #line 2667 "upb/json/parser.rl"
12009 { p--; {cs = stack[--top]; goto _again;} }
12010 break;
12011 case 31:
12012 #line 2672 "upb/json/parser.rl"
12013 { start_fieldmask_path_text(parser, p); }
12014 break;
12015 case 32:
12016 #line 2673 "upb/json/parser.rl"
12017 { end_fieldmask_path_text(parser, p); }
12018 break;
12019 case 33:
12020 #line 2678 "upb/json/parser.rl"
12021 { start_fieldmask_path(parser); }
12022 break;
12023 case 34:
12024 #line 2679 "upb/json/parser.rl"
12025 { end_fieldmask_path(parser); }
12026 break;
12027 case 35:
12028 #line 2685 "upb/json/parser.rl"
12029 { p--; {cs = stack[--top]; goto _again;} }
12030 break;
12031 case 36:
12032 #line 2690 "upb/json/parser.rl"
12033 {
12034 if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
12035 {stack[top++] = cs; cs = 47;goto _again;}
12036 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
12037 {stack[top++] = cs; cs = 40;goto _again;}
12038 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
12039 {stack[top++] = cs; cs = 75;goto _again;}
12040 } else {
12041 {stack[top++] = cs; cs = 32;goto _again;}
12042 }
12043 }
12044 break;
12045 case 37:
12046 #line 2703 "upb/json/parser.rl"
12047 { p--; {stack[top++] = cs; cs = 78;goto _again;} }
12048 break;
12049 case 38:
12050 #line 2708 "upb/json/parser.rl"
12051 {
12052 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12053 start_any_member(parser, p);
12054 } else {
12055 start_member(parser);
12056 }
12057 }
12058 break;
12059 case 39:
12060 #line 2715 "upb/json/parser.rl"
12061 { CHECK_RETURN_TOP(end_membername(parser)); }
12062 break;
12063 case 40:
12064 #line 2718 "upb/json/parser.rl"
12065 {
12066 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12067 end_any_member(parser, p);
12068 } else {
12069 end_member(parser);
12070 }
12071 }
12072 break;
12073 case 41:
12074 #line 2729 "upb/json/parser.rl"
12075 {
12076 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12077 start_any_object(parser, p);
12078 } else {
12079 start_object(parser);
12080 }
12081 }
12082 break;
12083 case 42:
12084 #line 2738 "upb/json/parser.rl"
12085 {
12086 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12087 CHECK_RETURN_TOP(end_any_object(parser, p));
12088 } else {
12089 end_object(parser);
12090 }
12091 }
12092 break;
12093 case 43:
12094 #line 2750 "upb/json/parser.rl"
12095 { CHECK_RETURN_TOP(start_array(parser)); }
12096 break;
12097 case 44:
12098 #line 2754 "upb/json/parser.rl"
12099 { end_array(parser); }
12100 break;
12101 case 45:
12102 #line 2759 "upb/json/parser.rl"
12103 { CHECK_RETURN_TOP(start_number(parser, p)); }
12104 break;
12105 case 46:
12106 #line 2760 "upb/json/parser.rl"
12107 { CHECK_RETURN_TOP(end_number(parser, p)); }
12108 break;
12109 case 47:
12110 #line 2762 "upb/json/parser.rl"
12111 { CHECK_RETURN_TOP(start_stringval(parser)); }
12112 break;
12113 case 48:
12114 #line 2763 "upb/json/parser.rl"
12115 { CHECK_RETURN_TOP(end_stringval(parser)); }
12116 break;
12117 case 49:
12118 #line 2765 "upb/json/parser.rl"
12119 { CHECK_RETURN_TOP(end_bool(parser, true)); }
12120 break;
12121 case 50:
12122 #line 2767 "upb/json/parser.rl"
12123 { CHECK_RETURN_TOP(end_bool(parser, false)); }
12124 break;
12125 case 51:
12126 #line 2769 "upb/json/parser.rl"
12127 { CHECK_RETURN_TOP(end_null(parser)); }
12128 break;
12129 case 52:
12130 #line 2771 "upb/json/parser.rl"
12131 { CHECK_RETURN_TOP(start_subobject_full(parser)); }
12132 break;
12133 case 53:
12134 #line 2772 "upb/json/parser.rl"
12135 { end_subobject_full(parser); }
12136 break;
12137 case 54:
12138 #line 2777 "upb/json/parser.rl"
12139 { p--; {cs = stack[--top]; goto _again;} }
12140 break;
12141 #line 3185 "upb/json/parser.c"
12142 }
12143 }
12144
12145 _again:
12146 if ( cs == 0 )
12147 goto _out;
12148 if ( ++p != pe )
12149 goto _resume;
12150 _test_eof: {}
12151 if ( p == eof )
12152 {
12153 const char *__acts = _json_actions + _json_eof_actions[cs];
12154 unsigned int __nacts = (unsigned int) *__acts++;
12155 while ( __nacts-- > 0 ) {
12156 switch ( *__acts++ ) {
12157 case 0:
12158 #line 2586 "upb/json/parser.rl"
12159 { p--; {cs = stack[--top]; if ( p == pe )
12160 goto _test_eof;
12161 goto _again;} }
12162 break;
12163 case 46:
12164 #line 2760 "upb/json/parser.rl"
12165 { CHECK_RETURN_TOP(end_number(parser, p)); }
12166 break;
12167 case 49:
12168 #line 2765 "upb/json/parser.rl"
12169 { CHECK_RETURN_TOP(end_bool(parser, true)); }
12170 break;
12171 case 50:
12172 #line 2767 "upb/json/parser.rl"
12173 { CHECK_RETURN_TOP(end_bool(parser, false)); }
12174 break;
12175 case 51:
12176 #line 2769 "upb/json/parser.rl"
12177 { CHECK_RETURN_TOP(end_null(parser)); }
12178 break;
12179 case 53:
12180 #line 2772 "upb/json/parser.rl"
12181 { end_subobject_full(parser); }
12182 break;
12183 #line 3227 "upb/json/parser.c"
12184 }
12185 }
12186 }
12187
12188 _out: {}
12189 }
12190
12191 #line 2805 "upb/json/parser.rl"
12192
12193 if (p != pe) {
12194 upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p);
12195 } else {
12196 capture_suspend(parser, &p);
12197 }
12198
12199 error:
12200 /* Save parsing state back to parser. */
12201 parser->current_state = cs;
12202 parser->parser_top = top;
12203
12204 return p - buf;
12205 }
12206
end(void * closure,const void * hd)12207 static bool end(void *closure, const void *hd) {
12208 upb_json_parser *parser = closure;
12209
12210 /* Prevent compile warning on unused static constants. */
12211 UPB_UNUSED(json_start);
12212 UPB_UNUSED(json_en_duration_machine);
12213 UPB_UNUSED(json_en_fieldmask_machine);
12214 UPB_UNUSED(json_en_number_machine);
12215 UPB_UNUSED(json_en_string_machine);
12216 UPB_UNUSED(json_en_timestamp_machine);
12217 UPB_UNUSED(json_en_value_machine);
12218 UPB_UNUSED(json_en_main);
12219
12220 parse(parser, hd, &eof_ch, 0, NULL);
12221
12222 return parser->current_state >= 106;
12223 }
12224
json_parser_reset(upb_json_parser * p)12225 static void json_parser_reset(upb_json_parser *p) {
12226 int cs;
12227 int top;
12228
12229 p->top = p->stack;
12230 init_frame(p->top);
12231
12232 /* Emit Ragel initialization of the parser. */
12233
12234 #line 3278 "upb/json/parser.c"
12235 {
12236 cs = json_start;
12237 top = 0;
12238 }
12239
12240 #line 2847 "upb/json/parser.rl"
12241 p->current_state = cs;
12242 p->parser_top = top;
12243 accumulate_clear(p);
12244 p->multipart_state = MULTIPART_INACTIVE;
12245 p->capture = NULL;
12246 p->accumulated = NULL;
12247 }
12248
parsermethod_new(upb_json_codecache * c,const upb_msgdef * md)12249 static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
12250 const upb_msgdef *md) {
12251 upb_msg_field_iter i;
12252 upb_alloc *alloc = upb_arena_alloc(c->arena);
12253
12254 upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m));
12255
12256 m->cache = c;
12257
12258 upb_byteshandler_init(&m->input_handler_);
12259 upb_byteshandler_setstring(&m->input_handler_, parse, m);
12260 upb_byteshandler_setendstr(&m->input_handler_, end, m);
12261
12262 upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
12263
12264 /* Build name_table */
12265
12266 for(upb_msg_field_begin(&i, md);
12267 !upb_msg_field_done(&i);
12268 upb_msg_field_next(&i)) {
12269 const upb_fielddef *f = upb_msg_iter_field(&i);
12270 upb_value v = upb_value_constptr(f);
12271 const char *name;
12272
12273 /* Add an entry for the JSON name. */
12274 name = upb_fielddef_jsonname(f);
12275 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
12276
12277 if (strcmp(name, upb_fielddef_name(f)) != 0) {
12278 /* Since the JSON name is different from the regular field name, add an
12279 * entry for the raw name (compliant proto3 JSON parsers must accept
12280 * both). */
12281 const char *name = upb_fielddef_name(f);
12282 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
12283 }
12284 }
12285
12286 return m;
12287 }
12288
12289 /* Public API *****************************************************************/
12290
upb_json_parser_create(upb_arena * arena,const upb_json_parsermethod * method,const upb_symtab * symtab,upb_sink output,upb_status * status,bool ignore_json_unknown)12291 upb_json_parser *upb_json_parser_create(upb_arena *arena,
12292 const upb_json_parsermethod *method,
12293 const upb_symtab* symtab,
12294 upb_sink output,
12295 upb_status *status,
12296 bool ignore_json_unknown) {
12297 #ifndef NDEBUG
12298 const size_t size_before = upb_arena_bytesallocated(arena);
12299 #endif
12300 upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser));
12301 if (!p) return false;
12302
12303 p->arena = arena;
12304 p->method = method;
12305 p->status = status;
12306 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
12307 p->accumulate_buf = NULL;
12308 p->accumulate_buf_size = 0;
12309 upb_bytessink_reset(&p->input_, &method->input_handler_, p);
12310
12311 json_parser_reset(p);
12312 p->top->sink = output;
12313 p->top->m = upb_handlers_msgdef(output.handlers);
12314 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
12315 p->top->is_any = true;
12316 p->top->any_frame = json_parser_any_frame_new(p);
12317 } else {
12318 p->top->is_any = false;
12319 p->top->any_frame = NULL;
12320 }
12321 set_name_table(p, p->top);
12322 p->symtab = symtab;
12323
12324 p->ignore_json_unknown = ignore_json_unknown;
12325
12326 /* If this fails, uncomment and increase the value in parser.h. */
12327 /* fprintf(stderr, "%zd\n", upb_arena_bytesallocated(arena) - size_before); */
12328 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
12329 UPB_JSON_PARSER_SIZE);
12330 return p;
12331 }
12332
upb_json_parser_input(upb_json_parser * p)12333 upb_bytessink upb_json_parser_input(upb_json_parser *p) {
12334 return p->input_;
12335 }
12336
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)12337 const upb_byteshandler *upb_json_parsermethod_inputhandler(
12338 const upb_json_parsermethod *m) {
12339 return &m->input_handler_;
12340 }
12341
upb_json_codecache_new(void)12342 upb_json_codecache *upb_json_codecache_new(void) {
12343 upb_alloc *alloc;
12344 upb_json_codecache *c;
12345
12346 c = upb_gmalloc(sizeof(*c));
12347
12348 c->arena = upb_arena_new();
12349 alloc = upb_arena_alloc(c->arena);
12350
12351 upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc);
12352
12353 return c;
12354 }
12355
upb_json_codecache_free(upb_json_codecache * c)12356 void upb_json_codecache_free(upb_json_codecache *c) {
12357 upb_arena_free(c->arena);
12358 upb_gfree(c);
12359 }
12360
upb_json_codecache_get(upb_json_codecache * c,const upb_msgdef * md)12361 const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c,
12362 const upb_msgdef *md) {
12363 upb_json_parsermethod *m;
12364 upb_value v;
12365 upb_msg_field_iter i;
12366 upb_alloc *alloc = upb_arena_alloc(c->arena);
12367
12368 if (upb_inttable_lookupptr(&c->methods, md, &v)) {
12369 return upb_value_getconstptr(v);
12370 }
12371
12372 m = parsermethod_new(c, md);
12373 v = upb_value_constptr(m);
12374
12375 if (!m) return NULL;
12376 if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL;
12377
12378 /* Populate parser methods for all submessages, so the name tables will
12379 * be available during parsing. */
12380 for(upb_msg_field_begin(&i, md);
12381 !upb_msg_field_done(&i);
12382 upb_msg_field_next(&i)) {
12383 upb_fielddef *f = upb_msg_iter_field(&i);
12384
12385 if (upb_fielddef_issubmsg(f)) {
12386 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
12387 const upb_json_parsermethod *sub_method =
12388 upb_json_codecache_get(c, subdef);
12389
12390 if (!sub_method) return NULL;
12391 }
12392 }
12393
12394 return m;
12395 }
12396 /*
12397 ** This currently uses snprintf() to format primitives, and could be optimized
12398 ** further.
12399 */
12400
12401
12402 #include <ctype.h>
12403 #include <inttypes.h>
12404 #include <stdint.h>
12405 #include <string.h>
12406 #include <time.h>
12407
12408
12409 struct upb_json_printer {
12410 upb_sink input_;
12411 /* BytesSink closure. */
12412 void *subc_;
12413 upb_bytessink output_;
12414
12415 /* We track the depth so that we know when to emit startstr/endstr on the
12416 * output. */
12417 int depth_;
12418
12419 /* Have we emitted the first element? This state is necessary to emit commas
12420 * without leaving a trailing comma in arrays/maps. We keep this state per
12421 * frame depth.
12422 *
12423 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
12424 * We count frames (contexts in which we separate elements by commas) as both
12425 * repeated fields and messages (maps), and the worst case is a
12426 * message->repeated field->submessage->repeated field->... nesting. */
12427 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
12428
12429 /* To print timestamp, printer needs to cache its seconds and nanos values
12430 * and convert them when ending timestamp message. See comments of
12431 * printer_sethandlers_timestamp for more detail. */
12432 int64_t seconds;
12433 int32_t nanos;
12434 };
12435
12436 /* StringPiece; a pointer plus a length. */
12437 typedef struct {
12438 char *ptr;
12439 size_t len;
12440 } strpc;
12441
freestrpc(void * ptr)12442 void freestrpc(void *ptr) {
12443 strpc *pc = ptr;
12444 upb_gfree(pc->ptr);
12445 upb_gfree(pc);
12446 }
12447
12448 typedef struct {
12449 bool preserve_fieldnames;
12450 } upb_json_printercache;
12451
12452 /* Convert fielddef name to JSON name and return as a string piece. */
newstrpc(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames)12453 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
12454 bool preserve_fieldnames) {
12455 /* TODO(haberman): handle malloc failure. */
12456 strpc *ret = upb_gmalloc(sizeof(*ret));
12457 if (preserve_fieldnames) {
12458 ret->ptr = upb_gstrdup(upb_fielddef_name(f));
12459 ret->len = strlen(ret->ptr);
12460 } else {
12461 ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f));
12462 ret->len = strlen(ret->ptr);
12463 }
12464
12465 upb_handlers_addcleanup(h, ret, freestrpc);
12466 return ret;
12467 }
12468
12469 /* Convert a null-terminated const char* to a string piece. */
newstrpc_str(upb_handlers * h,const char * str)12470 strpc *newstrpc_str(upb_handlers *h, const char * str) {
12471 strpc * ret = upb_gmalloc(sizeof(*ret));
12472 ret->ptr = upb_gstrdup(str);
12473 ret->len = strlen(str);
12474 upb_handlers_addcleanup(h, ret, freestrpc);
12475 return ret;
12476 }
12477
12478 /* ------------ JSON string printing: values, maps, arrays ------------------ */
12479
print_data(upb_json_printer * p,const char * buf,size_t len)12480 static void print_data(
12481 upb_json_printer *p, const char *buf, size_t len) {
12482 /* TODO: Will need to change if we support pushback from the sink. */
12483 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
12484 UPB_ASSERT(n == len);
12485 }
12486
print_comma(upb_json_printer * p)12487 static void print_comma(upb_json_printer *p) {
12488 if (!p->first_elem_[p->depth_]) {
12489 print_data(p, ",", 1);
12490 }
12491 p->first_elem_[p->depth_] = false;
12492 }
12493
12494 /* Helpers that print properly formatted elements to the JSON output stream. */
12495
12496 /* Used for escaping control chars in strings. */
12497 static const char kControlCharLimit = 0x20;
12498
is_json_escaped(char c)12499 UPB_INLINE bool is_json_escaped(char c) {
12500 /* See RFC 4627. */
12501 unsigned char uc = (unsigned char)c;
12502 return uc < kControlCharLimit || uc == '"' || uc == '\\';
12503 }
12504
json_nice_escape(char c)12505 UPB_INLINE const char* json_nice_escape(char c) {
12506 switch (c) {
12507 case '"': return "\\\"";
12508 case '\\': return "\\\\";
12509 case '\b': return "\\b";
12510 case '\f': return "\\f";
12511 case '\n': return "\\n";
12512 case '\r': return "\\r";
12513 case '\t': return "\\t";
12514 default: return NULL;
12515 }
12516 }
12517
12518 /* Write a properly escaped string chunk. The surrounding quotes are *not*
12519 * printed; this is so that the caller has the option of emitting the string
12520 * content in chunks. */
putstring(upb_json_printer * p,const char * buf,size_t len)12521 static void putstring(upb_json_printer *p, const char *buf, size_t len) {
12522 const char* unescaped_run = NULL;
12523 unsigned int i;
12524 for (i = 0; i < len; i++) {
12525 char c = buf[i];
12526 /* Handle escaping. */
12527 if (is_json_escaped(c)) {
12528 /* Use a "nice" escape, like \n, if one exists for this character. */
12529 const char* escape = json_nice_escape(c);
12530 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
12531 * escape. */
12532 char escape_buf[8];
12533 if (!escape) {
12534 unsigned char byte = (unsigned char)c;
12535 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
12536 escape = escape_buf;
12537 }
12538
12539 /* N.B. that we assume that the input encoding is equal to the output
12540 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
12541 * can simply pass the bytes through. */
12542
12543 /* If there's a current run of unescaped chars, print that run first. */
12544 if (unescaped_run) {
12545 print_data(p, unescaped_run, &buf[i] - unescaped_run);
12546 unescaped_run = NULL;
12547 }
12548 /* Then print the escape code. */
12549 print_data(p, escape, strlen(escape));
12550 } else {
12551 /* Add to the current unescaped run of characters. */
12552 if (unescaped_run == NULL) {
12553 unescaped_run = &buf[i];
12554 }
12555 }
12556 }
12557
12558 /* If the string ended in a run of unescaped characters, print that last run. */
12559 if (unescaped_run) {
12560 print_data(p, unescaped_run, &buf[len] - unescaped_run);
12561 }
12562 }
12563
12564 #define CHKLENGTH(x) if (!(x)) return -1;
12565
12566 /* Helpers that format floating point values according to our custom formats.
12567 * Right now we use %.8g and %.17g for float/double, respectively, to match
12568 * proto2::util::JsonFormat's defaults. May want to change this later. */
12569
12570 const char neginf[] = "\"-Infinity\"";
12571 const char inf[] = "\"Infinity\"";
12572
fmt_double(double val,char * buf,size_t length)12573 static size_t fmt_double(double val, char* buf, size_t length) {
12574 if (val == UPB_INFINITY) {
12575 CHKLENGTH(length >= strlen(inf));
12576 strcpy(buf, inf);
12577 return strlen(inf);
12578 } else if (val == -UPB_INFINITY) {
12579 CHKLENGTH(length >= strlen(neginf));
12580 strcpy(buf, neginf);
12581 return strlen(neginf);
12582 } else {
12583 size_t n = _upb_snprintf(buf, length, "%.17g", val);
12584 CHKLENGTH(n > 0 && n < length);
12585 return n;
12586 }
12587 }
12588
fmt_float(float val,char * buf,size_t length)12589 static size_t fmt_float(float val, char* buf, size_t length) {
12590 size_t n = _upb_snprintf(buf, length, "%.8g", val);
12591 CHKLENGTH(n > 0 && n < length);
12592 return n;
12593 }
12594
fmt_bool(bool val,char * buf,size_t length)12595 static size_t fmt_bool(bool val, char* buf, size_t length) {
12596 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
12597 CHKLENGTH(n > 0 && n < length);
12598 return n;
12599 }
12600
fmt_int64_as_number(int64_t val,char * buf,size_t length)12601 static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) {
12602 size_t n = _upb_snprintf(buf, length, "%" PRId64, val);
12603 CHKLENGTH(n > 0 && n < length);
12604 return n;
12605 }
12606
fmt_uint64_as_number(uint64_t val,char * buf,size_t length)12607 static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) {
12608 size_t n = _upb_snprintf(buf, length, "%" PRIu64, val);
12609 CHKLENGTH(n > 0 && n < length);
12610 return n;
12611 }
12612
fmt_int64_as_string(int64_t val,char * buf,size_t length)12613 static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) {
12614 size_t n = _upb_snprintf(buf, length, "\"%" PRId64 "\"", val);
12615 CHKLENGTH(n > 0 && n < length);
12616 return n;
12617 }
12618
fmt_uint64_as_string(uint64_t val,char * buf,size_t length)12619 static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) {
12620 size_t n = _upb_snprintf(buf, length, "\"%" PRIu64 "\"", val);
12621 CHKLENGTH(n > 0 && n < length);
12622 return n;
12623 }
12624
12625 /* Print a map key given a field name. Called by scalar field handlers and by
12626 * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)12627 static bool putkey(void *closure, const void *handler_data) {
12628 upb_json_printer *p = closure;
12629 const strpc *key = handler_data;
12630 print_comma(p);
12631 print_data(p, "\"", 1);
12632 putstring(p, key->ptr, key->len);
12633 print_data(p, "\":", 2);
12634 return true;
12635 }
12636
12637 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
12638 #define CHK(val) if (!(val)) return false;
12639
12640 #define TYPE_HANDLERS(type, fmt_func) \
12641 static bool put##type(void *closure, const void *handler_data, type val) { \
12642 upb_json_printer *p = closure; \
12643 char data[64]; \
12644 size_t length = fmt_func(val, data, sizeof(data)); \
12645 UPB_UNUSED(handler_data); \
12646 CHKFMT(length); \
12647 print_data(p, data, length); \
12648 return true; \
12649 } \
12650 static bool scalar_##type(void *closure, const void *handler_data, \
12651 type val) { \
12652 CHK(putkey(closure, handler_data)); \
12653 CHK(put##type(closure, handler_data, val)); \
12654 return true; \
12655 } \
12656 static bool repeated_##type(void *closure, const void *handler_data, \
12657 type val) { \
12658 upb_json_printer *p = closure; \
12659 print_comma(p); \
12660 CHK(put##type(closure, handler_data, val)); \
12661 return true; \
12662 }
12663
12664 #define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
12665 static bool putmapkey_##type(void *closure, const void *handler_data, \
12666 type val) { \
12667 upb_json_printer *p = closure; \
12668 char data[64]; \
12669 size_t length = fmt_func(val, data, sizeof(data)); \
12670 UPB_UNUSED(handler_data); \
12671 print_data(p, "\"", 1); \
12672 print_data(p, data, length); \
12673 print_data(p, "\":", 2); \
12674 return true; \
12675 }
12676
12677 TYPE_HANDLERS(double, fmt_double)
12678 TYPE_HANDLERS(float, fmt_float)
12679 TYPE_HANDLERS(bool, fmt_bool)
12680 TYPE_HANDLERS(int32_t, fmt_int64_as_number)
12681 TYPE_HANDLERS(uint32_t, fmt_int64_as_number)
12682 TYPE_HANDLERS(int64_t, fmt_int64_as_string)
12683 TYPE_HANDLERS(uint64_t, fmt_uint64_as_string)
12684
12685 /* double and float are not allowed to be map keys. */
12686 TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
12687 TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64_as_number)
12688 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number)
12689 TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64_as_number)
12690 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number)
12691
12692 #undef TYPE_HANDLERS
12693 #undef TYPE_HANDLERS_MAPKEY
12694
12695 typedef struct {
12696 void *keyname;
12697 const upb_enumdef *enumdef;
12698 } EnumHandlerData;
12699
scalar_enum(void * closure,const void * handler_data,int32_t val)12700 static bool scalar_enum(void *closure, const void *handler_data,
12701 int32_t val) {
12702 const EnumHandlerData *hd = handler_data;
12703 upb_json_printer *p = closure;
12704 const char *symbolic_name;
12705
12706 CHK(putkey(closure, hd->keyname));
12707
12708 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
12709 if (symbolic_name) {
12710 print_data(p, "\"", 1);
12711 putstring(p, symbolic_name, strlen(symbolic_name));
12712 print_data(p, "\"", 1);
12713 } else {
12714 putint32_t(closure, NULL, val);
12715 }
12716
12717 return true;
12718 }
12719
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)12720 static void print_enum_symbolic_name(upb_json_printer *p,
12721 const upb_enumdef *def,
12722 int32_t val) {
12723 const char *symbolic_name = upb_enumdef_iton(def, val);
12724 if (symbolic_name) {
12725 print_data(p, "\"", 1);
12726 putstring(p, symbolic_name, strlen(symbolic_name));
12727 print_data(p, "\"", 1);
12728 } else {
12729 putint32_t(p, NULL, val);
12730 }
12731 }
12732
repeated_enum(void * closure,const void * handler_data,int32_t val)12733 static bool repeated_enum(void *closure, const void *handler_data,
12734 int32_t val) {
12735 const EnumHandlerData *hd = handler_data;
12736 upb_json_printer *p = closure;
12737 print_comma(p);
12738
12739 print_enum_symbolic_name(p, hd->enumdef, val);
12740
12741 return true;
12742 }
12743
mapvalue_enum(void * closure,const void * handler_data,int32_t val)12744 static bool mapvalue_enum(void *closure, const void *handler_data,
12745 int32_t val) {
12746 const EnumHandlerData *hd = handler_data;
12747 upb_json_printer *p = closure;
12748
12749 print_enum_symbolic_name(p, hd->enumdef, val);
12750
12751 return true;
12752 }
12753
scalar_startsubmsg(void * closure,const void * handler_data)12754 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
12755 return putkey(closure, handler_data) ? closure : UPB_BREAK;
12756 }
12757
repeated_startsubmsg(void * closure,const void * handler_data)12758 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
12759 upb_json_printer *p = closure;
12760 UPB_UNUSED(handler_data);
12761 print_comma(p);
12762 return closure;
12763 }
12764
start_frame(upb_json_printer * p)12765 static void start_frame(upb_json_printer *p) {
12766 p->depth_++;
12767 p->first_elem_[p->depth_] = true;
12768 print_data(p, "{", 1);
12769 }
12770
end_frame(upb_json_printer * p)12771 static void end_frame(upb_json_printer *p) {
12772 print_data(p, "}", 1);
12773 p->depth_--;
12774 }
12775
printer_startmsg(void * closure,const void * handler_data)12776 static bool printer_startmsg(void *closure, const void *handler_data) {
12777 upb_json_printer *p = closure;
12778 UPB_UNUSED(handler_data);
12779 if (p->depth_ == 0) {
12780 upb_bytessink_start(p->output_, 0, &p->subc_);
12781 }
12782 start_frame(p);
12783 return true;
12784 }
12785
printer_endmsg(void * closure,const void * handler_data,upb_status * s)12786 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
12787 upb_json_printer *p = closure;
12788 UPB_UNUSED(handler_data);
12789 UPB_UNUSED(s);
12790 end_frame(p);
12791 if (p->depth_ == 0) {
12792 upb_bytessink_end(p->output_);
12793 }
12794 return true;
12795 }
12796
startseq(void * closure,const void * handler_data)12797 static void *startseq(void *closure, const void *handler_data) {
12798 upb_json_printer *p = closure;
12799 CHK(putkey(closure, handler_data));
12800 p->depth_++;
12801 p->first_elem_[p->depth_] = true;
12802 print_data(p, "[", 1);
12803 return closure;
12804 }
12805
endseq(void * closure,const void * handler_data)12806 static bool endseq(void *closure, const void *handler_data) {
12807 upb_json_printer *p = closure;
12808 UPB_UNUSED(handler_data);
12809 print_data(p, "]", 1);
12810 p->depth_--;
12811 return true;
12812 }
12813
startmap(void * closure,const void * handler_data)12814 static void *startmap(void *closure, const void *handler_data) {
12815 upb_json_printer *p = closure;
12816 CHK(putkey(closure, handler_data));
12817 p->depth_++;
12818 p->first_elem_[p->depth_] = true;
12819 print_data(p, "{", 1);
12820 return closure;
12821 }
12822
endmap(void * closure,const void * handler_data)12823 static bool endmap(void *closure, const void *handler_data) {
12824 upb_json_printer *p = closure;
12825 UPB_UNUSED(handler_data);
12826 print_data(p, "}", 1);
12827 p->depth_--;
12828 return true;
12829 }
12830
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12831 static size_t putstr(void *closure, const void *handler_data, const char *str,
12832 size_t len, const upb_bufhandle *handle) {
12833 upb_json_printer *p = closure;
12834 UPB_UNUSED(handler_data);
12835 UPB_UNUSED(handle);
12836 putstring(p, str, len);
12837 return len;
12838 }
12839
12840 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12841 static size_t putbytes(void *closure, const void *handler_data, const char *str,
12842 size_t len, const upb_bufhandle *handle) {
12843 upb_json_printer *p = closure;
12844
12845 /* This is the regular base64, not the "web-safe" version. */
12846 static const char base64[] =
12847 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12848
12849 /* Base64-encode. */
12850 char data[16000];
12851 const char *limit = data + sizeof(data);
12852 const unsigned char *from = (const unsigned char*)str;
12853 char *to = data;
12854 size_t remaining = len;
12855 size_t bytes;
12856
12857 UPB_UNUSED(handler_data);
12858 UPB_UNUSED(handle);
12859
12860 print_data(p, "\"", 1);
12861
12862 while (remaining > 2) {
12863 if (limit - to < 4) {
12864 bytes = to - data;
12865 putstring(p, data, bytes);
12866 to = data;
12867 }
12868
12869 to[0] = base64[from[0] >> 2];
12870 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12871 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
12872 to[3] = base64[from[2] & 0x3f];
12873
12874 remaining -= 3;
12875 to += 4;
12876 from += 3;
12877 }
12878
12879 switch (remaining) {
12880 case 2:
12881 to[0] = base64[from[0] >> 2];
12882 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12883 to[2] = base64[(from[1] & 0xf) << 2];
12884 to[3] = '=';
12885 to += 4;
12886 from += 2;
12887 break;
12888 case 1:
12889 to[0] = base64[from[0] >> 2];
12890 to[1] = base64[((from[0] & 0x3) << 4)];
12891 to[2] = '=';
12892 to[3] = '=';
12893 to += 4;
12894 from += 1;
12895 break;
12896 }
12897
12898 bytes = to - data;
12899 putstring(p, data, bytes);
12900 print_data(p, "\"", 1);
12901 return len;
12902 }
12903
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)12904 static void *scalar_startstr(void *closure, const void *handler_data,
12905 size_t size_hint) {
12906 upb_json_printer *p = closure;
12907 UPB_UNUSED(handler_data);
12908 UPB_UNUSED(size_hint);
12909 CHK(putkey(closure, handler_data));
12910 print_data(p, "\"", 1);
12911 return p;
12912 }
12913
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12914 static size_t scalar_str(void *closure, const void *handler_data,
12915 const char *str, size_t len,
12916 const upb_bufhandle *handle) {
12917 CHK(putstr(closure, handler_data, str, len, handle));
12918 return len;
12919 }
12920
scalar_endstr(void * closure,const void * handler_data)12921 static bool scalar_endstr(void *closure, const void *handler_data) {
12922 upb_json_printer *p = closure;
12923 UPB_UNUSED(handler_data);
12924 print_data(p, "\"", 1);
12925 return true;
12926 }
12927
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)12928 static void *repeated_startstr(void *closure, const void *handler_data,
12929 size_t size_hint) {
12930 upb_json_printer *p = closure;
12931 UPB_UNUSED(handler_data);
12932 UPB_UNUSED(size_hint);
12933 print_comma(p);
12934 print_data(p, "\"", 1);
12935 return p;
12936 }
12937
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12938 static size_t repeated_str(void *closure, const void *handler_data,
12939 const char *str, size_t len,
12940 const upb_bufhandle *handle) {
12941 CHK(putstr(closure, handler_data, str, len, handle));
12942 return len;
12943 }
12944
repeated_endstr(void * closure,const void * handler_data)12945 static bool repeated_endstr(void *closure, const void *handler_data) {
12946 upb_json_printer *p = closure;
12947 UPB_UNUSED(handler_data);
12948 print_data(p, "\"", 1);
12949 return true;
12950 }
12951
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)12952 static void *mapkeyval_startstr(void *closure, const void *handler_data,
12953 size_t size_hint) {
12954 upb_json_printer *p = closure;
12955 UPB_UNUSED(handler_data);
12956 UPB_UNUSED(size_hint);
12957 print_data(p, "\"", 1);
12958 return p;
12959 }
12960
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12961 static size_t mapkey_str(void *closure, const void *handler_data,
12962 const char *str, size_t len,
12963 const upb_bufhandle *handle) {
12964 CHK(putstr(closure, handler_data, str, len, handle));
12965 return len;
12966 }
12967
mapkey_endstr(void * closure,const void * handler_data)12968 static bool mapkey_endstr(void *closure, const void *handler_data) {
12969 upb_json_printer *p = closure;
12970 UPB_UNUSED(handler_data);
12971 print_data(p, "\":", 2);
12972 return true;
12973 }
12974
mapvalue_endstr(void * closure,const void * handler_data)12975 static bool mapvalue_endstr(void *closure, const void *handler_data) {
12976 upb_json_printer *p = closure;
12977 UPB_UNUSED(handler_data);
12978 print_data(p, "\"", 1);
12979 return true;
12980 }
12981
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12982 static size_t scalar_bytes(void *closure, const void *handler_data,
12983 const char *str, size_t len,
12984 const upb_bufhandle *handle) {
12985 CHK(putkey(closure, handler_data));
12986 CHK(putbytes(closure, handler_data, str, len, handle));
12987 return len;
12988 }
12989
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12990 static size_t repeated_bytes(void *closure, const void *handler_data,
12991 const char *str, size_t len,
12992 const upb_bufhandle *handle) {
12993 upb_json_printer *p = closure;
12994 print_comma(p);
12995 CHK(putbytes(closure, handler_data, str, len, handle));
12996 return len;
12997 }
12998
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12999 static size_t mapkey_bytes(void *closure, const void *handler_data,
13000 const char *str, size_t len,
13001 const upb_bufhandle *handle) {
13002 upb_json_printer *p = closure;
13003 CHK(putbytes(closure, handler_data, str, len, handle));
13004 print_data(p, ":", 1);
13005 return len;
13006 }
13007
set_enum_hd(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames,upb_handlerattr * attr)13008 static void set_enum_hd(upb_handlers *h,
13009 const upb_fielddef *f,
13010 bool preserve_fieldnames,
13011 upb_handlerattr *attr) {
13012 EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
13013 hd->enumdef = upb_fielddef_enumsubdef(f);
13014 hd->keyname = newstrpc(h, f, preserve_fieldnames);
13015 upb_handlers_addcleanup(h, hd, upb_gfree);
13016 attr->handler_data = hd;
13017 }
13018
13019 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
13020 * in a map).
13021 *
13022 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
13023 * key or value cases properly. The right way to do this is to allocate a
13024 * temporary structure at the start of a mapentry submessage, store key and
13025 * value data in it as key and value handlers are called, and then print the
13026 * key/value pair once at the end of the submessage. If we don't do this, we
13027 * should at least detect the case and throw an error. However, so far all of
13028 * our sources that emit mapentry messages do so canonically (with one key
13029 * field, and then one value field), so this is not a pressing concern at the
13030 * moment. */
printer_sethandlers_mapentry(const void * closure,bool preserve_fieldnames,upb_handlers * h)13031 void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
13032 upb_handlers *h) {
13033 const upb_msgdef *md = upb_handlers_msgdef(h);
13034
13035 /* A mapentry message is printed simply as '"key": value'. Rather than
13036 * special-case key and value for every type below, we just handle both
13037 * fields explicitly here. */
13038 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
13039 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
13040
13041 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13042
13043 UPB_UNUSED(closure);
13044
13045 switch (upb_fielddef_type(key_field)) {
13046 case UPB_TYPE_INT32:
13047 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
13048 break;
13049 case UPB_TYPE_INT64:
13050 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
13051 break;
13052 case UPB_TYPE_UINT32:
13053 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
13054 break;
13055 case UPB_TYPE_UINT64:
13056 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
13057 break;
13058 case UPB_TYPE_BOOL:
13059 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
13060 break;
13061 case UPB_TYPE_STRING:
13062 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
13063 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
13064 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
13065 break;
13066 case UPB_TYPE_BYTES:
13067 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
13068 break;
13069 default:
13070 UPB_ASSERT(false);
13071 break;
13072 }
13073
13074 switch (upb_fielddef_type(value_field)) {
13075 case UPB_TYPE_INT32:
13076 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
13077 break;
13078 case UPB_TYPE_INT64:
13079 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
13080 break;
13081 case UPB_TYPE_UINT32:
13082 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
13083 break;
13084 case UPB_TYPE_UINT64:
13085 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
13086 break;
13087 case UPB_TYPE_BOOL:
13088 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
13089 break;
13090 case UPB_TYPE_FLOAT:
13091 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
13092 break;
13093 case UPB_TYPE_DOUBLE:
13094 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
13095 break;
13096 case UPB_TYPE_STRING:
13097 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
13098 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
13099 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
13100 break;
13101 case UPB_TYPE_BYTES:
13102 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
13103 break;
13104 case UPB_TYPE_ENUM: {
13105 upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
13106 set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
13107 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
13108 break;
13109 }
13110 case UPB_TYPE_MESSAGE:
13111 /* No handler necessary -- the submsg handlers will print the message
13112 * as appropriate. */
13113 break;
13114 }
13115 }
13116
putseconds(void * closure,const void * handler_data,int64_t seconds)13117 static bool putseconds(void *closure, const void *handler_data,
13118 int64_t seconds) {
13119 upb_json_printer *p = closure;
13120 p->seconds = seconds;
13121 UPB_UNUSED(handler_data);
13122 return true;
13123 }
13124
putnanos(void * closure,const void * handler_data,int32_t nanos)13125 static bool putnanos(void *closure, const void *handler_data,
13126 int32_t nanos) {
13127 upb_json_printer *p = closure;
13128 p->nanos = nanos;
13129 UPB_UNUSED(handler_data);
13130 return true;
13131 }
13132
scalar_startstr_nokey(void * closure,const void * handler_data,size_t size_hint)13133 static void *scalar_startstr_nokey(void *closure, const void *handler_data,
13134 size_t size_hint) {
13135 upb_json_printer *p = closure;
13136 UPB_UNUSED(handler_data);
13137 UPB_UNUSED(size_hint);
13138 print_data(p, "\"", 1);
13139 return p;
13140 }
13141
putstr_nokey(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)13142 static size_t putstr_nokey(void *closure, const void *handler_data,
13143 const char *str, size_t len,
13144 const upb_bufhandle *handle) {
13145 upb_json_printer *p = closure;
13146 UPB_UNUSED(handler_data);
13147 UPB_UNUSED(handle);
13148 print_data(p, "\"", 1);
13149 putstring(p, str, len);
13150 print_data(p, "\"", 1);
13151 return len + 2;
13152 }
13153
startseq_nokey(void * closure,const void * handler_data)13154 static void *startseq_nokey(void *closure, const void *handler_data) {
13155 upb_json_printer *p = closure;
13156 UPB_UNUSED(handler_data);
13157 p->depth_++;
13158 p->first_elem_[p->depth_] = true;
13159 print_data(p, "[", 1);
13160 return closure;
13161 }
13162
startseq_fieldmask(void * closure,const void * handler_data)13163 static void *startseq_fieldmask(void *closure, const void *handler_data) {
13164 upb_json_printer *p = closure;
13165 UPB_UNUSED(handler_data);
13166 p->depth_++;
13167 p->first_elem_[p->depth_] = true;
13168 return closure;
13169 }
13170
endseq_fieldmask(void * closure,const void * handler_data)13171 static bool endseq_fieldmask(void *closure, const void *handler_data) {
13172 upb_json_printer *p = closure;
13173 UPB_UNUSED(handler_data);
13174 p->depth_--;
13175 return true;
13176 }
13177
repeated_startstr_fieldmask(void * closure,const void * handler_data,size_t size_hint)13178 static void *repeated_startstr_fieldmask(
13179 void *closure, const void *handler_data,
13180 size_t size_hint) {
13181 upb_json_printer *p = closure;
13182 UPB_UNUSED(handler_data);
13183 UPB_UNUSED(size_hint);
13184 print_comma(p);
13185 return p;
13186 }
13187
repeated_str_fieldmask(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)13188 static size_t repeated_str_fieldmask(
13189 void *closure, const void *handler_data,
13190 const char *str, size_t len,
13191 const upb_bufhandle *handle) {
13192 const char* limit = str + len;
13193 bool upper = false;
13194 size_t result_len = 0;
13195 for (; str < limit; str++) {
13196 if (*str == '_') {
13197 upper = true;
13198 continue;
13199 }
13200 if (upper && *str >= 'a' && *str <= 'z') {
13201 char upper_char = toupper(*str);
13202 CHK(putstr(closure, handler_data, &upper_char, 1, handle));
13203 } else {
13204 CHK(putstr(closure, handler_data, str, 1, handle));
13205 }
13206 upper = false;
13207 result_len++;
13208 }
13209 return result_len;
13210 }
13211
startmap_nokey(void * closure,const void * handler_data)13212 static void *startmap_nokey(void *closure, const void *handler_data) {
13213 upb_json_printer *p = closure;
13214 UPB_UNUSED(handler_data);
13215 p->depth_++;
13216 p->first_elem_[p->depth_] = true;
13217 print_data(p, "{", 1);
13218 return closure;
13219 }
13220
putnull(void * closure,const void * handler_data,int32_t null)13221 static bool putnull(void *closure, const void *handler_data,
13222 int32_t null) {
13223 upb_json_printer *p = closure;
13224 print_data(p, "null", 4);
13225 UPB_UNUSED(handler_data);
13226 UPB_UNUSED(null);
13227 return true;
13228 }
13229
printer_startdurationmsg(void * closure,const void * handler_data)13230 static bool printer_startdurationmsg(void *closure, const void *handler_data) {
13231 upb_json_printer *p = closure;
13232 UPB_UNUSED(handler_data);
13233 if (p->depth_ == 0) {
13234 upb_bytessink_start(p->output_, 0, &p->subc_);
13235 }
13236 return true;
13237 }
13238
13239 #define UPB_DURATION_MAX_JSON_LEN 23
13240 #define UPB_DURATION_MAX_NANO_LEN 9
13241
printer_enddurationmsg(void * closure,const void * handler_data,upb_status * s)13242 static bool printer_enddurationmsg(void *closure, const void *handler_data,
13243 upb_status *s) {
13244 upb_json_printer *p = closure;
13245 char buffer[UPB_DURATION_MAX_JSON_LEN];
13246 size_t base_len;
13247 size_t curr;
13248 size_t i;
13249
13250 memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN);
13251
13252 if (p->seconds < -315576000000) {
13253 upb_status_seterrf(s, "error parsing duration: "
13254 "minimum acceptable value is "
13255 "-315576000000");
13256 return false;
13257 }
13258
13259 if (p->seconds > 315576000000) {
13260 upb_status_seterrf(s, "error serializing duration: "
13261 "maximum acceptable value is "
13262 "315576000000");
13263 return false;
13264 }
13265
13266 _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
13267 base_len = strlen(buffer);
13268
13269 if (p->nanos != 0) {
13270 char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
13271 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13272 p->nanos / 1000000000.0);
13273 /* Remove trailing 0. */
13274 for (i = UPB_DURATION_MAX_NANO_LEN + 2;
13275 nanos_buffer[i] == '0'; i--) {
13276 nanos_buffer[i] = 0;
13277 }
13278 strcpy(buffer + base_len, nanos_buffer + 1);
13279 }
13280
13281 curr = strlen(buffer);
13282 strcpy(buffer + curr, "s");
13283
13284 p->seconds = 0;
13285 p->nanos = 0;
13286
13287 print_data(p, "\"", 1);
13288 print_data(p, buffer, strlen(buffer));
13289 print_data(p, "\"", 1);
13290
13291 if (p->depth_ == 0) {
13292 upb_bytessink_end(p->output_);
13293 }
13294
13295 UPB_UNUSED(handler_data);
13296 return true;
13297 }
13298
printer_starttimestampmsg(void * closure,const void * handler_data)13299 static bool printer_starttimestampmsg(void *closure, const void *handler_data) {
13300 upb_json_printer *p = closure;
13301 UPB_UNUSED(handler_data);
13302 if (p->depth_ == 0) {
13303 upb_bytessink_start(p->output_, 0, &p->subc_);
13304 }
13305 return true;
13306 }
13307
13308 #define UPB_TIMESTAMP_MAX_JSON_LEN 31
13309 #define UPB_TIMESTAMP_BEFORE_NANO_LEN 19
13310 #define UPB_TIMESTAMP_MAX_NANO_LEN 9
13311
printer_endtimestampmsg(void * closure,const void * handler_data,upb_status * s)13312 static bool printer_endtimestampmsg(void *closure, const void *handler_data,
13313 upb_status *s) {
13314 upb_json_printer *p = closure;
13315 char buffer[UPB_TIMESTAMP_MAX_JSON_LEN];
13316 time_t time = p->seconds;
13317 size_t curr;
13318 size_t i;
13319 size_t year_length =
13320 strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time));
13321
13322 if (p->seconds < -62135596800) {
13323 upb_status_seterrf(s, "error parsing timestamp: "
13324 "minimum acceptable value is "
13325 "0001-01-01T00:00:00Z");
13326 return false;
13327 }
13328
13329 if (p->seconds > 253402300799) {
13330 upb_status_seterrf(s, "error parsing timestamp: "
13331 "maximum acceptable value is "
13332 "9999-12-31T23:59:59Z");
13333 return false;
13334 }
13335
13336 /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */
13337 for (i = 0; i < 4 - year_length; i++) {
13338 buffer[i] = '0';
13339 }
13340
13341 strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN,
13342 "%Y-%m-%dT%H:%M:%S", gmtime(&time));
13343 if (p->nanos != 0) {
13344 char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
13345 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13346 p->nanos / 1000000000.0);
13347 /* Remove trailing 0. */
13348 for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
13349 nanos_buffer[i] == '0'; i--) {
13350 nanos_buffer[i] = 0;
13351 }
13352 strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1);
13353 }
13354
13355 curr = strlen(buffer);
13356 strcpy(buffer + curr, "Z");
13357
13358 p->seconds = 0;
13359 p->nanos = 0;
13360
13361 print_data(p, "\"", 1);
13362 print_data(p, buffer, strlen(buffer));
13363 print_data(p, "\"", 1);
13364
13365 if (p->depth_ == 0) {
13366 upb_bytessink_end(p->output_);
13367 }
13368
13369 UPB_UNUSED(handler_data);
13370 UPB_UNUSED(s);
13371 return true;
13372 }
13373
printer_startmsg_noframe(void * closure,const void * handler_data)13374 static bool printer_startmsg_noframe(void *closure, const void *handler_data) {
13375 upb_json_printer *p = closure;
13376 UPB_UNUSED(handler_data);
13377 if (p->depth_ == 0) {
13378 upb_bytessink_start(p->output_, 0, &p->subc_);
13379 }
13380 return true;
13381 }
13382
printer_endmsg_noframe(void * closure,const void * handler_data,upb_status * s)13383 static bool printer_endmsg_noframe(
13384 void *closure, const void *handler_data, upb_status *s) {
13385 upb_json_printer *p = closure;
13386 UPB_UNUSED(handler_data);
13387 UPB_UNUSED(s);
13388 if (p->depth_ == 0) {
13389 upb_bytessink_end(p->output_);
13390 }
13391 return true;
13392 }
13393
printer_startmsg_fieldmask(void * closure,const void * handler_data)13394 static bool printer_startmsg_fieldmask(
13395 void *closure, const void *handler_data) {
13396 upb_json_printer *p = closure;
13397 UPB_UNUSED(handler_data);
13398 if (p->depth_ == 0) {
13399 upb_bytessink_start(p->output_, 0, &p->subc_);
13400 }
13401 print_data(p, "\"", 1);
13402 return true;
13403 }
13404
printer_endmsg_fieldmask(void * closure,const void * handler_data,upb_status * s)13405 static bool printer_endmsg_fieldmask(
13406 void *closure, const void *handler_data, upb_status *s) {
13407 upb_json_printer *p = closure;
13408 UPB_UNUSED(handler_data);
13409 UPB_UNUSED(s);
13410 print_data(p, "\"", 1);
13411 if (p->depth_ == 0) {
13412 upb_bytessink_end(p->output_);
13413 }
13414 return true;
13415 }
13416
scalar_startstr_onlykey(void * closure,const void * handler_data,size_t size_hint)13417 static void *scalar_startstr_onlykey(
13418 void *closure, const void *handler_data, size_t size_hint) {
13419 upb_json_printer *p = closure;
13420 UPB_UNUSED(size_hint);
13421 CHK(putkey(closure, handler_data));
13422 return p;
13423 }
13424
13425 /* Set up handlers for an Any submessage. */
printer_sethandlers_any(const void * closure,upb_handlers * h)13426 void printer_sethandlers_any(const void *closure, upb_handlers *h) {
13427 const upb_msgdef *md = upb_handlers_msgdef(h);
13428
13429 const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE);
13430 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE);
13431
13432 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13433
13434 /* type_url's json name is "@type" */
13435 upb_handlerattr type_name_attr = UPB_HANDLERATTR_INIT;
13436 upb_handlerattr value_name_attr = UPB_HANDLERATTR_INIT;
13437 strpc *type_url_json_name = newstrpc_str(h, "@type");
13438 strpc *value_json_name = newstrpc_str(h, "value");
13439
13440 type_name_attr.handler_data = type_url_json_name;
13441 value_name_attr.handler_data = value_json_name;
13442
13443 /* Set up handlers. */
13444 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13445 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13446
13447 upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr);
13448 upb_handlers_setstring(h, type_field, scalar_str, &empty_attr);
13449 upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr);
13450
13451 /* This is not the full and correct JSON encoding for the Any value field. It
13452 * requires further processing by the wrapper code based on the type URL.
13453 */
13454 upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey,
13455 &value_name_attr);
13456
13457 UPB_UNUSED(closure);
13458 }
13459
13460 /* Set up handlers for a fieldmask submessage. */
printer_sethandlers_fieldmask(const void * closure,upb_handlers * h)13461 void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) {
13462 const upb_msgdef *md = upb_handlers_msgdef(h);
13463 const upb_fielddef* f = upb_msgdef_itof(md, 1);
13464
13465 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13466
13467 upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr);
13468 upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr);
13469
13470 upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr);
13471 upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr);
13472
13473 upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr);
13474 upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr);
13475
13476 UPB_UNUSED(closure);
13477 }
13478
13479 /* Set up handlers for a duration submessage. */
printer_sethandlers_duration(const void * closure,upb_handlers * h)13480 void printer_sethandlers_duration(const void *closure, upb_handlers *h) {
13481 const upb_msgdef *md = upb_handlers_msgdef(h);
13482
13483 const upb_fielddef* seconds_field =
13484 upb_msgdef_itof(md, UPB_DURATION_SECONDS);
13485 const upb_fielddef* nanos_field =
13486 upb_msgdef_itof(md, UPB_DURATION_NANOS);
13487
13488 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13489
13490 upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr);
13491 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13492 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13493 upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr);
13494
13495 UPB_UNUSED(closure);
13496 }
13497
13498 /* Set up handlers for a timestamp submessage. Instead of printing fields
13499 * separately, the json representation of timestamp follows RFC 3339 */
printer_sethandlers_timestamp(const void * closure,upb_handlers * h)13500 void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) {
13501 const upb_msgdef *md = upb_handlers_msgdef(h);
13502
13503 const upb_fielddef* seconds_field =
13504 upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS);
13505 const upb_fielddef* nanos_field =
13506 upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS);
13507
13508 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13509
13510 upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr);
13511 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13512 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13513 upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr);
13514
13515 UPB_UNUSED(closure);
13516 }
13517
printer_sethandlers_value(const void * closure,upb_handlers * h)13518 void printer_sethandlers_value(const void *closure, upb_handlers *h) {
13519 const upb_msgdef *md = upb_handlers_msgdef(h);
13520 upb_msg_field_iter i;
13521
13522 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13523
13524 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13525 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13526
13527 upb_msg_field_begin(&i, md);
13528 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13529 const upb_fielddef *f = upb_msg_iter_field(&i);
13530
13531 switch (upb_fielddef_type(f)) {
13532 case UPB_TYPE_ENUM:
13533 upb_handlers_setint32(h, f, putnull, &empty_attr);
13534 break;
13535 case UPB_TYPE_DOUBLE:
13536 upb_handlers_setdouble(h, f, putdouble, &empty_attr);
13537 break;
13538 case UPB_TYPE_STRING:
13539 upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr);
13540 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13541 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13542 break;
13543 case UPB_TYPE_BOOL:
13544 upb_handlers_setbool(h, f, putbool, &empty_attr);
13545 break;
13546 case UPB_TYPE_MESSAGE:
13547 break;
13548 default:
13549 UPB_ASSERT(false);
13550 break;
13551 }
13552 }
13553
13554 UPB_UNUSED(closure);
13555 }
13556
13557 #define WRAPPER_SETHANDLERS(wrapper, type, putmethod) \
13558 void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \
13559 const upb_msgdef *md = upb_handlers_msgdef(h); \
13560 const upb_fielddef* f = upb_msgdef_itof(md, 1); \
13561 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; \
13562 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); \
13563 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); \
13564 upb_handlers_set##type(h, f, putmethod, &empty_attr); \
13565 UPB_UNUSED(closure); \
13566 }
13567
WRAPPER_SETHANDLERS(doublevalue,double,putdouble)13568 WRAPPER_SETHANDLERS(doublevalue, double, putdouble)
13569 WRAPPER_SETHANDLERS(floatvalue, float, putfloat)
13570 WRAPPER_SETHANDLERS(int64value, int64, putint64_t)
13571 WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t)
13572 WRAPPER_SETHANDLERS(int32value, int32, putint32_t)
13573 WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t)
13574 WRAPPER_SETHANDLERS(boolvalue, bool, putbool)
13575 WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey)
13576 WRAPPER_SETHANDLERS(bytesvalue, string, putbytes)
13577
13578 #undef WRAPPER_SETHANDLERS
13579
13580 void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) {
13581 const upb_msgdef *md = upb_handlers_msgdef(h);
13582 const upb_fielddef* f = upb_msgdef_itof(md, 1);
13583
13584 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13585
13586 upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr);
13587 upb_handlers_setendseq(h, f, endseq, &empty_attr);
13588
13589 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13590 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13591
13592 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13593
13594 UPB_UNUSED(closure);
13595 }
13596
printer_sethandlers_structvalue(const void * closure,upb_handlers * h)13597 void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) {
13598 const upb_msgdef *md = upb_handlers_msgdef(h);
13599 const upb_fielddef* f = upb_msgdef_itof(md, 1);
13600
13601 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13602
13603 upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr);
13604 upb_handlers_setendseq(h, f, endmap, &empty_attr);
13605
13606 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13607 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13608
13609 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13610
13611 UPB_UNUSED(closure);
13612 }
13613
printer_sethandlers(const void * closure,upb_handlers * h)13614 void printer_sethandlers(const void *closure, upb_handlers *h) {
13615 const upb_msgdef *md = upb_handlers_msgdef(h);
13616 bool is_mapentry = upb_msgdef_mapentry(md);
13617 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13618 upb_msg_field_iter i;
13619 const upb_json_printercache *cache = closure;
13620 const bool preserve_fieldnames = cache->preserve_fieldnames;
13621
13622 if (is_mapentry) {
13623 /* mapentry messages are sufficiently different that we handle them
13624 * separately. */
13625 printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
13626 return;
13627 }
13628
13629 switch (upb_msgdef_wellknowntype(md)) {
13630 case UPB_WELLKNOWN_UNSPECIFIED:
13631 break;
13632 case UPB_WELLKNOWN_ANY:
13633 printer_sethandlers_any(closure, h);
13634 return;
13635 case UPB_WELLKNOWN_FIELDMASK:
13636 printer_sethandlers_fieldmask(closure, h);
13637 return;
13638 case UPB_WELLKNOWN_DURATION:
13639 printer_sethandlers_duration(closure, h);
13640 return;
13641 case UPB_WELLKNOWN_TIMESTAMP:
13642 printer_sethandlers_timestamp(closure, h);
13643 return;
13644 case UPB_WELLKNOWN_VALUE:
13645 printer_sethandlers_value(closure, h);
13646 return;
13647 case UPB_WELLKNOWN_LISTVALUE:
13648 printer_sethandlers_listvalue(closure, h);
13649 return;
13650 case UPB_WELLKNOWN_STRUCT:
13651 printer_sethandlers_structvalue(closure, h);
13652 return;
13653 #define WRAPPER(wellknowntype, name) \
13654 case wellknowntype: \
13655 printer_sethandlers_##name(closure, h); \
13656 return; \
13657
13658 WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue);
13659 WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue);
13660 WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value);
13661 WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value);
13662 WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value);
13663 WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value);
13664 WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue);
13665 WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue);
13666 WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue);
13667
13668 #undef WRAPPER
13669 }
13670
13671 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13672 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13673
13674 #define TYPE(type, name, ctype) \
13675 case type: \
13676 if (upb_fielddef_isseq(f)) { \
13677 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
13678 } else { \
13679 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
13680 } \
13681 break;
13682
13683 upb_msg_field_begin(&i, md);
13684 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13685 const upb_fielddef *f = upb_msg_iter_field(&i);
13686
13687 upb_handlerattr name_attr = UPB_HANDLERATTR_INIT;
13688 name_attr.handler_data = newstrpc(h, f, preserve_fieldnames);
13689
13690 if (upb_fielddef_ismap(f)) {
13691 upb_handlers_setstartseq(h, f, startmap, &name_attr);
13692 upb_handlers_setendseq(h, f, endmap, &name_attr);
13693 } else if (upb_fielddef_isseq(f)) {
13694 upb_handlers_setstartseq(h, f, startseq, &name_attr);
13695 upb_handlers_setendseq(h, f, endseq, &empty_attr);
13696 }
13697
13698 switch (upb_fielddef_type(f)) {
13699 TYPE(UPB_TYPE_FLOAT, float, float);
13700 TYPE(UPB_TYPE_DOUBLE, double, double);
13701 TYPE(UPB_TYPE_BOOL, bool, bool);
13702 TYPE(UPB_TYPE_INT32, int32, int32_t);
13703 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
13704 TYPE(UPB_TYPE_INT64, int64, int64_t);
13705 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
13706 case UPB_TYPE_ENUM: {
13707 /* For now, we always emit symbolic names for enums. We may want an
13708 * option later to control this behavior, but we will wait for a real
13709 * need first. */
13710 upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
13711 set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
13712
13713 if (upb_fielddef_isseq(f)) {
13714 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
13715 } else {
13716 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
13717 }
13718
13719 break;
13720 }
13721 case UPB_TYPE_STRING:
13722 if (upb_fielddef_isseq(f)) {
13723 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
13724 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
13725 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
13726 } else {
13727 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
13728 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13729 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13730 }
13731 break;
13732 case UPB_TYPE_BYTES:
13733 /* XXX: this doesn't support strings that span buffers yet. The base64
13734 * encoder will need to be made resumable for this to work properly. */
13735 if (upb_fielddef_isseq(f)) {
13736 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
13737 } else {
13738 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
13739 }
13740 break;
13741 case UPB_TYPE_MESSAGE:
13742 if (upb_fielddef_isseq(f)) {
13743 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
13744 } else {
13745 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
13746 }
13747 break;
13748 }
13749 }
13750
13751 #undef TYPE
13752 }
13753
json_printer_reset(upb_json_printer * p)13754 static void json_printer_reset(upb_json_printer *p) {
13755 p->depth_ = 0;
13756 }
13757
13758
13759 /* Public API *****************************************************************/
13760
upb_json_printer_create(upb_arena * a,const upb_handlers * h,upb_bytessink output)13761 upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h,
13762 upb_bytessink output) {
13763 #ifndef NDEBUG
13764 size_t size_before = upb_arena_bytesallocated(a);
13765 #endif
13766
13767 upb_json_printer *p = upb_arena_malloc(a, sizeof(upb_json_printer));
13768 if (!p) return NULL;
13769
13770 p->output_ = output;
13771 json_printer_reset(p);
13772 upb_sink_reset(&p->input_, h, p);
13773 p->seconds = 0;
13774 p->nanos = 0;
13775
13776 /* If this fails, increase the value in printer.h. */
13777 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
13778 UPB_JSON_PRINTER_SIZE);
13779 return p;
13780 }
13781
upb_json_printer_input(upb_json_printer * p)13782 upb_sink upb_json_printer_input(upb_json_printer *p) {
13783 return p->input_;
13784 }
13785
upb_json_printer_newcache(bool preserve_proto_fieldnames)13786 upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames) {
13787 upb_json_printercache *cache = upb_gmalloc(sizeof(*cache));
13788 upb_handlercache *ret = upb_handlercache_new(printer_sethandlers, cache);
13789
13790 cache->preserve_fieldnames = preserve_proto_fieldnames;
13791 upb_handlercache_addcleanup(ret, cache, upb_gfree);
13792
13793 return ret;
13794 }
13795 /* See port_def.inc. This should #undef all macros #defined there. */
13796
13797 #undef UPB_MAPTYPE_STRING
13798 #undef UPB_SIZE
13799 #undef UPB_PTR_AT
13800 #undef UPB_READ_ONEOF
13801 #undef UPB_WRITE_ONEOF
13802 #undef UPB_INLINE
13803 #undef UPB_FORCEINLINE
13804 #undef UPB_NOINLINE
13805 #undef UPB_NORETURN
13806 #undef UPB_MAX
13807 #undef UPB_MIN
13808 #undef UPB_UNUSED
13809 #undef UPB_ASSUME
13810 #undef UPB_ASSERT
13811 #undef UPB_ASSERT_DEBUGVAR
13812 #undef UPB_UNREACHABLE
13813 #undef UPB_INFINITY
13814 #undef UPB_MSVC_VSNPRINTF
13815 #undef _upb_snprintf
13816 #undef _upb_vsnprintf
13817 #undef _upb_va_copy
13818