• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Amalgamated source file */
2 #include "upb.h"
3 /*
4 * This is where we define macros used across upb.
5 *
6 * All of these macros are undef'd in port_undef.inc to avoid leaking them to
7 * users.
8 *
9 * The correct usage is:
10 *
11 *   #include "upb/foobar.h"
12 *   #include "upb/baz.h"
13 *
14 *   // MUST be last included header.
15 *   #include "upb/port_def.inc"
16 *
17 *   // Code for this file.
18 *   // <...>
19 *
20 *   // Can be omitted for .c files, required for .h.
21 *   #include "upb/port_undef.inc"
22 *
23 * This file is private and must not be included by users!
24 */
25 #include <stdint.h>
26 
27 #if UINTPTR_MAX == 0xffffffff
28 #define UPB_SIZE(size32, size64) size32
29 #else
30 #define UPB_SIZE(size32, size64) size64
31 #endif
32 
33 /* If we always read/write as a consistent type to each address, this shouldn't
34  * violate aliasing.
35  */
36 #define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
37 
38 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
39   *UPB_PTR_AT(msg, case_offset, int) == case_val                              \
40       ? *UPB_PTR_AT(msg, offset, fieldtype)                                   \
41       : default
42 
43 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
44   *UPB_PTR_AT(msg, case_offset, int) = case_val;                             \
45   *UPB_PTR_AT(msg, offset, fieldtype) = value;
46 
47 #define UPB_MAPTYPE_STRING 0
48 
49 /* UPB_INLINE: inline if possible, emit standalone code if required. */
50 #ifdef __cplusplus
51 #define UPB_INLINE inline
52 #elif defined (__GNUC__) || defined(__clang__)
53 #define UPB_INLINE static __inline__
54 #else
55 #define UPB_INLINE static
56 #endif
57 
58 /* Hints to the compiler about likely/unlikely branches. */
59 #if defined (__GNUC__) || defined(__clang__)
60 #define UPB_LIKELY(x) __builtin_expect((x),1)
61 #define UPB_UNLIKELY(x) __builtin_expect((x),0)
62 #else
63 #define UPB_LIKELY(x) (x)
64 #define UPB_UNLIKELY(x) (x)
65 #endif
66 
67 /* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
68  * doesn't provide these preprocessor symbols. */
69 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
70 #define UPB_BIG_ENDIAN
71 #endif
72 
73 /* Macros for function attributes on compilers that support them. */
74 #ifdef __GNUC__
75 #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
76 #define UPB_NOINLINE __attribute__((noinline))
77 #define UPB_NORETURN __attribute__((__noreturn__))
78 #else  /* !defined(__GNUC__) */
79 #define UPB_FORCEINLINE
80 #define UPB_NOINLINE
81 #define UPB_NORETURN
82 #endif
83 
84 #if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L
85 /* C99/C++11 versions. */
86 #include <stdio.h>
87 #define _upb_snprintf snprintf
88 #define _upb_vsnprintf vsnprintf
89 #define _upb_va_copy(a, b) va_copy(a, b)
90 #elif defined(_MSC_VER)
91 /* Microsoft C/C++ versions. */
92 #include <stdarg.h>
93 #include <stdio.h>
94 #if _MSC_VER < 1900
95 int msvc_snprintf(char* s, size_t n, const char* format, ...);
96 int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
97 #define UPB_MSVC_VSNPRINTF
98 #define _upb_snprintf msvc_snprintf
99 #define _upb_vsnprintf msvc_vsnprintf
100 #else
101 #define _upb_snprintf snprintf
102 #define _upb_vsnprintf vsnprintf
103 #endif
104 #define _upb_va_copy(a, b) va_copy(a, b)
105 #elif defined __GNUC__
106 /* A few hacky workarounds for functions not in C89.
107  * For internal use only!
108  * TODO(haberman): fix these by including our own implementations, or finding
109  * another workaround.
110  */
111 #define _upb_snprintf __builtin_snprintf
112 #define _upb_vsnprintf __builtin_vsnprintf
113 #define _upb_va_copy(a, b) __va_copy(a, b)
114 #else
115 #error Need implementations of [v]snprintf and va_copy
116 #endif
117 
118 #ifdef __cplusplus
119 #if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \
120     (defined(_MSC_VER) && _MSC_VER >= 1900)
121 /* C++11 is present */
122 #else
123 #error upb requires C++11 for C++ support
124 #endif
125 #endif
126 
127 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
128 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
129 
130 #define UPB_UNUSED(var) (void)var
131 
132 /* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
133  */
134 #ifdef NDEBUG
135 #ifdef __GNUC__
136 #define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
137 #else
138 #define UPB_ASSUME(expr) do {} if (false && (expr))
139 #endif
140 #else
141 #define UPB_ASSUME(expr) assert(expr)
142 #endif
143 
144 /* UPB_ASSERT(): in release mode, we use the expression without letting it be
145  * evaluated.  This prevents "unused variable" warnings. */
146 #ifdef NDEBUG
147 #define UPB_ASSERT(expr) do {} while (false && (expr))
148 #else
149 #define UPB_ASSERT(expr) assert(expr)
150 #endif
151 
152 /* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only
153  * exist in debug mode.  This turns into regular assert. */
154 #define UPB_ASSERT_DEBUGVAR(expr) assert(expr)
155 
156 #if defined(__GNUC__) || defined(__clang__)
157 #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
158 #else
159 #define UPB_UNREACHABLE() do { assert(0); } while(0)
160 #endif
161 
162 /* UPB_INFINITY representing floating-point positive infinity. */
163 #include <math.h>
164 #ifdef INFINITY
165 #define UPB_INFINITY INFINITY
166 #else
167 #define UPB_INFINITY (1.0 / 0.0)
168 #endif
169 
170 #include <setjmp.h>
171 #include <string.h>
172 
173 
174 
175 /* Maps descriptor type -> upb field type.  */
176 static const uint8_t desctype_to_fieldtype[] = {
177     -1,               /* invalid descriptor type */
178     UPB_TYPE_DOUBLE,  /* DOUBLE */
179     UPB_TYPE_FLOAT,   /* FLOAT */
180     UPB_TYPE_INT64,   /* INT64 */
181     UPB_TYPE_UINT64,  /* UINT64 */
182     UPB_TYPE_INT32,   /* INT32 */
183     UPB_TYPE_UINT64,  /* FIXED64 */
184     UPB_TYPE_UINT32,  /* FIXED32 */
185     UPB_TYPE_BOOL,    /* BOOL */
186     UPB_TYPE_STRING,  /* STRING */
187     UPB_TYPE_MESSAGE, /* GROUP */
188     UPB_TYPE_MESSAGE, /* MESSAGE */
189     UPB_TYPE_BYTES,   /* BYTES */
190     UPB_TYPE_UINT32,  /* UINT32 */
191     UPB_TYPE_ENUM,    /* ENUM */
192     UPB_TYPE_INT32,   /* SFIXED32 */
193     UPB_TYPE_INT64,   /* SFIXED64 */
194     UPB_TYPE_INT32,   /* SINT32 */
195     UPB_TYPE_INT64,   /* SINT64 */
196 };
197 
198 /* Maps descriptor type -> upb map size.  */
199 static const uint8_t desctype_to_mapsize[] = {
200     -1,                 /* invalid descriptor type */
201     8,                  /* DOUBLE */
202     4,                  /* FLOAT */
203     8,                  /* INT64 */
204     8,                  /* UINT64 */
205     4,                  /* INT32 */
206     8,                  /* FIXED64 */
207     4,                  /* FIXED32 */
208     1,                  /* BOOL */
209     UPB_MAPTYPE_STRING, /* STRING */
210     sizeof(void *),     /* GROUP */
211     sizeof(void *),     /* MESSAGE */
212     UPB_MAPTYPE_STRING, /* BYTES */
213     4,                  /* UINT32 */
214     4,                  /* ENUM */
215     4,                  /* SFIXED32 */
216     8,                  /* SFIXED64 */
217     4,                  /* SINT32 */
218     8,                  /* SINT64 */
219 };
220 
221 static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) |
222                                    (1 << UPB_DTYPE_FIXED32) |
223                                    (1 << UPB_DTYPE_SFIXED32);
224 
225 static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) |
226                                    (1 << UPB_DTYPE_FIXED64) |
227                                    (1 << UPB_DTYPE_SFIXED64);
228 
229 /* Op: an action to be performed for a wire-type/field-type combination. */
230 #define OP_SCALAR_LG2(n) (n)
231 #define OP_FIXPCK_LG2(n) (n + 4)
232 #define OP_VARPCK_LG2(n) (n + 8)
233 #define OP_STRING 4
234 #define OP_SUBMSG 5
235 
236 static const int8_t varint_ops[19] = {
237     -1,               /* field not found */
238     -1,               /* DOUBLE */
239     -1,               /* FLOAT */
240     OP_SCALAR_LG2(3), /* INT64 */
241     OP_SCALAR_LG2(3), /* UINT64 */
242     OP_SCALAR_LG2(2), /* INT32 */
243     -1,               /* FIXED64 */
244     -1,               /* FIXED32 */
245     OP_SCALAR_LG2(0), /* BOOL */
246     -1,               /* STRING */
247     -1,               /* GROUP */
248     -1,               /* MESSAGE */
249     -1,               /* BYTES */
250     OP_SCALAR_LG2(2), /* UINT32 */
251     OP_SCALAR_LG2(2), /* ENUM */
252     -1,               /* SFIXED32 */
253     -1,               /* SFIXED64 */
254     OP_SCALAR_LG2(2), /* SINT32 */
255     OP_SCALAR_LG2(3), /* SINT64 */
256 };
257 
258 static const int8_t delim_ops[37] = {
259     /* For non-repeated field type. */
260     -1,        /* field not found */
261     -1,        /* DOUBLE */
262     -1,        /* FLOAT */
263     -1,        /* INT64 */
264     -1,        /* UINT64 */
265     -1,        /* INT32 */
266     -1,        /* FIXED64 */
267     -1,        /* FIXED32 */
268     -1,        /* BOOL */
269     OP_STRING, /* STRING */
270     -1,        /* GROUP */
271     OP_SUBMSG, /* MESSAGE */
272     OP_STRING, /* BYTES */
273     -1,        /* UINT32 */
274     -1,        /* ENUM */
275     -1,        /* SFIXED32 */
276     -1,        /* SFIXED64 */
277     -1,        /* SINT32 */
278     -1,        /* SINT64 */
279     /* For repeated field type. */
280     OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
281     OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
282     OP_VARPCK_LG2(3), /* REPEATED INT64 */
283     OP_VARPCK_LG2(3), /* REPEATED UINT64 */
284     OP_VARPCK_LG2(2), /* REPEATED INT32 */
285     OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
286     OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
287     OP_VARPCK_LG2(0), /* REPEATED BOOL */
288     OP_STRING,        /* REPEATED STRING */
289     OP_SUBMSG,        /* REPEATED GROUP */
290     OP_SUBMSG,        /* REPEATED MESSAGE */
291     OP_STRING,        /* REPEATED BYTES */
292     OP_VARPCK_LG2(2), /* REPEATED UINT32 */
293     OP_VARPCK_LG2(2), /* REPEATED ENUM */
294     OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
295     OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
296     OP_VARPCK_LG2(2), /* REPEATED SINT32 */
297     OP_VARPCK_LG2(3), /* REPEATED SINT64 */
298 };
299 
300 /* Data pertaining to the parse. */
301 typedef struct {
302   const char *limit;       /* End of delimited region or end of buffer. */
303   upb_arena *arena;
304   int depth;
305   uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
306   jmp_buf err;
307 } upb_decstate;
308 
309 typedef union {
310   bool bool_val;
311   int32_t int32_val;
312   int64_t int64_val;
313   uint32_t uint32_val;
314   uint64_t uint64_val;
315   upb_strview str_val;
316 } wireval;
317 
318 static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
319                               const upb_msglayout *layout);
320 
decode_err(upb_decstate * d)321 UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }
322 
decode_reserve(upb_decstate * d,upb_array * arr,int elem)323 static bool decode_reserve(upb_decstate *d, upb_array *arr, int elem) {
324   bool need_realloc = arr->size - arr->len < elem;
325   if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) {
326     decode_err(d);
327   }
328   return need_realloc;
329 }
330 
331 UPB_NOINLINE
decode_longvarint64(upb_decstate * d,const char * ptr,const char * limit,uint64_t * val)332 static const char *decode_longvarint64(upb_decstate *d, const char *ptr,
333                                        const char *limit, uint64_t *val) {
334   uint8_t byte;
335   int bitpos = 0;
336   uint64_t out = 0;
337 
338   do {
339     if (bitpos >= 70 || ptr == limit) decode_err(d);
340     byte = *ptr;
341     out |= (uint64_t)(byte & 0x7F) << bitpos;
342     ptr++;
343     bitpos += 7;
344   } while (byte & 0x80);
345 
346   *val = out;
347   return ptr;
348 }
349 
350 UPB_FORCEINLINE
decode_varint64(upb_decstate * d,const char * ptr,const char * limit,uint64_t * val)351 static const char *decode_varint64(upb_decstate *d, const char *ptr,
352                                    const char *limit, uint64_t *val) {
353   if (UPB_LIKELY(ptr < limit && (*ptr & 0x80) == 0)) {
354     *val = (uint8_t)*ptr;
355     return ptr + 1;
356   } else {
357     return decode_longvarint64(d, ptr, limit, val);
358   }
359 }
360 
decode_varint32(upb_decstate * d,const char * ptr,const char * limit,uint32_t * val)361 static const char *decode_varint32(upb_decstate *d, const char *ptr,
362                                    const char *limit, uint32_t *val) {
363   uint64_t u64;
364   ptr = decode_varint64(d, ptr, limit, &u64);
365   if (u64 > UINT32_MAX) decode_err(d);
366   *val = (uint32_t)u64;
367   return ptr;
368 }
369 
decode_munge(int type,wireval * val)370 static void decode_munge(int type, wireval *val) {
371   switch (type) {
372     case UPB_DESCRIPTOR_TYPE_BOOL:
373       val->bool_val = val->uint64_val != 0;
374       break;
375     case UPB_DESCRIPTOR_TYPE_SINT32: {
376       uint32_t n = val->uint32_val;
377       val->int32_val = (n >> 1) ^ -(int32_t)(n & 1);
378       break;
379     }
380     case UPB_DESCRIPTOR_TYPE_SINT64: {
381       uint64_t n = val->uint64_val;
382       val->int64_val = (n >> 1) ^ -(int64_t)(n & 1);
383       break;
384     }
385   }
386 }
387 
upb_find_field(const upb_msglayout * l,uint32_t field_number)388 static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
389                                                  uint32_t field_number) {
390   static upb_msglayout_field none = {0};
391 
392   /* Lots of optimization opportunities here. */
393   int i;
394   if (l == NULL) return &none;
395   for (i = 0; i < l->field_count; i++) {
396     if (l->fields[i].number == field_number) {
397       return &l->fields[i];
398     }
399   }
400 
401   return &none; /* Unknown field. */
402 }
403 
decode_newsubmsg(upb_decstate * d,const upb_msglayout * layout,const upb_msglayout_field * field)404 static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout,
405                                  const upb_msglayout_field *field) {
406   const upb_msglayout *subl = layout->submsgs[field->submsg_index];
407   return _upb_msg_new(subl, d->arena);
408 }
409 
decode_tosubmsg(upb_decstate * d,upb_msg * submsg,const upb_msglayout * layout,const upb_msglayout_field * field,upb_strview val)410 static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg,
411                             const upb_msglayout *layout,
412                             const upb_msglayout_field *field, upb_strview val) {
413   const upb_msglayout *subl = layout->submsgs[field->submsg_index];
414   const char *saved_limit = d->limit;
415   if (--d->depth < 0) decode_err(d);
416   d->limit = val.data + val.size;
417   decode_msg(d, val.data, submsg, subl);
418   d->limit = saved_limit;
419   if (d->end_group != 0) decode_err(d);
420   d->depth++;
421 }
422 
decode_group(upb_decstate * d,const char * ptr,upb_msg * submsg,const upb_msglayout * subl,uint32_t number)423 static const char *decode_group(upb_decstate *d, const char *ptr,
424                                 upb_msg *submsg, const upb_msglayout *subl,
425                                 uint32_t number) {
426   if (--d->depth < 0) decode_err(d);
427   ptr = decode_msg(d, ptr, submsg, subl);
428   if (d->end_group != number) decode_err(d);
429   d->end_group = 0;
430   d->depth++;
431   return ptr;
432 }
433 
decode_togroup(upb_decstate * d,const char * ptr,upb_msg * submsg,const upb_msglayout * layout,const upb_msglayout_field * field)434 static const char *decode_togroup(upb_decstate *d, const char *ptr,
435                                   upb_msg *submsg, const upb_msglayout *layout,
436                                   const upb_msglayout_field *field) {
437   const upb_msglayout *subl = layout->submsgs[field->submsg_index];
438   return decode_group(d, ptr, submsg, subl, field->number);
439 }
440 
decode_toarray(upb_decstate * d,const char * ptr,upb_msg * msg,const upb_msglayout * layout,const upb_msglayout_field * field,wireval val,int op)441 static const char *decode_toarray(upb_decstate *d, const char *ptr,
442                                   upb_msg *msg, const upb_msglayout *layout,
443                                   const upb_msglayout_field *field, wireval val,
444                                   int op) {
445   upb_array **arrp = UPB_PTR_AT(msg, field->offset, void);
446   upb_array *arr = *arrp;
447   void *mem;
448 
449   if (!arr) {
450     upb_fieldtype_t type = desctype_to_fieldtype[field->descriptortype];
451     arr = _upb_array_new(d->arena, type);
452     if (!arr) decode_err(d);
453     *arrp = arr;
454   }
455 
456   decode_reserve(d, arr, 1);
457 
458   switch (op) {
459     case OP_SCALAR_LG2(0):
460     case OP_SCALAR_LG2(2):
461     case OP_SCALAR_LG2(3):
462       /* Append scalar value. */
463       mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
464       arr->len++;
465       memcpy(mem, &val, 1 << op);
466       return ptr;
467     case OP_STRING:
468       /* Append string. */
469       mem =
470           UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void);
471       arr->len++;
472       memcpy(mem, &val, sizeof(upb_strview));
473       return ptr;
474     case OP_SUBMSG: {
475       /* Append submessage / group. */
476       upb_msg *submsg = decode_newsubmsg(d, layout, field);
477       *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) =
478           submsg;
479       arr->len++;
480       if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) {
481         ptr = decode_togroup(d, ptr, submsg, layout, field);
482       } else {
483         decode_tosubmsg(d, submsg, layout, field, val.str_val);
484       }
485       return ptr;
486     }
487     case OP_FIXPCK_LG2(2):
488     case OP_FIXPCK_LG2(3): {
489       /* Fixed packed. */
490       int lg2 = op - OP_FIXPCK_LG2(0);
491       int mask = (1 << lg2) - 1;
492       int count = val.str_val.size >> lg2;
493       if ((val.str_val.size & mask) != 0) {
494         decode_err(d); /* Length isn't a round multiple of elem size. */
495       }
496       decode_reserve(d, arr, count);
497       mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
498       arr->len += count;
499       memcpy(mem, val.str_val.data, count << op);
500       return ptr;
501     }
502     case OP_VARPCK_LG2(0):
503     case OP_VARPCK_LG2(2):
504     case OP_VARPCK_LG2(3): {
505       /* Varint packed. */
506       int lg2 = op - OP_VARPCK_LG2(0);
507       int scale = 1 << lg2;
508       const char *ptr = val.str_val.data;
509       const char *end = ptr + val.str_val.size;
510       char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
511       while (ptr < end) {
512         wireval elem;
513         ptr = decode_varint64(d, ptr, end, &elem.uint64_val);
514         decode_munge(field->descriptortype, &elem);
515         if (decode_reserve(d, arr, 1)) {
516           out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
517         }
518         arr->len++;
519         memcpy(out, &elem, scale);
520         out += scale;
521       }
522       if (ptr != end) decode_err(d);
523       return ptr;
524     }
525     default:
526       UPB_UNREACHABLE();
527   }
528 }
529 
decode_tomap(upb_decstate * d,upb_msg * msg,const upb_msglayout * layout,const upb_msglayout_field * field,wireval val)530 static void decode_tomap(upb_decstate *d, upb_msg *msg,
531                          const upb_msglayout *layout,
532                          const upb_msglayout_field *field, wireval val) {
533   upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
534   upb_map *map = *map_p;
535   upb_map_entry ent;
536   const upb_msglayout *entry = layout->submsgs[field->submsg_index];
537 
538   if (!map) {
539     /* Lazily create map. */
540     const upb_msglayout *entry = layout->submsgs[field->submsg_index];
541     const upb_msglayout_field *key_field = &entry->fields[0];
542     const upb_msglayout_field *val_field = &entry->fields[1];
543     char key_size = desctype_to_mapsize[key_field->descriptortype];
544     char val_size = desctype_to_mapsize[val_field->descriptortype];
545     UPB_ASSERT(key_field->offset == 0);
546     UPB_ASSERT(val_field->offset == sizeof(upb_strview));
547     map = _upb_map_new(d->arena, key_size, val_size);
548     *map_p = map;
549   }
550 
551   /* Parse map entry. */
552   memset(&ent, 0, sizeof(ent));
553 
554   if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
555       entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
556     /* Create proactively to handle the case where it doesn't appear. */
557     ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena);
558   }
559 
560   decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
561 
562   /* Insert into map. */
563   _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, d->arena);
564 }
565 
decode_tomsg(upb_decstate * d,const char * ptr,upb_msg * msg,const upb_msglayout * layout,const upb_msglayout_field * field,wireval val,int op)566 static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
567                                 const upb_msglayout *layout,
568                                 const upb_msglayout_field *field, wireval val,
569                                 int op) {
570   void *mem = UPB_PTR_AT(msg, field->offset, void);
571   int type = field->descriptortype;
572 
573   /* Set presence if necessary. */
574   if (field->presence < 0) {
575     /* Oneof case */
576     *UPB_PTR_AT(msg, -field->presence, int32_t) = field->number;
577   } else if (field->presence > 0) {
578     /* Hasbit */
579     uint32_t hasbit = field->presence;
580     *UPB_PTR_AT(msg, hasbit / 8, uint8_t) |= (1 << (hasbit % 8));
581   }
582 
583   /* Store into message. */
584   switch (op) {
585     case OP_SUBMSG: {
586       upb_msg **submsgp = mem;
587       upb_msg *submsg = *submsgp;
588       if (!submsg) {
589         submsg = decode_newsubmsg(d, layout, field);
590         *submsgp = submsg;
591       }
592       if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) {
593         ptr = decode_togroup(d, ptr, submsg, layout, field);
594       } else {
595         decode_tosubmsg(d, submsg, layout, field, val.str_val);
596       }
597       break;
598     }
599     case OP_STRING:
600       memcpy(mem, &val, sizeof(upb_strview));
601       break;
602     case OP_SCALAR_LG2(3):
603       memcpy(mem, &val, 8);
604       break;
605     case OP_SCALAR_LG2(2):
606       memcpy(mem, &val, 4);
607       break;
608     case OP_SCALAR_LG2(0):
609       memcpy(mem, &val, 1);
610       break;
611     default:
612       UPB_UNREACHABLE();
613   }
614 
615   return ptr;
616 }
617 
decode_msg(upb_decstate * d,const char * ptr,upb_msg * msg,const upb_msglayout * layout)618 static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
619                               const upb_msglayout *layout) {
620   while (ptr < d->limit) {
621     uint32_t tag;
622     const upb_msglayout_field *field;
623     int field_number;
624     int wire_type;
625     const char *field_start = ptr;
626     wireval val;
627     int op;
628 
629     ptr = decode_varint32(d, ptr, d->limit, &tag);
630     field_number = tag >> 3;
631     wire_type = tag & 7;
632 
633     field = upb_find_field(layout, field_number);
634 
635     switch (wire_type) {
636       case UPB_WIRE_TYPE_VARINT:
637         ptr = decode_varint64(d, ptr, d->limit, &val.uint64_val);
638         op = varint_ops[field->descriptortype];
639         decode_munge(field->descriptortype, &val);
640         break;
641       case UPB_WIRE_TYPE_32BIT:
642         if (d->limit - ptr < 4) decode_err(d);
643         memcpy(&val, ptr, 4);
644         ptr += 4;
645         op = OP_SCALAR_LG2(2);
646         if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
647         break;
648       case UPB_WIRE_TYPE_64BIT:
649         if (d->limit - ptr < 8) decode_err(d);
650         memcpy(&val, ptr, 8);
651         ptr += 8;
652         op = OP_SCALAR_LG2(3);
653         if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
654         break;
655       case UPB_WIRE_TYPE_DELIMITED: {
656         uint32_t size;
657         int ndx = field->descriptortype;
658         if (_upb_isrepeated(field)) ndx += 18;
659         ptr = decode_varint32(d, ptr, d->limit, &size);
660         if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) {
661           decode_err(d); /* Length overflow. */
662         }
663         val.str_val.data = ptr;
664         val.str_val.size = size;
665         ptr += size;
666         op = delim_ops[ndx];
667         break;
668       }
669       case UPB_WIRE_TYPE_START_GROUP:
670         val.int32_val = field_number;
671         op = OP_SUBMSG;
672         if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown;
673         break;
674       case UPB_WIRE_TYPE_END_GROUP:
675         d->end_group = field_number;
676         return ptr;
677       default:
678         decode_err(d);
679     }
680 
681     if (op >= 0) {
682       /* Parse, using op for dispatch. */
683       switch (field->label) {
684         case UPB_LABEL_REPEATED:
685         case _UPB_LABEL_PACKED:
686           ptr = decode_toarray(d, ptr, msg, layout, field, val, op);
687           break;
688         case _UPB_LABEL_MAP:
689           decode_tomap(d, msg, layout, field, val);
690           break;
691         default:
692           ptr = decode_tomsg(d, ptr, msg, layout, field, val, op);
693           break;
694       }
695     } else {
696     unknown:
697       /* Skip unknown field. */
698       if (field_number == 0) decode_err(d);
699       if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
700         ptr = decode_group(d, ptr, NULL, NULL, field_number);
701       }
702       if (msg) {
703         if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
704                                  d->arena)) {
705           decode_err(d);
706         }
707       }
708     }
709   }
710 
711   if (ptr != d->limit) decode_err(d);
712   return ptr;
713 }
714 
upb_decode(const char * buf,size_t size,void * msg,const upb_msglayout * l,upb_arena * arena)715 bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
716                 upb_arena *arena) {
717   upb_decstate state;
718   state.limit = buf + size;
719   state.arena = arena;
720   state.depth = 64;
721   state.end_group = 0;
722 
723   if (setjmp(state.err)) return false;
724 
725   if (size == 0) return true;
726   decode_msg(&state, buf, msg, l);
727 
728   return state.end_group == 0;
729 }
730 
731 #undef OP_SCALAR_LG2
732 #undef OP_FIXPCK_LG2
733 #undef OP_VARPCK_LG2
734 #undef OP_STRING
735 #undef OP_SUBMSG
736 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
737 
738 
739 #include <string.h>
740 
741 
742 
743 #define UPB_PB_VARINT_MAX_LEN 10
744 #define CHK(x) do { if (!(x)) { return false; } } while(0)
745 
upb_encode_varint(uint64_t val,char * buf)746 static size_t upb_encode_varint(uint64_t val, char *buf) {
747   size_t i;
748   if (val < 128) { buf[0] = val; return 1; }
749   i = 0;
750   while (val) {
751     uint8_t byte = val & 0x7fU;
752     val >>= 7;
753     if (val) byte |= 0x80U;
754     buf[i++] = byte;
755   }
756   return i;
757 }
758 
upb_zzencode_32(int32_t n)759 static uint32_t upb_zzencode_32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
upb_zzencode_64(int64_t n)760 static uint64_t upb_zzencode_64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
761 
762 typedef struct {
763   upb_alloc *alloc;
764   char *buf, *ptr, *limit;
765 } upb_encstate;
766 
upb_roundup_pow2(size_t bytes)767 static size_t upb_roundup_pow2(size_t bytes) {
768   size_t ret = 128;
769   while (ret < bytes) {
770     ret *= 2;
771   }
772   return ret;
773 }
774 
upb_encode_growbuffer(upb_encstate * e,size_t bytes)775 static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
776   size_t old_size = e->limit - e->buf;
777   size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
778   char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
779   CHK(new_buf);
780 
781   /* We want previous data at the end, realloc() put it at the beginning. */
782   if (old_size > 0) {
783     memmove(new_buf + new_size - old_size, e->buf, old_size);
784   }
785 
786   e->ptr = new_buf + new_size - (e->limit - e->ptr);
787   e->limit = new_buf + new_size;
788   e->buf = new_buf;
789   return true;
790 }
791 
792 /* Call to ensure that at least "bytes" bytes are available for writing at
793  * e->ptr.  Returns false if the bytes could not be allocated. */
upb_encode_reserve(upb_encstate * e,size_t bytes)794 static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
795   CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
796       upb_encode_growbuffer(e, bytes));
797 
798   e->ptr -= bytes;
799   return true;
800 }
801 
802 /* Writes the given bytes to the buffer, handling reserve/advance. */
upb_put_bytes(upb_encstate * e,const void * data,size_t len)803 static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
804   if (len == 0) return true;
805   CHK(upb_encode_reserve(e, len));
806   memcpy(e->ptr, data, len);
807   return true;
808 }
809 
upb_put_fixed64(upb_encstate * e,uint64_t val)810 static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
811   /* TODO(haberman): byte-swap for big endian. */
812   return upb_put_bytes(e, &val, sizeof(uint64_t));
813 }
814 
upb_put_fixed32(upb_encstate * e,uint32_t val)815 static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
816   /* TODO(haberman): byte-swap for big endian. */
817   return upb_put_bytes(e, &val, sizeof(uint32_t));
818 }
819 
upb_put_varint(upb_encstate * e,uint64_t val)820 static bool upb_put_varint(upb_encstate *e, uint64_t val) {
821   size_t len;
822   char *start;
823   CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
824   len = upb_encode_varint(val, e->ptr);
825   start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
826   memmove(start, e->ptr, len);
827   e->ptr = start;
828   return true;
829 }
830 
upb_put_double(upb_encstate * e,double d)831 static bool upb_put_double(upb_encstate *e, double d) {
832   uint64_t u64;
833   UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
834   memcpy(&u64, &d, sizeof(uint64_t));
835   return upb_put_fixed64(e, u64);
836 }
837 
upb_put_float(upb_encstate * e,float d)838 static bool upb_put_float(upb_encstate *e, float d) {
839   uint32_t u32;
840   UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
841   memcpy(&u32, &d, sizeof(uint32_t));
842   return upb_put_fixed32(e, u32);
843 }
844 
upb_readcase(const char * msg,const upb_msglayout_field * f)845 static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) {
846   uint32_t ret;
847   memcpy(&ret, msg - f->presence, sizeof(ret));
848   return ret;
849 }
850 
upb_readhasbit(const char * msg,const upb_msglayout_field * f)851 static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) {
852   uint32_t hasbit = f->presence;
853   UPB_ASSERT(f->presence > 0);
854   return (*UPB_PTR_AT(msg, hasbit / 8, uint8_t)) & (1 << (hasbit % 8));
855 }
856 
upb_put_tag(upb_encstate * e,int field_number,int wire_type)857 static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
858   return upb_put_varint(e, (field_number << 3) | wire_type);
859 }
860 
upb_put_fixedarray(upb_encstate * e,const upb_array * arr,size_t elem_size,uint32_t tag)861 static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
862                                size_t elem_size, uint32_t tag) {
863   size_t bytes = arr->len * elem_size;
864   const char* data = _upb_array_constptr(arr);
865   const char* ptr = data + bytes - elem_size;
866   if (tag) {
867     while (true) {
868       CHK(upb_put_bytes(e, ptr, elem_size) && upb_put_varint(e, tag));
869       if (ptr == data) break;
870       ptr -= elem_size;
871     }
872     return true;
873   } else {
874     return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes);
875   }
876 }
877 
878 bool upb_encode_message(upb_encstate *e, const char *msg,
879                         const upb_msglayout *m, size_t *size);
880 
upb_encode_scalarfield(upb_encstate * e,const void * _field_mem,const upb_msglayout * m,const upb_msglayout_field * f,bool skip_zero_value)881 static bool upb_encode_scalarfield(upb_encstate *e, const void *_field_mem,
882                                    const upb_msglayout *m,
883                                    const upb_msglayout_field *f,
884                                    bool skip_zero_value) {
885   const char *field_mem = _field_mem;
886 #define CASE(ctype, type, wire_type, encodeval) do { \
887   ctype val = *(ctype*)field_mem; \
888   if (skip_zero_value && val == 0) { \
889     return true; \
890   } \
891   return upb_put_ ## type(e, encodeval) && \
892       upb_put_tag(e, f->number, wire_type); \
893 } while(0)
894 
895   switch (f->descriptortype) {
896     case UPB_DESCRIPTOR_TYPE_DOUBLE:
897       CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
898     case UPB_DESCRIPTOR_TYPE_FLOAT:
899       CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
900     case UPB_DESCRIPTOR_TYPE_INT64:
901     case UPB_DESCRIPTOR_TYPE_UINT64:
902       CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
903     case UPB_DESCRIPTOR_TYPE_UINT32:
904       CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
905     case UPB_DESCRIPTOR_TYPE_INT32:
906     case UPB_DESCRIPTOR_TYPE_ENUM:
907       CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
908     case UPB_DESCRIPTOR_TYPE_SFIXED64:
909     case UPB_DESCRIPTOR_TYPE_FIXED64:
910       CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
911     case UPB_DESCRIPTOR_TYPE_FIXED32:
912     case UPB_DESCRIPTOR_TYPE_SFIXED32:
913       CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
914     case UPB_DESCRIPTOR_TYPE_BOOL:
915       CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
916     case UPB_DESCRIPTOR_TYPE_SINT32:
917       CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
918     case UPB_DESCRIPTOR_TYPE_SINT64:
919       CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
920     case UPB_DESCRIPTOR_TYPE_STRING:
921     case UPB_DESCRIPTOR_TYPE_BYTES: {
922       upb_strview view = *(upb_strview*)field_mem;
923       if (skip_zero_value && view.size == 0) {
924         return true;
925       }
926       return upb_put_bytes(e, view.data, view.size) &&
927           upb_put_varint(e, view.size) &&
928           upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
929     }
930     case UPB_DESCRIPTOR_TYPE_GROUP: {
931       size_t size;
932       void *submsg = *(void **)field_mem;
933       const upb_msglayout *subm = m->submsgs[f->submsg_index];
934       if (submsg == NULL) {
935         return true;
936       }
937       return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
938           upb_encode_message(e, submsg, subm, &size) &&
939           upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
940     }
941     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
942       size_t size;
943       void *submsg = *(void **)field_mem;
944       const upb_msglayout *subm = m->submsgs[f->submsg_index];
945       if (submsg == NULL) {
946         return true;
947       }
948       return upb_encode_message(e, submsg, subm, &size) &&
949           upb_put_varint(e, size) &&
950           upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
951     }
952   }
953 #undef CASE
954   UPB_UNREACHABLE();
955 }
956 
upb_encode_array(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)957 static bool upb_encode_array(upb_encstate *e, const char *field_mem,
958                              const upb_msglayout *m,
959                              const upb_msglayout_field *f) {
960   const upb_array *arr = *(const upb_array**)field_mem;
961   bool packed = f->label == _UPB_LABEL_PACKED;
962 
963   if (arr == NULL || arr->len == 0) {
964     return true;
965   }
966 
967 #define VARINT_CASE(ctype, encode)                                       \
968   {                                                                      \
969     const ctype *start = _upb_array_constptr(arr);                       \
970     const ctype *ptr = start + arr->len;                                 \
971     size_t pre_len = e->limit - e->ptr;                                  \
972     uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
973     do {                                                                 \
974       ptr--;                                                             \
975       CHK(upb_put_varint(e, encode));                                    \
976       if (tag) CHK(upb_put_varint(e, tag));                              \
977     } while (ptr != start);                                              \
978     if (!tag) CHK(upb_put_varint(e, e->limit - e->ptr - pre_len));       \
979   }                                                                      \
980   break;                                                                 \
981   do {                                                                   \
982     ;                                                                    \
983   } while (0)
984 
985 #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
986 
987   switch (f->descriptortype) {
988     case UPB_DESCRIPTOR_TYPE_DOUBLE:
989       CHK(upb_put_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT)));
990       break;
991     case UPB_DESCRIPTOR_TYPE_FLOAT:
992       CHK(upb_put_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT)));
993       break;
994     case UPB_DESCRIPTOR_TYPE_SFIXED64:
995     case UPB_DESCRIPTOR_TYPE_FIXED64:
996       CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT)));
997       break;
998     case UPB_DESCRIPTOR_TYPE_FIXED32:
999     case UPB_DESCRIPTOR_TYPE_SFIXED32:
1000       CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT)));
1001       break;
1002     case UPB_DESCRIPTOR_TYPE_INT64:
1003     case UPB_DESCRIPTOR_TYPE_UINT64:
1004       VARINT_CASE(uint64_t, *ptr);
1005     case UPB_DESCRIPTOR_TYPE_UINT32:
1006       VARINT_CASE(uint32_t, *ptr);
1007     case UPB_DESCRIPTOR_TYPE_INT32:
1008     case UPB_DESCRIPTOR_TYPE_ENUM:
1009       VARINT_CASE(int32_t, (int64_t)*ptr);
1010     case UPB_DESCRIPTOR_TYPE_BOOL:
1011       VARINT_CASE(bool, *ptr);
1012     case UPB_DESCRIPTOR_TYPE_SINT32:
1013       VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
1014     case UPB_DESCRIPTOR_TYPE_SINT64:
1015       VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
1016     case UPB_DESCRIPTOR_TYPE_STRING:
1017     case UPB_DESCRIPTOR_TYPE_BYTES: {
1018       const upb_strview *start = _upb_array_constptr(arr);
1019       const upb_strview *ptr = start + arr->len;
1020       do {
1021         ptr--;
1022         CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
1023             upb_put_varint(e, ptr->size) &&
1024             upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1025       } while (ptr != start);
1026       return true;
1027     }
1028     case UPB_DESCRIPTOR_TYPE_GROUP: {
1029       const void *const*start = _upb_array_constptr(arr);
1030       const void *const*ptr = start + arr->len;
1031       const upb_msglayout *subm = m->submsgs[f->submsg_index];
1032       do {
1033         size_t size;
1034         ptr--;
1035         CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
1036             upb_encode_message(e, *ptr, subm, &size) &&
1037             upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
1038       } while (ptr != start);
1039       return true;
1040     }
1041     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
1042       const void *const*start = _upb_array_constptr(arr);
1043       const void *const*ptr = start + arr->len;
1044       const upb_msglayout *subm = m->submsgs[f->submsg_index];
1045       do {
1046         size_t size;
1047         ptr--;
1048         CHK(upb_encode_message(e, *ptr, subm, &size) &&
1049             upb_put_varint(e, size) &&
1050             upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1051       } while (ptr != start);
1052       return true;
1053     }
1054   }
1055 #undef VARINT_CASE
1056 
1057   if (packed) {
1058     CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1059   }
1060   return true;
1061 }
1062 
upb_encode_map(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)1063 static bool upb_encode_map(upb_encstate *e, const char *field_mem,
1064                            const upb_msglayout *m,
1065                            const upb_msglayout_field *f) {
1066   const upb_map *map = *(const upb_map**)field_mem;
1067   const upb_msglayout *entry = m->submsgs[f->submsg_index];
1068   const upb_msglayout_field *key_field = &entry->fields[0];
1069   const upb_msglayout_field *val_field = &entry->fields[1];
1070   upb_strtable_iter i;
1071   if (map == NULL) {
1072     return true;
1073   }
1074 
1075   upb_strtable_begin(&i, &map->table);
1076   for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
1077     size_t pre_len = e->limit - e->ptr;
1078     size_t size;
1079     upb_strview key = upb_strtable_iter_key(&i);
1080     const upb_value val = upb_strtable_iter_value(&i);
1081     const void *keyp =
1082         map->key_size == UPB_MAPTYPE_STRING ? (void *)&key : key.data;
1083     const void *valp =
1084         map->val_size == UPB_MAPTYPE_STRING ? upb_value_getptr(val) : &val;
1085 
1086     CHK(upb_encode_scalarfield(e, valp, entry, val_field, false));
1087     CHK(upb_encode_scalarfield(e, keyp, entry, key_field, false));
1088     size = (e->limit - e->ptr) - pre_len;
1089     CHK(upb_put_varint(e, size));
1090     CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1091   }
1092 
1093   return true;
1094 }
1095 
1096 
upb_encode_message(upb_encstate * e,const char * msg,const upb_msglayout * m,size_t * size)1097 bool upb_encode_message(upb_encstate *e, const char *msg,
1098                         const upb_msglayout *m, size_t *size) {
1099   int i;
1100   size_t pre_len = e->limit - e->ptr;
1101   const char *unknown;
1102   size_t unknown_size;
1103 
1104   unknown = upb_msg_getunknown(msg, &unknown_size);
1105 
1106   if (unknown) {
1107     upb_put_bytes(e, unknown, unknown_size);
1108   }
1109 
1110   for (i = m->field_count - 1; i >= 0; i--) {
1111     const upb_msglayout_field *f = &m->fields[i];
1112 
1113     if (_upb_isrepeated(f)) {
1114       CHK(upb_encode_array(e, msg + f->offset, m, f));
1115     } else if (f->label == _UPB_LABEL_MAP) {
1116       CHK(upb_encode_map(e, msg + f->offset, m, f));
1117     } else {
1118       bool skip_empty = false;
1119       if (f->presence == 0) {
1120         /* Proto3 presence. */
1121         skip_empty = true;
1122       } else if (f->presence > 0) {
1123         /* Proto2 presence: hasbit. */
1124         if (!upb_readhasbit(msg, f)) {
1125           continue;
1126         }
1127       } else {
1128         /* Field is in a oneof. */
1129         if (upb_readcase(msg, f) != f->number) {
1130           continue;
1131         }
1132       }
1133       CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
1134     }
1135   }
1136 
1137   *size = (e->limit - e->ptr) - pre_len;
1138   return true;
1139 }
1140 
upb_encode(const void * msg,const upb_msglayout * m,upb_arena * arena,size_t * size)1141 char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
1142                  size_t *size) {
1143   upb_encstate e;
1144   e.alloc = upb_arena_alloc(arena);
1145   e.buf = NULL;
1146   e.limit = NULL;
1147   e.ptr = NULL;
1148 
1149   if (!upb_encode_message(&e, msg, m, size)) {
1150     *size = 0;
1151     return NULL;
1152   }
1153 
1154   *size = e.limit - e.ptr;
1155 
1156   if (*size == 0) {
1157     static char ch;
1158     return &ch;
1159   } else {
1160     UPB_ASSERT(e.ptr);
1161     return e.ptr;
1162   }
1163 }
1164 
1165 #undef CHK
1166 
1167 
1168 
1169 
1170 /** upb_msg *******************************************************************/
1171 
1172 static const char _upb_fieldtype_to_sizelg2[12] = {
1173   0,
1174   0,  /* UPB_TYPE_BOOL */
1175   2,  /* UPB_TYPE_FLOAT */
1176   2,  /* UPB_TYPE_INT32 */
1177   2,  /* UPB_TYPE_UINT32 */
1178   2,  /* UPB_TYPE_ENUM */
1179   UPB_SIZE(2, 3),  /* UPB_TYPE_MESSAGE */
1180   3,  /* UPB_TYPE_DOUBLE */
1181   3,  /* UPB_TYPE_INT64 */
1182   3,  /* UPB_TYPE_UINT64 */
1183   UPB_SIZE(3, 4),  /* UPB_TYPE_STRING */
1184   UPB_SIZE(3, 4),  /* UPB_TYPE_BYTES */
1185 };
1186 
tag_arrptr(void * ptr,int elem_size_lg2)1187 static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) {
1188   UPB_ASSERT(elem_size_lg2 <= 4);
1189   return (uintptr_t)ptr | elem_size_lg2;
1190 }
1191 
upb_msg_internalsize(const upb_msglayout * l)1192 static int upb_msg_internalsize(const upb_msglayout *l) {
1193   return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
1194 }
1195 
upb_msg_sizeof(const upb_msglayout * l)1196 static size_t upb_msg_sizeof(const upb_msglayout *l) {
1197   return l->size + upb_msg_internalsize(l);
1198 }
1199 
upb_msg_getinternal(upb_msg * msg)1200 static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
1201   return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
1202 }
1203 
upb_msg_getinternal_const(const upb_msg * msg)1204 static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
1205   return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
1206 }
1207 
upb_msg_getinternalwithext(upb_msg * msg,const upb_msglayout * l)1208 static upb_msg_internal_withext *upb_msg_getinternalwithext(
1209     upb_msg *msg, const upb_msglayout *l) {
1210   UPB_ASSERT(l->extendable);
1211   return UPB_PTR_AT(msg, -sizeof(upb_msg_internal_withext),
1212                     upb_msg_internal_withext);
1213 }
1214 
_upb_msg_new(const upb_msglayout * l,upb_arena * a)1215 upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
1216   void *mem = upb_arena_malloc(a, upb_msg_sizeof(l));
1217   upb_msg_internal *in;
1218   upb_msg *msg;
1219 
1220   if (!mem) {
1221     return NULL;
1222   }
1223 
1224   msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg);
1225 
1226   /* Initialize normal members. */
1227   memset(msg, 0, l->size);
1228 
1229   /* Initialize internal members. */
1230   in = upb_msg_getinternal(msg);
1231   in->unknown = NULL;
1232   in->unknown_len = 0;
1233   in->unknown_size = 0;
1234 
1235   if (l->extendable) {
1236     upb_msg_getinternalwithext(msg, l)->extdict = NULL;
1237   }
1238 
1239   return msg;
1240 }
1241 
_upb_msg_addunknown(upb_msg * msg,const char * data,size_t len,upb_arena * arena)1242 bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
1243                          upb_arena *arena) {
1244   upb_msg_internal *in = upb_msg_getinternal(msg);
1245   if (len > in->unknown_size - in->unknown_len) {
1246     upb_alloc *alloc = upb_arena_alloc(arena);
1247     size_t need = in->unknown_size + len;
1248     size_t newsize = UPB_MAX(in->unknown_size * 2, need);
1249     void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
1250     if (!mem) return false;
1251     in->unknown = mem;
1252     in->unknown_size = newsize;
1253   }
1254   memcpy(in->unknown + in->unknown_len, data, len);
1255   in->unknown_len += len;
1256   return true;
1257 }
1258 
upb_msg_getunknown(const upb_msg * msg,size_t * len)1259 const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
1260   const upb_msg_internal *in = upb_msg_getinternal_const(msg);
1261   *len = in->unknown_len;
1262   return in->unknown;
1263 }
1264 
1265 /** upb_array *****************************************************************/
1266 
_upb_array_new(upb_arena * a,upb_fieldtype_t type)1267 upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type) {
1268   upb_array *arr = upb_arena_malloc(a, sizeof(upb_array));
1269 
1270   if (!arr) {
1271     return NULL;
1272   }
1273 
1274   arr->data = tag_arrptr(NULL, _upb_fieldtype_to_sizelg2[type]);
1275   arr->len = 0;
1276   arr->size = 0;
1277 
1278   return arr;
1279 }
1280 
_upb_array_realloc(upb_array * arr,size_t min_size,upb_arena * arena)1281 bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) {
1282   size_t new_size = UPB_MAX(arr->size, 4);
1283   int elem_size_lg2 = arr->data & 7;
1284   size_t old_bytes = arr->size << elem_size_lg2;
1285   size_t new_bytes;
1286   void* ptr = _upb_array_ptr(arr);
1287 
1288   /* Log2 ceiling of size. */
1289   while (new_size < min_size) new_size *= 2;
1290 
1291   new_bytes = new_size << elem_size_lg2;
1292   ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes);
1293 
1294   if (!ptr) {
1295     return false;
1296   }
1297 
1298   arr->data = tag_arrptr(ptr, elem_size_lg2);
1299   arr->size = new_size;
1300   return true;
1301 }
1302 
getorcreate_array(upb_array ** arr_ptr,upb_fieldtype_t type,upb_arena * arena)1303 static upb_array *getorcreate_array(upb_array **arr_ptr, upb_fieldtype_t type,
1304                                     upb_arena *arena) {
1305   upb_array *arr = *arr_ptr;
1306   if (!arr) {
1307     arr = _upb_array_new(arena, type);
1308     if (!arr) return NULL;
1309     *arr_ptr = arr;
1310   }
1311   return arr;
1312 }
1313 
resize_array(upb_array * arr,size_t size,upb_arena * arena)1314 static bool resize_array(upb_array *arr, size_t size, upb_arena *arena) {
1315   if (size > arr->size && !_upb_array_realloc(arr, size, arena)) {
1316     return false;
1317   }
1318 
1319   arr->len = size;
1320   return true;
1321 }
1322 
_upb_array_resize_fallback(upb_array ** arr_ptr,size_t size,upb_fieldtype_t type,upb_arena * arena)1323 void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
1324                                  upb_fieldtype_t type, upb_arena *arena) {
1325   upb_array *arr = getorcreate_array(arr_ptr, type, arena);
1326   return arr && resize_array(arr, size, arena) ? _upb_array_ptr(arr) : NULL;
1327 }
1328 
_upb_array_append_fallback(upb_array ** arr_ptr,const void * value,upb_fieldtype_t type,upb_arena * arena)1329 bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
1330                                 upb_fieldtype_t type, upb_arena *arena) {
1331   upb_array *arr = getorcreate_array(arr_ptr, type, arena);
1332   size_t elem = arr->len;
1333   int lg2 = _upb_fieldtype_to_sizelg2[type];
1334   char *data;
1335 
1336   if (!arr || !resize_array(arr, elem + 1, arena)) return false;
1337 
1338   data = _upb_array_ptr(arr);
1339   memcpy(data + (elem << lg2), value, 1 << lg2);
1340   return true;
1341 }
1342 
1343 /** upb_map *******************************************************************/
1344 
_upb_map_new(upb_arena * a,size_t key_size,size_t value_size)1345 upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
1346   upb_map *map = upb_arena_malloc(a, sizeof(upb_map));
1347 
1348   if (!map) {
1349     return NULL;
1350   }
1351 
1352   upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a));
1353   map->key_size = key_size;
1354   map->val_size = value_size;
1355 
1356   return map;
1357 }
1358 /*
1359 ** upb_table Implementation
1360 **
1361 ** Implementation is heavily inspired by Lua's ltable.c.
1362 */
1363 
1364 
1365 #include <string.h>
1366 
1367 
1368 #define UPB_MAXARRSIZE 16  /* 64k. */
1369 
1370 /* From Chromium. */
1371 #define ARRAY_SIZE(x) \
1372     ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
1373 
1374 static const double MAX_LOAD = 0.85;
1375 
1376 /* The minimum utilization of the array part of a mixed hash/array table.  This
1377  * is a speed/memory-usage tradeoff (though it's not straightforward because of
1378  * cache effects).  The lower this is, the more memory we'll use. */
1379 static const double MIN_DENSITY = 0.1;
1380 
is_pow2(uint64_t v)1381 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
1382 
log2ceil(uint64_t v)1383 int log2ceil(uint64_t v) {
1384   int ret = 0;
1385   bool pow2 = is_pow2(v);
1386   while (v >>= 1) ret++;
1387   ret = pow2 ? ret : ret + 1;  /* Ceiling. */
1388   return UPB_MIN(UPB_MAXARRSIZE, ret);
1389 }
1390 
upb_strdup(const char * s,upb_alloc * a)1391 char *upb_strdup(const char *s, upb_alloc *a) {
1392   return upb_strdup2(s, strlen(s), a);
1393 }
1394 
upb_strdup2(const char * s,size_t len,upb_alloc * a)1395 char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
1396   size_t n;
1397   char *p;
1398 
1399   /* Prevent overflow errors. */
1400   if (len == SIZE_MAX) return NULL;
1401   /* Always null-terminate, even if binary data; but don't rely on the input to
1402    * have a null-terminating byte since it may be a raw binary buffer. */
1403   n = len + 1;
1404   p = upb_malloc(a, n);
1405   if (p) {
1406     memcpy(p, s, len);
1407     p[len] = 0;
1408   }
1409   return p;
1410 }
1411 
1412 /* A type to represent the lookup key of either a strtable or an inttable. */
1413 typedef union {
1414   uintptr_t num;
1415   struct {
1416     const char *str;
1417     size_t len;
1418   } str;
1419 } lookupkey_t;
1420 
strkey2(const char * str,size_t len)1421 static lookupkey_t strkey2(const char *str, size_t len) {
1422   lookupkey_t k;
1423   k.str.str = str;
1424   k.str.len = len;
1425   return k;
1426 }
1427 
intkey(uintptr_t key)1428 static lookupkey_t intkey(uintptr_t key) {
1429   lookupkey_t k;
1430   k.num = key;
1431   return k;
1432 }
1433 
1434 typedef uint32_t hashfunc_t(upb_tabkey key);
1435 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
1436 
1437 /* Base table (shared code) ***************************************************/
1438 
1439 /* For when we need to cast away const. */
mutable_entries(upb_table * t)1440 static upb_tabent *mutable_entries(upb_table *t) {
1441   return (upb_tabent*)t->entries;
1442 }
1443 
isfull(upb_table * t)1444 static bool isfull(upb_table *t) {
1445   if (upb_table_size(t) == 0) {
1446     return true;
1447   } else {
1448     return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
1449   }
1450 }
1451 
init(upb_table * t,uint8_t size_lg2,upb_alloc * a)1452 static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
1453   size_t bytes;
1454 
1455   t->count = 0;
1456   t->size_lg2 = size_lg2;
1457   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
1458   bytes = upb_table_size(t) * sizeof(upb_tabent);
1459   if (bytes > 0) {
1460     t->entries = upb_malloc(a, bytes);
1461     if (!t->entries) return false;
1462     memset(mutable_entries(t), 0, bytes);
1463   } else {
1464     t->entries = NULL;
1465   }
1466   return true;
1467 }
1468 
uninit(upb_table * t,upb_alloc * a)1469 static void uninit(upb_table *t, upb_alloc *a) {
1470   upb_free(a, mutable_entries(t));
1471 }
1472 
emptyent(upb_table * t)1473 static upb_tabent *emptyent(upb_table *t) {
1474   upb_tabent *e = mutable_entries(t) + upb_table_size(t);
1475   while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
1476 }
1477 
getentry_mutable(upb_table * t,uint32_t hash)1478 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
1479   return (upb_tabent*)upb_getentry(t, hash);
1480 }
1481 
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)1482 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
1483                                    uint32_t hash, eqlfunc_t *eql) {
1484   const upb_tabent *e;
1485 
1486   if (t->size_lg2 == 0) return NULL;
1487   e = upb_getentry(t, hash);
1488   if (upb_tabent_isempty(e)) return NULL;
1489   while (1) {
1490     if (eql(e->key, key)) return e;
1491     if ((e = e->next) == NULL) return NULL;
1492   }
1493 }
1494 
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)1495 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
1496                                      uint32_t hash, eqlfunc_t *eql) {
1497   return (upb_tabent*)findentry(t, key, hash, eql);
1498 }
1499 
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)1500 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
1501                    uint32_t hash, eqlfunc_t *eql) {
1502   const upb_tabent *e = findentry(t, key, hash, eql);
1503   if (e) {
1504     if (v) {
1505       _upb_value_setval(v, e->val.val);
1506     }
1507     return true;
1508   } else {
1509     return false;
1510   }
1511 }
1512 
1513 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)1514 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
1515                    upb_value val, uint32_t hash,
1516                    hashfunc_t *hashfunc, eqlfunc_t *eql) {
1517   upb_tabent *mainpos_e;
1518   upb_tabent *our_e;
1519 
1520   UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
1521 
1522   t->count++;
1523   mainpos_e = getentry_mutable(t, hash);
1524   our_e = mainpos_e;
1525 
1526   if (upb_tabent_isempty(mainpos_e)) {
1527     /* Our main position is empty; use it. */
1528     our_e->next = NULL;
1529   } else {
1530     /* Collision. */
1531     upb_tabent *new_e = emptyent(t);
1532     /* Head of collider's chain. */
1533     upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
1534     if (chain == mainpos_e) {
1535       /* Existing ent is in its main posisiton (it has the same hash as us, and
1536        * is the head of our chain).  Insert to new ent and append to this chain. */
1537       new_e->next = mainpos_e->next;
1538       mainpos_e->next = new_e;
1539       our_e = new_e;
1540     } else {
1541       /* Existing ent is not in its main position (it is a node in some other
1542        * chain).  This implies that no existing ent in the table has our hash.
1543        * Evict it (updating its chain) and use its ent for head of our chain. */
1544       *new_e = *mainpos_e;  /* copies next. */
1545       while (chain->next != mainpos_e) {
1546         chain = (upb_tabent*)chain->next;
1547         UPB_ASSERT(chain);
1548       }
1549       chain->next = new_e;
1550       our_e = mainpos_e;
1551       our_e->next = NULL;
1552     }
1553   }
1554   our_e->key = tabkey;
1555   our_e->val.val = val.val;
1556   UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
1557 }
1558 
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)1559 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
1560                upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
1561   upb_tabent *chain = getentry_mutable(t, hash);
1562   if (upb_tabent_isempty(chain)) return false;
1563   if (eql(chain->key, key)) {
1564     /* Element to remove is at the head of its chain. */
1565     t->count--;
1566     if (val) _upb_value_setval(val, chain->val.val);
1567     if (removed) *removed = chain->key;
1568     if (chain->next) {
1569       upb_tabent *move = (upb_tabent*)chain->next;
1570       *chain = *move;
1571       move->key = 0;  /* Make the slot empty. */
1572     } else {
1573       chain->key = 0;  /* Make the slot empty. */
1574     }
1575     return true;
1576   } else {
1577     /* Element to remove is either in a non-head position or not in the
1578      * table. */
1579     while (chain->next && !eql(chain->next->key, key)) {
1580       chain = (upb_tabent*)chain->next;
1581     }
1582     if (chain->next) {
1583       /* Found element to remove. */
1584       upb_tabent *rm = (upb_tabent*)chain->next;
1585       t->count--;
1586       if (val) _upb_value_setval(val, chain->next->val.val);
1587       if (removed) *removed = rm->key;
1588       rm->key = 0;  /* Make the slot empty. */
1589       chain->next = rm->next;
1590       return true;
1591     } else {
1592       /* Element to remove is not in the table. */
1593       return false;
1594     }
1595   }
1596 }
1597 
next(const upb_table * t,size_t i)1598 static size_t next(const upb_table *t, size_t i) {
1599   do {
1600     if (++i >= upb_table_size(t))
1601       return SIZE_MAX;
1602   } while(upb_tabent_isempty(&t->entries[i]));
1603 
1604   return i;
1605 }
1606 
begin(const upb_table * t)1607 static size_t begin(const upb_table *t) {
1608   return next(t, -1);
1609 }
1610 
1611 
1612 /* upb_strtable ***************************************************************/
1613 
1614 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
1615 
strcopy(lookupkey_t k2,upb_alloc * a)1616 static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
1617   uint32_t len = (uint32_t) k2.str.len;
1618   char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
1619   if (str == NULL) return 0;
1620   memcpy(str, &len, sizeof(uint32_t));
1621   memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
1622   str[sizeof(uint32_t) + k2.str.len] = '\0';
1623   return (uintptr_t)str;
1624 }
1625 
strhash(upb_tabkey key)1626 static uint32_t strhash(upb_tabkey key) {
1627   uint32_t len;
1628   char *str = upb_tabstr(key, &len);
1629   return upb_murmur_hash2(str, len, 0);
1630 }
1631 
streql(upb_tabkey k1,lookupkey_t k2)1632 static bool streql(upb_tabkey k1, lookupkey_t k2) {
1633   uint32_t len;
1634   char *str = upb_tabstr(k1, &len);
1635   return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
1636 }
1637 
upb_strtable_init2(upb_strtable * t,upb_ctype_t ctype,upb_alloc * a)1638 bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
1639   return init(&t->t, 2, a);
1640 }
1641 
upb_strtable_clear(upb_strtable * t)1642 void upb_strtable_clear(upb_strtable *t) {
1643   size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
1644   t->t.count = 0;
1645   memset((char*)t->t.entries, 0, bytes);
1646 }
1647 
upb_strtable_uninit2(upb_strtable * t,upb_alloc * a)1648 void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
1649   size_t i;
1650   for (i = 0; i < upb_table_size(&t->t); i++)
1651     upb_free(a, (void*)t->t.entries[i].key);
1652   uninit(&t->t, a);
1653 }
1654 
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_alloc * a)1655 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
1656   upb_strtable new_table;
1657   upb_strtable_iter i;
1658 
1659   if (!init(&new_table.t, size_lg2, a))
1660     return false;
1661   upb_strtable_begin(&i, t);
1662   for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
1663     upb_strview key = upb_strtable_iter_key(&i);
1664     upb_strtable_insert3(
1665         &new_table, key.data, key.size,
1666         upb_strtable_iter_value(&i), a);
1667   }
1668   upb_strtable_uninit2(t, a);
1669   *t = new_table;
1670   return true;
1671 }
1672 
upb_strtable_insert3(upb_strtable * t,const char * k,size_t len,upb_value v,upb_alloc * a)1673 bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
1674                           upb_value v, upb_alloc *a) {
1675   lookupkey_t key;
1676   upb_tabkey tabkey;
1677   uint32_t hash;
1678 
1679   if (isfull(&t->t)) {
1680     /* Need to resize.  New table of double the size, add old elements to it. */
1681     if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
1682       return false;
1683     }
1684   }
1685 
1686   key = strkey2(k, len);
1687   tabkey = strcopy(key, a);
1688   if (tabkey == 0) return false;
1689 
1690   hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
1691   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
1692   return true;
1693 }
1694 
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)1695 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
1696                           upb_value *v) {
1697   uint32_t hash = upb_murmur_hash2(key, len, 0);
1698   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
1699 }
1700 
upb_strtable_remove3(upb_strtable * t,const char * key,size_t len,upb_value * val,upb_alloc * alloc)1701 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
1702                          upb_value *val, upb_alloc *alloc) {
1703   uint32_t hash = upb_murmur_hash2(key, len, 0);
1704   upb_tabkey tabkey;
1705   if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
1706     if (alloc) {
1707       /* Arena-based allocs don't need to free and won't pass this. */
1708       upb_free(alloc, (void*)tabkey);
1709     }
1710     return true;
1711   } else {
1712     return false;
1713   }
1714 }
1715 
1716 /* Iteration */
1717 
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)1718 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
1719   i->t = t;
1720   i->index = begin(&t->t);
1721 }
1722 
upb_strtable_next(upb_strtable_iter * i)1723 void upb_strtable_next(upb_strtable_iter *i) {
1724   i->index = next(&i->t->t, i->index);
1725 }
1726 
upb_strtable_done(const upb_strtable_iter * i)1727 bool upb_strtable_done(const upb_strtable_iter *i) {
1728   if (!i->t) return true;
1729   return i->index >= upb_table_size(&i->t->t) ||
1730          upb_tabent_isempty(str_tabent(i));
1731 }
1732 
upb_strtable_iter_key(const upb_strtable_iter * i)1733 upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) {
1734   upb_strview key;
1735   uint32_t len;
1736   UPB_ASSERT(!upb_strtable_done(i));
1737   key.data = upb_tabstr(str_tabent(i)->key, &len);
1738   key.size = len;
1739   return key;
1740 }
1741 
upb_strtable_iter_value(const upb_strtable_iter * i)1742 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
1743   UPB_ASSERT(!upb_strtable_done(i));
1744   return _upb_value_val(str_tabent(i)->val.val);
1745 }
1746 
upb_strtable_iter_setdone(upb_strtable_iter * i)1747 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
1748   i->t = NULL;
1749   i->index = SIZE_MAX;
1750 }
1751 
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)1752 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
1753                                const upb_strtable_iter *i2) {
1754   if (upb_strtable_done(i1) && upb_strtable_done(i2))
1755     return true;
1756   return i1->t == i2->t && i1->index == i2->index;
1757 }
1758 
1759 
1760 /* upb_inttable ***************************************************************/
1761 
1762 /* For inttables we use a hybrid structure where small keys are kept in an
1763  * array and large keys are put in the hash table. */
1764 
inthash(upb_tabkey key)1765 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
1766 
inteql(upb_tabkey k1,lookupkey_t k2)1767 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
1768   return k1 == k2.num;
1769 }
1770 
mutable_array(upb_inttable * t)1771 static upb_tabval *mutable_array(upb_inttable *t) {
1772   return (upb_tabval*)t->array;
1773 }
1774 
inttable_val(upb_inttable * t,uintptr_t key)1775 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
1776   if (key < t->array_size) {
1777     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
1778   } else {
1779     upb_tabent *e =
1780         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
1781     return e ? &e->val : NULL;
1782   }
1783 }
1784 
inttable_val_const(const upb_inttable * t,uintptr_t key)1785 static const upb_tabval *inttable_val_const(const upb_inttable *t,
1786                                             uintptr_t key) {
1787   return inttable_val((upb_inttable*)t, key);
1788 }
1789 
upb_inttable_count(const upb_inttable * t)1790 size_t upb_inttable_count(const upb_inttable *t) {
1791   return t->t.count + t->array_count;
1792 }
1793 
check(upb_inttable * t)1794 static void check(upb_inttable *t) {
1795   UPB_UNUSED(t);
1796 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
1797   {
1798     /* This check is very expensive (makes inserts/deletes O(N)). */
1799     size_t count = 0;
1800     upb_inttable_iter i;
1801     upb_inttable_begin(&i, t);
1802     for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
1803       UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
1804     }
1805     UPB_ASSERT(count == upb_inttable_count(t));
1806   }
1807 #endif
1808 }
1809 
upb_inttable_sizedinit(upb_inttable * t,size_t asize,int hsize_lg2,upb_alloc * a)1810 bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2,
1811                             upb_alloc *a) {
1812   size_t array_bytes;
1813 
1814   if (!init(&t->t, hsize_lg2, a)) return false;
1815   /* Always make the array part at least 1 long, so that we know key 0
1816    * won't be in the hash part, which simplifies things. */
1817   t->array_size = UPB_MAX(1, asize);
1818   t->array_count = 0;
1819   array_bytes = t->array_size * sizeof(upb_value);
1820   t->array = upb_malloc(a, array_bytes);
1821   if (!t->array) {
1822     uninit(&t->t, a);
1823     return false;
1824   }
1825   memset(mutable_array(t), 0xff, array_bytes);
1826   check(t);
1827   return true;
1828 }
1829 
upb_inttable_init2(upb_inttable * t,upb_ctype_t ctype,upb_alloc * a)1830 bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
1831   return upb_inttable_sizedinit(t, 0, 4, a);
1832 }
1833 
upb_inttable_uninit2(upb_inttable * t,upb_alloc * a)1834 void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
1835   uninit(&t->t, a);
1836   upb_free(a, mutable_array(t));
1837 }
1838 
upb_inttable_insert2(upb_inttable * t,uintptr_t key,upb_value val,upb_alloc * a)1839 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
1840                           upb_alloc *a) {
1841   upb_tabval tabval;
1842   tabval.val = val.val;
1843   UPB_ASSERT(upb_arrhas(tabval));  /* This will reject (uint64_t)-1.  Fix this. */
1844 
1845   if (key < t->array_size) {
1846     UPB_ASSERT(!upb_arrhas(t->array[key]));
1847     t->array_count++;
1848     mutable_array(t)[key].val = val.val;
1849   } else {
1850     if (isfull(&t->t)) {
1851       /* Need to resize the hash part, but we re-use the array part. */
1852       size_t i;
1853       upb_table new_table;
1854 
1855       if (!init(&new_table, t->t.size_lg2 + 1, a)) {
1856         return false;
1857       }
1858 
1859       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
1860         const upb_tabent *e = &t->t.entries[i];
1861         uint32_t hash;
1862         upb_value v;
1863 
1864         _upb_value_setval(&v, e->val.val);
1865         hash = upb_inthash(e->key);
1866         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
1867       }
1868 
1869       UPB_ASSERT(t->t.count == new_table.count);
1870 
1871       uninit(&t->t, a);
1872       t->t = new_table;
1873     }
1874     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
1875   }
1876   check(t);
1877   return true;
1878 }
1879 
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)1880 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
1881   const upb_tabval *table_v = inttable_val_const(t, key);
1882   if (!table_v) return false;
1883   if (v) _upb_value_setval(v, table_v->val);
1884   return true;
1885 }
1886 
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)1887 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
1888   upb_tabval *table_v = inttable_val(t, key);
1889   if (!table_v) return false;
1890   table_v->val = val.val;
1891   return true;
1892 }
1893 
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)1894 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
1895   bool success;
1896   if (key < t->array_size) {
1897     if (upb_arrhas(t->array[key])) {
1898       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
1899       t->array_count--;
1900       if (val) {
1901         _upb_value_setval(val, t->array[key].val);
1902       }
1903       mutable_array(t)[key] = empty;
1904       success = true;
1905     } else {
1906       success = false;
1907     }
1908   } else {
1909     success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
1910   }
1911   check(t);
1912   return success;
1913 }
1914 
upb_inttable_push2(upb_inttable * t,upb_value val,upb_alloc * a)1915 bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
1916   return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
1917 }
1918 
upb_inttable_pop(upb_inttable * t)1919 upb_value upb_inttable_pop(upb_inttable *t) {
1920   upb_value val;
1921   bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
1922   UPB_ASSERT(ok);
1923   return val;
1924 }
1925 
upb_inttable_insertptr2(upb_inttable * t,const void * key,upb_value val,upb_alloc * a)1926 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
1927                              upb_alloc *a) {
1928   return upb_inttable_insert2(t, (uintptr_t)key, val, a);
1929 }
1930 
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)1931 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
1932                             upb_value *v) {
1933   return upb_inttable_lookup(t, (uintptr_t)key, v);
1934 }
1935 
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)1936 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
1937   return upb_inttable_remove(t, (uintptr_t)key, val);
1938 }
1939 
upb_inttable_compact2(upb_inttable * t,upb_alloc * a)1940 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
1941   /* A power-of-two histogram of the table keys. */
1942   size_t counts[UPB_MAXARRSIZE + 1] = {0};
1943 
1944   /* The max key in each bucket. */
1945   uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
1946 
1947   upb_inttable_iter i;
1948   size_t arr_count;
1949   int size_lg2;
1950   upb_inttable new_t;
1951 
1952   upb_inttable_begin(&i, t);
1953   for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1954     uintptr_t key = upb_inttable_iter_key(&i);
1955     int bucket = log2ceil(key);
1956     max[bucket] = UPB_MAX(max[bucket], key);
1957     counts[bucket]++;
1958   }
1959 
1960   /* Find the largest power of two that satisfies the MIN_DENSITY
1961    * definition (while actually having some keys). */
1962   arr_count = upb_inttable_count(t);
1963 
1964   for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
1965     if (counts[size_lg2] == 0) {
1966       /* We can halve again without losing any entries. */
1967       continue;
1968     } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
1969       break;
1970     }
1971 
1972     arr_count -= counts[size_lg2];
1973   }
1974 
1975   UPB_ASSERT(arr_count <= upb_inttable_count(t));
1976 
1977   {
1978     /* Insert all elements into new, perfectly-sized table. */
1979     size_t arr_size = max[size_lg2] + 1;  /* +1 so arr[max] will fit. */
1980     size_t hash_count = upb_inttable_count(t) - arr_count;
1981     size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
1982     int hashsize_lg2 = log2ceil(hash_size);
1983 
1984     upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
1985     upb_inttable_begin(&i, t);
1986     for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1987       uintptr_t k = upb_inttable_iter_key(&i);
1988       upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
1989     }
1990     UPB_ASSERT(new_t.array_size == arr_size);
1991     UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
1992   }
1993   upb_inttable_uninit2(t, a);
1994   *t = new_t;
1995 }
1996 
1997 /* Iteration. */
1998 
int_tabent(const upb_inttable_iter * i)1999 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
2000   UPB_ASSERT(!i->array_part);
2001   return &i->t->t.entries[i->index];
2002 }
2003 
int_arrent(const upb_inttable_iter * i)2004 static upb_tabval int_arrent(const upb_inttable_iter *i) {
2005   UPB_ASSERT(i->array_part);
2006   return i->t->array[i->index];
2007 }
2008 
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)2009 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
2010   i->t = t;
2011   i->index = -1;
2012   i->array_part = true;
2013   upb_inttable_next(i);
2014 }
2015 
upb_inttable_next(upb_inttable_iter * iter)2016 void upb_inttable_next(upb_inttable_iter *iter) {
2017   const upb_inttable *t = iter->t;
2018   if (iter->array_part) {
2019     while (++iter->index < t->array_size) {
2020       if (upb_arrhas(int_arrent(iter))) {
2021         return;
2022       }
2023     }
2024     iter->array_part = false;
2025     iter->index = begin(&t->t);
2026   } else {
2027     iter->index = next(&t->t, iter->index);
2028   }
2029 }
2030 
upb_inttable_done(const upb_inttable_iter * i)2031 bool upb_inttable_done(const upb_inttable_iter *i) {
2032   if (!i->t) return true;
2033   if (i->array_part) {
2034     return i->index >= i->t->array_size ||
2035            !upb_arrhas(int_arrent(i));
2036   } else {
2037     return i->index >= upb_table_size(&i->t->t) ||
2038            upb_tabent_isempty(int_tabent(i));
2039   }
2040 }
2041 
upb_inttable_iter_key(const upb_inttable_iter * i)2042 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
2043   UPB_ASSERT(!upb_inttable_done(i));
2044   return i->array_part ? i->index : int_tabent(i)->key;
2045 }
2046 
upb_inttable_iter_value(const upb_inttable_iter * i)2047 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
2048   UPB_ASSERT(!upb_inttable_done(i));
2049   return _upb_value_val(
2050       i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val);
2051 }
2052 
upb_inttable_iter_setdone(upb_inttable_iter * i)2053 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
2054   i->t = NULL;
2055   i->index = SIZE_MAX;
2056   i->array_part = false;
2057 }
2058 
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)2059 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
2060                                           const upb_inttable_iter *i2) {
2061   if (upb_inttable_done(i1) && upb_inttable_done(i2))
2062     return true;
2063   return i1->t == i2->t && i1->index == i2->index &&
2064          i1->array_part == i2->array_part;
2065 }
2066 
2067 #if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
2068 /* -----------------------------------------------------------------------------
2069  * MurmurHash2, by Austin Appleby (released as public domain).
2070  * Reformatted and C99-ified by Joshua Haberman.
2071  * Note - This code makes a few assumptions about how your machine behaves -
2072  *   1. We can read a 4-byte value from any address without crashing
2073  *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
2074  * And it has a few limitations -
2075  *   1. It will not work incrementally.
2076  *   2. It will not produce the same results on little-endian and big-endian
2077  *      machines. */
upb_murmur_hash2(const void * key,size_t len,uint32_t seed)2078 uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
2079   /* 'm' and 'r' are mixing constants generated offline.
2080    * They're not really 'magic', they just happen to work well. */
2081   const uint32_t m = 0x5bd1e995;
2082   const int32_t r = 24;
2083 
2084   /* Initialize the hash to a 'random' value */
2085   uint32_t h = seed ^ len;
2086 
2087   /* Mix 4 bytes at a time into the hash */
2088   const uint8_t * data = (const uint8_t *)key;
2089   while(len >= 4) {
2090     uint32_t k;
2091     memcpy(&k, data, sizeof(k));
2092 
2093     k *= m;
2094     k ^= k >> r;
2095     k *= m;
2096 
2097     h *= m;
2098     h ^= k;
2099 
2100     data += 4;
2101     len -= 4;
2102   }
2103 
2104   /* Handle the last few bytes of the input array */
2105   switch(len) {
2106     case 3: h ^= data[2] << 16;
2107     case 2: h ^= data[1] << 8;
2108     case 1: h ^= data[0]; h *= m;
2109   };
2110 
2111   /* Do a few final mixes of the hash to ensure the last few
2112    * bytes are well-incorporated. */
2113   h ^= h >> 13;
2114   h *= m;
2115   h ^= h >> 15;
2116 
2117   return h;
2118 }
2119 
2120 #else /* !UPB_UNALIGNED_READS_OK */
2121 
2122 /* -----------------------------------------------------------------------------
2123  * MurmurHashAligned2, by Austin Appleby
2124  * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
2125  * on certain platforms.
2126  * Performance will be lower than MurmurHash2 */
2127 
2128 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
2129 
upb_murmur_hash2(const void * key,size_t len,uint32_t seed)2130 uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
2131   const uint32_t m = 0x5bd1e995;
2132   const int32_t r = 24;
2133   const uint8_t * data = (const uint8_t *)key;
2134   uint32_t h = (uint32_t)(seed ^ len);
2135   uint8_t align = (uintptr_t)data & 3;
2136 
2137   if(align && (len >= 4)) {
2138     /* Pre-load the temp registers */
2139     uint32_t t = 0, d = 0;
2140     int32_t sl;
2141     int32_t sr;
2142 
2143     switch(align) {
2144       case 1: t |= data[2] << 16;
2145       case 2: t |= data[1] << 8;
2146       case 3: t |= data[0];
2147     }
2148 
2149     t <<= (8 * align);
2150 
2151     data += 4-align;
2152     len -= 4-align;
2153 
2154     sl = 8 * (4-align);
2155     sr = 8 * align;
2156 
2157     /* Mix */
2158 
2159     while(len >= 4) {
2160       uint32_t k;
2161 
2162       d = *(uint32_t *)data;
2163       t = (t >> sr) | (d << sl);
2164 
2165       k = t;
2166 
2167       MIX(h,k,m);
2168 
2169       t = d;
2170 
2171       data += 4;
2172       len -= 4;
2173     }
2174 
2175     /* Handle leftover data in temp registers */
2176 
2177     d = 0;
2178 
2179     if(len >= align) {
2180       uint32_t k;
2181 
2182       switch(align) {
2183         case 3: d |= data[2] << 16;
2184         case 2: d |= data[1] << 8;
2185         case 1: d |= data[0];
2186       }
2187 
2188       k = (t >> sr) | (d << sl);
2189       MIX(h,k,m);
2190 
2191       data += align;
2192       len -= align;
2193 
2194       /* ----------
2195        * Handle tail bytes */
2196 
2197       switch(len) {
2198         case 3: h ^= data[2] << 16;
2199         case 2: h ^= data[1] << 8;
2200         case 1: h ^= data[0]; h *= m;
2201       };
2202     } else {
2203       switch(len) {
2204         case 3: d |= data[2] << 16;
2205         case 2: d |= data[1] << 8;
2206         case 1: d |= data[0];
2207         case 0: h ^= (t >> sr) | (d << sl); h *= m;
2208       }
2209     }
2210 
2211     h ^= h >> 13;
2212     h *= m;
2213     h ^= h >> 15;
2214 
2215     return h;
2216   } else {
2217     while(len >= 4) {
2218       uint32_t k = *(uint32_t *)data;
2219 
2220       MIX(h,k,m);
2221 
2222       data += 4;
2223       len -= 4;
2224     }
2225 
2226     /* ----------
2227      * Handle tail bytes */
2228 
2229     switch(len) {
2230       case 3: h ^= data[2] << 16;
2231       case 2: h ^= data[1] << 8;
2232       case 1: h ^= data[0]; h *= m;
2233     };
2234 
2235     h ^= h >> 13;
2236     h *= m;
2237     h ^= h >> 15;
2238 
2239     return h;
2240   }
2241 }
2242 #undef MIX
2243 
2244 #endif /* UPB_UNALIGNED_READS_OK */
2245 
2246 
2247 #include <errno.h>
2248 #include <stdarg.h>
2249 #include <stddef.h>
2250 #include <stdint.h>
2251 #include <stdio.h>
2252 #include <stdlib.h>
2253 #include <string.h>
2254 
2255 
2256 /* upb_status *****************************************************************/
2257 
upb_status_clear(upb_status * status)2258 void upb_status_clear(upb_status *status) {
2259   if (!status) return;
2260   status->ok = true;
2261   status->msg[0] = '\0';
2262 }
2263 
upb_ok(const upb_status * status)2264 bool upb_ok(const upb_status *status) { return status->ok; }
2265 
upb_status_errmsg(const upb_status * status)2266 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
2267 
upb_status_seterrmsg(upb_status * status,const char * msg)2268 void upb_status_seterrmsg(upb_status *status, const char *msg) {
2269   if (!status) return;
2270   status->ok = false;
2271   strncpy(status->msg, msg, UPB_STATUS_MAX_MESSAGE - 1);
2272   status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
2273 }
2274 
upb_status_seterrf(upb_status * status,const char * fmt,...)2275 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
2276   va_list args;
2277   va_start(args, fmt);
2278   upb_status_vseterrf(status, fmt, args);
2279   va_end(args);
2280 }
2281 
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)2282 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
2283   if (!status) return;
2284   status->ok = false;
2285   _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
2286   status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
2287 }
2288 
2289 /* upb_alloc ******************************************************************/
2290 
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)2291 static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
2292                                   size_t size) {
2293   UPB_UNUSED(alloc);
2294   UPB_UNUSED(oldsize);
2295   if (size == 0) {
2296     free(ptr);
2297     return NULL;
2298   } else {
2299     return realloc(ptr, size);
2300   }
2301 }
2302 
2303 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
2304 
2305 /* upb_arena ******************************************************************/
2306 
2307 /* Be conservative and choose 16 in case anyone is using SSE. */
2308 
2309 struct upb_arena {
2310   _upb_arena_head head;
2311   char *start;
2312 
2313   /* Allocator to allocate arena blocks.  We are responsible for freeing these
2314    * when we are destroyed. */
2315   upb_alloc *block_alloc;
2316 
2317   size_t bytes_allocated;
2318   size_t next_block_size;
2319   size_t max_block_size;
2320 
2321   /* Linked list of blocks.  Points to an arena_block, defined in env.c */
2322   void *block_head;
2323 
2324   /* Cleanup entries.  Pointer to a cleanup_ent, defined in env.c */
2325   void *cleanup_head;
2326 };
2327 
2328 typedef struct mem_block {
2329   struct mem_block *next;
2330   bool owned;
2331   /* Data follows. */
2332 } mem_block;
2333 
2334 typedef struct cleanup_ent {
2335   struct cleanup_ent *next;
2336   upb_cleanup_func *cleanup;
2337   void *ud;
2338 } cleanup_ent;
2339 
upb_arena_addblock(upb_arena * a,void * ptr,size_t size,bool owned)2340 static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
2341                                bool owned) {
2342   mem_block *block = ptr;
2343 
2344   if (a->block_head) {
2345     a->bytes_allocated += a->head.ptr - a->start;
2346   }
2347 
2348   block->next = a->block_head;
2349   block->owned = owned;
2350 
2351   a->block_head = block;
2352   a->start = (char*)block + _upb_arena_alignup(sizeof(mem_block));
2353   a->head.ptr = a->start;
2354   a->head.end = (char*)block + size;
2355 
2356   /* TODO(haberman): ASAN poison. */
2357 }
2358 
upb_arena_allocblock(upb_arena * a,size_t size)2359 static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
2360   size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
2361   mem_block *block = upb_malloc(a->block_alloc, block_size);
2362 
2363   if (!block) {
2364     return NULL;
2365   }
2366 
2367   upb_arena_addblock(a, block, block_size, true);
2368   a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
2369 
2370   return block;
2371 }
2372 
_upb_arena_slowmalloc(upb_arena * a,size_t size)2373 void *_upb_arena_slowmalloc(upb_arena *a, size_t size) {
2374   mem_block *block = upb_arena_allocblock(a, size);
2375   if (!block) return NULL;  /* Out of memory. */
2376   return upb_arena_malloc(a, size);
2377 }
2378 
upb_arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)2379 static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
2380                                size_t size) {
2381   upb_arena *a = (upb_arena*)alloc;  /* upb_alloc is initial member. */
2382   void *ret;
2383 
2384   if (size == 0) {
2385     return NULL;  /* We are an arena, don't need individual frees. */
2386   }
2387 
2388   ret = upb_arena_malloc(a, size);
2389   if (!ret) return NULL;
2390 
2391   /* TODO(haberman): special-case if this is a realloc of the last alloc? */
2392 
2393   if (oldsize > 0) {
2394     memcpy(ret, ptr, oldsize);  /* Preserve existing data. */
2395   }
2396 
2397   /* TODO(haberman): ASAN unpoison. */
2398   return ret;
2399 }
2400 
2401 /* Public Arena API ***********************************************************/
2402 
2403 #define upb_alignof(type) offsetof (struct { char c; type member; }, member)
2404 
upb_arena_init(void * mem,size_t n,upb_alloc * alloc)2405 upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) {
2406   const size_t first_block_overhead = sizeof(upb_arena) + sizeof(mem_block);
2407   upb_arena *a;
2408   bool owned = false;
2409 
2410   /* Round block size down to alignof(*a) since we will allocate the arena
2411    * itself at the end. */
2412   n &= ~(upb_alignof(upb_arena) - 1);
2413 
2414   if (n < first_block_overhead) {
2415     /* We need to malloc the initial block. */
2416     n = first_block_overhead + 256;
2417     owned = true;
2418     if (!alloc || !(mem = upb_malloc(alloc, n))) {
2419       return NULL;
2420     }
2421   }
2422 
2423   a = (void*)((char*)mem + n - sizeof(*a));
2424   n -= sizeof(*a);
2425 
2426   a->head.alloc.func = &upb_arena_doalloc;
2427   a->head.ptr = NULL;
2428   a->head.end = NULL;
2429   a->start = NULL;
2430   a->block_alloc = &upb_alloc_global;
2431   a->bytes_allocated = 0;
2432   a->next_block_size = 256;
2433   a->max_block_size = 16384;
2434   a->cleanup_head = NULL;
2435   a->block_head = NULL;
2436   a->block_alloc = alloc;
2437 
2438   upb_arena_addblock(a, mem, n, owned);
2439 
2440   return a;
2441 }
2442 
2443 #undef upb_alignof
2444 
upb_arena_free(upb_arena * a)2445 void upb_arena_free(upb_arena *a) {
2446   cleanup_ent *ent = a->cleanup_head;
2447   mem_block *block = a->block_head;
2448 
2449   while (ent) {
2450     ent->cleanup(ent->ud);
2451     ent = ent->next;
2452   }
2453 
2454   /* Must do this after running cleanup functions, because this will delete
2455    * the memory we store our cleanup entries in! */
2456   while (block) {
2457     /* Load first since we are deleting block. */
2458     mem_block *next = block->next;
2459 
2460     if (block->owned) {
2461       upb_free(a->block_alloc, block);
2462     }
2463 
2464     block = next;
2465   }
2466 }
2467 
upb_arena_addcleanup(upb_arena * a,void * ud,upb_cleanup_func * func)2468 bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
2469   cleanup_ent *ent = upb_malloc(&a->head.alloc, sizeof(cleanup_ent));
2470   if (!ent) {
2471     return false;  /* Out of memory. */
2472   }
2473 
2474   ent->cleanup = func;
2475   ent->ud = ud;
2476   ent->next = a->cleanup_head;
2477   a->cleanup_head = ent;
2478 
2479   return true;
2480 }
2481 
upb_arena_bytesallocated(const upb_arena * a)2482 size_t upb_arena_bytesallocated(const upb_arena *a) {
2483   return a->bytes_allocated + (a->head.ptr - a->start);
2484 }
2485 /* This file was generated by upbc (the upb compiler) from the input
2486  * file:
2487  *
2488  *     google/protobuf/descriptor.proto
2489  *
2490  * Do not edit -- your changes will be discarded when the file is
2491  * regenerated. */
2492 
2493 #include <stddef.h>
2494 
2495 
2496 static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
2497   &google_protobuf_FileDescriptorProto_msginit,
2498 };
2499 
2500 static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
2501   {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2502 };
2503 
2504 const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
2505   &google_protobuf_FileDescriptorSet_submsgs[0],
2506   &google_protobuf_FileDescriptorSet__fields[0],
2507   UPB_SIZE(4, 8), 1, false,
2508 };
2509 
2510 static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
2511   &google_protobuf_DescriptorProto_msginit,
2512   &google_protobuf_EnumDescriptorProto_msginit,
2513   &google_protobuf_FieldDescriptorProto_msginit,
2514   &google_protobuf_FileOptions_msginit,
2515   &google_protobuf_ServiceDescriptorProto_msginit,
2516   &google_protobuf_SourceCodeInfo_msginit,
2517 };
2518 
2519 static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
2520   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2521   {2, UPB_SIZE(12, 24), 2, 0, 9, 1},
2522   {3, UPB_SIZE(36, 72), 0, 0, 9, 3},
2523   {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
2524   {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
2525   {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
2526   {7, UPB_SIZE(52, 104), 0, 2, 11, 3},
2527   {8, UPB_SIZE(28, 56), 4, 3, 11, 1},
2528   {9, UPB_SIZE(32, 64), 5, 5, 11, 1},
2529   {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
2530   {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
2531   {12, UPB_SIZE(20, 40), 3, 0, 9, 1},
2532 };
2533 
2534 const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
2535   &google_protobuf_FileDescriptorProto_submsgs[0],
2536   &google_protobuf_FileDescriptorProto__fields[0],
2537   UPB_SIZE(64, 128), 12, false,
2538 };
2539 
2540 static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
2541   &google_protobuf_DescriptorProto_msginit,
2542   &google_protobuf_DescriptorProto_ExtensionRange_msginit,
2543   &google_protobuf_DescriptorProto_ReservedRange_msginit,
2544   &google_protobuf_EnumDescriptorProto_msginit,
2545   &google_protobuf_FieldDescriptorProto_msginit,
2546   &google_protobuf_MessageOptions_msginit,
2547   &google_protobuf_OneofDescriptorProto_msginit,
2548 };
2549 
2550 static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
2551   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2552   {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
2553   {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
2554   {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
2555   {5, UPB_SIZE(28, 56), 0, 1, 11, 3},
2556   {6, UPB_SIZE(32, 64), 0, 4, 11, 3},
2557   {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
2558   {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
2559   {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
2560   {10, UPB_SIZE(44, 88), 0, 0, 9, 3},
2561 };
2562 
2563 const upb_msglayout google_protobuf_DescriptorProto_msginit = {
2564   &google_protobuf_DescriptorProto_submsgs[0],
2565   &google_protobuf_DescriptorProto__fields[0],
2566   UPB_SIZE(48, 96), 10, false,
2567 };
2568 
2569 static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
2570   &google_protobuf_ExtensionRangeOptions_msginit,
2571 };
2572 
2573 static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
2574   {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2575   {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2576   {3, UPB_SIZE(12, 16), 3, 0, 11, 1},
2577 };
2578 
2579 const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
2580   &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
2581   &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
2582   UPB_SIZE(16, 24), 3, false,
2583 };
2584 
2585 static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
2586   {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2587   {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2588 };
2589 
2590 const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
2591   NULL,
2592   &google_protobuf_DescriptorProto_ReservedRange__fields[0],
2593   UPB_SIZE(12, 12), 2, false,
2594 };
2595 
2596 static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
2597   &google_protobuf_UninterpretedOption_msginit,
2598 };
2599 
2600 static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
2601   {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
2602 };
2603 
2604 const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
2605   &google_protobuf_ExtensionRangeOptions_submsgs[0],
2606   &google_protobuf_ExtensionRangeOptions__fields[0],
2607   UPB_SIZE(4, 8), 1, false,
2608 };
2609 
2610 static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
2611   &google_protobuf_FieldOptions_msginit,
2612 };
2613 
2614 static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = {
2615   {1, UPB_SIZE(36, 40), 6, 0, 9, 1},
2616   {2, UPB_SIZE(44, 56), 7, 0, 9, 1},
2617   {3, UPB_SIZE(24, 24), 3, 0, 5, 1},
2618   {4, UPB_SIZE(8, 8), 1, 0, 14, 1},
2619   {5, UPB_SIZE(16, 16), 2, 0, 14, 1},
2620   {6, UPB_SIZE(52, 72), 8, 0, 9, 1},
2621   {7, UPB_SIZE(60, 88), 9, 0, 9, 1},
2622   {8, UPB_SIZE(76, 120), 11, 0, 11, 1},
2623   {9, UPB_SIZE(28, 28), 4, 0, 5, 1},
2624   {10, UPB_SIZE(68, 104), 10, 0, 9, 1},
2625   {17, UPB_SIZE(32, 32), 5, 0, 8, 1},
2626 };
2627 
2628 const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
2629   &google_protobuf_FieldDescriptorProto_submsgs[0],
2630   &google_protobuf_FieldDescriptorProto__fields[0],
2631   UPB_SIZE(80, 128), 11, false,
2632 };
2633 
2634 static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
2635   &google_protobuf_OneofOptions_msginit,
2636 };
2637 
2638 static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
2639   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2640   {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
2641 };
2642 
2643 const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
2644   &google_protobuf_OneofDescriptorProto_submsgs[0],
2645   &google_protobuf_OneofDescriptorProto__fields[0],
2646   UPB_SIZE(16, 32), 2, false,
2647 };
2648 
2649 static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
2650   &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
2651   &google_protobuf_EnumOptions_msginit,
2652   &google_protobuf_EnumValueDescriptorProto_msginit,
2653 };
2654 
2655 static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
2656   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2657   {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
2658   {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
2659   {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
2660   {5, UPB_SIZE(24, 48), 0, 0, 9, 3},
2661 };
2662 
2663 const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
2664   &google_protobuf_EnumDescriptorProto_submsgs[0],
2665   &google_protobuf_EnumDescriptorProto__fields[0],
2666   UPB_SIZE(32, 64), 5, false,
2667 };
2668 
2669 static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
2670   {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2671   {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2672 };
2673 
2674 const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
2675   NULL,
2676   &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
2677   UPB_SIZE(12, 12), 2, false,
2678 };
2679 
2680 static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
2681   &google_protobuf_EnumValueOptions_msginit,
2682 };
2683 
2684 static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
2685   {1, UPB_SIZE(8, 8), 2, 0, 9, 1},
2686   {2, UPB_SIZE(4, 4), 1, 0, 5, 1},
2687   {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
2688 };
2689 
2690 const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
2691   &google_protobuf_EnumValueDescriptorProto_submsgs[0],
2692   &google_protobuf_EnumValueDescriptorProto__fields[0],
2693   UPB_SIZE(24, 32), 3, false,
2694 };
2695 
2696 static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
2697   &google_protobuf_MethodDescriptorProto_msginit,
2698   &google_protobuf_ServiceOptions_msginit,
2699 };
2700 
2701 static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
2702   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2703   {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
2704   {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
2705 };
2706 
2707 const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
2708   &google_protobuf_ServiceDescriptorProto_submsgs[0],
2709   &google_protobuf_ServiceDescriptorProto__fields[0],
2710   UPB_SIZE(24, 48), 3, false,
2711 };
2712 
2713 static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
2714   &google_protobuf_MethodOptions_msginit,
2715 };
2716 
2717 static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
2718   {1, UPB_SIZE(4, 8), 3, 0, 9, 1},
2719   {2, UPB_SIZE(12, 24), 4, 0, 9, 1},
2720   {3, UPB_SIZE(20, 40), 5, 0, 9, 1},
2721   {4, UPB_SIZE(28, 56), 6, 0, 11, 1},
2722   {5, UPB_SIZE(1, 1), 1, 0, 8, 1},
2723   {6, UPB_SIZE(2, 2), 2, 0, 8, 1},
2724 };
2725 
2726 const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
2727   &google_protobuf_MethodDescriptorProto_submsgs[0],
2728   &google_protobuf_MethodDescriptorProto__fields[0],
2729   UPB_SIZE(32, 64), 6, false,
2730 };
2731 
2732 static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
2733   &google_protobuf_UninterpretedOption_msginit,
2734 };
2735 
2736 static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
2737   {1, UPB_SIZE(28, 32), 11, 0, 9, 1},
2738   {8, UPB_SIZE(36, 48), 12, 0, 9, 1},
2739   {9, UPB_SIZE(8, 8), 1, 0, 14, 1},
2740   {10, UPB_SIZE(16, 16), 2, 0, 8, 1},
2741   {11, UPB_SIZE(44, 64), 13, 0, 9, 1},
2742   {16, UPB_SIZE(17, 17), 3, 0, 8, 1},
2743   {17, UPB_SIZE(18, 18), 4, 0, 8, 1},
2744   {18, UPB_SIZE(19, 19), 5, 0, 8, 1},
2745   {20, UPB_SIZE(20, 20), 6, 0, 8, 1},
2746   {23, UPB_SIZE(21, 21), 7, 0, 8, 1},
2747   {27, UPB_SIZE(22, 22), 8, 0, 8, 1},
2748   {31, UPB_SIZE(23, 23), 9, 0, 8, 1},
2749   {36, UPB_SIZE(52, 80), 14, 0, 9, 1},
2750   {37, UPB_SIZE(60, 96), 15, 0, 9, 1},
2751   {39, UPB_SIZE(68, 112), 16, 0, 9, 1},
2752   {40, UPB_SIZE(76, 128), 17, 0, 9, 1},
2753   {41, UPB_SIZE(84, 144), 18, 0, 9, 1},
2754   {42, UPB_SIZE(24, 24), 10, 0, 8, 1},
2755   {44, UPB_SIZE(92, 160), 19, 0, 9, 1},
2756   {45, UPB_SIZE(100, 176), 20, 0, 9, 1},
2757   {999, UPB_SIZE(108, 192), 0, 0, 11, 3},
2758 };
2759 
2760 const upb_msglayout google_protobuf_FileOptions_msginit = {
2761   &google_protobuf_FileOptions_submsgs[0],
2762   &google_protobuf_FileOptions__fields[0],
2763   UPB_SIZE(112, 208), 21, false,
2764 };
2765 
2766 static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
2767   &google_protobuf_UninterpretedOption_msginit,
2768 };
2769 
2770 static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
2771   {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
2772   {2, UPB_SIZE(2, 2), 2, 0, 8, 1},
2773   {3, UPB_SIZE(3, 3), 3, 0, 8, 1},
2774   {7, UPB_SIZE(4, 4), 4, 0, 8, 1},
2775   {999, UPB_SIZE(8, 8), 0, 0, 11, 3},
2776 };
2777 
2778 const upb_msglayout google_protobuf_MessageOptions_msginit = {
2779   &google_protobuf_MessageOptions_submsgs[0],
2780   &google_protobuf_MessageOptions__fields[0],
2781   UPB_SIZE(12, 16), 5, false,
2782 };
2783 
2784 static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
2785   &google_protobuf_UninterpretedOption_msginit,
2786 };
2787 
2788 static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
2789   {1, UPB_SIZE(8, 8), 1, 0, 14, 1},
2790   {2, UPB_SIZE(24, 24), 3, 0, 8, 1},
2791   {3, UPB_SIZE(25, 25), 4, 0, 8, 1},
2792   {5, UPB_SIZE(26, 26), 5, 0, 8, 1},
2793   {6, UPB_SIZE(16, 16), 2, 0, 14, 1},
2794   {10, UPB_SIZE(27, 27), 6, 0, 8, 1},
2795   {999, UPB_SIZE(28, 32), 0, 0, 11, 3},
2796 };
2797 
2798 const upb_msglayout google_protobuf_FieldOptions_msginit = {
2799   &google_protobuf_FieldOptions_submsgs[0],
2800   &google_protobuf_FieldOptions__fields[0],
2801   UPB_SIZE(32, 40), 7, false,
2802 };
2803 
2804 static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
2805   &google_protobuf_UninterpretedOption_msginit,
2806 };
2807 
2808 static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
2809   {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
2810 };
2811 
2812 const upb_msglayout google_protobuf_OneofOptions_msginit = {
2813   &google_protobuf_OneofOptions_submsgs[0],
2814   &google_protobuf_OneofOptions__fields[0],
2815   UPB_SIZE(4, 8), 1, false,
2816 };
2817 
2818 static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
2819   &google_protobuf_UninterpretedOption_msginit,
2820 };
2821 
2822 static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
2823   {2, UPB_SIZE(1, 1), 1, 0, 8, 1},
2824   {3, UPB_SIZE(2, 2), 2, 0, 8, 1},
2825   {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2826 };
2827 
2828 const upb_msglayout google_protobuf_EnumOptions_msginit = {
2829   &google_protobuf_EnumOptions_submsgs[0],
2830   &google_protobuf_EnumOptions__fields[0],
2831   UPB_SIZE(8, 16), 3, false,
2832 };
2833 
2834 static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
2835   &google_protobuf_UninterpretedOption_msginit,
2836 };
2837 
2838 static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
2839   {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
2840   {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2841 };
2842 
2843 const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
2844   &google_protobuf_EnumValueOptions_submsgs[0],
2845   &google_protobuf_EnumValueOptions__fields[0],
2846   UPB_SIZE(8, 16), 2, false,
2847 };
2848 
2849 static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
2850   &google_protobuf_UninterpretedOption_msginit,
2851 };
2852 
2853 static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
2854   {33, UPB_SIZE(1, 1), 1, 0, 8, 1},
2855   {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2856 };
2857 
2858 const upb_msglayout google_protobuf_ServiceOptions_msginit = {
2859   &google_protobuf_ServiceOptions_submsgs[0],
2860   &google_protobuf_ServiceOptions__fields[0],
2861   UPB_SIZE(8, 16), 2, false,
2862 };
2863 
2864 static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
2865   &google_protobuf_UninterpretedOption_msginit,
2866 };
2867 
2868 static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
2869   {33, UPB_SIZE(16, 16), 2, 0, 8, 1},
2870   {34, UPB_SIZE(8, 8), 1, 0, 14, 1},
2871   {999, UPB_SIZE(20, 24), 0, 0, 11, 3},
2872 };
2873 
2874 const upb_msglayout google_protobuf_MethodOptions_msginit = {
2875   &google_protobuf_MethodOptions_submsgs[0],
2876   &google_protobuf_MethodOptions__fields[0],
2877   UPB_SIZE(24, 32), 3, false,
2878 };
2879 
2880 static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
2881   &google_protobuf_UninterpretedOption_NamePart_msginit,
2882 };
2883 
2884 static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
2885   {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
2886   {3, UPB_SIZE(32, 32), 4, 0, 9, 1},
2887   {4, UPB_SIZE(8, 8), 1, 0, 4, 1},
2888   {5, UPB_SIZE(16, 16), 2, 0, 3, 1},
2889   {6, UPB_SIZE(24, 24), 3, 0, 1, 1},
2890   {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
2891   {8, UPB_SIZE(48, 64), 6, 0, 9, 1},
2892 };
2893 
2894 const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
2895   &google_protobuf_UninterpretedOption_submsgs[0],
2896   &google_protobuf_UninterpretedOption__fields[0],
2897   UPB_SIZE(64, 96), 7, false,
2898 };
2899 
2900 static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
2901   {1, UPB_SIZE(4, 8), 2, 0, 9, 2},
2902   {2, UPB_SIZE(1, 1), 1, 0, 8, 2},
2903 };
2904 
2905 const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
2906   NULL,
2907   &google_protobuf_UninterpretedOption_NamePart__fields[0],
2908   UPB_SIZE(16, 32), 2, false,
2909 };
2910 
2911 static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
2912   &google_protobuf_SourceCodeInfo_Location_msginit,
2913 };
2914 
2915 static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
2916   {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2917 };
2918 
2919 const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
2920   &google_protobuf_SourceCodeInfo_submsgs[0],
2921   &google_protobuf_SourceCodeInfo__fields[0],
2922   UPB_SIZE(4, 8), 1, false,
2923 };
2924 
2925 static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
2926   {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED},
2927   {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED},
2928   {3, UPB_SIZE(4, 8), 1, 0, 9, 1},
2929   {4, UPB_SIZE(12, 24), 2, 0, 9, 1},
2930   {6, UPB_SIZE(28, 56), 0, 0, 9, 3},
2931 };
2932 
2933 const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
2934   NULL,
2935   &google_protobuf_SourceCodeInfo_Location__fields[0],
2936   UPB_SIZE(32, 64), 5, false,
2937 };
2938 
2939 static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
2940   &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
2941 };
2942 
2943 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
2944   {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2945 };
2946 
2947 const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
2948   &google_protobuf_GeneratedCodeInfo_submsgs[0],
2949   &google_protobuf_GeneratedCodeInfo__fields[0],
2950   UPB_SIZE(4, 8), 1, false,
2951 };
2952 
2953 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
2954   {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED},
2955   {2, UPB_SIZE(12, 16), 3, 0, 9, 1},
2956   {3, UPB_SIZE(4, 4), 1, 0, 5, 1},
2957   {4, UPB_SIZE(8, 8), 2, 0, 5, 1},
2958 };
2959 
2960 const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
2961   NULL,
2962   &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
2963   UPB_SIZE(24, 48), 4, false,
2964 };
2965 
2966 
2967 
2968 
2969 #include <ctype.h>
2970 #include <errno.h>
2971 #include <stdlib.h>
2972 #include <string.h>
2973 
2974 
2975 typedef struct {
2976   size_t len;
2977   char str[1];  /* Null-terminated string data follows. */
2978 } str_t;
2979 
newstr(upb_alloc * alloc,const char * data,size_t len)2980 static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) {
2981   str_t *ret = upb_malloc(alloc, sizeof(*ret) + len);
2982   if (!ret) return NULL;
2983   ret->len = len;
2984   memcpy(ret->str, data, len);
2985   ret->str[len] = '\0';
2986   return ret;
2987 }
2988 
2989 struct upb_fielddef {
2990   const upb_filedef *file;
2991   const upb_msgdef *msgdef;
2992   const char *full_name;
2993   const char *json_name;
2994   union {
2995     int64_t sint;
2996     uint64_t uint;
2997     double dbl;
2998     float flt;
2999     bool boolean;
3000     str_t *str;
3001   } defaultval;
3002   const upb_oneofdef *oneof;
3003   union {
3004     const upb_msgdef *msgdef;
3005     const upb_enumdef *enumdef;
3006     const google_protobuf_FieldDescriptorProto *unresolved;
3007   } sub;
3008   uint32_t number_;
3009   uint16_t index_;
3010   uint16_t layout_index;
3011   uint32_t selector_base;  /* Used to index into a upb::Handlers table. */
3012   bool is_extension_;
3013   bool lazy_;
3014   bool packed_;
3015   bool proto3_optional_;
3016   upb_descriptortype_t type_;
3017   upb_label_t label_;
3018 };
3019 
3020 struct upb_msgdef {
3021   const upb_msglayout *layout;
3022   const upb_filedef *file;
3023   const char *full_name;
3024   uint32_t selector_count;
3025   uint32_t submsg_field_count;
3026 
3027   /* Tables for looking up fields by number and name. */
3028   upb_inttable itof;
3029   upb_strtable ntof;
3030 
3031   const upb_fielddef *fields;
3032   const upb_oneofdef *oneofs;
3033   int field_count;
3034   int oneof_count;
3035   int real_oneof_count;
3036 
3037   /* Is this a map-entry message? */
3038   bool map_entry;
3039   upb_wellknowntype_t well_known_type;
3040 
3041   /* TODO(haberman): proper extension ranges (there can be multiple). */
3042 };
3043 
3044 struct upb_enumdef {
3045   const upb_filedef *file;
3046   const char *full_name;
3047   upb_strtable ntoi;
3048   upb_inttable iton;
3049   int32_t defaultval;
3050 };
3051 
3052 struct upb_oneofdef {
3053   const upb_msgdef *parent;
3054   const char *full_name;
3055   uint32_t index;
3056   upb_strtable ntof;
3057   upb_inttable itof;
3058 };
3059 
3060 struct upb_filedef {
3061   const char *name;
3062   const char *package;
3063   const char *phpprefix;
3064   const char *phpnamespace;
3065   upb_syntax_t syntax;
3066 
3067   const upb_filedef **deps;
3068   const upb_msgdef *msgs;
3069   const upb_enumdef *enums;
3070   const upb_fielddef *exts;
3071 
3072   int dep_count;
3073   int msg_count;
3074   int enum_count;
3075   int ext_count;
3076 };
3077 
3078 struct upb_symtab {
3079   upb_arena *arena;
3080   upb_strtable syms;  /* full_name -> packed def ptr */
3081   upb_strtable files;  /* file_name -> upb_filedef* */
3082 };
3083 
3084 /* Inside a symtab we store tagged pointers to specific def types. */
3085 typedef enum {
3086   UPB_DEFTYPE_FIELD = 0,
3087 
3088   /* Only inside symtab table. */
3089   UPB_DEFTYPE_MSG = 1,
3090   UPB_DEFTYPE_ENUM = 2,
3091 
3092   /* Only inside message table. */
3093   UPB_DEFTYPE_ONEOF = 1,
3094   UPB_DEFTYPE_FIELD_JSONNAME = 2
3095 } upb_deftype_t;
3096 
unpack_def(upb_value v,upb_deftype_t type)3097 static const void *unpack_def(upb_value v, upb_deftype_t type) {
3098   uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
3099   return (num & 3) == type ? (const void*)(num & ~3) : NULL;
3100 }
3101 
pack_def(const void * ptr,upb_deftype_t type)3102 static upb_value pack_def(const void *ptr, upb_deftype_t type) {
3103   uintptr_t num = (uintptr_t)ptr | type;
3104   return upb_value_constptr((const void*)num);
3105 }
3106 
3107 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)3108 static bool upb_isbetween(char c, char low, char high) {
3109   return c >= low && c <= high;
3110 }
3111 
upb_isletter(char c)3112 static bool upb_isletter(char c) {
3113   return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
3114 }
3115 
upb_isalphanum(char c)3116 static bool upb_isalphanum(char c) {
3117   return upb_isletter(c) || upb_isbetween(c, '0', '9');
3118 }
3119 
upb_isident(upb_strview name,bool full,upb_status * s)3120 static bool upb_isident(upb_strview name, bool full, upb_status *s) {
3121   const char *str = name.data;
3122   size_t len = name.size;
3123   bool start = true;
3124   size_t i;
3125   for (i = 0; i < len; i++) {
3126     char c = str[i];
3127     if (c == '.') {
3128       if (start || !full) {
3129         upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
3130         return false;
3131       }
3132       start = true;
3133     } else if (start) {
3134       if (!upb_isletter(c)) {
3135         upb_status_seterrf(
3136             s, "invalid name: path components must start with a letter (%s)",
3137             str);
3138         return false;
3139       }
3140       start = false;
3141     } else {
3142       if (!upb_isalphanum(c)) {
3143         upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
3144                            str);
3145         return false;
3146       }
3147     }
3148   }
3149   return !start;
3150 }
3151 
shortdefname(const char * fullname)3152 static const char *shortdefname(const char *fullname) {
3153   const char *p;
3154 
3155   if (fullname == NULL) {
3156     return NULL;
3157   } else if ((p = strrchr(fullname, '.')) == NULL) {
3158     /* No '.' in the name, return the full string. */
3159     return fullname;
3160   } else {
3161     /* Return one past the last '.'. */
3162     return p + 1;
3163   }
3164 }
3165 
3166 /* All submessage fields are lower than all other fields.
3167  * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)3168 uint32_t field_rank(const upb_fielddef *f) {
3169   uint32_t ret = upb_fielddef_number(f);
3170   const uint32_t high_bit = 1 << 30;
3171   UPB_ASSERT(ret < high_bit);
3172   if (!upb_fielddef_issubmsg(f))
3173     ret |= high_bit;
3174   return ret;
3175 }
3176 
cmp_fields(const void * p1,const void * p2)3177 int cmp_fields(const void *p1, const void *p2) {
3178   const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
3179   const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
3180   return field_rank(f1) - field_rank(f2);
3181 }
3182 
3183 /* A few implementation details of handlers.  We put these here to avoid
3184  * a def -> handlers dependency. */
3185 
3186 #define UPB_STATIC_SELECTOR_COUNT 3  /* Warning: also in upb/handlers.h. */
3187 
upb_handlers_selectorbaseoffset(const upb_fielddef * f)3188 static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
3189   return upb_fielddef_isseq(f) ? 2 : 0;
3190 }
3191 
upb_handlers_selectorcount(const upb_fielddef * f)3192 static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
3193   uint32_t ret = 1;
3194   if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
3195   if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
3196   if (upb_fielddef_issubmsg(f)) {
3197     /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
3198     ret += 0;
3199     if (upb_fielddef_lazy(f)) {
3200       /* STARTSTR/ENDSTR/STRING (for lazy) */
3201       ret += 3;
3202     }
3203   }
3204   return ret;
3205 }
3206 
upb_status_setoom(upb_status * status)3207 static void upb_status_setoom(upb_status *status) {
3208   upb_status_seterrmsg(status, "out of memory");
3209 }
3210 
assign_msg_indices(upb_msgdef * m,upb_status * s)3211 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
3212   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
3213    * lowest indexes, but we do not publicly guarantee this. */
3214   upb_msg_field_iter j;
3215   int i;
3216   uint32_t selector;
3217   int n = upb_msgdef_numfields(m);
3218   upb_fielddef **fields;
3219 
3220   if (n == 0) {
3221     m->selector_count = UPB_STATIC_SELECTOR_COUNT;
3222     m->submsg_field_count = 0;
3223     return true;
3224   }
3225 
3226   fields = upb_gmalloc(n * sizeof(*fields));
3227   if (!fields) {
3228     upb_status_setoom(s);
3229     return false;
3230   }
3231 
3232   m->submsg_field_count = 0;
3233   for(i = 0, upb_msg_field_begin(&j, m);
3234       !upb_msg_field_done(&j);
3235       upb_msg_field_next(&j), i++) {
3236     upb_fielddef *f = upb_msg_iter_field(&j);
3237     UPB_ASSERT(f->msgdef == m);
3238     if (upb_fielddef_issubmsg(f)) {
3239       m->submsg_field_count++;
3240     }
3241     fields[i] = f;
3242   }
3243 
3244   qsort(fields, n, sizeof(*fields), cmp_fields);
3245 
3246   selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
3247   for (i = 0; i < n; i++) {
3248     upb_fielddef *f = fields[i];
3249     f->index_ = i;
3250     f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
3251     selector += upb_handlers_selectorcount(f);
3252   }
3253   m->selector_count = selector;
3254 
3255   upb_gfree(fields);
3256   return true;
3257 }
3258 
check_oneofs(upb_msgdef * m,upb_status * s)3259 static bool check_oneofs(upb_msgdef *m, upb_status *s) {
3260   int i;
3261   int first_synthetic = -1;
3262   upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
3263 
3264   for (i = 0; i < m->oneof_count; i++) {
3265     mutable_oneofs[i].index = i;
3266 
3267     if (upb_oneofdef_issynthetic(&mutable_oneofs[i])) {
3268       if (first_synthetic == -1) {
3269         first_synthetic = i;
3270       }
3271     } else {
3272       if (first_synthetic != -1) {
3273         upb_status_seterrf(
3274             s, "Synthetic oneofs must be after all other oneofs: %s",
3275             upb_oneofdef_name(&mutable_oneofs[i]));
3276         return false;
3277       }
3278     }
3279   }
3280 
3281   if (first_synthetic == -1) {
3282     m->real_oneof_count = m->oneof_count;
3283   } else {
3284     m->real_oneof_count = first_synthetic;
3285   }
3286 
3287   return true;
3288 }
3289 
assign_msg_wellknowntype(upb_msgdef * m)3290 static void assign_msg_wellknowntype(upb_msgdef *m) {
3291   const char *name = upb_msgdef_fullname(m);
3292   if (name == NULL) {
3293     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
3294     return;
3295   }
3296   if (!strcmp(name, "google.protobuf.Any")) {
3297     m->well_known_type = UPB_WELLKNOWN_ANY;
3298   } else if (!strcmp(name, "google.protobuf.FieldMask")) {
3299     m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
3300   } else if (!strcmp(name, "google.protobuf.Duration")) {
3301     m->well_known_type = UPB_WELLKNOWN_DURATION;
3302   } else if (!strcmp(name, "google.protobuf.Timestamp")) {
3303     m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
3304   } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
3305     m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
3306   } else if (!strcmp(name, "google.protobuf.FloatValue")) {
3307     m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
3308   } else if (!strcmp(name, "google.protobuf.Int64Value")) {
3309     m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
3310   } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
3311     m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
3312   } else if (!strcmp(name, "google.protobuf.Int32Value")) {
3313     m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
3314   } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
3315     m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
3316   } else if (!strcmp(name, "google.protobuf.BoolValue")) {
3317     m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
3318   } else if (!strcmp(name, "google.protobuf.StringValue")) {
3319     m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
3320   } else if (!strcmp(name, "google.protobuf.BytesValue")) {
3321     m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
3322   } else if (!strcmp(name, "google.protobuf.Value")) {
3323     m->well_known_type = UPB_WELLKNOWN_VALUE;
3324   } else if (!strcmp(name, "google.protobuf.ListValue")) {
3325     m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
3326   } else if (!strcmp(name, "google.protobuf.Struct")) {
3327     m->well_known_type = UPB_WELLKNOWN_STRUCT;
3328   } else {
3329     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
3330   }
3331 }
3332 
3333 
3334 /* upb_enumdef ****************************************************************/
3335 
upb_enumdef_fullname(const upb_enumdef * e)3336 const char *upb_enumdef_fullname(const upb_enumdef *e) {
3337   return e->full_name;
3338 }
3339 
upb_enumdef_name(const upb_enumdef * e)3340 const char *upb_enumdef_name(const upb_enumdef *e) {
3341   return shortdefname(e->full_name);
3342 }
3343 
upb_enumdef_file(const upb_enumdef * e)3344 const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
3345   return e->file;
3346 }
3347 
upb_enumdef_default(const upb_enumdef * e)3348 int32_t upb_enumdef_default(const upb_enumdef *e) {
3349   UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
3350   return e->defaultval;
3351 }
3352 
upb_enumdef_numvals(const upb_enumdef * e)3353 int upb_enumdef_numvals(const upb_enumdef *e) {
3354   return (int)upb_strtable_count(&e->ntoi);
3355 }
3356 
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)3357 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
3358   /* We iterate over the ntoi table, to account for duplicate numbers. */
3359   upb_strtable_begin(i, &e->ntoi);
3360 }
3361 
upb_enum_next(upb_enum_iter * iter)3362 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)3363 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
3364 
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)3365 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
3366                       size_t len, int32_t *num) {
3367   upb_value v;
3368   if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
3369     return false;
3370   }
3371   if (num) *num = upb_value_getint32(v);
3372   return true;
3373 }
3374 
upb_enumdef_iton(const upb_enumdef * def,int32_t num)3375 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
3376   upb_value v;
3377   return upb_inttable_lookup32(&def->iton, num, &v) ?
3378       upb_value_getcstr(v) : NULL;
3379 }
3380 
upb_enum_iter_name(upb_enum_iter * iter)3381 const char *upb_enum_iter_name(upb_enum_iter *iter) {
3382   return upb_strtable_iter_key(iter).data;
3383 }
3384 
upb_enum_iter_number(upb_enum_iter * iter)3385 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
3386   return upb_value_getint32(upb_strtable_iter_value(iter));
3387 }
3388 
3389 
3390 /* upb_fielddef ***************************************************************/
3391 
upb_fielddef_fullname(const upb_fielddef * f)3392 const char *upb_fielddef_fullname(const upb_fielddef *f) {
3393   return f->full_name;
3394 }
3395 
upb_fielddef_type(const upb_fielddef * f)3396 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
3397   switch (f->type_) {
3398     case UPB_DESCRIPTOR_TYPE_DOUBLE:
3399       return UPB_TYPE_DOUBLE;
3400     case UPB_DESCRIPTOR_TYPE_FLOAT:
3401       return UPB_TYPE_FLOAT;
3402     case UPB_DESCRIPTOR_TYPE_INT64:
3403     case UPB_DESCRIPTOR_TYPE_SINT64:
3404     case UPB_DESCRIPTOR_TYPE_SFIXED64:
3405       return UPB_TYPE_INT64;
3406     case UPB_DESCRIPTOR_TYPE_INT32:
3407     case UPB_DESCRIPTOR_TYPE_SFIXED32:
3408     case UPB_DESCRIPTOR_TYPE_SINT32:
3409       return UPB_TYPE_INT32;
3410     case UPB_DESCRIPTOR_TYPE_UINT64:
3411     case UPB_DESCRIPTOR_TYPE_FIXED64:
3412       return UPB_TYPE_UINT64;
3413     case UPB_DESCRIPTOR_TYPE_UINT32:
3414     case UPB_DESCRIPTOR_TYPE_FIXED32:
3415       return UPB_TYPE_UINT32;
3416     case UPB_DESCRIPTOR_TYPE_ENUM:
3417       return UPB_TYPE_ENUM;
3418     case UPB_DESCRIPTOR_TYPE_BOOL:
3419       return UPB_TYPE_BOOL;
3420     case UPB_DESCRIPTOR_TYPE_STRING:
3421       return UPB_TYPE_STRING;
3422     case UPB_DESCRIPTOR_TYPE_BYTES:
3423       return UPB_TYPE_BYTES;
3424     case UPB_DESCRIPTOR_TYPE_GROUP:
3425     case UPB_DESCRIPTOR_TYPE_MESSAGE:
3426       return UPB_TYPE_MESSAGE;
3427   }
3428   UPB_UNREACHABLE();
3429 }
3430 
upb_fielddef_descriptortype(const upb_fielddef * f)3431 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
3432   return f->type_;
3433 }
3434 
upb_fielddef_index(const upb_fielddef * f)3435 uint32_t upb_fielddef_index(const upb_fielddef *f) {
3436   return f->index_;
3437 }
3438 
upb_fielddef_label(const upb_fielddef * f)3439 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
3440   return f->label_;
3441 }
3442 
upb_fielddef_number(const upb_fielddef * f)3443 uint32_t upb_fielddef_number(const upb_fielddef *f) {
3444   return f->number_;
3445 }
3446 
upb_fielddef_isextension(const upb_fielddef * f)3447 bool upb_fielddef_isextension(const upb_fielddef *f) {
3448   return f->is_extension_;
3449 }
3450 
upb_fielddef_lazy(const upb_fielddef * f)3451 bool upb_fielddef_lazy(const upb_fielddef *f) {
3452   return f->lazy_;
3453 }
3454 
upb_fielddef_packed(const upb_fielddef * f)3455 bool upb_fielddef_packed(const upb_fielddef *f) {
3456   return f->packed_;
3457 }
3458 
upb_fielddef_name(const upb_fielddef * f)3459 const char *upb_fielddef_name(const upb_fielddef *f) {
3460   return shortdefname(f->full_name);
3461 }
3462 
upb_fielddef_jsonname(const upb_fielddef * f)3463 const char *upb_fielddef_jsonname(const upb_fielddef *f) {
3464   return f->json_name;
3465 }
3466 
upb_fielddef_selectorbase(const upb_fielddef * f)3467 uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
3468   return f->selector_base;
3469 }
3470 
upb_fielddef_file(const upb_fielddef * f)3471 const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
3472   return f->file;
3473 }
3474 
upb_fielddef_containingtype(const upb_fielddef * f)3475 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
3476   return f->msgdef;
3477 }
3478 
upb_fielddef_containingoneof(const upb_fielddef * f)3479 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
3480   return f->oneof;
3481 }
3482 
upb_fielddef_realcontainingoneof(const upb_fielddef * f)3483 const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
3484   if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
3485   return f->oneof;
3486 }
3487 
chkdefaulttype(const upb_fielddef * f,int ctype)3488 static void chkdefaulttype(const upb_fielddef *f, int ctype) {
3489   UPB_UNUSED(f);
3490   UPB_UNUSED(ctype);
3491 }
3492 
upb_fielddef_defaultint64(const upb_fielddef * f)3493 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
3494   chkdefaulttype(f, UPB_TYPE_INT64);
3495   return f->defaultval.sint;
3496 }
3497 
upb_fielddef_defaultint32(const upb_fielddef * f)3498 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
3499   chkdefaulttype(f, UPB_TYPE_INT32);
3500   return (int32_t)f->defaultval.sint;
3501 }
3502 
upb_fielddef_defaultuint64(const upb_fielddef * f)3503 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
3504   chkdefaulttype(f, UPB_TYPE_UINT64);
3505   return f->defaultval.uint;
3506 }
3507 
upb_fielddef_defaultuint32(const upb_fielddef * f)3508 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
3509   chkdefaulttype(f, UPB_TYPE_UINT32);
3510   return (uint32_t)f->defaultval.uint;
3511 }
3512 
upb_fielddef_defaultbool(const upb_fielddef * f)3513 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
3514   chkdefaulttype(f, UPB_TYPE_BOOL);
3515   return f->defaultval.boolean;
3516 }
3517 
upb_fielddef_defaultfloat(const upb_fielddef * f)3518 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
3519   chkdefaulttype(f, UPB_TYPE_FLOAT);
3520   return f->defaultval.flt;
3521 }
3522 
upb_fielddef_defaultdouble(const upb_fielddef * f)3523 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
3524   chkdefaulttype(f, UPB_TYPE_DOUBLE);
3525   return f->defaultval.dbl;
3526 }
3527 
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)3528 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
3529   str_t *str = f->defaultval.str;
3530   UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
3531          upb_fielddef_type(f) == UPB_TYPE_BYTES ||
3532          upb_fielddef_type(f) == UPB_TYPE_ENUM);
3533   if (str) {
3534     if (len) *len = str->len;
3535     return str->str;
3536   } else {
3537     if (len) *len = 0;
3538     return NULL;
3539   }
3540 }
3541 
upb_fielddef_msgsubdef(const upb_fielddef * f)3542 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
3543   UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
3544   return f->sub.msgdef;
3545 }
3546 
upb_fielddef_enumsubdef(const upb_fielddef * f)3547 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
3548   UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_ENUM);
3549   return f->sub.enumdef;
3550 }
3551 
upb_fielddef_layout(const upb_fielddef * f)3552 const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
3553   return &f->msgdef->layout->fields[f->layout_index];
3554 }
3555 
upb_fielddef_issubmsg(const upb_fielddef * f)3556 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
3557   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
3558 }
3559 
upb_fielddef_isstring(const upb_fielddef * f)3560 bool upb_fielddef_isstring(const upb_fielddef *f) {
3561   return upb_fielddef_type(f) == UPB_TYPE_STRING ||
3562          upb_fielddef_type(f) == UPB_TYPE_BYTES;
3563 }
3564 
upb_fielddef_isseq(const upb_fielddef * f)3565 bool upb_fielddef_isseq(const upb_fielddef *f) {
3566   return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
3567 }
3568 
upb_fielddef_isprimitive(const upb_fielddef * f)3569 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
3570   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
3571 }
3572 
upb_fielddef_ismap(const upb_fielddef * f)3573 bool upb_fielddef_ismap(const upb_fielddef *f) {
3574   return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
3575          upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
3576 }
3577 
upb_fielddef_hassubdef(const upb_fielddef * f)3578 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
3579   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
3580 }
3581 
upb_fielddef_haspresence(const upb_fielddef * f)3582 bool upb_fielddef_haspresence(const upb_fielddef *f) {
3583   if (upb_fielddef_isseq(f)) return false;
3584   return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
3585          f->file->syntax == UPB_SYNTAX_PROTO2;
3586 }
3587 
between(int32_t x,int32_t low,int32_t high)3588 static bool between(int32_t x, int32_t low, int32_t high) {
3589   return x >= low && x <= high;
3590 }
3591 
upb_fielddef_checklabel(int32_t label)3592 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)3593 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)3594 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
3595 
upb_fielddef_checkdescriptortype(int32_t type)3596 bool upb_fielddef_checkdescriptortype(int32_t type) {
3597   return between(type, 1, 18);
3598 }
3599 
3600 /* upb_msgdef *****************************************************************/
3601 
upb_msgdef_fullname(const upb_msgdef * m)3602 const char *upb_msgdef_fullname(const upb_msgdef *m) {
3603   return m->full_name;
3604 }
3605 
upb_msgdef_file(const upb_msgdef * m)3606 const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
3607   return m->file;
3608 }
3609 
upb_msgdef_name(const upb_msgdef * m)3610 const char *upb_msgdef_name(const upb_msgdef *m) {
3611   return shortdefname(m->full_name);
3612 }
3613 
upb_msgdef_syntax(const upb_msgdef * m)3614 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
3615   return m->file->syntax;
3616 }
3617 
upb_msgdef_selectorcount(const upb_msgdef * m)3618 size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
3619   return m->selector_count;
3620 }
3621 
upb_msgdef_submsgfieldcount(const upb_msgdef * m)3622 uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
3623   return m->submsg_field_count;
3624 }
3625 
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)3626 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
3627   upb_value val;
3628   return upb_inttable_lookup32(&m->itof, i, &val) ?
3629       upb_value_getconstptr(val) : NULL;
3630 }
3631 
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)3632 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
3633                                     size_t len) {
3634   upb_value val;
3635 
3636   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3637     return NULL;
3638   }
3639 
3640   return unpack_def(val, UPB_DEFTYPE_FIELD);
3641 }
3642 
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)3643 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
3644                                     size_t len) {
3645   upb_value val;
3646 
3647   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3648     return NULL;
3649   }
3650 
3651   return unpack_def(val, UPB_DEFTYPE_ONEOF);
3652 }
3653 
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)3654 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
3655                            const upb_fielddef **f, const upb_oneofdef **o) {
3656   upb_value val;
3657 
3658   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3659     return false;
3660   }
3661 
3662   *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
3663   *f = unpack_def(val, UPB_DEFTYPE_FIELD);
3664   return *o || *f;  /* False if this was a JSON name. */
3665 }
3666 
upb_msgdef_lookupjsonname(const upb_msgdef * m,const char * name,size_t len)3667 const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
3668                                               const char *name, size_t len) {
3669   upb_value val;
3670   const upb_fielddef* f;
3671 
3672   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3673     return NULL;
3674   }
3675 
3676   f = unpack_def(val, UPB_DEFTYPE_FIELD);
3677   if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
3678 
3679   return f;
3680 }
3681 
upb_msgdef_numfields(const upb_msgdef * m)3682 int upb_msgdef_numfields(const upb_msgdef *m) {
3683   return m->field_count;
3684 }
3685 
upb_msgdef_numoneofs(const upb_msgdef * m)3686 int upb_msgdef_numoneofs(const upb_msgdef *m) {
3687   return m->oneof_count;
3688 }
3689 
upb_msgdef_numrealoneofs(const upb_msgdef * m)3690 int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
3691   return m->real_oneof_count;
3692 }
3693 
upb_msgdef_layout(const upb_msgdef * m)3694 const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
3695   return m->layout;
3696 }
3697 
_upb_msgdef_field(const upb_msgdef * m,int i)3698 const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i) {
3699   if (i >= m->field_count) return NULL;
3700   return &m->fields[i];
3701 }
3702 
upb_msgdef_mapentry(const upb_msgdef * m)3703 bool upb_msgdef_mapentry(const upb_msgdef *m) {
3704   return m->map_entry;
3705 }
3706 
upb_msgdef_wellknowntype(const upb_msgdef * m)3707 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
3708   return m->well_known_type;
3709 }
3710 
upb_msgdef_isnumberwrapper(const upb_msgdef * m)3711 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
3712   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
3713   return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
3714          type <= UPB_WELLKNOWN_UINT32VALUE;
3715 }
3716 
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)3717 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
3718   upb_inttable_begin(iter, &m->itof);
3719 }
3720 
upb_msg_field_next(upb_msg_field_iter * iter)3721 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
3722 
upb_msg_field_done(const upb_msg_field_iter * iter)3723 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
3724   return upb_inttable_done(iter);
3725 }
3726 
upb_msg_iter_field(const upb_msg_field_iter * iter)3727 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
3728   return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
3729 }
3730 
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)3731 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
3732   upb_inttable_iter_setdone(iter);
3733 }
3734 
upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,const upb_msg_field_iter * iter2)3735 bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
3736                                 const upb_msg_field_iter * iter2) {
3737   return upb_inttable_iter_isequal(iter1, iter2);
3738 }
3739 
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)3740 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
3741   upb_strtable_begin(iter, &m->ntof);
3742   /* We need to skip past any initial fields. */
3743   while (!upb_strtable_done(iter) &&
3744          !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
3745     upb_strtable_next(iter);
3746   }
3747 }
3748 
upb_msg_oneof_next(upb_msg_oneof_iter * iter)3749 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
3750   /* We need to skip past fields to return only oneofs. */
3751   do {
3752     upb_strtable_next(iter);
3753   } while (!upb_strtable_done(iter) &&
3754            !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
3755 }
3756 
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)3757 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
3758   return upb_strtable_done(iter);
3759 }
3760 
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)3761 const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
3762   return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
3763 }
3764 
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)3765 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
3766   upb_strtable_iter_setdone(iter);
3767 }
3768 
upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter * iter1,const upb_msg_oneof_iter * iter2)3769 bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
3770                                 const upb_msg_oneof_iter *iter2) {
3771   return upb_strtable_iter_isequal(iter1, iter2);
3772 }
3773 
3774 /* upb_oneofdef ***************************************************************/
3775 
upb_oneofdef_name(const upb_oneofdef * o)3776 const char *upb_oneofdef_name(const upb_oneofdef *o) {
3777   return shortdefname(o->full_name);
3778 }
3779 
upb_oneofdef_containingtype(const upb_oneofdef * o)3780 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
3781   return o->parent;
3782 }
3783 
upb_oneofdef_numfields(const upb_oneofdef * o)3784 int upb_oneofdef_numfields(const upb_oneofdef *o) {
3785   return (int)upb_strtable_count(&o->ntof);
3786 }
3787 
upb_oneofdef_index(const upb_oneofdef * o)3788 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
3789   return o->index;
3790 }
3791 
upb_oneofdef_issynthetic(const upb_oneofdef * o)3792 bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
3793   upb_inttable_iter iter;
3794   const upb_fielddef *f;
3795   upb_inttable_begin(&iter, &o->itof);
3796   if (upb_oneofdef_numfields(o) != 1) return false;
3797   f = upb_value_getptr(upb_inttable_iter_value(&iter));
3798   UPB_ASSERT(f);
3799   return f->proto3_optional_;
3800 }
3801 
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)3802 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
3803                                       const char *name, size_t length) {
3804   upb_value val;
3805   return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
3806       upb_value_getptr(val) : NULL;
3807 }
3808 
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)3809 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
3810   upb_value val;
3811   return upb_inttable_lookup32(&o->itof, num, &val) ?
3812       upb_value_getptr(val) : NULL;
3813 }
3814 
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)3815 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
3816   upb_inttable_begin(iter, &o->itof);
3817 }
3818 
upb_oneof_next(upb_oneof_iter * iter)3819 void upb_oneof_next(upb_oneof_iter *iter) {
3820   upb_inttable_next(iter);
3821 }
3822 
upb_oneof_done(upb_oneof_iter * iter)3823 bool upb_oneof_done(upb_oneof_iter *iter) {
3824   return upb_inttable_done(iter);
3825 }
3826 
upb_oneof_iter_field(const upb_oneof_iter * iter)3827 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
3828   return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
3829 }
3830 
upb_oneof_iter_setdone(upb_oneof_iter * iter)3831 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
3832   upb_inttable_iter_setdone(iter);
3833 }
3834 
3835 /* Dynamic Layout Generation. *************************************************/
3836 
is_power_of_two(size_t val)3837 static bool is_power_of_two(size_t val) {
3838   return (val & (val - 1)) == 0;
3839 }
3840 
3841 /* Align up to the given power of 2. */
align_up(size_t val,size_t align)3842 static size_t align_up(size_t val, size_t align) {
3843   UPB_ASSERT(is_power_of_two(align));
3844   return (val + align - 1) & ~(align - 1);
3845 }
3846 
div_round_up(size_t n,size_t d)3847 static size_t div_round_up(size_t n, size_t d) {
3848   return (n + d - 1) / d;
3849 }
3850 
upb_msgval_sizeof(upb_fieldtype_t type)3851 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
3852   switch (type) {
3853     case UPB_TYPE_DOUBLE:
3854     case UPB_TYPE_INT64:
3855     case UPB_TYPE_UINT64:
3856       return 8;
3857     case UPB_TYPE_ENUM:
3858     case UPB_TYPE_INT32:
3859     case UPB_TYPE_UINT32:
3860     case UPB_TYPE_FLOAT:
3861       return 4;
3862     case UPB_TYPE_BOOL:
3863       return 1;
3864     case UPB_TYPE_MESSAGE:
3865       return sizeof(void*);
3866     case UPB_TYPE_BYTES:
3867     case UPB_TYPE_STRING:
3868       return sizeof(upb_strview);
3869   }
3870   UPB_UNREACHABLE();
3871 }
3872 
upb_msg_fielddefsize(const upb_fielddef * f)3873 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
3874   if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
3875     upb_map_entry ent;
3876     UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
3877     return sizeof(ent.k);
3878   } else if (upb_fielddef_isseq(f)) {
3879     return sizeof(void*);
3880   } else {
3881     return upb_msgval_sizeof(upb_fielddef_type(f));
3882   }
3883 }
3884 
upb_msglayout_place(upb_msglayout * l,size_t size)3885 static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
3886   uint32_t ret;
3887 
3888   l->size = align_up(l->size, size);
3889   ret = l->size;
3890   l->size += size;
3891   return ret;
3892 }
3893 
3894 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
3895  * It computes a dynamic layout for all of the fields in |m|. */
make_layout(const upb_symtab * symtab,const upb_msgdef * m)3896 static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
3897   upb_msglayout *l = (upb_msglayout*)m->layout;
3898   upb_msg_field_iter it;
3899   upb_msg_oneof_iter oit;
3900   size_t hasbit;
3901   size_t submsg_count = m->submsg_field_count;
3902   const upb_msglayout **submsgs;
3903   upb_msglayout_field *fields;
3904   upb_alloc *alloc = upb_arena_alloc(symtab->arena);
3905 
3906   memset(l, 0, sizeof(*l));
3907 
3908   fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields));
3909   submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs));
3910 
3911   if ((!fields && upb_msgdef_numfields(m)) ||
3912       (!submsgs && submsg_count)) {
3913     /* OOM. */
3914     return false;
3915   }
3916 
3917   l->field_count = upb_msgdef_numfields(m);
3918   l->fields = fields;
3919   l->submsgs = submsgs;
3920 
3921   if (upb_msgdef_mapentry(m)) {
3922     /* TODO(haberman): refactor this method so this special case is more
3923      * elegant. */
3924     const upb_fielddef *key = upb_msgdef_itof(m, 1);
3925     const upb_fielddef *val = upb_msgdef_itof(m, 2);
3926     fields[0].number = 1;
3927     fields[1].number = 2;
3928     fields[0].label = UPB_LABEL_OPTIONAL;
3929     fields[1].label = UPB_LABEL_OPTIONAL;
3930     fields[0].presence = 0;
3931     fields[1].presence = 0;
3932     fields[0].descriptortype = upb_fielddef_descriptortype(key);
3933     fields[1].descriptortype = upb_fielddef_descriptortype(val);
3934     fields[0].offset = 0;
3935     fields[1].offset = sizeof(upb_strview);
3936     fields[1].submsg_index = 0;
3937 
3938     if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
3939       submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
3940     }
3941 
3942     l->field_count = 2;
3943     l->size = 2 * sizeof(upb_strview);align_up(l->size, 8);
3944     return true;
3945   }
3946 
3947   /* Allocate data offsets in three stages:
3948    *
3949    * 1. hasbits.
3950    * 2. regular fields.
3951    * 3. oneof fields.
3952    *
3953    * OPT: There is a lot of room for optimization here to minimize the size.
3954    */
3955 
3956   /* Allocate hasbits and set basic field attributes. */
3957   submsg_count = 0;
3958   for (upb_msg_field_begin(&it, m), hasbit = 0;
3959        !upb_msg_field_done(&it);
3960        upb_msg_field_next(&it)) {
3961     upb_fielddef* f = upb_msg_iter_field(&it);
3962     upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
3963 
3964     field->number = upb_fielddef_number(f);
3965     field->descriptortype = upb_fielddef_descriptortype(f);
3966     field->label = upb_fielddef_label(f);
3967 
3968     if (upb_fielddef_ismap(f)) {
3969       field->label = _UPB_LABEL_MAP;
3970     } else if (upb_fielddef_packed(f)) {
3971       field->label = _UPB_LABEL_PACKED;
3972     }
3973 
3974     /* TODO: we probably should sort the fields by field number to match the
3975      * output of upbc, and to improve search speed for the table parser. */
3976     f->layout_index = f->index_;
3977 
3978     if (upb_fielddef_issubmsg(f)) {
3979       const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
3980       field->submsg_index = submsg_count++;
3981       submsgs[field->submsg_index] = subm->layout;
3982     }
3983 
3984     if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
3985       /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
3986        * table. This wastes one hasbit, but we don't worry about it for now. */
3987       field->presence = ++hasbit;
3988     } else {
3989       field->presence = 0;
3990     }
3991   }
3992 
3993   /* Account for space used by hasbits. */
3994   l->size = div_round_up(hasbit, 8);
3995 
3996   /* Allocate non-oneof fields. */
3997   for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
3998        upb_msg_field_next(&it)) {
3999     const upb_fielddef* f = upb_msg_iter_field(&it);
4000     size_t field_size = upb_msg_fielddefsize(f);
4001     size_t index = upb_fielddef_index(f);
4002 
4003     if (upb_fielddef_realcontainingoneof(f)) {
4004       /* Oneofs are handled separately below. */
4005       continue;
4006     }
4007 
4008     fields[index].offset = upb_msglayout_place(l, field_size);
4009   }
4010 
4011   /* Allocate oneof fields.  Each oneof field consists of a uint32 for the case
4012    * and space for the actual data. */
4013   for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
4014        upb_msg_oneof_next(&oit)) {
4015     const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
4016     upb_oneof_iter fit;
4017 
4018     if (upb_oneofdef_issynthetic(o)) continue;
4019 
4020     size_t case_size = sizeof(uint32_t);  /* Could potentially optimize this. */
4021     size_t field_size = 0;
4022     uint32_t case_offset;
4023     uint32_t data_offset;
4024 
4025     /* Calculate field size: the max of all field sizes. */
4026     for (upb_oneof_begin(&fit, o);
4027          !upb_oneof_done(&fit);
4028          upb_oneof_next(&fit)) {
4029       const upb_fielddef* f = upb_oneof_iter_field(&fit);
4030       field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
4031     }
4032 
4033     /* Align and allocate case offset. */
4034     case_offset = upb_msglayout_place(l, case_size);
4035     data_offset = upb_msglayout_place(l, field_size);
4036 
4037     for (upb_oneof_begin(&fit, o);
4038          !upb_oneof_done(&fit);
4039          upb_oneof_next(&fit)) {
4040       const upb_fielddef* f = upb_oneof_iter_field(&fit);
4041       fields[upb_fielddef_index(f)].offset = data_offset;
4042       fields[upb_fielddef_index(f)].presence = ~case_offset;
4043     }
4044   }
4045 
4046   /* Size of the entire structure should be a multiple of its greatest
4047    * alignment.  TODO: track overall alignment for real? */
4048   l->size = align_up(l->size, 8);
4049 
4050   return true;
4051 }
4052 
4053 /* Code to build defs from descriptor protos. *********************************/
4054 
4055 /* There is a question of how much validation to do here.  It will be difficult
4056  * to perfectly match the amount of validation performed by proto2.  But since
4057  * this code is used to directly build defs from Ruby (for example) we do need
4058  * to validate important constraints like uniqueness of names and numbers. */
4059 
4060 #define CHK(x) if (!(x)) { return false; }
4061 #define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; }
4062 
4063 typedef struct {
4064   const upb_symtab *symtab;
4065   upb_filedef *file;              /* File we are building. */
4066   upb_alloc *alloc;               /* Allocate defs here. */
4067   upb_alloc *tmp;                 /* Alloc for addtab and any other tmp data. */
4068   upb_strtable *addtab;           /* full_name -> packed def ptr for new defs */
4069   const upb_msglayout **layouts;  /* NULL if we should build layouts. */
4070   upb_status *status;             /* Record errors here. */
4071 } symtab_addctx;
4072 
strviewdup(const symtab_addctx * ctx,upb_strview view)4073 static char* strviewdup(const symtab_addctx *ctx, upb_strview view) {
4074   return upb_strdup2(view.data, view.size, ctx->alloc);
4075 }
4076 
streql2(const char * a,size_t n,const char * b)4077 static bool streql2(const char *a, size_t n, const char *b) {
4078   return n == strlen(b) && memcmp(a, b, n) == 0;
4079 }
4080 
streql_view(upb_strview view,const char * b)4081 static bool streql_view(upb_strview view, const char *b) {
4082   return streql2(view.data, view.size, b);
4083 }
4084 
makefullname(const symtab_addctx * ctx,const char * prefix,upb_strview name)4085 static const char *makefullname(const symtab_addctx *ctx, const char *prefix,
4086                                 upb_strview name) {
4087   if (prefix) {
4088     /* ret = prefix + '.' + name; */
4089     size_t n = strlen(prefix);
4090     char *ret = upb_malloc(ctx->alloc, n + name.size + 2);
4091     CHK_OOM(ret);
4092     strcpy(ret, prefix);
4093     ret[n] = '.';
4094     memcpy(&ret[n + 1], name.data, name.size);
4095     ret[n + 1 + name.size] = '\0';
4096     return ret;
4097   } else {
4098     return strviewdup(ctx, name);
4099   }
4100 }
4101 
getjsonname(const char * name,char * buf,size_t len)4102 size_t getjsonname(const char *name, char *buf, size_t len) {
4103   size_t src, dst = 0;
4104   bool ucase_next = false;
4105 
4106 #define WRITE(byte) \
4107   ++dst; \
4108   if (dst < len) buf[dst - 1] = byte; \
4109   else if (dst == len) buf[dst - 1] = '\0'
4110 
4111   if (!name) {
4112     WRITE('\0');
4113     return 0;
4114   }
4115 
4116   /* Implement the transformation as described in the spec:
4117    *   1. upper case all letters after an underscore.
4118    *   2. remove all underscores.
4119    */
4120   for (src = 0; name[src]; src++) {
4121     if (name[src] == '_') {
4122       ucase_next = true;
4123       continue;
4124     }
4125 
4126     if (ucase_next) {
4127       WRITE(toupper(name[src]));
4128       ucase_next = false;
4129     } else {
4130       WRITE(name[src]);
4131     }
4132   }
4133 
4134   WRITE('\0');
4135   return dst;
4136 
4137 #undef WRITE
4138 }
4139 
makejsonname(const char * name,upb_alloc * alloc)4140 static char* makejsonname(const char* name, upb_alloc *alloc) {
4141   size_t size = getjsonname(name, NULL, 0);
4142   char* json_name = upb_malloc(alloc, size);
4143   getjsonname(name, json_name, size);
4144   return json_name;
4145 }
4146 
symtab_add(const symtab_addctx * ctx,const char * name,upb_value v)4147 static bool symtab_add(const symtab_addctx *ctx, const char *name,
4148                        upb_value v) {
4149   upb_value tmp;
4150   if (upb_strtable_lookup(ctx->addtab, name, &tmp) ||
4151       upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) {
4152     upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name);
4153     return false;
4154   }
4155 
4156   CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp));
4157   return true;
4158 }
4159 
4160 /* Given a symbol and the base symbol inside which it is defined, find the
4161  * symbol's definition in t. */
resolvename(const upb_strtable * t,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type,upb_status * status,const void ** def)4162 static bool resolvename(const upb_strtable *t, const upb_fielddef *f,
4163                         const char *base, upb_strview sym,
4164                         upb_deftype_t type, upb_status *status,
4165                         const void **def) {
4166   if(sym.size == 0) return NULL;
4167   if(sym.data[0] == '.') {
4168     /* Symbols starting with '.' are absolute, so we do a single lookup.
4169      * Slice to omit the leading '.' */
4170     upb_value v;
4171     if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
4172       return false;
4173     }
4174 
4175     *def = unpack_def(v, type);
4176 
4177     if (!*def) {
4178       upb_status_seterrf(status,
4179                          "type mismatch when resolving field %s, name %s",
4180                          f->full_name, sym.data);
4181       return false;
4182     }
4183 
4184     return true;
4185   } else {
4186     /* Remove components from base until we find an entry or run out.
4187      * TODO: This branch is totally broken, but currently not used. */
4188     (void)base;
4189     UPB_ASSERT(false);
4190     return false;
4191   }
4192 }
4193 
symtab_resolve(const symtab_addctx * ctx,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type)4194 const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f,
4195                            const char *base, upb_strview sym,
4196                            upb_deftype_t type) {
4197   const void *ret;
4198   if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) &&
4199       !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) {
4200     if (upb_ok(ctx->status)) {
4201       upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data);
4202     }
4203     return false;
4204   }
4205   return ret;
4206 }
4207 
create_oneofdef(const symtab_addctx * ctx,upb_msgdef * m,const google_protobuf_OneofDescriptorProto * oneof_proto)4208 static bool create_oneofdef(
4209     const symtab_addctx *ctx, upb_msgdef *m,
4210     const google_protobuf_OneofDescriptorProto *oneof_proto) {
4211   upb_oneofdef *o;
4212   upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
4213   upb_value v;
4214 
4215   o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
4216   o->parent = m;
4217   o->full_name = makefullname(ctx, m->full_name, name);
4218 
4219   v = pack_def(o, UPB_DEFTYPE_ONEOF);
4220   CHK_OOM(symtab_add(ctx, o->full_name, v));
4221   CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
4222 
4223   CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4224   CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4225 
4226   return true;
4227 }
4228 
parse_default(const symtab_addctx * ctx,const char * str,size_t len,upb_fielddef * f)4229 static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len,
4230                           upb_fielddef *f) {
4231   char *end;
4232   char nullz[64];
4233   errno = 0;
4234 
4235   switch (upb_fielddef_type(f)) {
4236     case UPB_TYPE_INT32:
4237     case UPB_TYPE_INT64:
4238     case UPB_TYPE_UINT32:
4239     case UPB_TYPE_UINT64:
4240     case UPB_TYPE_DOUBLE:
4241     case UPB_TYPE_FLOAT:
4242       /* Standard C number parsing functions expect null-terminated strings. */
4243       if (len >= sizeof(nullz) - 1) {
4244         return false;
4245       }
4246       memcpy(nullz, str, len);
4247       nullz[len] = '\0';
4248       str = nullz;
4249       break;
4250     default:
4251       break;
4252   }
4253 
4254   switch (upb_fielddef_type(f)) {
4255     case UPB_TYPE_INT32: {
4256       long val = strtol(str, &end, 0);
4257       CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end);
4258       f->defaultval.sint = val;
4259       break;
4260     }
4261     case UPB_TYPE_ENUM: {
4262       const upb_enumdef *e = f->sub.enumdef;
4263       int32_t val;
4264       CHK(upb_enumdef_ntoi(e, str, len, &val));
4265       f->defaultval.sint = val;
4266       break;
4267     }
4268     case UPB_TYPE_INT64: {
4269       /* XXX: Need to write our own strtoll, since it's not available in c89. */
4270       int64_t val = strtol(str, &end, 0);
4271       CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end);
4272       f->defaultval.sint = val;
4273       break;
4274     }
4275     case UPB_TYPE_UINT32: {
4276       unsigned long val = strtoul(str, &end, 0);
4277       CHK(val <= UINT32_MAX && errno != ERANGE && !*end);
4278       f->defaultval.uint = val;
4279       break;
4280     }
4281     case UPB_TYPE_UINT64: {
4282       /* XXX: Need to write our own strtoull, since it's not available in c89. */
4283       uint64_t val = strtoul(str, &end, 0);
4284       CHK(val <= UINT64_MAX && errno != ERANGE && !*end);
4285       f->defaultval.uint = val;
4286       break;
4287     }
4288     case UPB_TYPE_DOUBLE: {
4289       double val = strtod(str, &end);
4290       CHK(errno != ERANGE && !*end);
4291       f->defaultval.dbl = val;
4292       break;
4293     }
4294     case UPB_TYPE_FLOAT: {
4295       /* XXX: Need to write our own strtof, since it's not available in c89. */
4296       float val = strtod(str, &end);
4297       CHK(errno != ERANGE && !*end);
4298       f->defaultval.flt = val;
4299       break;
4300     }
4301     case UPB_TYPE_BOOL: {
4302       if (streql2(str, len, "false")) {
4303         f->defaultval.boolean = false;
4304       } else if (streql2(str, len, "true")) {
4305         f->defaultval.boolean = true;
4306       } else {
4307         return false;
4308       }
4309       break;
4310     }
4311     case UPB_TYPE_STRING:
4312       f->defaultval.str = newstr(ctx->alloc, str, len);
4313       break;
4314     case UPB_TYPE_BYTES:
4315       /* XXX: need to interpret the C-escaped value. */
4316       f->defaultval.str = newstr(ctx->alloc, str, len);
4317       break;
4318     case UPB_TYPE_MESSAGE:
4319       /* Should not have a default value. */
4320       return false;
4321   }
4322   return true;
4323 }
4324 
set_default_default(const symtab_addctx * ctx,upb_fielddef * f)4325 static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) {
4326   switch (upb_fielddef_type(f)) {
4327     case UPB_TYPE_INT32:
4328     case UPB_TYPE_INT64:
4329     case UPB_TYPE_ENUM:
4330       f->defaultval.sint = 0;
4331       break;
4332     case UPB_TYPE_UINT64:
4333     case UPB_TYPE_UINT32:
4334       f->defaultval.uint = 0;
4335       break;
4336     case UPB_TYPE_DOUBLE:
4337     case UPB_TYPE_FLOAT:
4338       f->defaultval.dbl = 0;
4339       break;
4340     case UPB_TYPE_STRING:
4341     case UPB_TYPE_BYTES:
4342       f->defaultval.str = newstr(ctx->alloc, NULL, 0);
4343       break;
4344     case UPB_TYPE_BOOL:
4345       f->defaultval.boolean = false;
4346       break;
4347     case UPB_TYPE_MESSAGE:
4348       break;
4349   }
4350 }
4351 
create_fielddef(const symtab_addctx * ctx,const char * prefix,upb_msgdef * m,const google_protobuf_FieldDescriptorProto * field_proto)4352 static bool create_fielddef(
4353     const symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
4354     const google_protobuf_FieldDescriptorProto *field_proto) {
4355   upb_alloc *alloc = ctx->alloc;
4356   upb_fielddef *f;
4357   const google_protobuf_FieldOptions *options;
4358   upb_strview name;
4359   const char *full_name;
4360   const char *json_name;
4361   const char *shortname;
4362   uint32_t field_number;
4363 
4364   if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
4365     upb_status_seterrmsg(ctx->status, "field has no name");
4366     return false;
4367   }
4368 
4369   name = google_protobuf_FieldDescriptorProto_name(field_proto);
4370   CHK(upb_isident(name, false, ctx->status));
4371   full_name = makefullname(ctx, prefix, name);
4372   shortname = shortdefname(full_name);
4373 
4374   if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
4375     json_name = strviewdup(
4376         ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
4377   } else {
4378     json_name = makejsonname(shortname, ctx->alloc);
4379   }
4380 
4381   field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
4382 
4383   if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
4384     upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number);
4385     return false;
4386   }
4387 
4388   if (m) {
4389     /* direct message field. */
4390     upb_value v, field_v, json_v;
4391     size_t json_size;
4392 
4393     f = (upb_fielddef*)&m->fields[m->field_count++];
4394     f->msgdef = m;
4395     f->is_extension_ = false;
4396 
4397     if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
4398       upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname);
4399       return false;
4400     }
4401 
4402     if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
4403       upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name);
4404       return false;
4405     }
4406 
4407     if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
4408       upb_status_seterrf(ctx->status, "duplicate field number (%u)",
4409                          field_number);
4410       return false;
4411     }
4412 
4413     field_v = pack_def(f, UPB_DEFTYPE_FIELD);
4414     json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
4415     v = upb_value_constptr(f);
4416     json_size = strlen(json_name);
4417 
4418     CHK_OOM(
4419         upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
4420     CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
4421 
4422     if (strcmp(shortname, json_name) != 0) {
4423       upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
4424     }
4425 
4426     if (ctx->layouts) {
4427       const upb_msglayout_field *fields = m->layout->fields;
4428       int count = m->layout->field_count;
4429       bool found = false;
4430       int i;
4431       for (i = 0; i < count; i++) {
4432         if (fields[i].number == field_number) {
4433           f->layout_index = i;
4434           found = true;
4435           break;
4436         }
4437       }
4438       UPB_ASSERT(found);
4439     }
4440   } else {
4441     /* extension field. */
4442     f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
4443     f->is_extension_ = true;
4444     CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)));
4445   }
4446 
4447   f->full_name = full_name;
4448   f->json_name = json_name;
4449   f->file = ctx->file;
4450   f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
4451   f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
4452   f->number_ = field_number;
4453   f->oneof = NULL;
4454   f->proto3_optional_ =
4455       google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
4456 
4457   /* We can't resolve the subdef or (in the case of extensions) the containing
4458    * message yet, because it may not have been defined yet.  We stash a pointer
4459    * to the field_proto until later when we can properly resolve it. */
4460   f->sub.unresolved = field_proto;
4461 
4462   if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
4463     upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)",
4464                        f->full_name);
4465     return false;
4466   }
4467 
4468   if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
4469     int oneof_index =
4470         google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
4471     upb_oneofdef *oneof;
4472     upb_value v = upb_value_constptr(f);
4473 
4474     if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
4475       upb_status_seterrf(ctx->status,
4476                          "fields in oneof must have OPTIONAL label (%s)",
4477                          f->full_name);
4478       return false;
4479     }
4480 
4481     if (!m) {
4482       upb_status_seterrf(ctx->status,
4483                          "oneof_index provided for extension field (%s)",
4484                          f->full_name);
4485       return false;
4486     }
4487 
4488     if (oneof_index >= m->oneof_count) {
4489       upb_status_seterrf(ctx->status, "oneof_index out of range (%s)",
4490                          f->full_name);
4491       return false;
4492     }
4493 
4494     oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
4495     f->oneof = oneof;
4496 
4497     CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
4498     CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
4499   } else {
4500     f->oneof = NULL;
4501   }
4502 
4503   if (google_protobuf_FieldDescriptorProto_has_options(field_proto)) {
4504     options = google_protobuf_FieldDescriptorProto_options(field_proto);
4505     f->lazy_ = google_protobuf_FieldOptions_lazy(options);
4506     f->packed_ = google_protobuf_FieldOptions_packed(options);
4507   } else {
4508     f->lazy_ = false;
4509     f->packed_ = false;
4510   }
4511 
4512   return true;
4513 }
4514 
create_enumdef(const symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto)4515 static bool create_enumdef(
4516     const symtab_addctx *ctx, const char *prefix,
4517     const google_protobuf_EnumDescriptorProto *enum_proto) {
4518   upb_enumdef *e;
4519   const google_protobuf_EnumValueDescriptorProto *const *values;
4520   upb_strview name;
4521   size_t i, n;
4522 
4523   name = google_protobuf_EnumDescriptorProto_name(enum_proto);
4524   CHK(upb_isident(name, false, ctx->status));
4525 
4526   e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
4527   e->full_name = makefullname(ctx, prefix, name);
4528   CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
4529 
4530   CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
4531   CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
4532 
4533   e->file = ctx->file;
4534   e->defaultval = 0;
4535 
4536   values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
4537 
4538   if (n == 0) {
4539     upb_status_seterrf(ctx->status,
4540                        "enums must contain at least one value (%s)",
4541                        e->full_name);
4542     return false;
4543   }
4544 
4545   for (i = 0; i < n; i++) {
4546     const google_protobuf_EnumValueDescriptorProto *value = values[i];
4547     upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
4548     char *name2 = strviewdup(ctx, name);
4549     int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
4550     upb_value v = upb_value_int32(num);
4551 
4552     if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
4553       upb_status_seterrf(ctx->status,
4554                          "for proto3, the first enum value must be zero (%s)",
4555                          e->full_name);
4556       return false;
4557     }
4558 
4559     if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
4560       upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2);
4561       return false;
4562     }
4563 
4564     CHK_OOM(name2)
4565     CHK_OOM(
4566         upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
4567 
4568     if (!upb_inttable_lookup(&e->iton, num, NULL)) {
4569       upb_value v = upb_value_cstr(name2);
4570       CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
4571     }
4572   }
4573 
4574   upb_inttable_compact2(&e->iton, ctx->alloc);
4575 
4576   return true;
4577 }
4578 
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto)4579 static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
4580                           const google_protobuf_DescriptorProto *msg_proto) {
4581   upb_msgdef *m;
4582   const google_protobuf_MessageOptions *options;
4583   const google_protobuf_OneofDescriptorProto *const *oneofs;
4584   const google_protobuf_FieldDescriptorProto *const *fields;
4585   const google_protobuf_EnumDescriptorProto *const *enums;
4586   const google_protobuf_DescriptorProto *const *msgs;
4587   size_t i, n;
4588   upb_strview name;
4589 
4590   name = google_protobuf_DescriptorProto_name(msg_proto);
4591   CHK(upb_isident(name, false, ctx->status));
4592 
4593   m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
4594   m->full_name = makefullname(ctx, prefix, name);
4595   CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
4596 
4597   CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4598   CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4599 
4600   m->file = ctx->file;
4601   m->map_entry = false;
4602 
4603   options = google_protobuf_DescriptorProto_options(msg_proto);
4604 
4605   if (options) {
4606     m->map_entry = google_protobuf_MessageOptions_map_entry(options);
4607   }
4608 
4609   if (ctx->layouts) {
4610     m->layout = *ctx->layouts;
4611     ctx->layouts++;
4612   } else {
4613     /* Allocate now (to allow cross-linking), populate later. */
4614     m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout));
4615   }
4616 
4617   oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
4618   m->oneof_count = 0;
4619   m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
4620   for (i = 0; i < n; i++) {
4621     CHK(create_oneofdef(ctx, m, oneofs[i]));
4622   }
4623 
4624   fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
4625   m->field_count = 0;
4626   m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
4627   for (i = 0; i < n; i++) {
4628     CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
4629   }
4630 
4631   CHK(assign_msg_indices(m, ctx->status));
4632   CHK(check_oneofs(m, ctx->status));
4633   assign_msg_wellknowntype(m);
4634   upb_inttable_compact2(&m->itof, ctx->alloc);
4635 
4636   /* This message is built.  Now build nested messages and enums. */
4637 
4638   enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
4639   for (i = 0; i < n; i++) {
4640     CHK(create_enumdef(ctx, m->full_name, enums[i]));
4641   }
4642 
4643   msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
4644   for (i = 0; i < n; i++) {
4645     CHK(create_msgdef(ctx, m->full_name, msgs[i]));
4646   }
4647 
4648   return true;
4649 }
4650 
4651 typedef struct {
4652   int msg_count;
4653   int enum_count;
4654   int ext_count;
4655 } decl_counts;
4656 
count_types_in_msg(const google_protobuf_DescriptorProto * msg_proto,decl_counts * counts)4657 static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
4658                                decl_counts *counts) {
4659   const google_protobuf_DescriptorProto *const *msgs;
4660   size_t i, n;
4661 
4662   counts->msg_count++;
4663 
4664   msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
4665   for (i = 0; i < n; i++) {
4666     count_types_in_msg(msgs[i], counts);
4667   }
4668 
4669   google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
4670   counts->enum_count += n;
4671 
4672   google_protobuf_DescriptorProto_extension(msg_proto, &n);
4673   counts->ext_count += n;
4674 }
4675 
count_types_in_file(const google_protobuf_FileDescriptorProto * file_proto,decl_counts * counts)4676 static void count_types_in_file(
4677     const google_protobuf_FileDescriptorProto *file_proto,
4678     decl_counts *counts) {
4679   const google_protobuf_DescriptorProto *const *msgs;
4680   size_t i, n;
4681 
4682   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
4683   for (i = 0; i < n; i++) {
4684     count_types_in_msg(msgs[i], counts);
4685   }
4686 
4687   google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
4688   counts->enum_count += n;
4689 
4690   google_protobuf_FileDescriptorProto_extension(file_proto, &n);
4691   counts->ext_count += n;
4692 }
4693 
resolve_fielddef(const symtab_addctx * ctx,const char * prefix,upb_fielddef * f)4694 static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix,
4695                              upb_fielddef *f) {
4696   upb_strview name;
4697   const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
4698 
4699   if (f->is_extension_) {
4700     if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
4701       upb_status_seterrf(ctx->status,
4702                          "extension for field '%s' had no extendee",
4703                          f->full_name);
4704       return false;
4705     }
4706 
4707     name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
4708     f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
4709     CHK(f->msgdef);
4710   }
4711 
4712   if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
4713       !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
4714     upb_status_seterrf(ctx->status, "field '%s' is missing type name",
4715                        f->full_name);
4716     return false;
4717   }
4718 
4719   name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
4720 
4721   if (upb_fielddef_issubmsg(f)) {
4722     f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
4723     CHK(f->sub.msgdef);
4724   } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
4725     f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
4726     CHK(f->sub.enumdef);
4727   }
4728 
4729   /* Have to delay resolving of the default value until now because of the enum
4730    * case, since enum defaults are specified with a label. */
4731   if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
4732     upb_strview defaultval =
4733         google_protobuf_FieldDescriptorProto_default_value(field_proto);
4734 
4735     if (f->file->syntax == UPB_SYNTAX_PROTO3) {
4736       upb_status_seterrf(ctx->status,
4737                          "proto3 fields cannot have explicit defaults (%s)",
4738                          f->full_name);
4739       return false;
4740     }
4741 
4742     if (upb_fielddef_issubmsg(f)) {
4743       upb_status_seterrf(ctx->status,
4744                          "message fields cannot have explicit defaults (%s)",
4745                          f->full_name);
4746       return false;
4747     }
4748 
4749     if (!parse_default(ctx, defaultval.data, defaultval.size, f)) {
4750       upb_status_seterrf(ctx->status,
4751                          "couldn't parse default '" UPB_STRVIEW_FORMAT
4752                          "' for field (%s)",
4753                          UPB_STRVIEW_ARGS(defaultval), f->full_name);
4754       return false;
4755     }
4756   } else {
4757     set_default_default(ctx, f);
4758   }
4759 
4760   return true;
4761 }
4762 
build_filedef(symtab_addctx * ctx,upb_filedef * file,const google_protobuf_FileDescriptorProto * file_proto)4763 static bool build_filedef(
4764     symtab_addctx *ctx, upb_filedef *file,
4765     const google_protobuf_FileDescriptorProto *file_proto) {
4766   upb_alloc *alloc = ctx->alloc;
4767   const google_protobuf_FileOptions *file_options_proto;
4768   const google_protobuf_DescriptorProto *const *msgs;
4769   const google_protobuf_EnumDescriptorProto *const *enums;
4770   const google_protobuf_FieldDescriptorProto *const *exts;
4771   const upb_strview* strs;
4772   size_t i, n;
4773   decl_counts counts = {0};
4774 
4775   count_types_in_file(file_proto, &counts);
4776 
4777   file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count);
4778   file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count);
4779   file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count);
4780 
4781   CHK_OOM(counts.msg_count == 0 || file->msgs);
4782   CHK_OOM(counts.enum_count == 0 || file->enums);
4783   CHK_OOM(counts.ext_count == 0 || file->exts);
4784 
4785   /* We increment these as defs are added. */
4786   file->msg_count = 0;
4787   file->enum_count = 0;
4788   file->ext_count = 0;
4789 
4790   if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
4791     upb_status_seterrmsg(ctx->status, "File has no name");
4792     return false;
4793   }
4794 
4795   file->name =
4796       strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
4797   file->phpprefix = NULL;
4798   file->phpnamespace = NULL;
4799 
4800   if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
4801     upb_strview package =
4802         google_protobuf_FileDescriptorProto_package(file_proto);
4803     CHK(upb_isident(package, true, ctx->status));
4804     file->package = strviewdup(ctx, package);
4805   } else {
4806     file->package = NULL;
4807   }
4808 
4809   if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
4810     upb_strview syntax =
4811         google_protobuf_FileDescriptorProto_syntax(file_proto);
4812 
4813     if (streql_view(syntax, "proto2")) {
4814       file->syntax = UPB_SYNTAX_PROTO2;
4815     } else if (streql_view(syntax, "proto3")) {
4816       file->syntax = UPB_SYNTAX_PROTO3;
4817     } else {
4818       upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
4819                          UPB_STRVIEW_ARGS(syntax));
4820       return false;
4821     }
4822   } else {
4823     file->syntax = UPB_SYNTAX_PROTO2;
4824   }
4825 
4826   /* Read options. */
4827   file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
4828   if (file_options_proto) {
4829     if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
4830       file->phpprefix = strviewdup(
4831           ctx,
4832           google_protobuf_FileOptions_php_class_prefix(file_options_proto));
4833     }
4834     if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
4835       file->phpnamespace = strviewdup(
4836           ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
4837     }
4838   }
4839 
4840   /* Verify dependencies. */
4841   strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
4842   file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ;
4843   CHK_OOM(n == 0 || file->deps);
4844 
4845   for (i = 0; i < n; i++) {
4846     upb_strview dep_name = strs[i];
4847     upb_value v;
4848     if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
4849                               dep_name.size, &v)) {
4850       upb_status_seterrf(ctx->status,
4851                          "Depends on file '" UPB_STRVIEW_FORMAT
4852                          "', but it has not been loaded",
4853                          UPB_STRVIEW_ARGS(dep_name));
4854       return false;
4855     }
4856     file->deps[i] = upb_value_getconstptr(v);
4857   }
4858 
4859   /* Create messages. */
4860   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
4861   for (i = 0; i < n; i++) {
4862     CHK(create_msgdef(ctx, file->package, msgs[i]));
4863   }
4864 
4865   /* Create enums. */
4866   enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
4867   for (i = 0; i < n; i++) {
4868     CHK(create_enumdef(ctx, file->package, enums[i]));
4869   }
4870 
4871   /* Create extensions. */
4872   exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
4873   file->exts = upb_malloc(alloc, sizeof(*file->exts) * n);
4874   CHK_OOM(n == 0 || file->exts);
4875   for (i = 0; i < n; i++) {
4876     CHK(create_fielddef(ctx, file->package, NULL, exts[i]));
4877   }
4878 
4879   /* Now that all names are in the table, build layouts and resolve refs. */
4880   for (i = 0; i < file->ext_count; i++) {
4881     CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]));
4882   }
4883 
4884   for (i = 0; i < file->msg_count; i++) {
4885     const upb_msgdef *m = &file->msgs[i];
4886     int j;
4887     for (j = 0; j < m->field_count; j++) {
4888       CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]));
4889     }
4890   }
4891 
4892   if (!ctx->layouts) {
4893     for (i = 0; i < file->msg_count; i++) {
4894       const upb_msgdef *m = &file->msgs[i];
4895       make_layout(ctx->symtab, m);
4896     }
4897   }
4898 
4899   return true;
4900  }
4901 
upb_symtab_addtotabs(upb_symtab * s,symtab_addctx * ctx,upb_status * status)4902 static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx,
4903                                  upb_status *status) {
4904   const upb_filedef *file = ctx->file;
4905   upb_alloc *alloc = upb_arena_alloc(s->arena);
4906   upb_strtable_iter iter;
4907 
4908   CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name),
4909                                upb_value_constptr(file), alloc));
4910 
4911   upb_strtable_begin(&iter, ctx->addtab);
4912   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4913     upb_strview key = upb_strtable_iter_key(&iter);
4914     upb_value value = upb_strtable_iter_value(&iter);
4915     CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc));
4916   }
4917 
4918   return true;
4919 }
4920 
4921 /* upb_filedef ****************************************************************/
4922 
upb_filedef_name(const upb_filedef * f)4923 const char *upb_filedef_name(const upb_filedef *f) {
4924   return f->name;
4925 }
4926 
upb_filedef_package(const upb_filedef * f)4927 const char *upb_filedef_package(const upb_filedef *f) {
4928   return f->package;
4929 }
4930 
upb_filedef_phpprefix(const upb_filedef * f)4931 const char *upb_filedef_phpprefix(const upb_filedef *f) {
4932   return f->phpprefix;
4933 }
4934 
upb_filedef_phpnamespace(const upb_filedef * f)4935 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
4936   return f->phpnamespace;
4937 }
4938 
upb_filedef_syntax(const upb_filedef * f)4939 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
4940   return f->syntax;
4941 }
4942 
upb_filedef_msgcount(const upb_filedef * f)4943 int upb_filedef_msgcount(const upb_filedef *f) {
4944   return f->msg_count;
4945 }
4946 
upb_filedef_depcount(const upb_filedef * f)4947 int upb_filedef_depcount(const upb_filedef *f) {
4948   return f->dep_count;
4949 }
4950 
upb_filedef_enumcount(const upb_filedef * f)4951 int upb_filedef_enumcount(const upb_filedef *f) {
4952   return f->enum_count;
4953 }
4954 
upb_filedef_dep(const upb_filedef * f,int i)4955 const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
4956   return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
4957 }
4958 
upb_filedef_msg(const upb_filedef * f,int i)4959 const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
4960   return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
4961 }
4962 
upb_filedef_enum(const upb_filedef * f,int i)4963 const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
4964   return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
4965 }
4966 
upb_symtab_free(upb_symtab * s)4967 void upb_symtab_free(upb_symtab *s) {
4968   upb_arena_free(s->arena);
4969   upb_gfree(s);
4970 }
4971 
upb_symtab_new(void)4972 upb_symtab *upb_symtab_new(void) {
4973   upb_symtab *s = upb_gmalloc(sizeof(*s));
4974   upb_alloc *alloc;
4975 
4976   if (!s) {
4977     return NULL;
4978   }
4979 
4980   s->arena = upb_arena_new();
4981   alloc = upb_arena_alloc(s->arena);
4982 
4983   if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
4984       !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
4985     upb_arena_free(s->arena);
4986     upb_gfree(s);
4987     s = NULL;
4988   }
4989   return s;
4990 }
4991 
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)4992 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
4993   upb_value v;
4994   return upb_strtable_lookup(&s->syms, sym, &v) ?
4995       unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
4996 }
4997 
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)4998 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
4999                                         size_t len) {
5000   upb_value v;
5001   return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
5002       unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
5003 }
5004 
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)5005 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
5006   upb_value v;
5007   return upb_strtable_lookup(&s->syms, sym, &v) ?
5008       unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
5009 }
5010 
upb_symtab_lookupfile(const upb_symtab * s,const char * name)5011 const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
5012   upb_value v;
5013   return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
5014                                                   : NULL;
5015 }
5016 
upb_symtab_filecount(const upb_symtab * s)5017 int upb_symtab_filecount(const upb_symtab *s) {
5018   return (int)upb_strtable_count(&s->files);
5019 }
5020 
_upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_msglayout ** layouts,upb_status * status)5021 static const upb_filedef *_upb_symtab_addfile(
5022     upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
5023     const upb_msglayout **layouts, upb_status *status) {
5024   upb_arena *tmparena = upb_arena_new();
5025   upb_strtable addtab;
5026   upb_alloc *alloc = upb_arena_alloc(s->arena);
5027   upb_filedef *file = upb_malloc(alloc, sizeof(*file));
5028   bool ok;
5029   symtab_addctx ctx;
5030 
5031   ctx.file = file;
5032   ctx.symtab = s;
5033   ctx.alloc = alloc;
5034   ctx.tmp = upb_arena_alloc(tmparena);
5035   ctx.addtab = &addtab;
5036   ctx.layouts = layouts;
5037   ctx.status = status;
5038 
5039   ok = file &&
5040       upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
5041       build_filedef(&ctx, file, file_proto) &&
5042       upb_symtab_addtotabs(s, &ctx, status);
5043 
5044   upb_arena_free(tmparena);
5045   return ok ? file : NULL;
5046 }
5047 
upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,upb_status * status)5048 const upb_filedef *upb_symtab_addfile(
5049     upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
5050     upb_status *status) {
5051   return _upb_symtab_addfile(s, file_proto, NULL, status);
5052 }
5053 
5054 /* Include here since we want most of this file to be stdio-free. */
5055 #include <stdio.h>
5056 
_upb_symtab_loaddefinit(upb_symtab * s,const upb_def_init * init)5057 bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
5058   /* Since this function should never fail (it would indicate a bug in upb) we
5059    * print errors to stderr instead of returning error status to the user. */
5060   upb_def_init **deps = init->deps;
5061   google_protobuf_FileDescriptorProto *file;
5062   upb_arena *arena;
5063   upb_status status;
5064 
5065   upb_status_clear(&status);
5066 
5067   if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
5068     return true;
5069   }
5070 
5071   arena = upb_arena_new();
5072 
5073   for (; *deps; deps++) {
5074     if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
5075   }
5076 
5077   file = google_protobuf_FileDescriptorProto_parse(
5078       init->descriptor.data, init->descriptor.size, arena);
5079 
5080   if (!file) {
5081     upb_status_seterrf(
5082         &status,
5083         "Failed to parse compiled-in descriptor for file '%s'. This should "
5084         "never happen.",
5085         init->filename);
5086     goto err;
5087   }
5088 
5089   if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
5090 
5091   upb_arena_free(arena);
5092   return true;
5093 
5094 err:
5095   fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
5096           upb_status_errmsg(&status));
5097   upb_arena_free(arena);
5098   return false;
5099 }
5100 
5101 #undef CHK
5102 #undef CHK_OOM
5103 
5104 
5105 #include <string.h>
5106 
5107 
5108 static char field_size[] = {
5109   0,/* 0 */
5110   8, /* UPB_DESCRIPTOR_TYPE_DOUBLE */
5111   4, /* UPB_DESCRIPTOR_TYPE_FLOAT */
5112   8, /* UPB_DESCRIPTOR_TYPE_INT64 */
5113   8, /* UPB_DESCRIPTOR_TYPE_UINT64 */
5114   4, /* UPB_DESCRIPTOR_TYPE_INT32 */
5115   8, /* UPB_DESCRIPTOR_TYPE_FIXED64 */
5116   4, /* UPB_DESCRIPTOR_TYPE_FIXED32 */
5117   1, /* UPB_DESCRIPTOR_TYPE_BOOL */
5118   sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_STRING */
5119   sizeof(void*), /* UPB_DESCRIPTOR_TYPE_GROUP */
5120   sizeof(void*), /* UPB_DESCRIPTOR_TYPE_MESSAGE */
5121   sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_BYTES */
5122   4, /* UPB_DESCRIPTOR_TYPE_UINT32 */
5123   4, /* UPB_DESCRIPTOR_TYPE_ENUM */
5124   4, /* UPB_DESCRIPTOR_TYPE_SFIXED32 */
5125   8, /* UPB_DESCRIPTOR_TYPE_SFIXED64 */
5126   4, /* UPB_DESCRIPTOR_TYPE_SINT32 */
5127   8, /* UPB_DESCRIPTOR_TYPE_SINT64 */
5128 };
5129 
5130 /* Strings/bytes are special-cased in maps. */
5131 static char _upb_fieldtype_to_mapsize[12] = {
5132   0,
5133   1,  /* UPB_TYPE_BOOL */
5134   4,  /* UPB_TYPE_FLOAT */
5135   4,  /* UPB_TYPE_INT32 */
5136   4,  /* UPB_TYPE_UINT32 */
5137   4,  /* UPB_TYPE_ENUM */
5138   sizeof(void*),  /* UPB_TYPE_MESSAGE */
5139   8,  /* UPB_TYPE_DOUBLE */
5140   8,  /* UPB_TYPE_INT64 */
5141   8,  /* UPB_TYPE_UINT64 */
5142   0,  /* UPB_TYPE_STRING */
5143   0,  /* UPB_TYPE_BYTES */
5144 };
5145 
5146 /** upb_msg *******************************************************************/
5147 
upb_msg_new(const upb_msgdef * m,upb_arena * a)5148 upb_msg *upb_msg_new(const upb_msgdef *m, upb_arena *a) {
5149   return _upb_msg_new(upb_msgdef_layout(m), a);
5150 }
5151 
in_oneof(const upb_msglayout_field * field)5152 static bool in_oneof(const upb_msglayout_field *field) {
5153   return field->presence < 0;
5154 }
5155 
oneofcase(const upb_msg * msg,const upb_msglayout_field * field)5156 static uint32_t *oneofcase(const upb_msg *msg,
5157                            const upb_msglayout_field *field) {
5158   UPB_ASSERT(in_oneof(field));
5159   return UPB_PTR_AT(msg, -field->presence, uint32_t);
5160 }
5161 
_upb_msg_getraw(const upb_msg * msg,const upb_fielddef * f)5162 static upb_msgval _upb_msg_getraw(const upb_msg *msg, const upb_fielddef *f) {
5163   const upb_msglayout_field *field = upb_fielddef_layout(f);
5164   const char *mem = UPB_PTR_AT(msg, field->offset, char);
5165   upb_msgval val = {0};
5166   int size = upb_fielddef_isseq(f) ? sizeof(void *)
5167                                    : field_size[field->descriptortype];
5168   memcpy(&val, mem, size);
5169   return val;
5170 }
5171 
upb_msg_has(const upb_msg * msg,const upb_fielddef * f)5172 bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) {
5173   const upb_msglayout_field *field = upb_fielddef_layout(f);
5174   if (in_oneof(field)) {
5175     return *oneofcase(msg, field) == field->number;
5176   } else if (field->presence > 0) {
5177     uint32_t hasbit = field->presence;
5178     return *UPB_PTR_AT(msg, hasbit / 8, uint8_t) & (1 << (hasbit % 8));
5179   } else {
5180     UPB_ASSERT(field->descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
5181                field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP);
5182     return _upb_msg_getraw(msg, f).msg_val != NULL;
5183   }
5184 }
5185 
upb_msg_hasoneof(const upb_msg * msg,const upb_oneofdef * o)5186 bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o) {
5187   upb_oneof_iter i;
5188   const upb_fielddef *f;
5189   const upb_msglayout_field *field;
5190 
5191   upb_oneof_begin(&i, o);
5192   if (upb_oneof_done(&i)) return false;
5193   f = upb_oneof_iter_field(&i);
5194   field = upb_fielddef_layout(f);
5195   return *oneofcase(msg, field) != 0;
5196 }
5197 
upb_msg_get(const upb_msg * msg,const upb_fielddef * f)5198 upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) {
5199   if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) {
5200     return _upb_msg_getraw(msg, f);
5201   } else {
5202     /* TODO(haberman): change upb_fielddef to not require this switch(). */
5203     upb_msgval val = {0};
5204     switch (upb_fielddef_type(f)) {
5205       case UPB_TYPE_INT32:
5206       case UPB_TYPE_ENUM:
5207         val.int32_val = upb_fielddef_defaultint32(f);
5208         break;
5209       case UPB_TYPE_INT64:
5210         val.int64_val = upb_fielddef_defaultint64(f);
5211         break;
5212       case UPB_TYPE_UINT32:
5213         val.uint32_val = upb_fielddef_defaultuint32(f);
5214         break;
5215       case UPB_TYPE_UINT64:
5216         val.uint64_val = upb_fielddef_defaultuint64(f);
5217         break;
5218       case UPB_TYPE_FLOAT:
5219         val.float_val = upb_fielddef_defaultfloat(f);
5220         break;
5221       case UPB_TYPE_DOUBLE:
5222         val.double_val = upb_fielddef_defaultdouble(f);
5223         break;
5224       case UPB_TYPE_BOOL:
5225         val.double_val = upb_fielddef_defaultbool(f);
5226         break;
5227       case UPB_TYPE_STRING:
5228       case UPB_TYPE_BYTES:
5229         val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size);
5230         break;
5231       case UPB_TYPE_MESSAGE:
5232         val.msg_val = NULL;
5233         break;
5234     }
5235     return val;
5236   }
5237 }
5238 
upb_msg_mutable(upb_msg * msg,const upb_fielddef * f,upb_arena * a)5239 upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f,
5240                               upb_arena *a) {
5241   const upb_msglayout_field *field = upb_fielddef_layout(f);
5242   upb_mutmsgval ret;
5243   char *mem = UPB_PTR_AT(msg, field->offset, char);
5244   bool wrong_oneof = in_oneof(field) && *oneofcase(msg, field) != field->number;
5245 
5246   memcpy(&ret, mem, sizeof(void*));
5247 
5248   if (a && (!ret.msg || wrong_oneof)) {
5249     if (upb_fielddef_ismap(f)) {
5250       const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
5251       const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY);
5252       const upb_fielddef *value = upb_msgdef_itof(entry, UPB_MAPENTRY_VALUE);
5253       ret.map = upb_map_new(a, upb_fielddef_type(key), upb_fielddef_type(value));
5254     } else if (upb_fielddef_isseq(f)) {
5255       ret.array = upb_array_new(a, upb_fielddef_type(f));
5256     } else {
5257       UPB_ASSERT(upb_fielddef_issubmsg(f));
5258       ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a);
5259     }
5260 
5261     memcpy(mem, &ret, sizeof(void*));
5262 
5263     if (wrong_oneof) {
5264       *oneofcase(msg, field) = field->number;
5265     }
5266   }
5267   return ret;
5268 }
5269 
upb_msg_set(upb_msg * msg,const upb_fielddef * f,upb_msgval val,upb_arena * a)5270 void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val,
5271                  upb_arena *a) {
5272   const upb_msglayout_field *field = upb_fielddef_layout(f);
5273   char *mem = UPB_PTR_AT(msg, field->offset, char);
5274   int size = upb_fielddef_isseq(f) ? sizeof(void *)
5275                                    : field_size[field->descriptortype];
5276   memcpy(mem, &val, size);
5277   if (in_oneof(field)) {
5278     *oneofcase(msg, field) = field->number;
5279   }
5280 }
5281 
upb_msg_next(const upb_msg * msg,const upb_msgdef * m,const upb_symtab * ext_pool,const upb_fielddef ** out_f,upb_msgval * out_val,size_t * iter)5282 bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m,
5283                   const upb_symtab *ext_pool, const upb_fielddef **out_f,
5284                   upb_msgval *out_val, size_t *iter) {
5285   size_t i = *iter;
5286   const upb_msgval zero = {0};
5287   const upb_fielddef *f;
5288   while ((f = _upb_msgdef_field(m, (int)++i)) != NULL) {
5289     upb_msgval val = _upb_msg_getraw(msg, f);
5290 
5291     /* Skip field if unset or empty. */
5292     if (upb_fielddef_haspresence(f)) {
5293       if (!upb_msg_has(msg, f)) continue;
5294     } else {
5295       upb_msgval test = val;
5296       if (upb_fielddef_isstring(f) && !upb_fielddef_isseq(f)) {
5297         /* Clear string pointer, only size matters (ptr could be non-NULL). */
5298         test.str_val.data = NULL;
5299       }
5300       /* Continue if NULL or 0. */
5301       if (memcmp(&test, &zero, sizeof(test)) == 0) continue;
5302 
5303       /* Continue on empty array or map. */
5304       if (upb_fielddef_ismap(f)) {
5305         if (upb_map_size(test.map_val) == 0) continue;
5306       } else if (upb_fielddef_isseq(f)) {
5307         if (upb_array_size(test.array_val) == 0) continue;
5308       }
5309     }
5310 
5311     *out_val = val;
5312     *out_f = f;
5313     *iter = i;
5314     return true;
5315   }
5316   *iter = i;
5317   return false;
5318 }
5319 
5320 /** upb_array *****************************************************************/
5321 
upb_array_new(upb_arena * a,upb_fieldtype_t type)5322 upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type) {
5323   return _upb_array_new(a, type);
5324 }
5325 
upb_array_size(const upb_array * arr)5326 size_t upb_array_size(const upb_array *arr) {
5327   return arr->len;
5328 }
5329 
upb_array_get(const upb_array * arr,size_t i)5330 upb_msgval upb_array_get(const upb_array *arr, size_t i) {
5331   upb_msgval ret;
5332   const char* data = _upb_array_constptr(arr);
5333   int lg2 = arr->data & 7;
5334   UPB_ASSERT(i < arr->len);
5335   memcpy(&ret, data + (i << lg2), 1 << lg2);
5336   return ret;
5337 }
5338 
upb_array_set(upb_array * arr,size_t i,upb_msgval val)5339 void upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
5340   char* data = _upb_array_ptr(arr);
5341   int lg2 = arr->data & 7;
5342   UPB_ASSERT(i < arr->len);
5343   memcpy(data + (i << lg2), &val, 1 << lg2);
5344 }
5345 
upb_array_append(upb_array * arr,upb_msgval val,upb_arena * arena)5346 bool upb_array_append(upb_array *arr, upb_msgval val, upb_arena *arena) {
5347   if (!_upb_array_realloc(arr, arr->len + 1, arena)) {
5348     return false;
5349   }
5350   arr->len++;
5351   upb_array_set(arr, arr->len - 1, val);
5352   return true;
5353 }
5354 
5355 /* Resizes the array to the given size, reallocating if necessary, and returns a
5356  * pointer to the new array elements. */
upb_array_resize(upb_array * arr,size_t size,upb_arena * arena)5357 bool upb_array_resize(upb_array *arr, size_t size, upb_arena *arena) {
5358   return _upb_array_realloc(arr, size, arena);
5359 }
5360 
5361 /** upb_map *******************************************************************/
5362 
upb_map_new(upb_arena * a,upb_fieldtype_t key_type,upb_fieldtype_t value_type)5363 upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type,
5364                      upb_fieldtype_t value_type) {
5365   return _upb_map_new(a, _upb_fieldtype_to_mapsize[key_type],
5366                       _upb_fieldtype_to_mapsize[value_type]);
5367 }
5368 
upb_map_size(const upb_map * map)5369 size_t upb_map_size(const upb_map *map) {
5370   return _upb_map_size(map);
5371 }
5372 
upb_map_get(const upb_map * map,upb_msgval key,upb_msgval * val)5373 bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
5374   return _upb_map_get(map, &key, map->key_size, val, map->val_size);
5375 }
5376 
upb_map_set(upb_map * map,upb_msgval key,upb_msgval val,upb_arena * arena)5377 bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
5378                  upb_arena *arena) {
5379   return _upb_map_set(map, &key, map->key_size, &val, map->val_size, arena);
5380 }
5381 
upb_map_delete(upb_map * map,upb_msgval key)5382 bool upb_map_delete(upb_map *map, upb_msgval key) {
5383   return _upb_map_delete(map, &key, map->key_size);
5384 }
5385 
upb_mapiter_next(const upb_map * map,size_t * iter)5386 bool upb_mapiter_next(const upb_map *map, size_t *iter) {
5387   return _upb_map_next(map, iter);
5388 }
5389 
5390 /* Returns the key and value for this entry of the map. */
upb_mapiter_key(const upb_map * map,size_t iter)5391 upb_msgval upb_mapiter_key(const upb_map *map, size_t iter) {
5392   upb_strtable_iter i;
5393   upb_msgval ret;
5394   i.t = &map->table;
5395   i.index = iter;
5396   _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
5397   return ret;
5398 }
5399 
upb_mapiter_value(const upb_map * map,size_t iter)5400 upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) {
5401   upb_strtable_iter i;
5402   upb_msgval ret;
5403   i.t = &map->table;
5404   i.index = iter;
5405   _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
5406   return ret;
5407 }
5408 
5409 /* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */
5410 /*
5411 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
5412 ** UPB_ASSERT() or return false.
5413 */
5414 
5415 
5416 #include <string.h>
5417 
5418 
5419 
5420 struct upb_handlers {
5421   upb_handlercache *cache;
5422   const upb_msgdef *msg;
5423   const upb_handlers **sub;
5424   const void *top_closure_type;
5425   upb_handlers_tabent table[1];  /* Dynamically-sized field handler array. */
5426 };
5427 
upb_calloc(upb_arena * arena,size_t size)5428 static void *upb_calloc(upb_arena *arena, size_t size) {
5429   void *mem = upb_malloc(upb_arena_alloc(arena), size);
5430   if (mem) {
5431     memset(mem, 0, size);
5432   }
5433   return mem;
5434 }
5435 
5436 /* Defined for the sole purpose of having a unique pointer value for
5437  * UPB_NO_CLOSURE. */
5438 char _upb_noclosure;
5439 
5440 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
5441  * subhandlers for this submessage field. */
5442 #define SUBH(h, selector) (h->sub[selector])
5443 
5444 /* The selector for a submessage field is the field index. */
5445 #define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f))
5446 
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5447 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
5448                          upb_handlertype_t type) {
5449   upb_selector_t sel;
5450   bool ok;
5451 
5452   ok = upb_handlers_getselector(f, type, &sel);
5453 
5454   UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f));
5455   UPB_ASSERT(ok);
5456 
5457   return sel;
5458 }
5459 
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5460 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
5461                              upb_handlertype_t type) {
5462   int32_t sel = trygetsel(h, f, type);
5463   UPB_ASSERT(sel >= 0);
5464   return sel;
5465 }
5466 
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5467 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
5468                                upb_handlertype_t type) {
5469   return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type;
5470 }
5471 
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,const upb_handlerattr * attr)5472 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
5473                   upb_handlertype_t type, upb_func *func,
5474                   const upb_handlerattr *attr) {
5475   upb_handlerattr set_attr = UPB_HANDLERATTR_INIT;
5476   const void *closure_type;
5477   const void **context_closure_type;
5478 
5479   UPB_ASSERT(!h->table[sel].func);
5480 
5481   if (attr) {
5482     set_attr = *attr;
5483   }
5484 
5485   /* Check that the given closure type matches the closure type that has been
5486    * established for this context (if any). */
5487   closure_type = set_attr.closure_type;
5488 
5489   if (type == UPB_HANDLER_STRING) {
5490     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
5491   } else if (f && upb_fielddef_isseq(f) &&
5492              type != UPB_HANDLER_STARTSEQ &&
5493              type != UPB_HANDLER_ENDSEQ) {
5494     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
5495   } else {
5496     context_closure_type = &h->top_closure_type;
5497   }
5498 
5499   if (closure_type && *context_closure_type &&
5500       closure_type != *context_closure_type) {
5501     return false;
5502   }
5503 
5504   if (closure_type)
5505     *context_closure_type = closure_type;
5506 
5507   /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
5508    * matches any pre-existing expectations about what type is expected. */
5509   if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
5510     const void *return_type = set_attr.return_closure_type;
5511     const void *table_return_type = h->table[sel].attr.return_closure_type;
5512     if (return_type && table_return_type && return_type != table_return_type) {
5513       return false;
5514     }
5515 
5516     if (table_return_type && !return_type) {
5517       set_attr.return_closure_type = table_return_type;
5518     }
5519   }
5520 
5521   h->table[sel].func = (upb_func*)func;
5522   h->table[sel].attr = set_attr;
5523   return true;
5524 }
5525 
5526 /* Returns the effective closure type for this handler (which will propagate
5527  * from outer frames if this frame has no START* handler).  Not implemented for
5528  * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
5529  * the effective closure type is unspecified (either no handler was registered
5530  * to specify it or the handler that was registered did not specify the closure
5531  * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)5532 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
5533                                    upb_handlertype_t type) {
5534   const void *ret;
5535   upb_selector_t sel;
5536 
5537   UPB_ASSERT(type != UPB_HANDLER_STRING);
5538   ret = h->top_closure_type;
5539 
5540   if (upb_fielddef_isseq(f) &&
5541       type != UPB_HANDLER_STARTSEQ &&
5542       type != UPB_HANDLER_ENDSEQ &&
5543       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
5544     ret = h->table[sel].attr.return_closure_type;
5545   }
5546 
5547   if (type == UPB_HANDLER_STRING &&
5548       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
5549     ret = h->table[sel].attr.return_closure_type;
5550   }
5551 
5552   /* The effective type of the submessage; not used yet.
5553    * if (type == SUBMESSAGE &&
5554    *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
5555    *   ret = h->table[sel].attr.return_closure_type;
5556    * } */
5557 
5558   return ret;
5559 }
5560 
5561 /* Checks whether the START* handler specified by f & type is missing even
5562  * though it is required to convert the established type of an outer frame
5563  * ("closure_type") into the established type of an inner frame (represented in
5564  * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)5565 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
5566                 upb_status *status) {
5567   const void *closure_type;
5568   const upb_handlerattr *attr;
5569   const void *return_closure_type;
5570 
5571   upb_selector_t sel = handlers_getsel(h, f, type);
5572   if (h->table[sel].func) return true;
5573   closure_type = effective_closure_type(h, f, type);
5574   attr = &h->table[sel].attr;
5575   return_closure_type = attr->return_closure_type;
5576   if (closure_type && return_closure_type &&
5577       closure_type != return_closure_type) {
5578     return false;
5579   }
5580   return true;
5581 }
5582 
upb_handlers_new(const upb_msgdef * md,upb_handlercache * cache,upb_arena * arena)5583 static upb_handlers *upb_handlers_new(const upb_msgdef *md,
5584                                       upb_handlercache *cache,
5585                                       upb_arena *arena) {
5586   int extra;
5587   upb_handlers *h;
5588 
5589   extra =
5590       (int)(sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1));
5591   h = upb_calloc(arena, sizeof(*h) + extra);
5592   if (!h) return NULL;
5593 
5594   h->cache = cache;
5595   h->msg = md;
5596 
5597   if (upb_msgdef_submsgfieldcount(md) > 0) {
5598     size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub);
5599     h->sub = upb_calloc(arena, bytes);
5600     if (!h->sub) return NULL;
5601   } else {
5602     h->sub = 0;
5603   }
5604 
5605   /* calloc() above initialized all handlers to NULL. */
5606   return h;
5607 }
5608 
5609 /* Public interface ***********************************************************/
5610 
5611 #define SETTER(name, handlerctype, handlertype)                       \
5612   bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \
5613                               handlerctype func,                      \
5614                               const upb_handlerattr *attr) {          \
5615     int32_t sel = trygetsel(h, f, handlertype);                       \
5616     return doset(h, sel, f, handlertype, (upb_func *)func, attr);     \
5617   }
5618 
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)5619 SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
5620 SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
5621 SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
5622 SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
5623 SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
5624 SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
5625 SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
5626 SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
5627 SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
5628 SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
5629 SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
5630 SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
5631 SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
5632 SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
5633 
5634 #undef SETTER
5635 
5636 bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
5637                              const upb_handlerattr *attr) {
5638   return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
5639                (upb_func *)func, attr);
5640 }
5641 
upb_handlers_setstartmsg(upb_handlers * h,upb_startmsg_handlerfunc * func,const upb_handlerattr * attr)5642 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
5643                               const upb_handlerattr *attr) {
5644   return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
5645                (upb_func *)func, attr);
5646 }
5647 
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,const upb_handlerattr * attr)5648 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
5649                             const upb_handlerattr *attr) {
5650   return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
5651                (upb_func *)func, attr);
5652 }
5653 
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)5654 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
5655                                  const upb_handlers *sub) {
5656   UPB_ASSERT(sub);
5657   UPB_ASSERT(upb_fielddef_issubmsg(f));
5658   if (SUBH_F(h, f)) return false;  /* Can't reset. */
5659   if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) {
5660     return false;
5661   }
5662   SUBH_F(h, f) = sub;
5663   return true;
5664 }
5665 
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)5666 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
5667                                                 const upb_fielddef *f) {
5668   UPB_ASSERT(upb_fielddef_issubmsg(f));
5669   return SUBH_F(h, f);
5670 }
5671 
upb_handlers_gethandler(const upb_handlers * h,upb_selector_t s,const void ** handler_data)5672 upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s,
5673                                   const void **handler_data) {
5674   upb_func *ret = (upb_func *)h->table[s].func;
5675   if (ret && handler_data) {
5676     *handler_data = h->table[s].attr.handler_data;
5677   }
5678   return ret;
5679 }
5680 
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)5681 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
5682                           upb_handlerattr *attr) {
5683   if (!upb_handlers_gethandler(h, sel, NULL))
5684     return false;
5685   *attr = h->table[sel].attr;
5686   return true;
5687 }
5688 
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)5689 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
5690                                                     upb_selector_t sel) {
5691   /* STARTSUBMSG selector in sel is the field's selector base. */
5692   return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
5693 }
5694 
upb_handlers_msgdef(const upb_handlers * h)5695 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
5696 
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)5697 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
5698   return upb_handlercache_addcleanup(h->cache, p, func);
5699 }
5700 
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)5701 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
5702   switch (upb_fielddef_type(f)) {
5703     case UPB_TYPE_INT32:
5704     case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
5705     case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
5706     case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
5707     case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
5708     case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
5709     case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
5710     case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
5711     default: UPB_ASSERT(false); return -1;  /* Invalid input. */
5712   }
5713 }
5714 
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)5715 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
5716                               upb_selector_t *s) {
5717   uint32_t selector_base = upb_fielddef_selectorbase(f);
5718   switch (type) {
5719     case UPB_HANDLER_INT32:
5720     case UPB_HANDLER_INT64:
5721     case UPB_HANDLER_UINT32:
5722     case UPB_HANDLER_UINT64:
5723     case UPB_HANDLER_FLOAT:
5724     case UPB_HANDLER_DOUBLE:
5725     case UPB_HANDLER_BOOL:
5726       if (!upb_fielddef_isprimitive(f) ||
5727           upb_handlers_getprimitivehandlertype(f) != type)
5728         return false;
5729       *s = selector_base;
5730       break;
5731     case UPB_HANDLER_STRING:
5732       if (upb_fielddef_isstring(f)) {
5733         *s = selector_base;
5734       } else if (upb_fielddef_lazy(f)) {
5735         *s = selector_base + 3;
5736       } else {
5737         return false;
5738       }
5739       break;
5740     case UPB_HANDLER_STARTSTR:
5741       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
5742         *s = selector_base + 1;
5743       } else {
5744         return false;
5745       }
5746       break;
5747     case UPB_HANDLER_ENDSTR:
5748       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
5749         *s = selector_base + 2;
5750       } else {
5751         return false;
5752       }
5753       break;
5754     case UPB_HANDLER_STARTSEQ:
5755       if (!upb_fielddef_isseq(f)) return false;
5756       *s = selector_base - 2;
5757       break;
5758     case UPB_HANDLER_ENDSEQ:
5759       if (!upb_fielddef_isseq(f)) return false;
5760       *s = selector_base - 1;
5761       break;
5762     case UPB_HANDLER_STARTSUBMSG:
5763       if (!upb_fielddef_issubmsg(f)) return false;
5764       /* Selectors for STARTSUBMSG are at the beginning of the table so that the
5765        * selector can also be used as an index into the "sub" array of
5766        * subhandlers.  The indexes for the two into these two tables are the
5767        * same, except that in the handler table the static selectors come first. */
5768       *s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT;
5769       break;
5770     case UPB_HANDLER_ENDSUBMSG:
5771       if (!upb_fielddef_issubmsg(f)) return false;
5772       *s = selector_base;
5773       break;
5774   }
5775   UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f)));
5776   return true;
5777 }
5778 
5779 /* upb_handlercache ***********************************************************/
5780 
5781 struct upb_handlercache {
5782   upb_arena *arena;
5783   upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
5784   upb_handlers_callback *callback;
5785   const void *closure;
5786 };
5787 
upb_handlercache_get(upb_handlercache * c,const upb_msgdef * md)5788 const upb_handlers *upb_handlercache_get(upb_handlercache *c,
5789                                          const upb_msgdef *md) {
5790   upb_msg_field_iter i;
5791   upb_value v;
5792   upb_handlers *h;
5793 
5794   if (upb_inttable_lookupptr(&c->tab, md, &v)) {
5795     return upb_value_getptr(v);
5796   }
5797 
5798   h = upb_handlers_new(md, c, c->arena);
5799   v = upb_value_ptr(h);
5800 
5801   if (!h) return NULL;
5802   if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL;
5803 
5804   c->callback(c->closure, h);
5805 
5806   /* For each submessage field, get or create a handlers object and set it as
5807    * the subhandlers. */
5808   for(upb_msg_field_begin(&i, md);
5809       !upb_msg_field_done(&i);
5810       upb_msg_field_next(&i)) {
5811     upb_fielddef *f = upb_msg_iter_field(&i);
5812 
5813     if (upb_fielddef_issubmsg(f)) {
5814       const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
5815       const upb_handlers *sub_mh = upb_handlercache_get(c, subdef);
5816 
5817       if (!sub_mh) return NULL;
5818 
5819       upb_handlers_setsubhandlers(h, f, sub_mh);
5820     }
5821   }
5822 
5823   return h;
5824 }
5825 
5826 
upb_handlercache_new(upb_handlers_callback * callback,const void * closure)5827 upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback,
5828                                        const void *closure) {
5829   upb_handlercache *cache = upb_gmalloc(sizeof(*cache));
5830 
5831   if (!cache) return NULL;
5832 
5833   cache->arena = upb_arena_new();
5834 
5835   cache->callback = callback;
5836   cache->closure = closure;
5837 
5838   if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom;
5839 
5840   return cache;
5841 
5842 oom:
5843   upb_gfree(cache);
5844   return NULL;
5845 }
5846 
upb_handlercache_free(upb_handlercache * cache)5847 void upb_handlercache_free(upb_handlercache *cache) {
5848   upb_inttable_uninit(&cache->tab);
5849   upb_arena_free(cache->arena);
5850   upb_gfree(cache);
5851 }
5852 
upb_handlercache_addcleanup(upb_handlercache * c,void * p,upb_handlerfree * func)5853 bool upb_handlercache_addcleanup(upb_handlercache *c, void *p,
5854                                  upb_handlerfree *func) {
5855   return upb_arena_addcleanup(c->arena, p, func);
5856 }
5857 
5858 /* upb_byteshandler ***********************************************************/
5859 
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)5860 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
5861                                   upb_startstr_handlerfunc *func, void *d) {
5862   h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
5863   h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d;
5864   return true;
5865 }
5866 
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)5867 bool upb_byteshandler_setstring(upb_byteshandler *h,
5868                                 upb_string_handlerfunc *func, void *d) {
5869   h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
5870   h->table[UPB_STRING_SELECTOR].attr.handler_data = d;
5871   return true;
5872 }
5873 
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)5874 bool upb_byteshandler_setendstr(upb_byteshandler *h,
5875                                 upb_endfield_handlerfunc *func, void *d) {
5876   h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
5877   h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d;
5878   return true;
5879 }
5880 
5881 /** Handlers for upb_msg ******************************************************/
5882 
5883 typedef struct {
5884   size_t offset;
5885   int32_t hasbit;
5886 } upb_msg_handlerdata;
5887 
5888 /* Fallback implementation if the handler is not specialized by the producer. */
5889 #define MSG_WRITER(type, ctype)                                               \
5890   bool upb_msg_set ## type (void *c, const void *hd, ctype val) {             \
5891     uint8_t *m = c;                                                           \
5892     const upb_msg_handlerdata *d = hd;                                        \
5893     if (d->hasbit > 0)                                                        \
5894       *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
5895     *(ctype*)&m[d->offset] = val;                                             \
5896     return true;                                                              \
5897   }                                                                           \
5898 
MSG_WRITER(double,double)5899 MSG_WRITER(double, double)
5900 MSG_WRITER(float,  float)
5901 MSG_WRITER(int32,  int32_t)
5902 MSG_WRITER(int64,  int64_t)
5903 MSG_WRITER(uint32, uint32_t)
5904 MSG_WRITER(uint64, uint64_t)
5905 MSG_WRITER(bool,   bool)
5906 
5907 bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
5908                               size_t offset, int32_t hasbit) {
5909   upb_handlerattr attr = UPB_HANDLERATTR_INIT;
5910   bool ok;
5911 
5912   upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
5913   if (!d) return false;
5914   d->offset = offset;
5915   d->hasbit = hasbit;
5916 
5917   attr.handler_data = d;
5918   attr.alwaysok = true;
5919   upb_handlers_addcleanup(h, d, upb_gfree);
5920 
5921 #define TYPE(u, l) \
5922   case UPB_TYPE_##u: \
5923     ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
5924 
5925   ok = false;
5926 
5927   switch (upb_fielddef_type(f)) {
5928     TYPE(INT64,  int64);
5929     TYPE(INT32,  int32);
5930     TYPE(ENUM,   int32);
5931     TYPE(UINT64, uint64);
5932     TYPE(UINT32, uint32);
5933     TYPE(DOUBLE, double);
5934     TYPE(FLOAT,  float);
5935     TYPE(BOOL,   bool);
5936     default: UPB_ASSERT(false); break;
5937   }
5938 #undef TYPE
5939 
5940   return ok;
5941 }
5942 
upb_msg_getscalarhandlerdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type,size_t * offset,int32_t * hasbit)5943 bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
5944                                   upb_selector_t s,
5945                                   upb_fieldtype_t *type,
5946                                   size_t *offset,
5947                                   int32_t *hasbit) {
5948   const upb_msg_handlerdata *d;
5949   const void *p;
5950   upb_func *f = upb_handlers_gethandler(h, s, &p);
5951 
5952   if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
5953     *type = UPB_TYPE_INT64;
5954   } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
5955     *type = UPB_TYPE_INT32;
5956   } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
5957     *type = UPB_TYPE_UINT64;
5958   } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
5959     *type = UPB_TYPE_UINT32;
5960   } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
5961     *type = UPB_TYPE_DOUBLE;
5962   } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
5963     *type = UPB_TYPE_FLOAT;
5964   } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
5965     *type = UPB_TYPE_BOOL;
5966   } else {
5967     return false;
5968   }
5969 
5970   d = p;
5971   *offset = d->offset;
5972   *hasbit = d->hasbit;
5973   return true;
5974 }
5975 
5976 
upb_bufsrc_putbuf(const char * buf,size_t len,upb_bytessink sink)5977 bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink) {
5978   void *subc;
5979   bool ret;
5980   upb_bufhandle handle = UPB_BUFHANDLE_INIT;
5981   handle.buf = buf;
5982   ret = upb_bytessink_start(sink, len, &subc);
5983   if (ret && len != 0) {
5984     ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
5985   }
5986   if (ret) {
5987     ret = upb_bytessink_end(sink);
5988   }
5989   return ret;
5990 }
5991 
5992 
5993 #ifdef UPB_MSVC_VSNPRINTF
5994 /* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and
5995  * vsnprintf. To support them, missing functions are manually implemented
5996  * using the existing secure functions. */
msvc_vsnprintf(char * s,size_t n,const char * format,va_list arg)5997 int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) {
5998   if (!s) {
5999     return _vscprintf(format, arg);
6000   }
6001   int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg);
6002   if (ret < 0) {
6003 	ret = _vscprintf(format, arg);
6004   }
6005   return ret;
6006 }
6007 
msvc_snprintf(char * s,size_t n,const char * format,...)6008 int msvc_snprintf(char* s, size_t n, const char* format, ...) {
6009   va_list arg;
6010   va_start(arg, format);
6011   int ret = msvc_vsnprintf(s, n, format, arg);
6012   va_end(arg);
6013   return ret;
6014 }
6015 #endif
6016 /*
6017 ** protobuf decoder bytecode compiler
6018 **
6019 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6020 ** according to that specific schema and destination handlers.
6021 **
6022 ** Bytecode definition is in decoder.int.h.
6023 */
6024 
6025 #include <stdarg.h>
6026 
6027 #ifdef UPB_DUMP_BYTECODE
6028 #include <stdio.h>
6029 #endif
6030 
6031 
6032 #define MAXLABEL 5
6033 #define EMPTYLABEL -1
6034 
6035 /* upb_pbdecodermethod ********************************************************/
6036 
freemethod(upb_pbdecodermethod * method)6037 static void freemethod(upb_pbdecodermethod *method) {
6038   upb_inttable_uninit(&method->dispatch);
6039   upb_gfree(method);
6040 }
6041 
newmethod(const upb_handlers * dest_handlers,mgroup * group)6042 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6043                                       mgroup *group) {
6044   upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
6045   upb_byteshandler_init(&ret->input_handler_);
6046 
6047   ret->group = group;
6048   ret->dest_handlers_ = dest_handlers;
6049   upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6050 
6051   return ret;
6052 }
6053 
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)6054 const upb_handlers *upb_pbdecodermethod_desthandlers(
6055     const upb_pbdecodermethod *m) {
6056   return m->dest_handlers_;
6057 }
6058 
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)6059 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6060     const upb_pbdecodermethod *m) {
6061   return &m->input_handler_;
6062 }
6063 
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)6064 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6065   return m->is_native_;
6066 }
6067 
6068 
6069 /* mgroup *********************************************************************/
6070 
freegroup(mgroup * g)6071 static void freegroup(mgroup *g) {
6072   upb_inttable_iter i;
6073 
6074   upb_inttable_begin(&i, &g->methods);
6075   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6076     freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
6077   }
6078 
6079   upb_inttable_uninit(&g->methods);
6080   upb_gfree(g->bytecode);
6081   upb_gfree(g);
6082 }
6083 
newgroup(void)6084 mgroup *newgroup(void) {
6085   mgroup *g = upb_gmalloc(sizeof(*g));
6086   upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6087   g->bytecode = NULL;
6088   g->bytecode_end = NULL;
6089   return g;
6090 }
6091 
6092 
6093 /* bytecode compiler **********************************************************/
6094 
6095 /* Data used only at compilation time. */
6096 typedef struct {
6097   mgroup *group;
6098 
6099   uint32_t *pc;
6100   int fwd_labels[MAXLABEL];
6101   int back_labels[MAXLABEL];
6102 
6103   /* For fields marked "lazy", parse them lazily or eagerly? */
6104   bool lazy;
6105 } compiler;
6106 
newcompiler(mgroup * group,bool lazy)6107 static compiler *newcompiler(mgroup *group, bool lazy) {
6108   compiler *ret = upb_gmalloc(sizeof(*ret));
6109   int i;
6110 
6111   ret->group = group;
6112   ret->lazy = lazy;
6113   for (i = 0; i < MAXLABEL; i++) {
6114     ret->fwd_labels[i] = EMPTYLABEL;
6115     ret->back_labels[i] = EMPTYLABEL;
6116   }
6117   return ret;
6118 }
6119 
freecompiler(compiler * c)6120 static void freecompiler(compiler *c) {
6121   upb_gfree(c);
6122 }
6123 
6124 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6125 
6126 /* How many words an instruction is. */
instruction_len(uint32_t instr)6127 static int instruction_len(uint32_t instr) {
6128   switch (getop(instr)) {
6129     case OP_SETDISPATCH: return 1 + ptr_words;
6130     case OP_TAGN: return 3;
6131     case OP_SETBIGGROUPNUM: return 2;
6132     default: return 1;
6133   }
6134 }
6135 
op_has_longofs(int32_t instruction)6136 bool op_has_longofs(int32_t instruction) {
6137   switch (getop(instruction)) {
6138     case OP_CALL:
6139     case OP_BRANCH:
6140     case OP_CHECKDELIM:
6141       return true;
6142     /* The "tag" instructions only have 8 bytes available for the jump target,
6143      * but that is ok because these opcodes only require short jumps. */
6144     case OP_TAG1:
6145     case OP_TAG2:
6146     case OP_TAGN:
6147       return false;
6148     default:
6149       UPB_ASSERT(false);
6150       return false;
6151   }
6152 }
6153 
getofs(uint32_t instruction)6154 static int32_t getofs(uint32_t instruction) {
6155   if (op_has_longofs(instruction)) {
6156     return (int32_t)instruction >> 8;
6157   } else {
6158     return (int8_t)(instruction >> 8);
6159   }
6160 }
6161 
setofs(uint32_t * instruction,int32_t ofs)6162 static void setofs(uint32_t *instruction, int32_t ofs) {
6163   if (op_has_longofs(*instruction)) {
6164     *instruction = getop(*instruction) | (uint32_t)ofs << 8;
6165   } else {
6166     *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6167   }
6168   UPB_ASSERT(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
6169 }
6170 
pcofs(compiler * c)6171 static uint32_t pcofs(compiler *c) {
6172   return (uint32_t)(c->pc - c->group->bytecode);
6173 }
6174 
6175 /* Defines a local label at the current PC location.  All previous forward
6176  * references are updated to point to this location.  The location is noted
6177  * for any future backward references. */
label(compiler * c,unsigned int label)6178 static void label(compiler *c, unsigned int label) {
6179   int val;
6180   uint32_t *codep;
6181 
6182   UPB_ASSERT(label < MAXLABEL);
6183   val = c->fwd_labels[label];
6184   codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
6185   while (codep) {
6186     int ofs = getofs(*codep);
6187     setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep)));
6188     codep = ofs ? codep + ofs : NULL;
6189   }
6190   c->fwd_labels[label] = EMPTYLABEL;
6191   c->back_labels[label] = pcofs(c);
6192 }
6193 
6194 /* Creates a reference to a numbered label; either a forward reference
6195  * (positive arg) or backward reference (negative arg).  For forward references
6196  * the value returned now is actually a "next" pointer into a linked list of all
6197  * instructions that use this label and will be patched later when the label is
6198  * defined with label().
6199  *
6200  * The returned value is the offset that should be written into the instruction.
6201  */
labelref(compiler * c,int label)6202 static int32_t labelref(compiler *c, int label) {
6203   UPB_ASSERT(label < MAXLABEL);
6204   if (label == LABEL_DISPATCH) {
6205     /* No resolving required. */
6206     return 0;
6207   } else if (label < 0) {
6208     /* Backward local label.  Relative to the next instruction. */
6209     uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode);
6210     return c->back_labels[-label] - from;
6211   } else {
6212     /* Forward local label: prepend to (possibly-empty) linked list. */
6213     int *lptr = &c->fwd_labels[label];
6214     int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6215     *lptr = pcofs(c);
6216     return ret;
6217   }
6218 }
6219 
put32(compiler * c,uint32_t v)6220 static void put32(compiler *c, uint32_t v) {
6221   mgroup *g = c->group;
6222   if (c->pc == g->bytecode_end) {
6223     int ofs = pcofs(c);
6224     size_t oldsize = g->bytecode_end - g->bytecode;
6225     size_t newsize = UPB_MAX(oldsize * 2, 64);
6226     /* TODO(haberman): handle OOM. */
6227     g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
6228                                             newsize * sizeof(uint32_t));
6229     g->bytecode_end = g->bytecode + newsize;
6230     c->pc = g->bytecode + ofs;
6231   }
6232   *c->pc++ = v;
6233 }
6234 
putop(compiler * c,int op,...)6235 static void putop(compiler *c, int op, ...) {
6236   va_list ap;
6237   va_start(ap, op);
6238 
6239   switch (op) {
6240     case OP_SETDISPATCH: {
6241       uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6242       put32(c, OP_SETDISPATCH);
6243       put32(c, (uint32_t)ptr);
6244       if (sizeof(uintptr_t) > sizeof(uint32_t))
6245         put32(c, (uint64_t)ptr >> 32);
6246       break;
6247     }
6248     case OP_STARTMSG:
6249     case OP_ENDMSG:
6250     case OP_PUSHLENDELIM:
6251     case OP_POP:
6252     case OP_SETDELIM:
6253     case OP_HALT:
6254     case OP_RET:
6255     case OP_DISPATCH:
6256       put32(c, op);
6257       break;
6258     case OP_PARSE_DOUBLE:
6259     case OP_PARSE_FLOAT:
6260     case OP_PARSE_INT64:
6261     case OP_PARSE_UINT64:
6262     case OP_PARSE_INT32:
6263     case OP_PARSE_FIXED64:
6264     case OP_PARSE_FIXED32:
6265     case OP_PARSE_BOOL:
6266     case OP_PARSE_UINT32:
6267     case OP_PARSE_SFIXED32:
6268     case OP_PARSE_SFIXED64:
6269     case OP_PARSE_SINT32:
6270     case OP_PARSE_SINT64:
6271     case OP_STARTSEQ:
6272     case OP_ENDSEQ:
6273     case OP_STARTSUBMSG:
6274     case OP_ENDSUBMSG:
6275     case OP_STARTSTR:
6276     case OP_STRING:
6277     case OP_ENDSTR:
6278     case OP_PUSHTAGDELIM:
6279       put32(c, op | va_arg(ap, upb_selector_t) << 8);
6280       break;
6281     case OP_SETBIGGROUPNUM:
6282       put32(c, op);
6283       put32(c, va_arg(ap, int));
6284       break;
6285     case OP_CALL: {
6286       const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6287       put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6288       break;
6289     }
6290     case OP_CHECKDELIM:
6291     case OP_BRANCH: {
6292       uint32_t instruction = op;
6293       int label = va_arg(ap, int);
6294       setofs(&instruction, labelref(c, label));
6295       put32(c, instruction);
6296       break;
6297     }
6298     case OP_TAG1:
6299     case OP_TAG2: {
6300       int label = va_arg(ap, int);
6301       uint64_t tag = va_arg(ap, uint64_t);
6302       uint32_t instruction = (uint32_t)(op | (tag << 16));
6303       UPB_ASSERT(tag <= 0xffff);
6304       setofs(&instruction, labelref(c, label));
6305       put32(c, instruction);
6306       break;
6307     }
6308     case OP_TAGN: {
6309       int label = va_arg(ap, int);
6310       uint64_t tag = va_arg(ap, uint64_t);
6311       uint32_t instruction = op | (upb_value_size(tag) << 16);
6312       setofs(&instruction, labelref(c, label));
6313       put32(c, instruction);
6314       put32(c, (uint32_t)tag);
6315       put32(c, tag >> 32);
6316       break;
6317     }
6318   }
6319 
6320   va_end(ap);
6321 }
6322 
6323 #if defined(UPB_DUMP_BYTECODE)
6324 
upb_pbdecoder_getopname(unsigned int op)6325 const char *upb_pbdecoder_getopname(unsigned int op) {
6326 #define QUOTE(x) #x
6327 #define EXPAND_AND_QUOTE(x) QUOTE(x)
6328 #define OPNAME(x) OP_##x
6329 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6330 #define T(x) OP(PARSE_##x)
6331   /* Keep in sync with list in decoder.int.h. */
6332   switch ((opcode)op) {
6333     T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6334     T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6335     OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6336     OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6337     OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6338     OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6339     OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6340   }
6341   return "<unknown op>";
6342 #undef OP
6343 #undef T
6344 }
6345 
6346 #endif
6347 
6348 #ifdef UPB_DUMP_BYTECODE
6349 
dumpbc(uint32_t * p,uint32_t * end,FILE * f)6350 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6351 
6352   uint32_t *begin = p;
6353 
6354   while (p < end) {
6355     fprintf(f, "%p  %8tx", p, p - begin);
6356     uint32_t instr = *p++;
6357     uint8_t op = getop(instr);
6358     fprintf(f, " %s", upb_pbdecoder_getopname(op));
6359     switch ((opcode)op) {
6360       case OP_SETDISPATCH: {
6361         const upb_inttable *dispatch;
6362         memcpy(&dispatch, p, sizeof(void*));
6363         p += ptr_words;
6364         const upb_pbdecodermethod *method =
6365             (void *)((char *)dispatch -
6366                      offsetof(upb_pbdecodermethod, dispatch));
6367         fprintf(f, " %s", upb_msgdef_fullname(
6368                               upb_handlers_msgdef(method->dest_handlers_)));
6369         break;
6370       }
6371       case OP_DISPATCH:
6372       case OP_STARTMSG:
6373       case OP_ENDMSG:
6374       case OP_PUSHLENDELIM:
6375       case OP_POP:
6376       case OP_SETDELIM:
6377       case OP_HALT:
6378       case OP_RET:
6379         break;
6380       case OP_PARSE_DOUBLE:
6381       case OP_PARSE_FLOAT:
6382       case OP_PARSE_INT64:
6383       case OP_PARSE_UINT64:
6384       case OP_PARSE_INT32:
6385       case OP_PARSE_FIXED64:
6386       case OP_PARSE_FIXED32:
6387       case OP_PARSE_BOOL:
6388       case OP_PARSE_UINT32:
6389       case OP_PARSE_SFIXED32:
6390       case OP_PARSE_SFIXED64:
6391       case OP_PARSE_SINT32:
6392       case OP_PARSE_SINT64:
6393       case OP_STARTSEQ:
6394       case OP_ENDSEQ:
6395       case OP_STARTSUBMSG:
6396       case OP_ENDSUBMSG:
6397       case OP_STARTSTR:
6398       case OP_STRING:
6399       case OP_ENDSTR:
6400       case OP_PUSHTAGDELIM:
6401         fprintf(f, " %d", instr >> 8);
6402         break;
6403       case OP_SETBIGGROUPNUM:
6404         fprintf(f, " %d", *p++);
6405         break;
6406       case OP_CHECKDELIM:
6407       case OP_CALL:
6408       case OP_BRANCH:
6409         fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6410         break;
6411       case OP_TAG1:
6412       case OP_TAG2: {
6413         fprintf(f, " tag:0x%x", instr >> 16);
6414         if (getofs(instr)) {
6415           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6416         }
6417         break;
6418       }
6419       case OP_TAGN: {
6420         uint64_t tag = *p++;
6421         tag |= (uint64_t)*p++ << 32;
6422         fprintf(f, " tag:0x%llx", (long long)tag);
6423         fprintf(f, " n:%d", instr >> 16);
6424         if (getofs(instr)) {
6425           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6426         }
6427         break;
6428       }
6429     }
6430     fputs("\n", f);
6431   }
6432 }
6433 
6434 #endif
6435 
get_encoded_tag(const upb_fielddef * f,int wire_type)6436 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6437   uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6438   uint64_t encoded_tag = upb_vencode32(tag);
6439   /* No tag should be greater than 5 bytes. */
6440   UPB_ASSERT(encoded_tag <= 0xffffffffff);
6441   return encoded_tag;
6442 }
6443 
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)6444 static void putchecktag(compiler *c, const upb_fielddef *f,
6445                         int wire_type, int dest) {
6446   uint64_t tag = get_encoded_tag(f, wire_type);
6447   switch (upb_value_size(tag)) {
6448     case 1:
6449       putop(c, OP_TAG1, dest, tag);
6450       break;
6451     case 2:
6452       putop(c, OP_TAG2, dest, tag);
6453       break;
6454     default:
6455       putop(c, OP_TAGN, dest, tag);
6456       break;
6457   }
6458 }
6459 
getsel(const upb_fielddef * f,upb_handlertype_t type)6460 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6461   upb_selector_t selector;
6462   bool ok = upb_handlers_getselector(f, type, &selector);
6463   UPB_ASSERT(ok);
6464   return selector;
6465 }
6466 
6467 /* Takes an existing, primary dispatch table entry and repacks it with a
6468  * different alternate wire type.  Called when we are inserting a secondary
6469  * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)6470 static uint64_t repack(uint64_t dispatch, int new_wt2) {
6471   uint64_t ofs;
6472   uint8_t wt1;
6473   uint8_t old_wt2;
6474   upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
6475   UPB_ASSERT(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
6476   return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6477 }
6478 
6479 /* Marks the current bytecode position as the dispatch target for this message,
6480  * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)6481 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6482                            const upb_fielddef *f, int wire_type) {
6483   /* Offset is relative to msg base. */
6484   uint64_t ofs = pcofs(c) - method->code_base.ofs;
6485   uint32_t fn = upb_fielddef_number(f);
6486   upb_inttable *d = &method->dispatch;
6487   upb_value v;
6488   if (upb_inttable_remove(d, fn, &v)) {
6489     /* TODO: prioritize based on packed setting in .proto file. */
6490     uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6491     upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6492     upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6493   } else {
6494     uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
6495     upb_inttable_insert(d, fn, upb_value_uint64(val));
6496   }
6497 }
6498 
putpush(compiler * c,const upb_fielddef * f)6499 static void putpush(compiler *c, const upb_fielddef *f) {
6500   if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
6501     putop(c, OP_PUSHLENDELIM);
6502   } else {
6503     uint32_t fn = upb_fielddef_number(f);
6504     if (fn >= 1 << 24) {
6505       putop(c, OP_PUSHTAGDELIM, 0);
6506       putop(c, OP_SETBIGGROUPNUM, fn);
6507     } else {
6508       putop(c, OP_PUSHTAGDELIM, fn);
6509     }
6510   }
6511 }
6512 
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)6513 static upb_pbdecodermethod *find_submethod(const compiler *c,
6514                                            const upb_pbdecodermethod *method,
6515                                            const upb_fielddef *f) {
6516   const upb_handlers *sub =
6517       upb_handlers_getsubhandlers(method->dest_handlers_, f);
6518   upb_value v;
6519   return upb_inttable_lookupptr(&c->group->methods, sub, &v)
6520              ? upb_value_getptr(v)
6521              : NULL;
6522 }
6523 
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)6524 static void putsel(compiler *c, opcode op, upb_selector_t sel,
6525                    const upb_handlers *h) {
6526   if (upb_handlers_gethandler(h, sel, NULL)) {
6527     putop(c, op, sel);
6528   }
6529 }
6530 
6531 /* Puts an opcode to call a callback, but only if a callback actually exists for
6532  * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)6533 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
6534                      const upb_fielddef *f, upb_handlertype_t type) {
6535   putsel(c, op, getsel(f, type), h);
6536 }
6537 
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)6538 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
6539   if (!upb_fielddef_lazy(f))
6540     return false;
6541 
6542   return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
6543          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
6544          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
6545 }
6546 
6547 
6548 /* bytecode compiler code generation ******************************************/
6549 
6550 /* Symbolic names for our local labels. */
6551 #define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
6552 #define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
6553 #define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
6554 #define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
6555 
6556 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)6557 static void generate_msgfield(compiler *c, const upb_fielddef *f,
6558                               upb_pbdecodermethod *method) {
6559   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6560   const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
6561   int wire_type;
6562 
6563   if (!sub_m) {
6564     /* Don't emit any code for this field at all; it will be parsed as an
6565      * unknown field.
6566      *
6567      * TODO(haberman): we should change this to parse it as a string field
6568      * instead.  It will probably be faster, but more importantly, once we
6569      * start vending unknown fields, a field shouldn't be treated as unknown
6570      * just because it doesn't have subhandlers registered. */
6571     return;
6572   }
6573 
6574   label(c, LABEL_FIELD);
6575 
6576   wire_type =
6577       (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
6578           ? UPB_WIRE_TYPE_DELIMITED
6579           : UPB_WIRE_TYPE_START_GROUP;
6580 
6581   if (upb_fielddef_isseq(f)) {
6582     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6583     putchecktag(c, f, wire_type, LABEL_DISPATCH);
6584    dispatchtarget(c, method, f, wire_type);
6585     putop(c, OP_PUSHTAGDELIM, 0);
6586     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6587    label(c, LABEL_LOOPSTART);
6588     putpush(c, f);
6589     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6590     putop(c, OP_CALL, sub_m);
6591     putop(c, OP_POP);
6592     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6593     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6594       putop(c, OP_SETDELIM);
6595     }
6596     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6597     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6598     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6599    label(c, LABEL_LOOPBREAK);
6600     putop(c, OP_POP);
6601     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6602   } else {
6603     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6604     putchecktag(c, f, wire_type, LABEL_DISPATCH);
6605    dispatchtarget(c, method, f, wire_type);
6606     putpush(c, f);
6607     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6608     putop(c, OP_CALL, sub_m);
6609     putop(c, OP_POP);
6610     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6611     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6612       putop(c, OP_SETDELIM);
6613     }
6614   }
6615 }
6616 
6617 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)6618 static void generate_delimfield(compiler *c, const upb_fielddef *f,
6619                                 upb_pbdecodermethod *method) {
6620   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6621 
6622   label(c, LABEL_FIELD);
6623   if (upb_fielddef_isseq(f)) {
6624     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6625     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6626    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6627     putop(c, OP_PUSHTAGDELIM, 0);
6628     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6629    label(c, LABEL_LOOPSTART);
6630     putop(c, OP_PUSHLENDELIM);
6631     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6632     /* Need to emit even if no handler to skip past the string. */
6633     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
6634     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
6635     putop(c, OP_POP);
6636     putop(c, OP_SETDELIM);
6637     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6638     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
6639     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6640    label(c, LABEL_LOOPBREAK);
6641     putop(c, OP_POP);
6642     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6643   } else {
6644     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6645     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6646    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6647     putop(c, OP_PUSHLENDELIM);
6648     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6649     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
6650     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
6651     putop(c, OP_POP);
6652     putop(c, OP_SETDELIM);
6653   }
6654 }
6655 
6656 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)6657 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
6658                                     upb_pbdecodermethod *method) {
6659   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6660   upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
6661   opcode parse_type;
6662   upb_selector_t sel;
6663   int wire_type;
6664 
6665   label(c, LABEL_FIELD);
6666 
6667   /* From a decoding perspective, ENUM is the same as INT32. */
6668   if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
6669     descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
6670 
6671   parse_type = (opcode)descriptor_type;
6672 
6673   /* TODO(haberman): generate packed or non-packed first depending on "packed"
6674    * setting in the fielddef.  This will favor (in speed) whichever was
6675    * specified. */
6676 
6677   UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
6678   sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
6679   wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
6680   if (upb_fielddef_isseq(f)) {
6681     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6682     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6683    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6684     putop(c, OP_PUSHLENDELIM);
6685     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
6686    label(c, LABEL_LOOPSTART);
6687     putop(c, parse_type, sel);
6688     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6689     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6690    dispatchtarget(c, method, f, wire_type);
6691     putop(c, OP_PUSHTAGDELIM, 0);
6692     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
6693    label(c, LABEL_LOOPSTART);
6694     putop(c, parse_type, sel);
6695     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6696     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6697     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6698    label(c, LABEL_LOOPBREAK);
6699     putop(c, OP_POP);  /* Packed and non-packed join. */
6700     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6701     putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
6702   } else {
6703     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6704     putchecktag(c, f, wire_type, LABEL_DISPATCH);
6705    dispatchtarget(c, method, f, wire_type);
6706     putop(c, parse_type, sel);
6707   }
6708 }
6709 
6710 /* Adds bytecode for parsing the given message to the given decoderplan,
6711  * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)6712 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6713   const upb_handlers *h;
6714   const upb_msgdef *md;
6715   uint32_t* start_pc;
6716   upb_msg_field_iter i;
6717   upb_value val;
6718 
6719   UPB_ASSERT(method);
6720 
6721   /* Clear all entries in the dispatch table. */
6722   upb_inttable_uninit(&method->dispatch);
6723   upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
6724 
6725   h = upb_pbdecodermethod_desthandlers(method);
6726   md = upb_handlers_msgdef(h);
6727 
6728  method->code_base.ofs = pcofs(c);
6729   putop(c, OP_SETDISPATCH, &method->dispatch);
6730   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
6731  label(c, LABEL_FIELD);
6732   start_pc = c->pc;
6733   for(upb_msg_field_begin(&i, md);
6734       !upb_msg_field_done(&i);
6735       upb_msg_field_next(&i)) {
6736     const upb_fielddef *f = upb_msg_iter_field(&i);
6737     upb_fieldtype_t type = upb_fielddef_type(f);
6738 
6739     if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
6740       generate_msgfield(c, f, method);
6741     } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
6742                type == UPB_TYPE_MESSAGE) {
6743       generate_delimfield(c, f, method);
6744     } else {
6745       generate_primitivefield(c, f, method);
6746     }
6747   }
6748 
6749   /* If there were no fields, or if no handlers were defined, we need to
6750    * generate a non-empty loop body so that we can at least dispatch for unknown
6751    * fields and check for the end of the message. */
6752   if (c->pc == start_pc) {
6753     /* Check for end-of-message. */
6754     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6755     /* Unconditionally dispatch. */
6756     putop(c, OP_DISPATCH, 0);
6757   }
6758 
6759   /* For now we just loop back to the last field of the message (or if none,
6760    * the DISPATCH opcode for the message). */
6761   putop(c, OP_BRANCH, -LABEL_FIELD);
6762 
6763   /* Insert both a label and a dispatch table entry for this end-of-msg. */
6764  label(c, LABEL_ENDMSG);
6765   val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
6766   upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
6767 
6768   putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
6769   putop(c, OP_RET);
6770 
6771   upb_inttable_compact(&method->dispatch);
6772 }
6773 
6774 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
6775  * Returns the method for these handlers.
6776  *
6777  * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)6778 static void find_methods(compiler *c, const upb_handlers *h) {
6779   upb_value v;
6780   upb_msg_field_iter i;
6781   const upb_msgdef *md;
6782   upb_pbdecodermethod *method;
6783 
6784   if (upb_inttable_lookupptr(&c->group->methods, h, &v))
6785     return;
6786 
6787   method = newmethod(h, c->group);
6788   upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
6789 
6790   /* Find submethods. */
6791   md = upb_handlers_msgdef(h);
6792   for(upb_msg_field_begin(&i, md);
6793       !upb_msg_field_done(&i);
6794       upb_msg_field_next(&i)) {
6795     const upb_fielddef *f = upb_msg_iter_field(&i);
6796     const upb_handlers *sub_h;
6797     if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
6798         (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
6799       /* We only generate a decoder method for submessages with handlers.
6800        * Others will be parsed as unknown fields. */
6801       find_methods(c, sub_h);
6802     }
6803   }
6804 }
6805 
6806 /* (Re-)compile bytecode for all messages in "msgs."
6807  * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)6808 static void compile_methods(compiler *c) {
6809   upb_inttable_iter i;
6810 
6811   /* Start over at the beginning of the bytecode. */
6812   c->pc = c->group->bytecode;
6813 
6814   upb_inttable_begin(&i, &c->group->methods);
6815   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6816     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6817     compile_method(c, method);
6818   }
6819 }
6820 
set_bytecode_handlers(mgroup * g)6821 static void set_bytecode_handlers(mgroup *g) {
6822   upb_inttable_iter i;
6823   upb_inttable_begin(&i, &g->methods);
6824   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6825     upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
6826     upb_byteshandler *h = &m->input_handler_;
6827 
6828     m->code_base.ptr = g->bytecode + m->code_base.ofs;
6829 
6830     upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
6831     upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
6832     upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
6833   }
6834 }
6835 
6836 
6837 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
6838  * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool lazy)6839 const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
6840   mgroup *g;
6841   compiler *c;
6842 
6843   g = newgroup();
6844   c = newcompiler(g, lazy);
6845   find_methods(c, dest);
6846 
6847   /* We compile in two passes:
6848    * 1. all messages are assigned relative offsets from the beginning of the
6849    *    bytecode (saved in method->code_base).
6850    * 2. forwards OP_CALL instructions can be correctly linked since message
6851    *    offsets have been previously assigned.
6852    *
6853    * Could avoid the second pass by linking OP_CALL instructions somehow. */
6854   compile_methods(c);
6855   compile_methods(c);
6856   g->bytecode_end = c->pc;
6857   freecompiler(c);
6858 
6859 #ifdef UPB_DUMP_BYTECODE
6860   {
6861     FILE *f = fopen("/tmp/upb-bytecode", "w");
6862     UPB_ASSERT(f);
6863     dumpbc(g->bytecode, g->bytecode_end, stderr);
6864     dumpbc(g->bytecode, g->bytecode_end, f);
6865     fclose(f);
6866 
6867     f = fopen("/tmp/upb-bytecode.bin", "wb");
6868     UPB_ASSERT(f);
6869     fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
6870     fclose(f);
6871   }
6872 #endif
6873 
6874   set_bytecode_handlers(g);
6875   return g;
6876 }
6877 
6878 
6879 /* upb_pbcodecache ************************************************************/
6880 
upb_pbcodecache_new(upb_handlercache * dest)6881 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
6882   upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
6883 
6884   if (!c) return NULL;
6885 
6886   c->dest = dest;
6887   c->lazy = false;
6888 
6889   c->arena = upb_arena_new();
6890   if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
6891 
6892   return c;
6893 }
6894 
upb_pbcodecache_free(upb_pbcodecache * c)6895 void upb_pbcodecache_free(upb_pbcodecache *c) {
6896   upb_inttable_iter i;
6897 
6898   upb_inttable_begin(&i, &c->groups);
6899   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6900     upb_value val = upb_inttable_iter_value(&i);
6901     freegroup((void*)upb_value_getconstptr(val));
6902   }
6903 
6904   upb_inttable_uninit(&c->groups);
6905   upb_arena_free(c->arena);
6906   upb_gfree(c);
6907 }
6908 
upb_pbdecodermethodopts_setlazy(upb_pbcodecache * c,bool lazy)6909 void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
6910   UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
6911   c->lazy = lazy;
6912 }
6913 
upb_pbcodecache_get(upb_pbcodecache * c,const upb_msgdef * md)6914 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
6915                                                const upb_msgdef *md) {
6916   upb_value v;
6917   bool ok;
6918   const upb_handlers *h;
6919   const mgroup *g;
6920 
6921   h = upb_handlercache_get(c->dest, md);
6922   if (upb_inttable_lookupptr(&c->groups, md, &v)) {
6923     g = upb_value_getconstptr(v);
6924   } else {
6925     g = mgroup_new(h, c->lazy);
6926     ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
6927     UPB_ASSUME(ok);
6928   }
6929 
6930   ok = upb_inttable_lookupptr(&g->methods, h, &v);
6931   UPB_ASSUME(ok);
6932   return upb_value_getptr(v);
6933 }
6934 /*
6935 ** upb::Decoder (Bytecode Decoder VM)
6936 **
6937 ** Bytecode must previously have been generated using the bytecode compiler in
6938 ** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
6939 ** parse the input.
6940 **
6941 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
6942 ** instruction and resume from there.  A fair amount of the logic here is to
6943 ** handle the fact that values can span buffer seams and we have to be able to
6944 ** be capable of suspending/resuming from any byte in the stream.  This
6945 ** sometimes requires keeping a few trailing bytes from the last buffer around
6946 ** in the "residual" buffer.
6947 */
6948 
6949 #include <inttypes.h>
6950 #include <stddef.h>
6951 
6952 #ifdef UPB_DUMP_BYTECODE
6953 #include <stdio.h>
6954 #endif
6955 
6956 
6957 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
6958 
6959 /* Error messages that are shared between the bytecode and JIT decoders. */
6960 const char *kPbDecoderStackOverflow = "Nesting too deep.";
6961 const char *kPbDecoderSubmessageTooLong =
6962     "Submessage end extends past enclosing submessage.";
6963 
6964 /* Error messages shared within this file. */
6965 static const char *kUnterminatedVarint = "Unterminated varint.";
6966 
6967 /* upb_pbdecoder **************************************************************/
6968 
6969 static opcode halt = OP_HALT;
6970 
6971 /* A dummy character we can point to when the user passes us a NULL buffer.
6972  * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
6973  * behavior, which would invalidate functions like curbufleft(). */
6974 static const char dummy_char;
6975 
6976 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)6977 static bool consumes_input(opcode op) {
6978   switch (op) {
6979     case OP_SETDISPATCH:
6980     case OP_STARTMSG:
6981     case OP_ENDMSG:
6982     case OP_STARTSEQ:
6983     case OP_ENDSEQ:
6984     case OP_STARTSUBMSG:
6985     case OP_ENDSUBMSG:
6986     case OP_STARTSTR:
6987     case OP_ENDSTR:
6988     case OP_PUSHTAGDELIM:
6989     case OP_POP:
6990     case OP_SETDELIM:
6991     case OP_SETBIGGROUPNUM:
6992     case OP_CHECKDELIM:
6993     case OP_CALL:
6994     case OP_RET:
6995     case OP_BRANCH:
6996       return false;
6997     default:
6998       return true;
6999   }
7000 }
7001 
stacksize(upb_pbdecoder * d,size_t entries)7002 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
7003   UPB_UNUSED(d);
7004   return entries * sizeof(upb_pbdecoder_frame);
7005 }
7006 
callstacksize(upb_pbdecoder * d,size_t entries)7007 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7008   UPB_UNUSED(d);
7009 
7010   return entries * sizeof(uint32_t*);
7011 }
7012 
7013 
7014 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7015 
7016 /* It's unfortunate that we have to micro-manage the compiler with
7017  * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7018  * specific to one hardware configuration.  But empirically on a Core i7,
7019  * performance increases 30-50% with these annotations.  Every instance where
7020  * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7021  * benchmarks. */
7022 
seterr(upb_pbdecoder * d,const char * msg)7023 static void seterr(upb_pbdecoder *d, const char *msg) {
7024   upb_status_seterrmsg(d->status, msg);
7025 }
7026 
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)7027 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7028   seterr(d, msg);
7029 }
7030 
7031 
7032 /* Buffering ******************************************************************/
7033 
7034 /* We operate on one buffer at a time, which is either the user's buffer passed
7035  * to our "decode" callback or some residual bytes from the previous buffer. */
7036 
7037 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
7038  * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)7039 static size_t curbufleft(const upb_pbdecoder *d) {
7040   UPB_ASSERT(d->data_end >= d->ptr);
7041   return d->data_end - d->ptr;
7042 }
7043 
7044 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)7045 static size_t bufleft(const upb_pbdecoder *d) {
7046   return d->end - d->ptr;
7047 }
7048 
7049 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)7050 uint64_t offset(const upb_pbdecoder *d) {
7051   return d->bufstart_ofs + (d->ptr - d->buf);
7052 }
7053 
7054 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)7055 size_t delim_remaining(const upb_pbdecoder *d) {
7056   return d->top->end_ofs - offset(d);
7057 }
7058 
7059 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)7060 static void advance(upb_pbdecoder *d, size_t len) {
7061   UPB_ASSERT(curbufleft(d) >= len);
7062   d->ptr += len;
7063 }
7064 
in_buf(const char * p,const char * buf,const char * end)7065 static bool in_buf(const char *p, const char *buf, const char *end) {
7066   return p >= buf && p <= end;
7067 }
7068 
in_residual_buf(const upb_pbdecoder * d,const char * p)7069 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7070   return in_buf(p, d->residual, d->residual_end);
7071 }
7072 
7073 /* Calculates the delim_end value, which is affected by both the current buffer
7074  * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)7075 static void set_delim_end(upb_pbdecoder *d) {
7076   size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7077   if (delim_ofs <= (size_t)(d->end - d->buf)) {
7078     d->delim_end = d->buf + delim_ofs;
7079     d->data_end = d->delim_end;
7080   } else {
7081     d->data_end = d->end;
7082     d->delim_end = NULL;
7083   }
7084 }
7085 
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)7086 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7087   d->ptr = buf;
7088   d->buf = buf;
7089   d->end = end;
7090   set_delim_end(d);
7091 }
7092 
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)7093 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7094   UPB_ASSERT(curbufleft(d) == 0);
7095   d->bufstart_ofs += (d->end - d->buf);
7096   switchtobuf(d, buf, buf + len);
7097 }
7098 
checkpoint(upb_pbdecoder * d)7099 static void checkpoint(upb_pbdecoder *d) {
7100   /* The assertion here is in the interests of efficiency, not correctness.
7101    * We are trying to ensure that we don't checkpoint() more often than
7102    * necessary. */
7103   UPB_ASSERT(d->checkpoint != d->ptr);
7104   d->checkpoint = d->ptr;
7105 }
7106 
7107 /* Skips "bytes" bytes in the stream, which may be more than available.  If we
7108  * skip more bytes than are available, we return a long read count to the caller
7109  * indicating how many bytes can be skipped over before passing actual data
7110  * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
7111  * won't actually be read.
7112  */
skip(upb_pbdecoder * d,size_t bytes)7113 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7114   UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7115   UPB_ASSERT(d->skip == 0);
7116   if (bytes > delim_remaining(d)) {
7117     seterr(d, "Skipped value extended beyond enclosing submessage.");
7118     return (int32_t)upb_pbdecoder_suspend(d);
7119   } else if (bufleft(d) >= bytes) {
7120     /* Skipped data is all in current buffer, and more is still available. */
7121     advance(d, bytes);
7122     d->skip = 0;
7123     return DECODE_OK;
7124   } else {
7125     /* Skipped data extends beyond currently available buffers. */
7126     d->pc = d->last;
7127     d->skip = bytes - curbufleft(d);
7128     d->bufstart_ofs += (d->end - d->buf);
7129     d->residual_end = d->residual;
7130     switchtobuf(d, d->residual, d->residual_end);
7131     return (int32_t)(d->size_param + d->skip);
7132   }
7133 }
7134 
7135 
7136 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)7137 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7138                              size_t size, const upb_bufhandle *handle) {
7139   UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
7140 
7141   /* d->skip and d->residual_end could probably elegantly be represented
7142    * as a single variable, to more easily represent this invariant. */
7143   UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
7144 
7145   /* We need to remember the original size_param, so that the value we return
7146    * is relative to it, even if we do some skipping first. */
7147   d->size_param = size;
7148   d->handle = handle;
7149 
7150   /* Have to handle this case specially (ie. not with skip()) because the user
7151    * is allowed to pass a NULL buffer here, which won't allow us to safely
7152    * calculate a d->end or use our normal functions like curbufleft(). */
7153   if (d->skip && d->skip >= size) {
7154     d->skip -= size;
7155     d->bufstart_ofs += size;
7156     buf = &dummy_char;
7157     size = 0;
7158 
7159     /* We can't just return now, because we might need to execute some ops
7160      * like CHECKDELIM, which could call some callbacks and pop the stack. */
7161   }
7162 
7163   /* We need to pretend that this was the actual buffer param, since some of the
7164    * calculations assume that d->ptr/d->buf is relative to this. */
7165   d->buf_param = buf;
7166 
7167   if (!buf) {
7168     /* NULL buf is ok if its entire span is covered by the "skip" above, but
7169      * by this point we know that "skip" doesn't cover the buffer. */
7170     seterr(d, "Passed NULL buffer over non-skippable region.");
7171     return (int32_t)upb_pbdecoder_suspend(d);
7172   }
7173 
7174   if (d->residual_end > d->residual) {
7175     /* We have residual bytes from the last buffer. */
7176     UPB_ASSERT(d->ptr == d->residual);
7177   } else {
7178     switchtobuf(d, buf, buf + size);
7179   }
7180 
7181   d->checkpoint = d->ptr;
7182 
7183   /* Handle skips that don't cover the whole buffer (as above). */
7184   if (d->skip) {
7185     size_t skip_bytes = d->skip;
7186     d->skip = 0;
7187     CHECK_RETURN(skip(d, skip_bytes));
7188     checkpoint(d);
7189   }
7190 
7191   /* If we're inside an unknown group, continue to parse unknown values. */
7192   if (d->top->groupnum < 0) {
7193     CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
7194     checkpoint(d);
7195   }
7196 
7197   return DECODE_OK;
7198 }
7199 
7200 /* Suspends the decoder at the last checkpoint, without saving any residual
7201  * bytes.  If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)7202 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7203   d->pc = d->last;
7204   if (d->checkpoint == d->residual) {
7205     /* Checkpoint was in residual buf; no user bytes were consumed. */
7206     d->ptr = d->residual;
7207     return 0;
7208   } else {
7209     size_t ret = d->size_param - (d->end - d->checkpoint);
7210     UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
7211     UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
7212 
7213     d->bufstart_ofs += (d->checkpoint - d->buf);
7214     d->residual_end = d->residual;
7215     switchtobuf(d, d->residual, d->residual_end);
7216     return ret;
7217   }
7218 }
7219 
7220 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
7221  * bytes in our residual buffer.  This is necessary if we need more user
7222  * bytes to form a complete value, which might not be contiguous in the
7223  * user's buffers.  Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)7224 static size_t suspend_save(upb_pbdecoder *d) {
7225   /* We hit end-of-buffer before we could parse a full value.
7226    * Save any unconsumed bytes (if any) to the residual buffer. */
7227   d->pc = d->last;
7228 
7229   if (d->checkpoint == d->residual) {
7230     /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
7231     UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
7232            sizeof(d->residual));
7233     if (!in_residual_buf(d, d->ptr)) {
7234       d->bufstart_ofs -= (d->residual_end - d->residual);
7235     }
7236     memcpy(d->residual_end, d->buf_param, d->size_param);
7237     d->residual_end += d->size_param;
7238   } else {
7239     /* Checkpoint was in user buf; old residual bytes not needed. */
7240     size_t save;
7241     UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
7242 
7243     d->ptr = d->checkpoint;
7244     save = curbufleft(d);
7245     UPB_ASSERT(save <= sizeof(d->residual));
7246     memcpy(d->residual, d->ptr, save);
7247     d->residual_end = d->residual + save;
7248     d->bufstart_ofs = offset(d);
7249   }
7250 
7251   switchtobuf(d, d->residual, d->residual_end);
7252   return d->size_param;
7253 }
7254 
7255 /* Copies the next "bytes" bytes into "buf" and advances the stream.
7256  * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)7257 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7258                                          size_t bytes) {
7259   UPB_ASSERT(bytes <= curbufleft(d));
7260   memcpy(buf, d->ptr, bytes);
7261   advance(d, bytes);
7262 }
7263 
7264 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
7265  * available in the current buffer or not.  Returns a status code as described
7266  * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7267 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7268                                           size_t bytes) {
7269   const size_t avail = curbufleft(d);
7270   consumebytes(d, buf, avail);
7271   bytes -= avail;
7272   UPB_ASSERT(bytes > 0);
7273   if (in_residual_buf(d, d->ptr)) {
7274     advancetobuf(d, d->buf_param, d->size_param);
7275   }
7276   if (curbufleft(d) >= bytes) {
7277     consumebytes(d, (char *)buf + avail, bytes);
7278     return DECODE_OK;
7279   } else if (d->data_end == d->delim_end) {
7280     seterr(d, "Submessage ended in the middle of a value or group");
7281     return (int32_t)upb_pbdecoder_suspend(d);
7282   } else {
7283     return (int32_t)suspend_save(d);
7284   }
7285 }
7286 
7287 /* Gets the next "bytes" bytes, regardless of whether they are available in the
7288  * current buffer or not.  Returns a status code as described in decoder.int.h.
7289  */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)7290 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7291                                         size_t bytes) {
7292   if (curbufleft(d) >= bytes) {
7293     /* Buffer has enough data to satisfy. */
7294     consumebytes(d, buf, bytes);
7295     return DECODE_OK;
7296   } else {
7297     return getbytes_slow(d, buf, bytes);
7298   }
7299 }
7300 
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7301 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7302                                           size_t bytes) {
7303   size_t ret = curbufleft(d);
7304   memcpy(buf, d->ptr, ret);
7305   if (in_residual_buf(d, d->ptr)) {
7306     size_t copy = UPB_MIN(bytes - ret, d->size_param);
7307     memcpy((char *)buf + ret, d->buf_param, copy);
7308     ret += copy;
7309   }
7310   return ret;
7311 }
7312 
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)7313 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7314                                         size_t bytes) {
7315   if (curbufleft(d) >= bytes) {
7316     memcpy(buf, d->ptr, bytes);
7317     return bytes;
7318   } else {
7319     return peekbytes_slow(d, buf, bytes);
7320   }
7321 }
7322 
7323 
7324 /* Decoding of wire types *****************************************************/
7325 
7326 /* Slow path for decoding a varint from the current buffer position.
7327  * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)7328 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7329                                                       uint64_t *u64) {
7330   uint8_t byte = 0x80;
7331   int bitpos;
7332   *u64 = 0;
7333   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
7334     CHECK_RETURN(getbytes(d, &byte, 1));
7335     *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7336   }
7337   if(bitpos == 70 && (byte & 0x80)) {
7338     seterr(d, kUnterminatedVarint);
7339     return (int32_t)upb_pbdecoder_suspend(d);
7340   }
7341   return DECODE_OK;
7342 }
7343 
7344 /* Decodes a varint from the current buffer position.
7345  * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)7346 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7347   if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7348     *u64 = *d->ptr;
7349     advance(d, 1);
7350     return DECODE_OK;
7351   } else if (curbufleft(d) >= 10) {
7352     /* Fast case. */
7353     upb_decoderet r = upb_vdecode_fast(d->ptr);
7354     if (r.p == NULL) {
7355       seterr(d, kUnterminatedVarint);
7356       return (int32_t)upb_pbdecoder_suspend(d);
7357     }
7358     advance(d, r.p - d->ptr);
7359     *u64 = r.val;
7360     return DECODE_OK;
7361   } else {
7362     /* Slow case -- varint spans buffer seam. */
7363     return upb_pbdecoder_decode_varint_slow(d, u64);
7364   }
7365 }
7366 
7367 /* Decodes a 32-bit varint from the current buffer position.
7368  * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)7369 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7370   uint64_t u64;
7371   int32_t ret = decode_varint(d, &u64);
7372   if (ret >= 0) return ret;
7373   if (u64 > UINT32_MAX) {
7374     seterr(d, "Unterminated 32-bit varint");
7375     /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7376      * so we know this path will always be treated as error by our caller.
7377      * Right now the size_t -> int32_t can overflow and produce negative values.
7378      */
7379     *u32 = 0;
7380     return (int32_t)upb_pbdecoder_suspend(d);
7381   }
7382   *u32 = (uint32_t)u64;
7383   return DECODE_OK;
7384 }
7385 
7386 /* Decodes a fixed32 from the current buffer position.
7387  * Returns a status code as described in decoder.int.h.
7388  * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)7389 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7390   return getbytes(d, u32, 4);
7391 }
7392 
7393 /* Decodes a fixed64 from the current buffer position.
7394  * Returns a status code as described in decoder.int.h.
7395  * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)7396 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7397   return getbytes(d, u64, 8);
7398 }
7399 
7400 /* Non-static versions of the above functions.
7401  * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)7402 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7403   return decode_fixed32(d, u32);
7404 }
7405 
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)7406 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7407   return decode_fixed64(d, u64);
7408 }
7409 
as_double(uint64_t n)7410 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)7411 static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
7412 
7413 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)7414 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7415   upb_pbdecoder_frame *fr = d->top;
7416 
7417   if (end > fr->end_ofs) {
7418     seterr(d, kPbDecoderSubmessageTooLong);
7419     return false;
7420   } else if (fr == d->limit) {
7421     seterr(d, kPbDecoderStackOverflow);
7422     return false;
7423   }
7424 
7425   fr++;
7426   fr->end_ofs = end;
7427   fr->dispatch = NULL;
7428   fr->groupnum = 0;
7429   d->top = fr;
7430   return true;
7431 }
7432 
pushtagdelim(upb_pbdecoder * d,uint32_t arg)7433 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7434   /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7435    * field number) prior to hitting any enclosing submessage end, pushing our
7436    * existing delim end prevents us from continuing to parse values from a
7437    * corrupt proto that doesn't give us an END tag in time. */
7438   if (!decoder_push(d, d->top->end_ofs))
7439     return false;
7440   d->top->groupnum = arg;
7441   return true;
7442 }
7443 
7444 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)7445 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7446 
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)7447 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7448                                                  uint64_t expected) {
7449   uint64_t data = 0;
7450   size_t bytes = upb_value_size(expected);
7451   size_t read = peekbytes(d, &data, bytes);
7452   if (read == bytes && data == expected) {
7453     /* Advance past matched bytes. */
7454     int32_t ok = getbytes(d, &data, read);
7455     UPB_ASSERT(ok < 0);
7456     return DECODE_OK;
7457   } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7458     return (int32_t)suspend_save(d);
7459   } else {
7460     return DECODE_MISMATCH;
7461   }
7462 }
7463 
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)7464 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7465                                   uint8_t wire_type) {
7466   if (fieldnum >= 0)
7467     goto have_tag;
7468 
7469   while (true) {
7470     uint32_t tag;
7471     CHECK_RETURN(decode_v32(d, &tag));
7472     wire_type = tag & 0x7;
7473     fieldnum = tag >> 3;
7474 
7475 have_tag:
7476     if (fieldnum == 0) {
7477       seterr(d, "Saw invalid field number (0)");
7478       return (int32_t)upb_pbdecoder_suspend(d);
7479     }
7480 
7481     switch (wire_type) {
7482       case UPB_WIRE_TYPE_32BIT:
7483         CHECK_RETURN(skip(d, 4));
7484         break;
7485       case UPB_WIRE_TYPE_64BIT:
7486         CHECK_RETURN(skip(d, 8));
7487         break;
7488       case UPB_WIRE_TYPE_VARINT: {
7489         uint64_t u64;
7490         CHECK_RETURN(decode_varint(d, &u64));
7491         break;
7492       }
7493       case UPB_WIRE_TYPE_DELIMITED: {
7494         uint32_t len;
7495         CHECK_RETURN(decode_v32(d, &len));
7496         CHECK_RETURN(skip(d, len));
7497         break;
7498       }
7499       case UPB_WIRE_TYPE_START_GROUP:
7500         if (!pushtagdelim(d, -fieldnum)) {
7501           return (int32_t)upb_pbdecoder_suspend(d);
7502         }
7503         break;
7504       case UPB_WIRE_TYPE_END_GROUP:
7505         if (fieldnum == -d->top->groupnum) {
7506           decoder_pop(d);
7507         } else if (fieldnum == d->top->groupnum) {
7508           return DECODE_ENDGROUP;
7509         } else {
7510           seterr(d, "Unmatched ENDGROUP tag.");
7511           return (int32_t)upb_pbdecoder_suspend(d);
7512         }
7513         break;
7514       default:
7515         seterr(d, "Invalid wire type");
7516         return (int32_t)upb_pbdecoder_suspend(d);
7517     }
7518 
7519     if (d->top->groupnum >= 0) {
7520       /* TODO: More code needed for handling unknown groups. */
7521       upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
7522       return DECODE_OK;
7523     }
7524 
7525     /* Unknown group -- continue looping over unknown fields. */
7526     checkpoint(d);
7527   }
7528 }
7529 
goto_endmsg(upb_pbdecoder * d)7530 static void goto_endmsg(upb_pbdecoder *d) {
7531   upb_value v;
7532   bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
7533   UPB_ASSERT(found);
7534   d->pc = d->top->base + upb_value_getuint64(v);
7535 }
7536 
7537 /* Parses a tag and jumps to the corresponding bytecode instruction for this
7538  * field.
7539  *
7540  * If the tag is unknown (or the wire type doesn't match), parses the field as
7541  * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
7542  * instruction for the end of message. */
dispatch(upb_pbdecoder * d)7543 static int32_t dispatch(upb_pbdecoder *d) {
7544   upb_inttable *dispatch = d->top->dispatch;
7545   uint32_t tag;
7546   uint8_t wire_type;
7547   uint32_t fieldnum;
7548   upb_value val;
7549   int32_t retval;
7550 
7551   /* Decode tag. */
7552   CHECK_RETURN(decode_v32(d, &tag));
7553   wire_type = tag & 0x7;
7554   fieldnum = tag >> 3;
7555 
7556   /* Lookup tag.  Because of packed/non-packed compatibility, we have to
7557    * check the wire type against two possibilities. */
7558   if (fieldnum != DISPATCH_ENDMSG &&
7559       upb_inttable_lookup32(dispatch, fieldnum, &val)) {
7560     uint64_t v = upb_value_getuint64(val);
7561     if (wire_type == (v & 0xff)) {
7562       d->pc = d->top->base + (v >> 16);
7563       return DECODE_OK;
7564     } else if (wire_type == ((v >> 8) & 0xff)) {
7565       bool found =
7566           upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
7567       UPB_ASSERT(found);
7568       d->pc = d->top->base + upb_value_getuint64(val);
7569       return DECODE_OK;
7570     }
7571   }
7572 
7573   /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
7574    * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
7575    * we need to back up to, so that when we're done skipping unknown data we
7576    * can re-check the delimited end. */
7577   d->last--;  /* Necessary if we get suspended */
7578   d->pc = d->last;
7579   UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
7580 
7581   /* Unknown field or ENDGROUP. */
7582   retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
7583 
7584   CHECK_RETURN(retval);
7585 
7586   if (retval == DECODE_ENDGROUP) {
7587     goto_endmsg(d);
7588     return DECODE_OK;
7589   }
7590 
7591   return DECODE_OK;
7592 }
7593 
7594 /* Callers know that the stack is more than one deep because the opcodes that
7595  * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)7596 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
7597   UPB_ASSERT(d->top != d->stack);
7598   return d->top - 1;
7599 }
7600 
7601 
7602 /* The main decoding loop *****************************************************/
7603 
7604 /* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
7605  * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)7606 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
7607                       const upb_bufhandle* handle) {
7608 
7609 #define VMCASE(op, code) \
7610   case op: { code; if (consumes_input(op)) checkpoint(d); break; }
7611 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
7612   VMCASE(OP_PARSE_ ## type, { \
7613     ctype val; \
7614     CHECK_RETURN(decode_ ## wt(d, &val)); \
7615     upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \
7616   })
7617 
7618   while(1) {
7619     int32_t instruction;
7620     opcode op;
7621     uint32_t arg;
7622     int32_t longofs;
7623 
7624     d->last = d->pc;
7625     instruction = *d->pc++;
7626     op = getop(instruction);
7627     arg = instruction >> 8;
7628     longofs = arg;
7629     UPB_ASSERT(d->ptr != d->residual_end);
7630     UPB_UNUSED(group);
7631 #ifdef UPB_DUMP_BYTECODE
7632     fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
7633                     "%x %s (%d)\n",
7634             (int)offset(d),
7635             (int)(d->ptr - d->buf),
7636             (int)(d->data_end - d->ptr),
7637             (int)(d->end - d->ptr),
7638             (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
7639             (int)(d->pc - 1 - group->bytecode),
7640             upb_pbdecoder_getopname(op),
7641             arg);
7642 #endif
7643     switch (op) {
7644       /* Technically, we are losing data if we see a 32-bit varint that is not
7645        * properly sign-extended.  We could detect this and error about the data
7646        * loss, but proto2 does not do this, so we pass. */
7647       PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
7648       PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
7649       PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
7650       PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
7651       PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
7652       PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
7653       PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
7654       PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
7655       PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
7656       PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
7657       PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
7658       PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
7659       PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
7660 
7661       VMCASE(OP_SETDISPATCH,
7662         d->top->base = d->pc - 1;
7663         memcpy(&d->top->dispatch, d->pc, sizeof(void*));
7664         d->pc += sizeof(void*) / sizeof(uint32_t);
7665       )
7666       VMCASE(OP_STARTMSG,
7667         CHECK_SUSPEND(upb_sink_startmsg(d->top->sink));
7668       )
7669       VMCASE(OP_ENDMSG,
7670         CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status));
7671       )
7672       VMCASE(OP_STARTSEQ,
7673         upb_pbdecoder_frame *outer = outer_frame(d);
7674         CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink));
7675       )
7676       VMCASE(OP_ENDSEQ,
7677         CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg));
7678       )
7679       VMCASE(OP_STARTSUBMSG,
7680         upb_pbdecoder_frame *outer = outer_frame(d);
7681         CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink));
7682       )
7683       VMCASE(OP_ENDSUBMSG,
7684         upb_sink subsink = (d->top + 1)->sink;
7685         CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, subsink, arg));
7686       )
7687       VMCASE(OP_STARTSTR,
7688         uint32_t len = (uint32_t)delim_remaining(d);
7689         upb_pbdecoder_frame *outer = outer_frame(d);
7690         CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink));
7691         if (len == 0) {
7692           d->pc++;  /* Skip OP_STRING. */
7693         }
7694       )
7695       VMCASE(OP_STRING,
7696         uint32_t len = (uint32_t)curbufleft(d);
7697         size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle);
7698         if (n > len) {
7699           if (n > delim_remaining(d)) {
7700             seterr(d, "Tried to skip past end of string.");
7701             return upb_pbdecoder_suspend(d);
7702           } else {
7703             int32_t ret = skip(d, n);
7704             /* This shouldn't return DECODE_OK, because n > len. */
7705             UPB_ASSERT(ret >= 0);
7706             return ret;
7707           }
7708         }
7709         advance(d, n);
7710         if (n < len || d->delim_end == NULL) {
7711           /* We aren't finished with this string yet. */
7712           d->pc--;  /* Repeat OP_STRING. */
7713           if (n > 0) checkpoint(d);
7714           return upb_pbdecoder_suspend(d);
7715         }
7716       )
7717       VMCASE(OP_ENDSTR,
7718         CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg));
7719       )
7720       VMCASE(OP_PUSHTAGDELIM,
7721         CHECK_SUSPEND(pushtagdelim(d, arg));
7722       )
7723       VMCASE(OP_SETBIGGROUPNUM,
7724         d->top->groupnum = *d->pc++;
7725       )
7726       VMCASE(OP_POP,
7727         UPB_ASSERT(d->top > d->stack);
7728         decoder_pop(d);
7729       )
7730       VMCASE(OP_PUSHLENDELIM,
7731         uint32_t len;
7732         CHECK_RETURN(decode_v32(d, &len));
7733         CHECK_SUSPEND(decoder_push(d, offset(d) + len));
7734         set_delim_end(d);
7735       )
7736       VMCASE(OP_SETDELIM,
7737         set_delim_end(d);
7738       )
7739       VMCASE(OP_CHECKDELIM,
7740         /* We are guaranteed of this assert because we never allow ourselves to
7741          * consume bytes beyond data_end, which covers delim_end when non-NULL.
7742          */
7743         UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
7744         if (d->ptr == d->delim_end)
7745           d->pc += longofs;
7746       )
7747       VMCASE(OP_CALL,
7748         d->callstack[d->call_len++] = d->pc;
7749         d->pc += longofs;
7750       )
7751       VMCASE(OP_RET,
7752         UPB_ASSERT(d->call_len > 0);
7753         d->pc = d->callstack[--d->call_len];
7754       )
7755       VMCASE(OP_BRANCH,
7756         d->pc += longofs;
7757       )
7758       VMCASE(OP_TAG1,
7759         uint8_t expected;
7760         CHECK_SUSPEND(curbufleft(d) > 0);
7761         expected = (arg >> 8) & 0xff;
7762         if (*d->ptr == expected) {
7763           advance(d, 1);
7764         } else {
7765           int8_t shortofs;
7766          badtag:
7767           shortofs = arg;
7768           if (shortofs == LABEL_DISPATCH) {
7769             CHECK_RETURN(dispatch(d));
7770           } else {
7771             d->pc += shortofs;
7772             break; /* Avoid checkpoint(). */
7773           }
7774         }
7775       )
7776       VMCASE(OP_TAG2,
7777         uint16_t expected;
7778         CHECK_SUSPEND(curbufleft(d) > 0);
7779         expected = (arg >> 8) & 0xffff;
7780         if (curbufleft(d) >= 2) {
7781           uint16_t actual;
7782           memcpy(&actual, d->ptr, 2);
7783           if (expected == actual) {
7784             advance(d, 2);
7785           } else {
7786             goto badtag;
7787           }
7788         } else {
7789           int32_t result = upb_pbdecoder_checktag_slow(d, expected);
7790           if (result == DECODE_MISMATCH) goto badtag;
7791           if (result >= 0) return result;
7792         }
7793       )
7794       VMCASE(OP_TAGN, {
7795         uint64_t expected;
7796         int32_t result;
7797         memcpy(&expected, d->pc, 8);
7798         d->pc += 2;
7799         result = upb_pbdecoder_checktag_slow(d, expected);
7800         if (result == DECODE_MISMATCH) goto badtag;
7801         if (result >= 0) return result;
7802       })
7803       VMCASE(OP_DISPATCH, {
7804         CHECK_RETURN(dispatch(d));
7805       })
7806       VMCASE(OP_HALT, {
7807         return d->size_param;
7808       })
7809     }
7810   }
7811 }
7812 
7813 
7814 /* BytesHandler handlers ******************************************************/
7815 
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)7816 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
7817   upb_pbdecoder *d = closure;
7818   UPB_UNUSED(size_hint);
7819   d->top->end_ofs = UINT64_MAX;
7820   d->bufstart_ofs = 0;
7821   d->call_len = 1;
7822   d->callstack[0] = &halt;
7823   d->pc = pc;
7824   d->skip = 0;
7825   return d;
7826 }
7827 
upb_pbdecoder_end(void * closure,const void * handler_data)7828 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7829   upb_pbdecoder *d = closure;
7830   const upb_pbdecodermethod *method = handler_data;
7831   uint64_t end;
7832   char dummy;
7833 
7834   if (d->residual_end > d->residual) {
7835     seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
7836     return false;
7837   }
7838 
7839   if (d->skip) {
7840     seterr(d, "Unexpected EOF inside skipped data");
7841     return false;
7842   }
7843 
7844   if (d->top->end_ofs != UINT64_MAX) {
7845     seterr(d, "Unexpected EOF inside delimited string");
7846     return false;
7847   }
7848 
7849   /* The user's end() call indicates that the message ends here. */
7850   end = offset(d);
7851   d->top->end_ofs = end;
7852 
7853   {
7854     const uint32_t *p = d->pc;
7855     d->stack->end_ofs = end;
7856     /* Check the previous bytecode, but guard against beginning. */
7857     if (p != method->code_base.ptr) p--;
7858     if (getop(*p) == OP_CHECKDELIM) {
7859       /* Rewind from OP_TAG* to OP_CHECKDELIM. */
7860       UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
7861              getop(*d->pc) == OP_TAG2 ||
7862              getop(*d->pc) == OP_TAGN ||
7863              getop(*d->pc) == OP_DISPATCH);
7864       d->pc = p;
7865     }
7866     upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
7867   }
7868 
7869   if (d->call_len != 0) {
7870     seterr(d, "Unexpected EOF inside submessage or group");
7871     return false;
7872   }
7873 
7874   return true;
7875 }
7876 
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)7877 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
7878                             size_t size, const upb_bufhandle *handle) {
7879   int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
7880 
7881   if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
7882   CHECK_RETURN(result);
7883 
7884   return run_decoder_vm(decoder, group, handle);
7885 }
7886 
7887 
7888 /* Public API *****************************************************************/
7889 
upb_pbdecoder_reset(upb_pbdecoder * d)7890 void upb_pbdecoder_reset(upb_pbdecoder *d) {
7891   d->top = d->stack;
7892   d->top->groupnum = 0;
7893   d->ptr = d->residual;
7894   d->buf = d->residual;
7895   d->end = d->residual;
7896   d->residual_end = d->residual;
7897 }
7898 
upb_pbdecoder_create(upb_arena * a,const upb_pbdecodermethod * m,upb_sink sink,upb_status * status)7899 upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m,
7900                                     upb_sink sink, upb_status *status) {
7901   const size_t default_max_nesting = 64;
7902 #ifndef NDEBUG
7903   size_t size_before = upb_arena_bytesallocated(a);
7904 #endif
7905 
7906   upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder));
7907   if (!d) return NULL;
7908 
7909   d->method_ = m;
7910   d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting));
7911   d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting));
7912   if (!d->stack || !d->callstack) {
7913     return NULL;
7914   }
7915 
7916   d->arena = a;
7917   d->limit = d->stack + default_max_nesting - 1;
7918   d->stack_size = default_max_nesting;
7919   d->status = status;
7920 
7921   upb_pbdecoder_reset(d);
7922   upb_bytessink_reset(&d->input_, &m->input_handler_, d);
7923 
7924   if (d->method_->dest_handlers_) {
7925     if (sink.handlers != d->method_->dest_handlers_)
7926       return NULL;
7927   }
7928   d->top->sink = sink;
7929 
7930   /* If this fails, increase the value in decoder.h. */
7931   UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
7932                       UPB_PB_DECODER_SIZE);
7933   return d;
7934 }
7935 
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)7936 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
7937   return offset(d);
7938 }
7939 
upb_pbdecoder_method(const upb_pbdecoder * d)7940 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
7941   return d->method_;
7942 }
7943 
upb_pbdecoder_input(upb_pbdecoder * d)7944 upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) {
7945   return d->input_;
7946 }
7947 
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)7948 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
7949   return d->stack_size;
7950 }
7951 
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)7952 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
7953   UPB_ASSERT(d->top >= d->stack);
7954 
7955   if (max < (size_t)(d->top - d->stack)) {
7956     /* Can't set a limit smaller than what we are currently at. */
7957     return false;
7958   }
7959 
7960   if (max > d->stack_size) {
7961     /* Need to reallocate stack and callstack to accommodate. */
7962     size_t old_size = stacksize(d, d->stack_size);
7963     size_t new_size = stacksize(d, max);
7964     void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size);
7965     if (!p) {
7966       return false;
7967     }
7968     d->stack = p;
7969 
7970     old_size = callstacksize(d, d->stack_size);
7971     new_size = callstacksize(d, max);
7972     p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size);
7973     if (!p) {
7974       return false;
7975     }
7976     d->callstack = p;
7977 
7978     d->stack_size = max;
7979   }
7980 
7981   d->limit = d->stack + max - 1;
7982   return true;
7983 }
7984 /*
7985 ** upb::Encoder
7986 **
7987 ** Since we are implementing pure handlers (ie. without any out-of-band access
7988 ** to pre-computed lengths), we have to buffer all submessages before we can
7989 ** emit even their first byte.
7990 **
7991 ** Not knowing the size of submessages also means we can't write a perfect
7992 ** zero-copy implementation, even with buffering.  Lengths are stored as
7993 ** varints, which means that we don't know how many bytes to reserve for the
7994 ** length until we know what the length is.
7995 **
7996 ** This leaves us with three main choices:
7997 **
7998 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
7999 **    once into the output buffer.
8000 **
8001 ** 2. attempt to buffer data directly into the output buffer, estimating how
8002 **    many bytes each length will take.  When our guesses are wrong, use
8003 **    memmove() to grow or shrink the allotted space.
8004 **
8005 ** 3. buffer directly into the output buffer, allocating a max length
8006 **    ahead-of-time for each submessage length.  If we overallocated, we waste
8007 **    space, but no memcpy() or memmove() is required.  This approach requires
8008 **    defining a maximum size for submessages and rejecting submessages that
8009 **    exceed that size.
8010 **
8011 ** (2) and (3) have the potential to have better performance, but they are more
8012 ** complicated and subtle to implement:
8013 **
8014 **   (3) requires making an arbitrary choice of the maximum message size; it
8015 **       wastes space when submessages are shorter than this and fails
8016 **       completely when they are longer.  This makes it more finicky and
8017 **       requires configuration based on the input.  It also makes it impossible
8018 **       to perfectly match the output of reference encoders that always use the
8019 **       optimal amount of space for each length.
8020 **
8021 **   (2) requires guessing the the size upfront, and if multiple lengths are
8022 **       guessed wrong the minimum required number of memmove() operations may
8023 **       be complicated to compute correctly.  Implemented properly, it may have
8024 **       a useful amortized or average cost, but more investigation is required
8025 **       to determine this and what the optimal algorithm is to achieve it.
8026 **
8027 **   (1) makes you always pay for exactly one copy, but its implementation is
8028 **       the simplest and its performance is predictable.
8029 **
8030 ** So for now, we implement (1) only.  If we wish to optimize later, we should
8031 ** be able to do it without affecting users.
8032 **
8033 ** The strategy is to buffer the segments of data that do *not* depend on
8034 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8035 ** and lengths.  When the top-level submessage ends, we can go beginning to end,
8036 ** alternating the writing of lengths with memcpy() of the rest of the data.
8037 ** At the top level though, no buffering is required.
8038 */
8039 
8040 
8041 
8042 /* The output buffer is divided into segments; a segment is a string of data
8043  * that is "ready to go" -- it does not need any varint lengths inserted into
8044  * the middle.  The seams between segments are where varints will be inserted
8045  * once they are known.
8046  *
8047  * We also use the concept of a "run", which is a range of encoded bytes that
8048  * occur at a single submessage level.  Every segment contains one or more runs.
8049  *
8050  * A segment can span messages.  Consider:
8051  *
8052  *                  .--Submessage lengths---------.
8053  *                  |       |                     |
8054  *                  |       V                     V
8055  *                  V      | |---------------    | |-----------------
8056  * Submessages:    | |-----------------------------------------------
8057  * Top-level msg: ------------------------------------------------------------
8058  *
8059  * Segments:          -----   -------------------   -----------------
8060  * Runs:              *----   *--------------*---   *----------------
8061  * (* marks the start)
8062  *
8063  * Note that the top-level menssage is not in any segment because it does not
8064  * have any length preceding it.
8065  *
8066  * A segment is only interrupted when another length needs to be inserted.  So
8067  * observe how the second segment spans both the inner submessage and part of
8068  * the next enclosing message. */
8069 typedef struct {
8070   uint32_t msglen;  /* The length to varint-encode before this segment. */
8071   uint32_t seglen;  /* Length of the segment. */
8072 } upb_pb_encoder_segment;
8073 
8074 struct upb_pb_encoder {
8075   upb_arena *arena;
8076 
8077   /* Our input and output. */
8078   upb_sink input_;
8079   upb_bytessink output_;
8080 
8081   /* The "subclosure" -- used as the inner closure as part of the bytessink
8082    * protocol. */
8083   void *subc;
8084 
8085   /* The output buffer and limit, and our current write position.  "buf"
8086    * initially points to "initbuf", but is dynamically allocated if we need to
8087    * grow beyond the initial size. */
8088   char *buf, *ptr, *limit;
8089 
8090   /* The beginning of the current run, or undefined if we are at the top
8091    * level. */
8092   char *runbegin;
8093 
8094   /* The list of segments we are accumulating. */
8095   upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8096 
8097   /* The stack of enclosing submessages.  Each entry in the stack points to the
8098    * segment where this submessage's length is being accumulated. */
8099   int *stack, *top, *stacklimit;
8100 
8101   /* Depth of startmsg/endmsg calls. */
8102   int depth;
8103 };
8104 
8105 /* low-level buffering ********************************************************/
8106 
8107 /* Low-level functions for interacting with the output buffer. */
8108 
8109 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)8110 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8111   size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8112   UPB_ASSERT(n == len);
8113 }
8114 
top(upb_pb_encoder * e)8115 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8116   return &e->segbuf[*e->top];
8117 }
8118 
8119 /* Call to ensure that at least "bytes" bytes are available for writing at
8120  * e->ptr.  Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)8121 static bool reserve(upb_pb_encoder *e, size_t bytes) {
8122   if ((size_t)(e->limit - e->ptr) < bytes) {
8123     /* Grow buffer. */
8124     char *new_buf;
8125     size_t needed = bytes + (e->ptr - e->buf);
8126     size_t old_size = e->limit - e->buf;
8127 
8128     size_t new_size = old_size;
8129 
8130     while (new_size < needed) {
8131       new_size *= 2;
8132     }
8133 
8134     new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size);
8135 
8136     if (new_buf == NULL) {
8137       return false;
8138     }
8139 
8140     e->ptr = new_buf + (e->ptr - e->buf);
8141     e->runbegin = new_buf + (e->runbegin - e->buf);
8142     e->limit = new_buf + new_size;
8143     e->buf = new_buf;
8144   }
8145 
8146   return true;
8147 }
8148 
8149 /* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
8150  * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)8151 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8152   UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
8153   e->ptr += bytes;
8154 }
8155 
8156 /* Call when all of the bytes for a handler have been written.  Flushes the
8157  * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)8158 static bool commit(upb_pb_encoder *e) {
8159   if (!e->top) {
8160     /* We aren't inside a delimited region.  Flush our accumulated bytes to
8161      * the output.
8162      *
8163      * TODO(haberman): in the future we may want to delay flushing for
8164      * efficiency reasons. */
8165     putbuf(e, e->buf, e->ptr - e->buf);
8166     e->ptr = e->buf;
8167   }
8168 
8169   return true;
8170 }
8171 
8172 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)8173 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8174   if (!reserve(e, len)) {
8175     return false;
8176   }
8177 
8178   memcpy(e->ptr, data, len);
8179   encoder_advance(e, len);
8180   return true;
8181 }
8182 
8183 /* Finish the current run by adding the run totals to the segment and message
8184  * length. */
accumulate(upb_pb_encoder * e)8185 static void accumulate(upb_pb_encoder *e) {
8186   size_t run_len;
8187   UPB_ASSERT(e->ptr >= e->runbegin);
8188   run_len = e->ptr - e->runbegin;
8189   e->segptr->seglen += run_len;
8190   top(e)->msglen += run_len;
8191   e->runbegin = e->ptr;
8192 }
8193 
8194 /* Call to indicate the start of delimited region for which the full length is
8195  * not yet known.  All data will be buffered until the length is known.
8196  * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)8197 static bool start_delim(upb_pb_encoder *e) {
8198   if (e->top) {
8199     /* We are already buffering, advance to the next segment and push it on the
8200      * stack. */
8201     accumulate(e);
8202 
8203     if (++e->top == e->stacklimit) {
8204       /* TODO(haberman): grow stack? */
8205       return false;
8206     }
8207 
8208     if (++e->segptr == e->seglimit) {
8209       /* Grow segment buffer. */
8210       size_t old_size =
8211           (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8212       size_t new_size = old_size * 2;
8213       upb_pb_encoder_segment *new_buf =
8214           upb_arena_realloc(e->arena, e->segbuf, old_size, new_size);
8215 
8216       if (new_buf == NULL) {
8217         return false;
8218       }
8219 
8220       e->segptr = new_buf + (e->segptr - e->segbuf);
8221       e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8222       e->segbuf = new_buf;
8223     }
8224   } else {
8225     /* We were previously at the top level, start buffering. */
8226     e->segptr = e->segbuf;
8227     e->top = e->stack;
8228     e->runbegin = e->ptr;
8229   }
8230 
8231   *e->top = (int)(e->segptr - e->segbuf);
8232   e->segptr->seglen = 0;
8233   e->segptr->msglen = 0;
8234 
8235   return true;
8236 }
8237 
8238 /* Call to indicate the end of a delimited region.  We now know the length of
8239  * the delimited region.  If we are not nested inside any other delimited
8240  * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)8241 static bool end_delim(upb_pb_encoder *e) {
8242   size_t msglen;
8243   accumulate(e);
8244   msglen = top(e)->msglen;
8245 
8246   if (e->top == e->stack) {
8247     /* All lengths are now available, emit all buffered data. */
8248     char buf[UPB_PB_VARINT_MAX_LEN];
8249     upb_pb_encoder_segment *s;
8250     const char *ptr = e->buf;
8251     for (s = e->segbuf; s <= e->segptr; s++) {
8252       size_t lenbytes = upb_vencode64(s->msglen, buf);
8253       putbuf(e, buf, lenbytes);
8254       putbuf(e, ptr, s->seglen);
8255       ptr += s->seglen;
8256     }
8257 
8258     e->ptr = e->buf;
8259     e->top = NULL;
8260   } else {
8261     /* Need to keep buffering; propagate length info into enclosing
8262      * submessages. */
8263     --e->top;
8264     top(e)->msglen += msglen + upb_varint_size(msglen);
8265   }
8266 
8267   return true;
8268 }
8269 
8270 
8271 /* tag_t **********************************************************************/
8272 
8273 /* A precomputed (pre-encoded) tag and length. */
8274 
8275 typedef struct {
8276   uint8_t bytes;
8277   char tag[7];
8278 } tag_t;
8279 
8280 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)8281 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8282                     upb_handlerattr *attr) {
8283   uint32_t n = upb_fielddef_number(f);
8284 
8285   tag_t *tag = upb_gmalloc(sizeof(tag_t));
8286   tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8287 
8288   attr->handler_data = tag;
8289   upb_handlers_addcleanup(h, tag, upb_gfree);
8290 }
8291 
encode_tag(upb_pb_encoder * e,const tag_t * tag)8292 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8293   return encode_bytes(e, tag->tag, tag->bytes);
8294 }
8295 
8296 
8297 /* encoding of wire types *****************************************************/
8298 
encode_fixed64(upb_pb_encoder * e,uint64_t val)8299 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8300   /* TODO(haberman): byte-swap for big endian. */
8301   return encode_bytes(e, &val, sizeof(uint64_t));
8302 }
8303 
encode_fixed32(upb_pb_encoder * e,uint32_t val)8304 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8305   /* TODO(haberman): byte-swap for big endian. */
8306   return encode_bytes(e, &val, sizeof(uint32_t));
8307 }
8308 
encode_varint(upb_pb_encoder * e,uint64_t val)8309 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8310   if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8311     return false;
8312   }
8313 
8314   encoder_advance(e, upb_vencode64(val, e->ptr));
8315   return true;
8316 }
8317 
dbl2uint64(double d)8318 static uint64_t dbl2uint64(double d) {
8319   uint64_t ret;
8320   memcpy(&ret, &d, sizeof(uint64_t));
8321   return ret;
8322 }
8323 
flt2uint32(float d)8324 static uint32_t flt2uint32(float d) {
8325   uint32_t ret;
8326   memcpy(&ret, &d, sizeof(uint32_t));
8327   return ret;
8328 }
8329 
8330 
8331 /* encoding of proto types ****************************************************/
8332 
startmsg(void * c,const void * hd)8333 static bool startmsg(void *c, const void *hd) {
8334   upb_pb_encoder *e = c;
8335   UPB_UNUSED(hd);
8336   if (e->depth++ == 0) {
8337     upb_bytessink_start(e->output_, 0, &e->subc);
8338   }
8339   return true;
8340 }
8341 
endmsg(void * c,const void * hd,upb_status * status)8342 static bool endmsg(void *c, const void *hd, upb_status *status) {
8343   upb_pb_encoder *e = c;
8344   UPB_UNUSED(hd);
8345   UPB_UNUSED(status);
8346   if (--e->depth == 0) {
8347     upb_bytessink_end(e->output_);
8348   }
8349   return true;
8350 }
8351 
encode_startdelimfield(void * c,const void * hd)8352 static void *encode_startdelimfield(void *c, const void *hd) {
8353   bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8354   return ok ? c : UPB_BREAK;
8355 }
8356 
encode_unknown(void * c,const void * hd,const char * buf,size_t len)8357 static bool encode_unknown(void *c, const void *hd, const char *buf,
8358                            size_t len) {
8359   UPB_UNUSED(hd);
8360   return encode_bytes(c, buf, len) && commit(c);
8361 }
8362 
encode_enddelimfield(void * c,const void * hd)8363 static bool encode_enddelimfield(void *c, const void *hd) {
8364   UPB_UNUSED(hd);
8365   return end_delim(c);
8366 }
8367 
encode_startgroup(void * c,const void * hd)8368 static void *encode_startgroup(void *c, const void *hd) {
8369   return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8370 }
8371 
encode_endgroup(void * c,const void * hd)8372 static bool encode_endgroup(void *c, const void *hd) {
8373   return encode_tag(c, hd) && commit(c);
8374 }
8375 
encode_startstr(void * c,const void * hd,size_t size_hint)8376 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8377   UPB_UNUSED(size_hint);
8378   return encode_startdelimfield(c, hd);
8379 }
8380 
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)8381 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8382                             size_t len, const upb_bufhandle *h) {
8383   UPB_UNUSED(hd);
8384   UPB_UNUSED(h);
8385   return encode_bytes(c, buf, len) ? len : 0;
8386 }
8387 
8388 #define T(type, ctype, convert, encode)                                  \
8389   static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8390     return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
8391   }                                                                      \
8392   static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8393     UPB_UNUSED(hd);                                                      \
8394     return encode(e, (convert)(val));                                    \
8395   }
8396 
T(double,double,dbl2uint64,encode_fixed64)8397 T(double,   double,   dbl2uint64,   encode_fixed64)
8398 T(float,    float,    flt2uint32,   encode_fixed32)
8399 T(int64,    int64_t,  uint64_t,     encode_varint)
8400 T(int32,    int32_t,  int64_t,      encode_varint)
8401 T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
8402 T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
8403 T(bool,     bool,     bool,         encode_varint)
8404 T(uint32,   uint32_t, uint32_t,     encode_varint)
8405 T(uint64,   uint64_t, uint64_t,     encode_varint)
8406 T(enum,     int32_t,  uint32_t,     encode_varint)
8407 T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
8408 T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
8409 T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
8410 T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
8411 
8412 #undef T
8413 
8414 
8415 /* code to build the handlers *************************************************/
8416 
8417 #include <stdio.h>
8418 static void newhandlers_callback(const void *closure, upb_handlers *h) {
8419   const upb_msgdef *m;
8420   upb_msg_field_iter i;
8421 
8422   UPB_UNUSED(closure);
8423 
8424   upb_handlers_setstartmsg(h, startmsg, NULL);
8425   upb_handlers_setendmsg(h, endmsg, NULL);
8426   upb_handlers_setunknown(h, encode_unknown, NULL);
8427 
8428   m = upb_handlers_msgdef(h);
8429   for(upb_msg_field_begin(&i, m);
8430       !upb_msg_field_done(&i);
8431       upb_msg_field_next(&i)) {
8432     const upb_fielddef *f = upb_msg_iter_field(&i);
8433     bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8434                   upb_fielddef_packed(f);
8435     upb_handlerattr attr = UPB_HANDLERATTR_INIT;
8436     upb_wiretype_t wt =
8437         packed ? UPB_WIRE_TYPE_DELIMITED
8438                : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8439 
8440     /* Pre-encode the tag for this field. */
8441     new_tag(h, f, wt, &attr);
8442 
8443     if (packed) {
8444       upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8445       upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8446     }
8447 
8448 #define T(upper, lower, upbtype)                                     \
8449   case UPB_DESCRIPTOR_TYPE_##upper:                                  \
8450     if (packed) {                                                    \
8451       upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8452     } else {                                                         \
8453       upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8454     }                                                                \
8455     break;
8456 
8457     switch (upb_fielddef_descriptortype(f)) {
8458       T(DOUBLE,   double,   double);
8459       T(FLOAT,    float,    float);
8460       T(INT64,    int64,    int64);
8461       T(INT32,    int32,    int32);
8462       T(FIXED64,  fixed64,  uint64);
8463       T(FIXED32,  fixed32,  uint32);
8464       T(BOOL,     bool,     bool);
8465       T(UINT32,   uint32,   uint32);
8466       T(UINT64,   uint64,   uint64);
8467       T(ENUM,     enum,     int32);
8468       T(SFIXED32, sfixed32, int32);
8469       T(SFIXED64, sfixed64, int64);
8470       T(SINT32,   sint32,   int32);
8471       T(SINT64,   sint64,   int64);
8472       case UPB_DESCRIPTOR_TYPE_STRING:
8473       case UPB_DESCRIPTOR_TYPE_BYTES:
8474         upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8475         upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8476         upb_handlers_setstring(h, f, encode_strbuf, &attr);
8477         break;
8478       case UPB_DESCRIPTOR_TYPE_MESSAGE:
8479         upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8480         upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8481         break;
8482       case UPB_DESCRIPTOR_TYPE_GROUP: {
8483         /* Endgroup takes a different tag (wire_type = END_GROUP). */
8484         upb_handlerattr attr2 = UPB_HANDLERATTR_INIT;
8485         new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8486 
8487         upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
8488         upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
8489 
8490         break;
8491       }
8492     }
8493 
8494 #undef T
8495   }
8496 }
8497 
upb_pb_encoder_reset(upb_pb_encoder * e)8498 void upb_pb_encoder_reset(upb_pb_encoder *e) {
8499   e->segptr = NULL;
8500   e->top = NULL;
8501   e->depth = 0;
8502 }
8503 
8504 
8505 /* public API *****************************************************************/
8506 
upb_pb_encoder_newcache(void)8507 upb_handlercache *upb_pb_encoder_newcache(void) {
8508   return upb_handlercache_new(newhandlers_callback, NULL);
8509 }
8510 
upb_pb_encoder_create(upb_arena * arena,const upb_handlers * h,upb_bytessink output)8511 upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h,
8512                                       upb_bytessink output) {
8513   const size_t initial_bufsize = 256;
8514   const size_t initial_segbufsize = 16;
8515   /* TODO(haberman): make this configurable. */
8516   const size_t stack_size = 64;
8517 #ifndef NDEBUG
8518   const size_t size_before = upb_arena_bytesallocated(arena);
8519 #endif
8520 
8521   upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder));
8522   if (!e) return NULL;
8523 
8524   e->buf = upb_arena_malloc(arena, initial_bufsize);
8525   e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf));
8526   e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack));
8527 
8528   if (!e->buf || !e->segbuf || !e->stack) {
8529     return NULL;
8530   }
8531 
8532   e->limit = e->buf + initial_bufsize;
8533   e->seglimit = e->segbuf + initial_segbufsize;
8534   e->stacklimit = e->stack + stack_size;
8535 
8536   upb_pb_encoder_reset(e);
8537   upb_sink_reset(&e->input_, h, e);
8538 
8539   e->arena = arena;
8540   e->output_ = output;
8541   e->subc = output.closure;
8542   e->ptr = e->buf;
8543 
8544   /* If this fails, increase the value in encoder.h. */
8545   UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
8546                       UPB_PB_ENCODER_SIZE);
8547   return e;
8548 }
8549 
upb_pb_encoder_input(upb_pb_encoder * e)8550 upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; }
8551 /*
8552  * upb::pb::TextPrinter
8553  *
8554  * OPT: This is not optimized at all.  It uses printf() which parses the format
8555  * string every time, and it allocates memory for every put.
8556  */
8557 
8558 
8559 #include <ctype.h>
8560 #include <float.h>
8561 #include <inttypes.h>
8562 #include <stdarg.h>
8563 #include <stdio.h>
8564 #include <string.h>
8565 
8566 
8567 
8568 struct upb_textprinter {
8569   upb_sink input_;
8570   upb_bytessink output_;
8571   int indent_depth_;
8572   bool single_line_;
8573   void *subc;
8574 };
8575 
8576 #define CHECK(x) if ((x) < 0) goto err;
8577 
shortname(const char * longname)8578 static const char *shortname(const char *longname) {
8579   const char *last = strrchr(longname, '.');
8580   return last ? last + 1 : longname;
8581 }
8582 
indent(upb_textprinter * p)8583 static int indent(upb_textprinter *p) {
8584   int i;
8585   if (!p->single_line_)
8586     for (i = 0; i < p->indent_depth_; i++)
8587       upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
8588   return 0;
8589 }
8590 
endfield(upb_textprinter * p)8591 static int endfield(upb_textprinter *p) {
8592   const char ch = (p->single_line_ ? ' ' : '\n');
8593   upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
8594   return 0;
8595 }
8596 
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)8597 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
8598                       bool preserve_utf8) {
8599   /* Based on CEscapeInternal() from Google's protobuf release. */
8600   char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
8601   const char *end = buf + len;
8602 
8603   /* I think hex is prettier and more useful, but proto2 uses octal; should
8604    * investigate whether it can parse hex also. */
8605   const bool use_hex = false;
8606   bool last_hex_escape = false; /* true if last output char was \xNN */
8607 
8608   for (; buf < end; buf++) {
8609     bool is_hex_escape;
8610 
8611     if (dstend - dst < 4) {
8612       upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8613       dst = dstbuf;
8614     }
8615 
8616     is_hex_escape = false;
8617     switch (*buf) {
8618       case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
8619       case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
8620       case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
8621       case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
8622       case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
8623       case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
8624       default:
8625         /* Note that if we emit \xNN and the buf character after that is a hex
8626          * digit then that digit must be escaped too to prevent it being
8627          * interpreted as part of the character code by C. */
8628         if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
8629             (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
8630           sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
8631           is_hex_escape = use_hex;
8632           dst += 4;
8633         } else {
8634           *(dst++) = *buf; break;
8635         }
8636     }
8637     last_hex_escape = is_hex_escape;
8638   }
8639   /* Flush remaining data. */
8640   upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8641   return 0;
8642 }
8643 
putf(upb_textprinter * p,const char * fmt,...)8644 bool putf(upb_textprinter *p, const char *fmt, ...) {
8645   va_list args;
8646   va_list args_copy;
8647   char *str;
8648   int written;
8649   int len;
8650   bool ok;
8651 
8652   va_start(args, fmt);
8653 
8654   /* Run once to get the length of the string. */
8655   _upb_va_copy(args_copy, args);
8656   len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
8657   va_end(args_copy);
8658 
8659   /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
8660   str = upb_gmalloc(len + 1);
8661   if (!str) return false;
8662   written = vsprintf(str, fmt, args);
8663   va_end(args);
8664   UPB_ASSERT(written == len);
8665 
8666   ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
8667   upb_gfree(str);
8668   return ok;
8669 }
8670 
8671 
8672 /* handlers *******************************************************************/
8673 
textprinter_startmsg(void * c,const void * hd)8674 static bool textprinter_startmsg(void *c, const void *hd) {
8675   upb_textprinter *p = c;
8676   UPB_UNUSED(hd);
8677   if (p->indent_depth_ == 0) {
8678     upb_bytessink_start(p->output_, 0, &p->subc);
8679   }
8680   return true;
8681 }
8682 
textprinter_endmsg(void * c,const void * hd,upb_status * s)8683 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
8684   upb_textprinter *p = c;
8685   UPB_UNUSED(hd);
8686   UPB_UNUSED(s);
8687   if (p->indent_depth_ == 0) {
8688     upb_bytessink_end(p->output_);
8689   }
8690   return true;
8691 }
8692 
8693 #define TYPE(name, ctype, fmt) \
8694   static bool textprinter_put ## name(void *closure, const void *handler_data, \
8695                                       ctype val) {                             \
8696     upb_textprinter *p = closure;                                              \
8697     const upb_fielddef *f = handler_data;                                      \
8698     CHECK(indent(p));                                                          \
8699     putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
8700     CHECK(endfield(p));                                                        \
8701     return true;                                                               \
8702   err:                                                                         \
8703     return false;                                                              \
8704 }
8705 
textprinter_putbool(void * closure,const void * handler_data,bool val)8706 static bool textprinter_putbool(void *closure, const void *handler_data,
8707                                 bool val) {
8708   upb_textprinter *p = closure;
8709   const upb_fielddef *f = handler_data;
8710   CHECK(indent(p));
8711   putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
8712   CHECK(endfield(p));
8713   return true;
8714 err:
8715   return false;
8716 }
8717 
8718 #define STRINGIFY_HELPER(x) #x
8719 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
8720 
8721 TYPE(int32,  int32_t,  "%" PRId32)
8722 TYPE(int64,  int64_t,  "%" PRId64)
8723 TYPE(uint32, uint32_t, "%" PRIu32)
8724 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)8725 TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
8726 TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
8727 
8728 #undef TYPE
8729 
8730 /* Output a symbolic value from the enum if found, else just print as int32. */
8731 static bool textprinter_putenum(void *closure, const void *handler_data,
8732                                 int32_t val) {
8733   upb_textprinter *p = closure;
8734   const upb_fielddef *f = handler_data;
8735   const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f);
8736   const char *label = upb_enumdef_iton(enum_def, val);
8737   if (label) {
8738     indent(p);
8739     putf(p, "%s: %s", upb_fielddef_name(f), label);
8740     endfield(p);
8741   } else {
8742     if (!textprinter_putint32(closure, handler_data, val))
8743       return false;
8744   }
8745   return true;
8746 }
8747 
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)8748 static void *textprinter_startstr(void *closure, const void *handler_data,
8749                       size_t size_hint) {
8750   upb_textprinter *p = closure;
8751   const upb_fielddef *f = handler_data;
8752   UPB_UNUSED(size_hint);
8753   indent(p);
8754   putf(p, "%s: \"", upb_fielddef_name(f));
8755   return p;
8756 }
8757 
textprinter_endstr(void * closure,const void * handler_data)8758 static bool textprinter_endstr(void *closure, const void *handler_data) {
8759   upb_textprinter *p = closure;
8760   UPB_UNUSED(handler_data);
8761   putf(p, "\"");
8762   endfield(p);
8763   return true;
8764 }
8765 
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)8766 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
8767                                  size_t len, const upb_bufhandle *handle) {
8768   upb_textprinter *p = closure;
8769   const upb_fielddef *f = hd;
8770   UPB_UNUSED(handle);
8771   CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
8772   return len;
8773 err:
8774   return 0;
8775 }
8776 
textprinter_startsubmsg(void * closure,const void * handler_data)8777 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
8778   upb_textprinter *p = closure;
8779   const char *name = handler_data;
8780   CHECK(indent(p));
8781   putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
8782   p->indent_depth_++;
8783   return p;
8784 err:
8785   return UPB_BREAK;
8786 }
8787 
textprinter_endsubmsg(void * closure,const void * handler_data)8788 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
8789   upb_textprinter *p = closure;
8790   UPB_UNUSED(handler_data);
8791   p->indent_depth_--;
8792   CHECK(indent(p));
8793   upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
8794   CHECK(endfield(p));
8795   return true;
8796 err:
8797   return false;
8798 }
8799 
onmreg(const void * c,upb_handlers * h)8800 static void onmreg(const void *c, upb_handlers *h) {
8801   const upb_msgdef *m = upb_handlers_msgdef(h);
8802   upb_msg_field_iter i;
8803   UPB_UNUSED(c);
8804 
8805   upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
8806   upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
8807 
8808   for(upb_msg_field_begin(&i, m);
8809       !upb_msg_field_done(&i);
8810       upb_msg_field_next(&i)) {
8811     upb_fielddef *f = upb_msg_iter_field(&i);
8812     upb_handlerattr attr = UPB_HANDLERATTR_INIT;
8813     attr.handler_data = f;
8814     switch (upb_fielddef_type(f)) {
8815       case UPB_TYPE_INT32:
8816         upb_handlers_setint32(h, f, textprinter_putint32, &attr);
8817         break;
8818       case UPB_TYPE_INT64:
8819         upb_handlers_setint64(h, f, textprinter_putint64, &attr);
8820         break;
8821       case UPB_TYPE_UINT32:
8822         upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
8823         break;
8824       case UPB_TYPE_UINT64:
8825         upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
8826         break;
8827       case UPB_TYPE_FLOAT:
8828         upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
8829         break;
8830       case UPB_TYPE_DOUBLE:
8831         upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
8832         break;
8833       case UPB_TYPE_BOOL:
8834         upb_handlers_setbool(h, f, textprinter_putbool, &attr);
8835         break;
8836       case UPB_TYPE_STRING:
8837       case UPB_TYPE_BYTES:
8838         upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
8839         upb_handlers_setstring(h, f, textprinter_putstr, &attr);
8840         upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
8841         break;
8842       case UPB_TYPE_MESSAGE: {
8843         const char *name =
8844             upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP
8845                 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
8846                 : upb_fielddef_name(f);
8847         attr.handler_data = name;
8848         upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
8849         upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
8850         break;
8851       }
8852       case UPB_TYPE_ENUM:
8853         upb_handlers_setint32(h, f, textprinter_putenum, &attr);
8854         break;
8855     }
8856   }
8857 }
8858 
textprinter_reset(upb_textprinter * p,bool single_line)8859 static void textprinter_reset(upb_textprinter *p, bool single_line) {
8860   p->single_line_ = single_line;
8861   p->indent_depth_ = 0;
8862 }
8863 
8864 
8865 /* Public API *****************************************************************/
8866 
upb_textprinter_create(upb_arena * arena,const upb_handlers * h,upb_bytessink output)8867 upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h,
8868                                         upb_bytessink output) {
8869   upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter));
8870   if (!p) return NULL;
8871 
8872   p->output_ = output;
8873   upb_sink_reset(&p->input_, h, p);
8874   textprinter_reset(p, false);
8875 
8876   return p;
8877 }
8878 
upb_textprinter_newcache(void)8879 upb_handlercache *upb_textprinter_newcache(void) {
8880   return upb_handlercache_new(&onmreg, NULL);
8881 }
8882 
upb_textprinter_input(upb_textprinter * p)8883 upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; }
8884 
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)8885 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
8886   p->single_line_ = single_line;
8887 }
8888 
8889 
8890 /* Index is descriptor type. */
8891 const uint8_t upb_pb_native_wire_types[] = {
8892   UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
8893   UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
8894   UPB_WIRE_TYPE_32BIT,         /* FLOAT */
8895   UPB_WIRE_TYPE_VARINT,        /* INT64 */
8896   UPB_WIRE_TYPE_VARINT,        /* UINT64 */
8897   UPB_WIRE_TYPE_VARINT,        /* INT32 */
8898   UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
8899   UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
8900   UPB_WIRE_TYPE_VARINT,        /* BOOL */
8901   UPB_WIRE_TYPE_DELIMITED,     /* STRING */
8902   UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
8903   UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
8904   UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
8905   UPB_WIRE_TYPE_VARINT,        /* UINT32 */
8906   UPB_WIRE_TYPE_VARINT,        /* ENUM */
8907   UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
8908   UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
8909   UPB_WIRE_TYPE_VARINT,        /* SINT32 */
8910   UPB_WIRE_TYPE_VARINT,        /* SINT64 */
8911 };
8912 
8913 /* A basic branch-based decoder, uses 32-bit values to get good performance
8914  * on 32-bit architectures (but performs well on 64-bits also).
8915  * This scheme comes from the original Google Protobuf implementation
8916  * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)8917 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
8918   upb_decoderet err = {NULL, 0};
8919   const char *p = r.p;
8920   uint32_t low = (uint32_t)r.val;
8921   uint32_t high = 0;
8922   uint32_t b;
8923   b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8924   b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8925   b = *(p++); low  |= (b & 0x7fU) << 28;
8926               high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
8927   b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
8928   b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
8929   b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
8930   b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
8931   b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
8932   return err;
8933 
8934 done:
8935   r.val = ((uint64_t)high << 32) | low;
8936   r.p = p;
8937   return r;
8938 }
8939 
8940 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)8941 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
8942   const char *p = r.p;
8943   uint64_t val = r.val;
8944   uint64_t b;
8945   upb_decoderet err = {NULL, 0};
8946   b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8947   b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8948   b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
8949   b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
8950   b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
8951   b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
8952   b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
8953   b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
8954   return err;
8955 
8956 done:
8957   r.val = val;
8958   r.p = p;
8959   return r;
8960 }
8961 
8962 #line 1 "upb/json/parser.rl"
8963 /*
8964 ** upb::json::Parser (upb_json_parser)
8965 **
8966 ** A parser that uses the Ragel State Machine Compiler to generate
8967 ** the finite automata.
8968 **
8969 ** Ragel only natively handles regular languages, but we can manually
8970 ** program it a bit to handle context-free languages like JSON, by using
8971 ** the "fcall" and "fret" constructs.
8972 **
8973 ** This parser can handle the basics, but needs several things to be fleshed
8974 ** out:
8975 **
8976 ** - handling of unicode escape sequences (including high surrogate pairs).
8977 ** - properly check and report errors for unknown fields, stack overflow,
8978 **   improper array nesting (or lack of nesting).
8979 ** - handling of base64 sequences with padding characters.
8980 ** - handling of push-back (non-success returns from sink functions).
8981 ** - handling of keys/escape-sequences/etc that span input buffers.
8982 */
8983 
8984 #include <ctype.h>
8985 #include <errno.h>
8986 #include <float.h>
8987 #include <math.h>
8988 #include <stdint.h>
8989 #include <stdio.h>
8990 #include <stdlib.h>
8991 #include <string.h>
8992 
8993 #include <time.h>
8994 
8995 
8996 
8997 #define UPB_JSON_MAX_DEPTH 64
8998 
8999 /* Type of value message */
9000 enum {
9001   VALUE_NULLVALUE   = 0,
9002   VALUE_NUMBERVALUE = 1,
9003   VALUE_STRINGVALUE = 2,
9004   VALUE_BOOLVALUE   = 3,
9005   VALUE_STRUCTVALUE = 4,
9006   VALUE_LISTVALUE   = 5
9007 };
9008 
9009 /* Forward declare */
9010 static bool is_top_level(upb_json_parser *p);
9011 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
9012 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
9013 
9014 static bool is_number_wrapper_object(upb_json_parser *p);
9015 static bool does_number_wrapper_start(upb_json_parser *p);
9016 static bool does_number_wrapper_end(upb_json_parser *p);
9017 
9018 static bool is_string_wrapper_object(upb_json_parser *p);
9019 static bool does_string_wrapper_start(upb_json_parser *p);
9020 static bool does_string_wrapper_end(upb_json_parser *p);
9021 
9022 static bool does_fieldmask_start(upb_json_parser *p);
9023 static bool does_fieldmask_end(upb_json_parser *p);
9024 static void start_fieldmask_object(upb_json_parser *p);
9025 static void end_fieldmask_object(upb_json_parser *p);
9026 
9027 static void start_wrapper_object(upb_json_parser *p);
9028 static void end_wrapper_object(upb_json_parser *p);
9029 
9030 static void start_value_object(upb_json_parser *p, int value_type);
9031 static void end_value_object(upb_json_parser *p);
9032 
9033 static void start_listvalue_object(upb_json_parser *p);
9034 static void end_listvalue_object(upb_json_parser *p);
9035 
9036 static void start_structvalue_object(upb_json_parser *p);
9037 static void end_structvalue_object(upb_json_parser *p);
9038 
9039 static void start_object(upb_json_parser *p);
9040 static void end_object(upb_json_parser *p);
9041 
9042 static void start_any_object(upb_json_parser *p, const char *ptr);
9043 static bool end_any_object(upb_json_parser *p, const char *ptr);
9044 
9045 static bool start_subobject(upb_json_parser *p);
9046 static void end_subobject(upb_json_parser *p);
9047 
9048 static void start_member(upb_json_parser *p);
9049 static void end_member(upb_json_parser *p);
9050 static bool end_membername(upb_json_parser *p);
9051 
9052 static void start_any_member(upb_json_parser *p, const char *ptr);
9053 static void end_any_member(upb_json_parser *p, const char *ptr);
9054 static bool end_any_membername(upb_json_parser *p);
9055 
9056 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
9057              const upb_bufhandle *handle);
9058 static bool end(void *closure, const void *hd);
9059 
9060 static const char eof_ch = 'e';
9061 
9062 /* stringsink */
9063 typedef struct {
9064   upb_byteshandler handler;
9065   upb_bytessink sink;
9066   char *ptr;
9067   size_t len, size;
9068 } upb_stringsink;
9069 
9070 
stringsink_start(void * _sink,const void * hd,size_t size_hint)9071 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
9072   upb_stringsink *sink = _sink;
9073   sink->len = 0;
9074   UPB_UNUSED(hd);
9075   UPB_UNUSED(size_hint);
9076   return sink;
9077 }
9078 
stringsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)9079 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
9080                                 size_t len, const upb_bufhandle *handle) {
9081   upb_stringsink *sink = _sink;
9082   size_t new_size = sink->size;
9083 
9084   UPB_UNUSED(hd);
9085   UPB_UNUSED(handle);
9086 
9087   while (sink->len + len > new_size) {
9088     new_size *= 2;
9089   }
9090 
9091   if (new_size != sink->size) {
9092     sink->ptr = realloc(sink->ptr, new_size);
9093     sink->size = new_size;
9094   }
9095 
9096   memcpy(sink->ptr + sink->len, ptr, len);
9097   sink->len += len;
9098 
9099   return len;
9100 }
9101 
upb_stringsink_init(upb_stringsink * sink)9102 void upb_stringsink_init(upb_stringsink *sink) {
9103   upb_byteshandler_init(&sink->handler);
9104   upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
9105   upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
9106 
9107   upb_bytessink_reset(&sink->sink, &sink->handler, sink);
9108 
9109   sink->size = 32;
9110   sink->ptr = malloc(sink->size);
9111   sink->len = 0;
9112 }
9113 
upb_stringsink_uninit(upb_stringsink * sink)9114 void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
9115 
9116 typedef struct {
9117   /* For encoding Any value field in binary format. */
9118   upb_handlercache *encoder_handlercache;
9119   upb_stringsink stringsink;
9120 
9121   /* For decoding Any value field in json format. */
9122   upb_json_codecache *parser_codecache;
9123   upb_sink sink;
9124   upb_json_parser *parser;
9125 
9126   /* Mark the range of uninterpreted values in json input before type url. */
9127   const char *before_type_url_start;
9128   const char *before_type_url_end;
9129 
9130   /* Mark the range of uninterpreted values in json input after type url. */
9131   const char *after_type_url_start;
9132 } upb_jsonparser_any_frame;
9133 
9134 typedef struct {
9135   upb_sink sink;
9136 
9137   /* The current message in which we're parsing, and the field whose value we're
9138    * expecting next. */
9139   const upb_msgdef *m;
9140   const upb_fielddef *f;
9141 
9142   /* The table mapping json name to fielddef for this message. */
9143   const upb_strtable *name_table;
9144 
9145   /* We are in a repeated-field context. We need this flag to decide whether to
9146    * handle the array as a normal repeated field or a
9147    * google.protobuf.ListValue/google.protobuf.Value. */
9148   bool is_repeated;
9149 
9150   /* We are in a repeated-field context, ready to emit mapentries as
9151    * submessages. This flag alters the start-of-object (open-brace) behavior to
9152    * begin a sequence of mapentry messages rather than a single submessage. */
9153   bool is_map;
9154 
9155   /* We are in a map-entry message context. This flag is set when parsing the
9156    * value field of a single map entry and indicates to all value-field parsers
9157    * (subobjects, strings, numbers, and bools) that the map-entry submessage
9158    * should end as soon as the value is parsed. */
9159   bool is_mapentry;
9160 
9161   /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9162    * message's map field that we're currently parsing. This differs from |f|
9163    * because |f| is the field in the *current* message (i.e., the map-entry
9164    * message itself), not the parent's field that leads to this map. */
9165   const upb_fielddef *mapfield;
9166 
9167   /* We are in an Any message context. This flag is set when parsing the Any
9168    * message and indicates to all field parsers (subobjects, strings, numbers,
9169    * and bools) that the parsed field should be serialized as binary data or
9170    * cached (type url not found yet). */
9171   bool is_any;
9172 
9173   /* The type of packed message in Any. */
9174   upb_jsonparser_any_frame *any_frame;
9175 
9176   /* True if the field to be parsed is unknown. */
9177   bool is_unknown_field;
9178 } upb_jsonparser_frame;
9179 
init_frame(upb_jsonparser_frame * frame)9180 static void init_frame(upb_jsonparser_frame* frame) {
9181   frame->m = NULL;
9182   frame->f = NULL;
9183   frame->name_table = NULL;
9184   frame->is_repeated = false;
9185   frame->is_map = false;
9186   frame->is_mapentry = false;
9187   frame->mapfield = NULL;
9188   frame->is_any = false;
9189   frame->any_frame = NULL;
9190   frame->is_unknown_field = false;
9191 }
9192 
9193 struct upb_json_parser {
9194   upb_arena *arena;
9195   const upb_json_parsermethod *method;
9196   upb_bytessink input_;
9197 
9198   /* Stack to track the JSON scopes we are in. */
9199   upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9200   upb_jsonparser_frame *top;
9201   upb_jsonparser_frame *limit;
9202 
9203   upb_status *status;
9204 
9205   /* Ragel's internal parsing stack for the parsing state machine. */
9206   int current_state;
9207   int parser_stack[UPB_JSON_MAX_DEPTH];
9208   int parser_top;
9209 
9210   /* The handle for the current buffer. */
9211   const upb_bufhandle *handle;
9212 
9213   /* Accumulate buffer.  See details in parser.rl. */
9214   const char *accumulated;
9215   size_t accumulated_len;
9216   char *accumulate_buf;
9217   size_t accumulate_buf_size;
9218 
9219   /* Multi-part text data.  See details in parser.rl. */
9220   int multipart_state;
9221   upb_selector_t string_selector;
9222 
9223   /* Input capture.  See details in parser.rl. */
9224   const char *capture;
9225 
9226   /* Intermediate result of parsing a unicode escape sequence. */
9227   uint32_t digit;
9228 
9229   /* For resolve type url in Any. */
9230   const upb_symtab *symtab;
9231 
9232   /* Whether to proceed if unknown field is met. */
9233   bool ignore_json_unknown;
9234 
9235   /* Cache for parsing timestamp due to base and zone are handled in different
9236    * handlers. */
9237   struct tm tm;
9238 };
9239 
start_jsonparser_frame(upb_json_parser * p)9240 static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
9241   upb_jsonparser_frame *inner;
9242   inner = p->top + 1;
9243   init_frame(inner);
9244   return inner;
9245 }
9246 
9247 struct upb_json_codecache {
9248   upb_arena *arena;
9249   upb_inttable methods;   /* upb_msgdef* -> upb_json_parsermethod* */
9250 };
9251 
9252 struct upb_json_parsermethod {
9253   const upb_json_codecache *cache;
9254   upb_byteshandler input_handler_;
9255 
9256   /* Maps json_name -> fielddef */
9257   upb_strtable name_table;
9258 };
9259 
9260 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
9261 
json_parser_any_frame_new(upb_json_parser * p)9262 static upb_jsonparser_any_frame *json_parser_any_frame_new(
9263     upb_json_parser *p) {
9264   upb_jsonparser_any_frame *frame;
9265 
9266   frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame));
9267 
9268   frame->encoder_handlercache = upb_pb_encoder_newcache();
9269   frame->parser_codecache = upb_json_codecache_new();
9270   frame->parser = NULL;
9271   frame->before_type_url_start = NULL;
9272   frame->before_type_url_end = NULL;
9273   frame->after_type_url_start = NULL;
9274 
9275   upb_stringsink_init(&frame->stringsink);
9276 
9277   return frame;
9278 }
9279 
json_parser_any_frame_set_payload_type(upb_json_parser * p,upb_jsonparser_any_frame * frame,const upb_msgdef * payload_type)9280 static void json_parser_any_frame_set_payload_type(
9281     upb_json_parser *p,
9282     upb_jsonparser_any_frame *frame,
9283     const upb_msgdef *payload_type) {
9284   const upb_handlers *h;
9285   const upb_json_parsermethod *parser_method;
9286   upb_pb_encoder *encoder;
9287 
9288   /* Initialize encoder. */
9289   h = upb_handlercache_get(frame->encoder_handlercache, payload_type);
9290   encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink);
9291 
9292   /* Initialize parser. */
9293   parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type);
9294   upb_sink_reset(&frame->sink, h, encoder);
9295   frame->parser =
9296       upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink,
9297                              p->status, p->ignore_json_unknown);
9298 }
9299 
json_parser_any_frame_free(upb_jsonparser_any_frame * frame)9300 static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
9301   upb_handlercache_free(frame->encoder_handlercache);
9302   upb_json_codecache_free(frame->parser_codecache);
9303   upb_stringsink_uninit(&frame->stringsink);
9304 }
9305 
json_parser_any_frame_has_type_url(upb_jsonparser_any_frame * frame)9306 static bool json_parser_any_frame_has_type_url(
9307   upb_jsonparser_any_frame *frame) {
9308   return frame->parser != NULL;
9309 }
9310 
json_parser_any_frame_has_value_before_type_url(upb_jsonparser_any_frame * frame)9311 static bool json_parser_any_frame_has_value_before_type_url(
9312   upb_jsonparser_any_frame *frame) {
9313   return frame->before_type_url_start != frame->before_type_url_end;
9314 }
9315 
json_parser_any_frame_has_value_after_type_url(upb_jsonparser_any_frame * frame)9316 static bool json_parser_any_frame_has_value_after_type_url(
9317   upb_jsonparser_any_frame *frame) {
9318   return frame->after_type_url_start != NULL;
9319 }
9320 
json_parser_any_frame_has_value(upb_jsonparser_any_frame * frame)9321 static bool json_parser_any_frame_has_value(
9322   upb_jsonparser_any_frame *frame) {
9323   return json_parser_any_frame_has_value_before_type_url(frame) ||
9324          json_parser_any_frame_has_value_after_type_url(frame);
9325 }
9326 
json_parser_any_frame_set_before_type_url_end(upb_jsonparser_any_frame * frame,const char * ptr)9327 static void json_parser_any_frame_set_before_type_url_end(
9328     upb_jsonparser_any_frame *frame,
9329     const char *ptr) {
9330   if (frame->parser == NULL) {
9331     frame->before_type_url_end = ptr;
9332   }
9333 }
9334 
json_parser_any_frame_set_after_type_url_start_once(upb_jsonparser_any_frame * frame,const char * ptr)9335 static void json_parser_any_frame_set_after_type_url_start_once(
9336     upb_jsonparser_any_frame *frame,
9337     const char *ptr) {
9338   if (json_parser_any_frame_has_type_url(frame) &&
9339       frame->after_type_url_start == NULL) {
9340     frame->after_type_url_start = ptr;
9341   }
9342 }
9343 
9344 /* Used to signal that a capture has been suspended. */
9345 static char suspend_capture;
9346 
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)9347 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9348                                              upb_handlertype_t type) {
9349   upb_selector_t sel;
9350   bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9351   UPB_ASSUME(ok);
9352   return sel;
9353 }
9354 
parser_getsel(upb_json_parser * p)9355 static upb_selector_t parser_getsel(upb_json_parser *p) {
9356   return getsel_for_handlertype(
9357       p, upb_handlers_getprimitivehandlertype(p->top->f));
9358 }
9359 
check_stack(upb_json_parser * p)9360 static bool check_stack(upb_json_parser *p) {
9361   if ((p->top + 1) == p->limit) {
9362     upb_status_seterrmsg(p->status, "Nesting too deep");
9363     return false;
9364   }
9365 
9366   return true;
9367 }
9368 
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)9369 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
9370   upb_value v;
9371   const upb_json_codecache *cache = p->method->cache;
9372   bool ok;
9373   const upb_json_parsermethod *method;
9374 
9375   ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v);
9376   UPB_ASSUME(ok);
9377   method = upb_value_getconstptr(v);
9378 
9379   frame->name_table = &method->name_table;
9380 }
9381 
9382 /* There are GCC/Clang built-ins for overflow checking which we could start
9383  * using if there was any performance benefit to it. */
9384 
checked_add(size_t a,size_t b,size_t * c)9385 static bool checked_add(size_t a, size_t b, size_t *c) {
9386   if (SIZE_MAX - a < b) return false;
9387   *c = a + b;
9388   return true;
9389 }
9390 
saturating_multiply(size_t a,size_t b)9391 static size_t saturating_multiply(size_t a, size_t b) {
9392   /* size_t is unsigned, so this is defined behavior even on overflow. */
9393   size_t ret = a * b;
9394   if (b != 0 && ret / b != a) {
9395     ret = SIZE_MAX;
9396   }
9397   return ret;
9398 }
9399 
9400 
9401 /* Base64 decoding ************************************************************/
9402 
9403 /* TODO(haberman): make this streaming. */
9404 
9405 static const signed char b64table[] = {
9406   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9407   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9408   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9409   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9410   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9411   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
9412   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9413   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
9414   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
9415   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9416   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9417   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
9418   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9419   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9420   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9421   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
9422   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9423   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9424   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9425   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9426   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9427   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9428   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9429   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9430   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9431   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9432   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9433   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9434   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9435   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9436   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9437   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
9438 };
9439 
9440 /* Returns the table value sign-extended to 32 bits.  Knowing that the upper
9441  * bits will be 1 for unrecognized characters makes it easier to check for
9442  * this error condition later (see below). */
b64lookup(unsigned char ch)9443 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9444 
9445 /* Returns true if the given character is not a valid base64 character or
9446  * padding. */
nonbase64(unsigned char ch)9447 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9448 
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)9449 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9450                         size_t len) {
9451   const char *limit = ptr + len;
9452   for (; ptr < limit; ptr += 4) {
9453     uint32_t val;
9454     char output[3];
9455 
9456     if (limit - ptr < 4) {
9457       upb_status_seterrf(p->status,
9458                          "Base64 input for bytes field not a multiple of 4: %s",
9459                          upb_fielddef_name(p->top->f));
9460       return false;
9461     }
9462 
9463     val = b64lookup(ptr[0]) << 18 |
9464           b64lookup(ptr[1]) << 12 |
9465           b64lookup(ptr[2]) << 6  |
9466           b64lookup(ptr[3]);
9467 
9468     /* Test the upper bit; returns true if any of the characters returned -1. */
9469     if (val & 0x80000000) {
9470       goto otherchar;
9471     }
9472 
9473     output[0] = val >> 16;
9474     output[1] = (val >> 8) & 0xff;
9475     output[2] = val & 0xff;
9476     upb_sink_putstring(p->top->sink, sel, output, 3, NULL);
9477   }
9478   return true;
9479 
9480 otherchar:
9481   if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9482       nonbase64(ptr[3]) ) {
9483     upb_status_seterrf(p->status,
9484                        "Non-base64 characters in bytes field: %s",
9485                        upb_fielddef_name(p->top->f));
9486     return false;
9487   } if (ptr[2] == '=') {
9488     uint32_t val;
9489     char output;
9490 
9491     /* Last group contains only two input bytes, one output byte. */
9492     if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9493       goto badpadding;
9494     }
9495 
9496     val = b64lookup(ptr[0]) << 18 |
9497           b64lookup(ptr[1]) << 12;
9498 
9499     UPB_ASSERT(!(val & 0x80000000));
9500     output = val >> 16;
9501     upb_sink_putstring(p->top->sink, sel, &output, 1, NULL);
9502     return true;
9503   } else {
9504     uint32_t val;
9505     char output[2];
9506 
9507     /* Last group contains only three input bytes, two output bytes. */
9508     if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9509       goto badpadding;
9510     }
9511 
9512     val = b64lookup(ptr[0]) << 18 |
9513           b64lookup(ptr[1]) << 12 |
9514           b64lookup(ptr[2]) << 6;
9515 
9516     output[0] = val >> 16;
9517     output[1] = (val >> 8) & 0xff;
9518     upb_sink_putstring(p->top->sink, sel, output, 2, NULL);
9519     return true;
9520   }
9521 
9522 badpadding:
9523   upb_status_seterrf(p->status,
9524                      "Incorrect base64 padding for field: %s (%.*s)",
9525                      upb_fielddef_name(p->top->f),
9526                      4, ptr);
9527   return false;
9528 }
9529 
9530 
9531 /* Accumulate buffer **********************************************************/
9532 
9533 /* Functionality for accumulating a buffer.
9534  *
9535  * Some parts of the parser need an entire value as a contiguous string.  For
9536  * example, to look up a member name in a hash table, or to turn a string into
9537  * a number, the relevant library routines need the input string to be in
9538  * contiguous memory, even if the value spanned two or more buffers in the
9539  * input.  These routines handle that.
9540  *
9541  * In the common case we can just point to the input buffer to get this
9542  * contiguous string and avoid any actual copy.  So we optimistically begin
9543  * this way.  But there are a few cases where we must instead copy into a
9544  * separate buffer:
9545  *
9546  *   1. The string was not contiguous in the input (it spanned buffers).
9547  *
9548  *   2. The string included escape sequences that need to be interpreted to get
9549  *      the true value in a contiguous buffer. */
9550 
assert_accumulate_empty(upb_json_parser * p)9551 static void assert_accumulate_empty(upb_json_parser *p) {
9552   UPB_ASSERT(p->accumulated == NULL);
9553   UPB_ASSERT(p->accumulated_len == 0);
9554 }
9555 
accumulate_clear(upb_json_parser * p)9556 static void accumulate_clear(upb_json_parser *p) {
9557   p->accumulated = NULL;
9558   p->accumulated_len = 0;
9559 }
9560 
9561 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)9562 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9563   void *mem;
9564   size_t old_size = p->accumulate_buf_size;
9565   size_t new_size = UPB_MAX(old_size, 128);
9566   while (new_size < need) {
9567     new_size = saturating_multiply(new_size, 2);
9568   }
9569 
9570   mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size);
9571   if (!mem) {
9572     upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
9573     return false;
9574   }
9575 
9576   p->accumulate_buf = mem;
9577   p->accumulate_buf_size = new_size;
9578   return true;
9579 }
9580 
9581 /* Logically appends the given data to the append buffer.
9582  * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9583  * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)9584 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9585                               bool can_alias) {
9586   size_t need;
9587 
9588   if (!p->accumulated && can_alias) {
9589     p->accumulated = buf;
9590     p->accumulated_len = len;
9591     return true;
9592   }
9593 
9594   if (!checked_add(p->accumulated_len, len, &need)) {
9595     upb_status_seterrmsg(p->status, "Integer overflow.");
9596     return false;
9597   }
9598 
9599   if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9600     return false;
9601   }
9602 
9603   if (p->accumulated != p->accumulate_buf) {
9604     memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9605     p->accumulated = p->accumulate_buf;
9606   }
9607 
9608   memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9609   p->accumulated_len += len;
9610   return true;
9611 }
9612 
9613 /* Returns a pointer to the data accumulated since the last accumulate_clear()
9614  * call, and writes the length to *len.  This with point either to the input
9615  * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)9616 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
9617   UPB_ASSERT(p->accumulated);
9618   *len = p->accumulated_len;
9619   return p->accumulated;
9620 }
9621 
9622 
9623 /* Mult-part text data ********************************************************/
9624 
9625 /* When we have text data in the input, it can often come in multiple segments.
9626  * For example, there may be some raw string data followed by an escape
9627  * sequence.  The two segments are processed with different logic.  Also buffer
9628  * seams in the input can cause multiple segments.
9629  *
9630  * As we see segments, there are two main cases for how we want to process them:
9631  *
9632  *  1. we want to push the captured input directly to string handlers.
9633  *
9634  *  2. we need to accumulate all the parts into a contiguous buffer for further
9635  *     processing (field name lookup, string->number conversion, etc). */
9636 
9637 /* This is the set of states for p->multipart_state. */
9638 enum {
9639   /* We are not currently processing multipart data. */
9640   MULTIPART_INACTIVE = 0,
9641 
9642   /* We are processing multipart data by accumulating it into a contiguous
9643    * buffer. */
9644   MULTIPART_ACCUMULATE = 1,
9645 
9646   /* We are processing multipart data by pushing each part directly to the
9647    * current string handlers. */
9648   MULTIPART_PUSHEAGERLY = 2
9649 };
9650 
9651 /* Start a multi-part text value where we accumulate the data for processing at
9652  * the end. */
multipart_startaccum(upb_json_parser * p)9653 static void multipart_startaccum(upb_json_parser *p) {
9654   assert_accumulate_empty(p);
9655   UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
9656   p->multipart_state = MULTIPART_ACCUMULATE;
9657 }
9658 
9659 /* Start a multi-part text value where we immediately push text data to a string
9660  * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)9661 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
9662   assert_accumulate_empty(p);
9663   UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
9664   p->multipart_state = MULTIPART_PUSHEAGERLY;
9665   p->string_selector = sel;
9666 }
9667 
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)9668 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
9669                            bool can_alias) {
9670   switch (p->multipart_state) {
9671     case MULTIPART_INACTIVE:
9672       upb_status_seterrmsg(
9673           p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
9674       return false;
9675 
9676     case MULTIPART_ACCUMULATE:
9677       if (!accumulate_append(p, buf, len, can_alias)) {
9678         return false;
9679       }
9680       break;
9681 
9682     case MULTIPART_PUSHEAGERLY: {
9683       const upb_bufhandle *handle = can_alias ? p->handle : NULL;
9684       upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle);
9685       break;
9686     }
9687   }
9688 
9689   return true;
9690 }
9691 
9692 /* Note: this invalidates the accumulate buffer!  Call only after reading its
9693  * contents. */
multipart_end(upb_json_parser * p)9694 static void multipart_end(upb_json_parser *p) {
9695   /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */
9696   p->multipart_state = MULTIPART_INACTIVE;
9697   accumulate_clear(p);
9698 }
9699 
9700 
9701 /* Input capture **************************************************************/
9702 
9703 /* Functionality for capturing a region of the input as text.  Gracefully
9704  * handles the case where a buffer seam occurs in the middle of the captured
9705  * region. */
9706 
capture_begin(upb_json_parser * p,const char * ptr)9707 static void capture_begin(upb_json_parser *p, const char *ptr) {
9708   UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
9709   UPB_ASSERT(p->capture == NULL);
9710   p->capture = ptr;
9711 }
9712 
capture_end(upb_json_parser * p,const char * ptr)9713 static bool capture_end(upb_json_parser *p, const char *ptr) {
9714   UPB_ASSERT(p->capture);
9715   if (multipart_text(p, p->capture, ptr - p->capture, true)) {
9716     p->capture = NULL;
9717     return true;
9718   } else {
9719     return false;
9720   }
9721 }
9722 
9723 /* This is called at the end of each input buffer (ie. when we have hit a
9724  * buffer seam).  If we are in the middle of capturing the input, this
9725  * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)9726 static void capture_suspend(upb_json_parser *p, const char **ptr) {
9727   if (!p->capture) return;
9728 
9729   if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
9730     /* We use this as a signal that we were in the middle of capturing, and
9731      * that capturing should resume at the beginning of the next buffer.
9732      *
9733      * We can't use *ptr here, because we have no guarantee that this pointer
9734      * will be valid when we resume (if the underlying memory is freed, then
9735      * using the pointer at all, even to compare to NULL, is likely undefined
9736      * behavior). */
9737     p->capture = &suspend_capture;
9738   } else {
9739     /* Need to back up the pointer to the beginning of the capture, since
9740      * we were not able to actually preserve it. */
9741     *ptr = p->capture;
9742   }
9743 }
9744 
capture_resume(upb_json_parser * p,const char * ptr)9745 static void capture_resume(upb_json_parser *p, const char *ptr) {
9746   if (p->capture) {
9747     UPB_ASSERT(p->capture == &suspend_capture);
9748     p->capture = ptr;
9749   }
9750 }
9751 
9752 
9753 /* Callbacks from the parser **************************************************/
9754 
9755 /* These are the functions called directly from the parser itself.
9756  * We define these in the same order as their declarations in the parser. */
9757 
escape_char(char in)9758 static char escape_char(char in) {
9759   switch (in) {
9760     case 'r': return '\r';
9761     case 't': return '\t';
9762     case 'n': return '\n';
9763     case 'f': return '\f';
9764     case 'b': return '\b';
9765     case '/': return '/';
9766     case '"': return '"';
9767     case '\\': return '\\';
9768     default:
9769       UPB_ASSERT(0);
9770       return 'x';
9771   }
9772 }
9773 
escape(upb_json_parser * p,const char * ptr)9774 static bool escape(upb_json_parser *p, const char *ptr) {
9775   char ch = escape_char(*ptr);
9776   return multipart_text(p, &ch, 1, false);
9777 }
9778 
start_hex(upb_json_parser * p)9779 static void start_hex(upb_json_parser *p) {
9780   p->digit = 0;
9781 }
9782 
hexdigit(upb_json_parser * p,const char * ptr)9783 static void hexdigit(upb_json_parser *p, const char *ptr) {
9784   char ch = *ptr;
9785 
9786   p->digit <<= 4;
9787 
9788   if (ch >= '0' && ch <= '9') {
9789     p->digit += (ch - '0');
9790   } else if (ch >= 'a' && ch <= 'f') {
9791     p->digit += ((ch - 'a') + 10);
9792   } else {
9793     UPB_ASSERT(ch >= 'A' && ch <= 'F');
9794     p->digit += ((ch - 'A') + 10);
9795   }
9796 }
9797 
end_hex(upb_json_parser * p)9798 static bool end_hex(upb_json_parser *p) {
9799   uint32_t codepoint = p->digit;
9800 
9801   /* emit the codepoint as UTF-8. */
9802   char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
9803   int length = 0;
9804   if (codepoint <= 0x7F) {
9805     utf8[0] = codepoint;
9806     length = 1;
9807   } else if (codepoint <= 0x07FF) {
9808     utf8[1] = (codepoint & 0x3F) | 0x80;
9809     codepoint >>= 6;
9810     utf8[0] = (codepoint & 0x1F) | 0xC0;
9811     length = 2;
9812   } else /* codepoint <= 0xFFFF */ {
9813     utf8[2] = (codepoint & 0x3F) | 0x80;
9814     codepoint >>= 6;
9815     utf8[1] = (codepoint & 0x3F) | 0x80;
9816     codepoint >>= 6;
9817     utf8[0] = (codepoint & 0x0F) | 0xE0;
9818     length = 3;
9819   }
9820   /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
9821    * we have to wait for the next escape to get the full code point). */
9822 
9823   return multipart_text(p, utf8, length, false);
9824 }
9825 
start_text(upb_json_parser * p,const char * ptr)9826 static void start_text(upb_json_parser *p, const char *ptr) {
9827   capture_begin(p, ptr);
9828 }
9829 
end_text(upb_json_parser * p,const char * ptr)9830 static bool end_text(upb_json_parser *p, const char *ptr) {
9831   return capture_end(p, ptr);
9832 }
9833 
start_number(upb_json_parser * p,const char * ptr)9834 static bool start_number(upb_json_parser *p, const char *ptr) {
9835   if (is_top_level(p)) {
9836     if (is_number_wrapper_object(p)) {
9837       start_wrapper_object(p);
9838     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9839       start_value_object(p, VALUE_NUMBERVALUE);
9840     } else {
9841       return false;
9842     }
9843   } else if (does_number_wrapper_start(p)) {
9844     if (!start_subobject(p)) {
9845       return false;
9846     }
9847     start_wrapper_object(p);
9848   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
9849     if (!start_subobject(p)) {
9850       return false;
9851     }
9852     start_value_object(p, VALUE_NUMBERVALUE);
9853   }
9854 
9855   multipart_startaccum(p);
9856   capture_begin(p, ptr);
9857   return true;
9858 }
9859 
9860 static bool parse_number(upb_json_parser *p, bool is_quoted);
9861 
end_number_nontop(upb_json_parser * p,const char * ptr)9862 static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
9863   if (!capture_end(p, ptr)) {
9864     return false;
9865   }
9866 
9867   if (p->top->f == NULL) {
9868     multipart_end(p);
9869     return true;
9870   }
9871 
9872   return parse_number(p, false);
9873 }
9874 
end_number(upb_json_parser * p,const char * ptr)9875 static bool end_number(upb_json_parser *p, const char *ptr) {
9876   if (!end_number_nontop(p, ptr)) {
9877     return false;
9878   }
9879 
9880   if (does_number_wrapper_end(p)) {
9881     end_wrapper_object(p);
9882     if (!is_top_level(p)) {
9883       end_subobject(p);
9884     }
9885     return true;
9886   }
9887 
9888   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9889     end_value_object(p);
9890     if (!is_top_level(p)) {
9891       end_subobject(p);
9892     }
9893     return true;
9894   }
9895 
9896   return true;
9897 }
9898 
9899 /* |buf| is NULL-terminated. |buf| itself will never include quotes;
9900  * |is_quoted| tells us whether this text originally appeared inside quotes. */
parse_number_from_buffer(upb_json_parser * p,const char * buf,bool is_quoted)9901 static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
9902                                      bool is_quoted) {
9903   size_t len = strlen(buf);
9904   const char *bufend = buf + len;
9905   char *end;
9906   upb_fieldtype_t type = upb_fielddef_type(p->top->f);
9907   double val;
9908   double dummy;
9909   double inf = UPB_INFINITY;
9910 
9911   errno = 0;
9912 
9913   if (len == 0 || buf[0] == ' ') {
9914     return false;
9915   }
9916 
9917   /* For integer types, first try parsing with integer-specific routines.
9918    * If these succeed, they will be more accurate for int64/uint64 than
9919    * strtod().
9920    */
9921   switch (type) {
9922     case UPB_TYPE_ENUM:
9923     case UPB_TYPE_INT32: {
9924       long val = strtol(buf, &end, 0);
9925       if (errno == ERANGE || end != bufend) {
9926         break;
9927       } else if (val > INT32_MAX || val < INT32_MIN) {
9928         return false;
9929       } else {
9930         upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val);
9931         return true;
9932       }
9933     }
9934     case UPB_TYPE_UINT32: {
9935       unsigned long val = strtoul(buf, &end, 0);
9936       if (end != bufend) {
9937         break;
9938       } else if (val > UINT32_MAX || errno == ERANGE) {
9939         return false;
9940       } else {
9941         upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val);
9942         return true;
9943       }
9944     }
9945     /* XXX: We can't handle [u]int64 properly on 32-bit machines because
9946      * strto[u]ll isn't in C89. */
9947     case UPB_TYPE_INT64: {
9948       long val = strtol(buf, &end, 0);
9949       if (errno == ERANGE || end != bufend) {
9950         break;
9951       } else {
9952         upb_sink_putint64(p->top->sink, parser_getsel(p), val);
9953         return true;
9954       }
9955     }
9956     case UPB_TYPE_UINT64: {
9957       unsigned long val = strtoul(p->accumulated, &end, 0);
9958       if (end != bufend) {
9959         break;
9960       } else if (errno == ERANGE) {
9961         return false;
9962       } else {
9963         upb_sink_putuint64(p->top->sink, parser_getsel(p), val);
9964         return true;
9965       }
9966     }
9967     default:
9968       break;
9969   }
9970 
9971   if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
9972     /* Quoted numbers for integer types are not allowed to be in double form. */
9973     return false;
9974   }
9975 
9976   if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
9977     /* C89 does not have an INFINITY macro. */
9978     val = inf;
9979   } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
9980     val = -inf;
9981   } else {
9982     val = strtod(buf, &end);
9983     if (errno == ERANGE || end != bufend) {
9984       return false;
9985     }
9986   }
9987 
9988   switch (type) {
9989 #define CASE(capitaltype, smalltype, ctype, min, max)                     \
9990     case UPB_TYPE_ ## capitaltype: {                                      \
9991       if (modf(val, &dummy) != 0 || val > max || val < min) {             \
9992         return false;                                                     \
9993       } else {                                                            \
9994         upb_sink_put ## smalltype(p->top->sink, parser_getsel(p),        \
9995                                   (ctype)val);                            \
9996         return true;                                                      \
9997       }                                                                   \
9998       break;                                                              \
9999     }
10000     case UPB_TYPE_ENUM:
10001     CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
10002     CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
10003     CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
10004     CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
10005 #undef CASE
10006 
10007     case UPB_TYPE_DOUBLE:
10008       upb_sink_putdouble(p->top->sink, parser_getsel(p), val);
10009       return true;
10010     case UPB_TYPE_FLOAT:
10011       if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
10012         return false;
10013       } else {
10014         upb_sink_putfloat(p->top->sink, parser_getsel(p), val);
10015         return true;
10016       }
10017     default:
10018       return false;
10019   }
10020 }
10021 
parse_number(upb_json_parser * p,bool is_quoted)10022 static bool parse_number(upb_json_parser *p, bool is_quoted) {
10023   size_t len;
10024   const char *buf;
10025 
10026   /* strtol() and friends unfortunately do not support specifying the length of
10027    * the input string, so we need to force a copy into a NULL-terminated buffer. */
10028   if (!multipart_text(p, "\0", 1, false)) {
10029     return false;
10030   }
10031 
10032   buf = accumulate_getptr(p, &len);
10033 
10034   if (parse_number_from_buffer(p, buf, is_quoted)) {
10035     multipart_end(p);
10036     return true;
10037   } else {
10038     upb_status_seterrf(p->status, "error parsing number: %s", buf);
10039     multipart_end(p);
10040     return false;
10041   }
10042 }
10043 
parser_putbool(upb_json_parser * p,bool val)10044 static bool parser_putbool(upb_json_parser *p, bool val) {
10045   bool ok;
10046 
10047   if (p->top->f == NULL) {
10048     return true;
10049   }
10050 
10051   if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
10052     upb_status_seterrf(p->status,
10053                        "Boolean value specified for non-bool field: %s",
10054                        upb_fielddef_name(p->top->f));
10055     return false;
10056   }
10057 
10058   ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val);
10059   UPB_ASSERT(ok);
10060 
10061   return true;
10062 }
10063 
end_bool(upb_json_parser * p,bool val)10064 static bool end_bool(upb_json_parser *p, bool val) {
10065   if (is_top_level(p)) {
10066     if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
10067       start_wrapper_object(p);
10068     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10069       start_value_object(p, VALUE_BOOLVALUE);
10070     } else {
10071       return false;
10072     }
10073   } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
10074     if (!start_subobject(p)) {
10075       return false;
10076     }
10077     start_wrapper_object(p);
10078   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10079     if (!start_subobject(p)) {
10080       return false;
10081     }
10082     start_value_object(p, VALUE_BOOLVALUE);
10083   }
10084 
10085   if (p->top->is_unknown_field) {
10086     return true;
10087   }
10088 
10089   if (!parser_putbool(p, val)) {
10090     return false;
10091   }
10092 
10093   if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
10094     end_wrapper_object(p);
10095     if (!is_top_level(p)) {
10096       end_subobject(p);
10097     }
10098     return true;
10099   }
10100 
10101   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10102     end_value_object(p);
10103     if (!is_top_level(p)) {
10104       end_subobject(p);
10105     }
10106     return true;
10107   }
10108 
10109   return true;
10110 }
10111 
end_null(upb_json_parser * p)10112 static bool end_null(upb_json_parser *p) {
10113   const char *zero_ptr = "0";
10114 
10115   if (is_top_level(p)) {
10116     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10117       start_value_object(p, VALUE_NULLVALUE);
10118     } else {
10119       return true;
10120     }
10121   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10122     if (!start_subobject(p)) {
10123       return false;
10124     }
10125     start_value_object(p, VALUE_NULLVALUE);
10126   } else {
10127     return true;
10128   }
10129 
10130   /* Fill null_value field. */
10131   multipart_startaccum(p);
10132   capture_begin(p, zero_ptr);
10133   capture_end(p, zero_ptr + 1);
10134   parse_number(p, false);
10135 
10136   end_value_object(p);
10137   if (!is_top_level(p)) {
10138     end_subobject(p);
10139   }
10140 
10141   return true;
10142 }
10143 
start_any_stringval(upb_json_parser * p)10144 static bool start_any_stringval(upb_json_parser *p) {
10145   multipart_startaccum(p);
10146   return true;
10147 }
10148 
start_stringval(upb_json_parser * p)10149 static bool start_stringval(upb_json_parser *p) {
10150   if (is_top_level(p)) {
10151     if (is_string_wrapper_object(p) ||
10152         is_number_wrapper_object(p)) {
10153       start_wrapper_object(p);
10154     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
10155       start_fieldmask_object(p);
10156       return true;
10157     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10158                is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
10159       start_object(p);
10160     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10161       start_value_object(p, VALUE_STRINGVALUE);
10162     } else {
10163       return false;
10164     }
10165   } else if (does_string_wrapper_start(p) ||
10166              does_number_wrapper_start(p)) {
10167     if (!start_subobject(p)) {
10168       return false;
10169     }
10170     start_wrapper_object(p);
10171   } else if (does_fieldmask_start(p)) {
10172     if (!start_subobject(p)) {
10173       return false;
10174     }
10175     start_fieldmask_object(p);
10176     return true;
10177   } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
10178              is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
10179     if (!start_subobject(p)) {
10180       return false;
10181     }
10182     start_object(p);
10183   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10184     if (!start_subobject(p)) {
10185       return false;
10186     }
10187     start_value_object(p, VALUE_STRINGVALUE);
10188   }
10189 
10190   if (p->top->f == NULL) {
10191     multipart_startaccum(p);
10192     return true;
10193   }
10194 
10195   if (p->top->is_any) {
10196     return start_any_stringval(p);
10197   }
10198 
10199   if (upb_fielddef_isstring(p->top->f)) {
10200     upb_jsonparser_frame *inner;
10201     upb_selector_t sel;
10202 
10203     if (!check_stack(p)) return false;
10204 
10205     /* Start a new parser frame: parser frames correspond one-to-one with
10206      * handler frames, and string events occur in a sub-frame. */
10207     inner = start_jsonparser_frame(p);
10208     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10209     upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10210     inner->m = p->top->m;
10211     inner->f = p->top->f;
10212     p->top = inner;
10213 
10214     if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10215       /* For STRING fields we push data directly to the handlers as it is
10216        * parsed.  We don't do this yet for BYTES fields, because our base64
10217        * decoder is not streaming.
10218        *
10219        * TODO(haberman): make base64 decoding streaming also. */
10220       multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10221       return true;
10222     } else {
10223       multipart_startaccum(p);
10224       return true;
10225     }
10226   } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
10227              upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
10228     /* No need to push a frame -- numeric values in quotes remain in the
10229      * current parser frame.  These values must accmulate so we can convert
10230      * them all at once at the end. */
10231     multipart_startaccum(p);
10232     return true;
10233   } else {
10234     upb_status_seterrf(p->status,
10235                        "String specified for bool or submessage field: %s",
10236                        upb_fielddef_name(p->top->f));
10237     return false;
10238   }
10239 }
10240 
end_any_stringval(upb_json_parser * p)10241 static bool end_any_stringval(upb_json_parser *p) {
10242   size_t len;
10243   const char *buf = accumulate_getptr(p, &len);
10244 
10245   /* Set type_url */
10246   upb_selector_t sel;
10247   upb_jsonparser_frame *inner;
10248   if (!check_stack(p)) return false;
10249   inner = p->top + 1;
10250 
10251   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10252   upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10253   sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10254   upb_sink_putstring(inner->sink, sel, buf, len, NULL);
10255   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10256   upb_sink_endstr(inner->sink, sel);
10257 
10258   multipart_end(p);
10259 
10260   /* Resolve type url */
10261   if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
10262     const upb_msgdef *payload_type = NULL;
10263     buf += 20;
10264     len -= 20;
10265 
10266     payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
10267     if (payload_type == NULL) {
10268       upb_status_seterrf(
10269           p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
10270       return false;
10271     }
10272 
10273     json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
10274 
10275     return true;
10276   } else {
10277     upb_status_seterrf(
10278         p->status, "Invalid type url: %.*s\n", (int)len, buf);
10279     return false;
10280   }
10281 }
10282 
end_stringval_nontop(upb_json_parser * p)10283 static bool end_stringval_nontop(upb_json_parser *p) {
10284   bool ok = true;
10285 
10286   if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10287       is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
10288     multipart_end(p);
10289     return true;
10290   }
10291 
10292   if (p->top->f == NULL) {
10293     multipart_end(p);
10294     return true;
10295   }
10296 
10297   if (p->top->is_any) {
10298     return end_any_stringval(p);
10299   }
10300 
10301   switch (upb_fielddef_type(p->top->f)) {
10302     case UPB_TYPE_BYTES:
10303       if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10304                        p->accumulated, p->accumulated_len)) {
10305         return false;
10306       }
10307       /* Fall through. */
10308 
10309     case UPB_TYPE_STRING: {
10310       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10311       upb_sink_endstr(p->top->sink, sel);
10312       p->top--;
10313       break;
10314     }
10315 
10316     case UPB_TYPE_ENUM: {
10317       /* Resolve enum symbolic name to integer value. */
10318       const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f);
10319 
10320       size_t len;
10321       const char *buf = accumulate_getptr(p, &len);
10322 
10323       int32_t int_val = 0;
10324       ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10325 
10326       if (ok) {
10327         upb_selector_t sel = parser_getsel(p);
10328         upb_sink_putint32(p->top->sink, sel, int_val);
10329       } else {
10330         upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
10331       }
10332 
10333       break;
10334     }
10335 
10336     case UPB_TYPE_INT32:
10337     case UPB_TYPE_INT64:
10338     case UPB_TYPE_UINT32:
10339     case UPB_TYPE_UINT64:
10340     case UPB_TYPE_DOUBLE:
10341     case UPB_TYPE_FLOAT:
10342       ok = parse_number(p, true);
10343       break;
10344 
10345     default:
10346       UPB_ASSERT(false);
10347       upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
10348       ok = false;
10349       break;
10350   }
10351 
10352   multipart_end(p);
10353 
10354   return ok;
10355 }
10356 
end_stringval(upb_json_parser * p)10357 static bool end_stringval(upb_json_parser *p) {
10358   /* FieldMask's stringvals have been ended when handling them. Only need to
10359    * close FieldMask here.*/
10360   if (does_fieldmask_end(p)) {
10361     end_fieldmask_object(p);
10362     if (!is_top_level(p)) {
10363       end_subobject(p);
10364     }
10365     return true;
10366   }
10367 
10368   if (!end_stringval_nontop(p)) {
10369     return false;
10370   }
10371 
10372   if (does_string_wrapper_end(p) ||
10373       does_number_wrapper_end(p)) {
10374     end_wrapper_object(p);
10375     if (!is_top_level(p)) {
10376       end_subobject(p);
10377     }
10378     return true;
10379   }
10380 
10381   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10382     end_value_object(p);
10383     if (!is_top_level(p)) {
10384       end_subobject(p);
10385     }
10386     return true;
10387   }
10388 
10389   if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10390       is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
10391       is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
10392     end_object(p);
10393     if (!is_top_level(p)) {
10394       end_subobject(p);
10395     }
10396     return true;
10397   }
10398 
10399   return true;
10400 }
10401 
start_duration_base(upb_json_parser * p,const char * ptr)10402 static void start_duration_base(upb_json_parser *p, const char *ptr) {
10403   capture_begin(p, ptr);
10404 }
10405 
end_duration_base(upb_json_parser * p,const char * ptr)10406 static bool end_duration_base(upb_json_parser *p, const char *ptr) {
10407   size_t len;
10408   const char *buf;
10409   char seconds_buf[14];
10410   char nanos_buf[12];
10411   char *end;
10412   int64_t seconds = 0;
10413   int32_t nanos = 0;
10414   double val = 0.0;
10415   const char *seconds_membername = "seconds";
10416   const char *nanos_membername = "nanos";
10417   size_t fraction_start;
10418 
10419   if (!capture_end(p, ptr)) {
10420     return false;
10421   }
10422 
10423   buf = accumulate_getptr(p, &len);
10424 
10425   memset(seconds_buf, 0, 14);
10426   memset(nanos_buf, 0, 12);
10427 
10428   /* Find out base end. The maximus duration is 315576000000, which cannot be
10429    * represented by double without losing precision. Thus, we need to handle
10430    * fraction and base separately. */
10431   for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
10432        fraction_start++);
10433 
10434   /* Parse base */
10435   memcpy(seconds_buf, buf, fraction_start);
10436   seconds = strtol(seconds_buf, &end, 10);
10437   if (errno == ERANGE || end != seconds_buf + fraction_start) {
10438     upb_status_seterrf(p->status, "error parsing duration: %s",
10439                        seconds_buf);
10440     return false;
10441   }
10442 
10443   if (seconds > 315576000000) {
10444     upb_status_seterrf(p->status, "error parsing duration: "
10445                                    "maximum acceptable value is "
10446                                    "315576000000");
10447     return false;
10448   }
10449 
10450   if (seconds < -315576000000) {
10451     upb_status_seterrf(p->status, "error parsing duration: "
10452                                    "minimum acceptable value is "
10453                                    "-315576000000");
10454     return false;
10455   }
10456 
10457   /* Parse fraction */
10458   nanos_buf[0] = '0';
10459   memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
10460   val = strtod(nanos_buf, &end);
10461   if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
10462     upb_status_seterrf(p->status, "error parsing duration: %s",
10463                        nanos_buf);
10464     return false;
10465   }
10466 
10467   nanos = val * 1000000000;
10468   if (seconds < 0) nanos = -nanos;
10469 
10470   /* Clean up buffer */
10471   multipart_end(p);
10472 
10473   /* Set seconds */
10474   start_member(p);
10475   capture_begin(p, seconds_membername);
10476   capture_end(p, seconds_membername + 7);
10477   end_membername(p);
10478   upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
10479   end_member(p);
10480 
10481   /* Set nanos */
10482   start_member(p);
10483   capture_begin(p, nanos_membername);
10484   capture_end(p, nanos_membername + 5);
10485   end_membername(p);
10486   upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
10487   end_member(p);
10488 
10489   /* Continue previous arena */
10490   multipart_startaccum(p);
10491 
10492   return true;
10493 }
10494 
parse_timestamp_number(upb_json_parser * p)10495 static int parse_timestamp_number(upb_json_parser *p) {
10496   size_t len;
10497   const char *buf;
10498   int val;
10499 
10500   /* atoi() and friends unfortunately do not support specifying the length of
10501    * the input string, so we need to force a copy into a NULL-terminated buffer. */
10502   multipart_text(p, "\0", 1, false);
10503 
10504   buf = accumulate_getptr(p, &len);
10505   val = atoi(buf);
10506   multipart_end(p);
10507   multipart_startaccum(p);
10508 
10509   return val;
10510 }
10511 
start_year(upb_json_parser * p,const char * ptr)10512 static void start_year(upb_json_parser *p, const char *ptr) {
10513   capture_begin(p, ptr);
10514 }
10515 
end_year(upb_json_parser * p,const char * ptr)10516 static bool end_year(upb_json_parser *p, const char *ptr) {
10517   if (!capture_end(p, ptr)) {
10518     return false;
10519   }
10520   p->tm.tm_year = parse_timestamp_number(p) - 1900;
10521   return true;
10522 }
10523 
start_month(upb_json_parser * p,const char * ptr)10524 static void start_month(upb_json_parser *p, const char *ptr) {
10525   capture_begin(p, ptr);
10526 }
10527 
end_month(upb_json_parser * p,const char * ptr)10528 static bool end_month(upb_json_parser *p, const char *ptr) {
10529   if (!capture_end(p, ptr)) {
10530     return false;
10531   }
10532   p->tm.tm_mon = parse_timestamp_number(p) - 1;
10533   return true;
10534 }
10535 
start_day(upb_json_parser * p,const char * ptr)10536 static void start_day(upb_json_parser *p, const char *ptr) {
10537   capture_begin(p, ptr);
10538 }
10539 
end_day(upb_json_parser * p,const char * ptr)10540 static bool end_day(upb_json_parser *p, const char *ptr) {
10541   if (!capture_end(p, ptr)) {
10542     return false;
10543   }
10544   p->tm.tm_mday = parse_timestamp_number(p);
10545   return true;
10546 }
10547 
start_hour(upb_json_parser * p,const char * ptr)10548 static void start_hour(upb_json_parser *p, const char *ptr) {
10549   capture_begin(p, ptr);
10550 }
10551 
end_hour(upb_json_parser * p,const char * ptr)10552 static bool end_hour(upb_json_parser *p, const char *ptr) {
10553   if (!capture_end(p, ptr)) {
10554     return false;
10555   }
10556   p->tm.tm_hour = parse_timestamp_number(p);
10557   return true;
10558 }
10559 
start_minute(upb_json_parser * p,const char * ptr)10560 static void start_minute(upb_json_parser *p, const char *ptr) {
10561   capture_begin(p, ptr);
10562 }
10563 
end_minute(upb_json_parser * p,const char * ptr)10564 static bool end_minute(upb_json_parser *p, const char *ptr) {
10565   if (!capture_end(p, ptr)) {
10566     return false;
10567   }
10568   p->tm.tm_min = parse_timestamp_number(p);
10569   return true;
10570 }
10571 
start_second(upb_json_parser * p,const char * ptr)10572 static void start_second(upb_json_parser *p, const char *ptr) {
10573   capture_begin(p, ptr);
10574 }
10575 
end_second(upb_json_parser * p,const char * ptr)10576 static bool end_second(upb_json_parser *p, const char *ptr) {
10577   if (!capture_end(p, ptr)) {
10578     return false;
10579   }
10580   p->tm.tm_sec = parse_timestamp_number(p);
10581   return true;
10582 }
10583 
start_timestamp_base(upb_json_parser * p)10584 static void start_timestamp_base(upb_json_parser *p) {
10585   memset(&p->tm, 0, sizeof(struct tm));
10586 }
10587 
start_timestamp_fraction(upb_json_parser * p,const char * ptr)10588 static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10589   capture_begin(p, ptr);
10590 }
10591 
end_timestamp_fraction(upb_json_parser * p,const char * ptr)10592 static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10593   size_t len;
10594   const char *buf;
10595   char nanos_buf[12];
10596   char *end;
10597   double val = 0.0;
10598   int32_t nanos;
10599   const char *nanos_membername = "nanos";
10600 
10601   memset(nanos_buf, 0, 12);
10602 
10603   if (!capture_end(p, ptr)) {
10604     return false;
10605   }
10606 
10607   buf = accumulate_getptr(p, &len);
10608 
10609   if (len > 10) {
10610     upb_status_seterrf(p->status,
10611         "error parsing timestamp: at most 9-digit fraction.");
10612     return false;
10613   }
10614 
10615   /* Parse nanos */
10616   nanos_buf[0] = '0';
10617   memcpy(nanos_buf + 1, buf, len);
10618   val = strtod(nanos_buf, &end);
10619 
10620   if (errno == ERANGE || end != nanos_buf + len + 1) {
10621     upb_status_seterrf(p->status, "error parsing timestamp nanos: %s",
10622                        nanos_buf);
10623     return false;
10624   }
10625 
10626   nanos = val * 1000000000;
10627 
10628   /* Clean up previous environment */
10629   multipart_end(p);
10630 
10631   /* Set nanos */
10632   start_member(p);
10633   capture_begin(p, nanos_membername);
10634   capture_end(p, nanos_membername + 5);
10635   end_membername(p);
10636   upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
10637   end_member(p);
10638 
10639   /* Continue previous environment */
10640   multipart_startaccum(p);
10641 
10642   return true;
10643 }
10644 
start_timestamp_zone(upb_json_parser * p,const char * ptr)10645 static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
10646   capture_begin(p, ptr);
10647 }
10648 
div_round_up2(int n,int d)10649 static int div_round_up2(int n, int d) {
10650   return (n + d - 1) / d;
10651 }
10652 
10653 /* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */
epoch_days(int year,int month,int day)10654 static int epoch_days(int year, int month, int day) {
10655   static const uint16_t month_yday[12] = {0,   31,  59,  90,  120, 151,
10656                                           181, 212, 243, 273, 304, 334};
10657   int febs_since_0 = month > 2 ? year + 1 : year;
10658   int leap_days_since_0 = div_round_up2(febs_since_0, 4) -
10659                           div_round_up2(febs_since_0, 100) +
10660                           div_round_up2(febs_since_0, 400);
10661   int days_since_0 =
10662       365 * year + month_yday[month - 1] + (day - 1) + leap_days_since_0;
10663 
10664   /* Convert from 0-epoch (0001-01-01 BC) to Unix Epoch (1970-01-01 AD).
10665    * Since the "BC" system does not have a year zero, 1 BC == year zero. */
10666   return days_since_0 - 719528;
10667 }
10668 
upb_timegm(const struct tm * tp)10669 static int64_t upb_timegm(const struct tm *tp) {
10670   int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday);
10671   ret = (ret * 24) + tp->tm_hour;
10672   ret = (ret * 60) + tp->tm_min;
10673   ret = (ret * 60) + tp->tm_sec;
10674   return ret;
10675 }
10676 
end_timestamp_zone(upb_json_parser * p,const char * ptr)10677 static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
10678   size_t len;
10679   const char *buf;
10680   int hours;
10681   int64_t seconds;
10682   const char *seconds_membername = "seconds";
10683 
10684   if (!capture_end(p, ptr)) {
10685     return false;
10686   }
10687 
10688   buf = accumulate_getptr(p, &len);
10689 
10690   if (buf[0] != 'Z') {
10691     if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
10692       upb_status_seterrf(p->status, "error parsing timestamp offset");
10693       return false;
10694     }
10695 
10696     if (buf[0] == '+') {
10697       hours = -hours;
10698     }
10699 
10700     p->tm.tm_hour += hours;
10701   }
10702 
10703   /* Normalize tm */
10704   seconds = upb_timegm(&p->tm);
10705 
10706   /* Check timestamp boundary */
10707   if (seconds < -62135596800) {
10708     upb_status_seterrf(p->status, "error parsing timestamp: "
10709                                    "minimum acceptable value is "
10710                                    "0001-01-01T00:00:00Z");
10711     return false;
10712   }
10713 
10714   /* Clean up previous environment */
10715   multipart_end(p);
10716 
10717   /* Set seconds */
10718   start_member(p);
10719   capture_begin(p, seconds_membername);
10720   capture_end(p, seconds_membername + 7);
10721   end_membername(p);
10722   upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
10723   end_member(p);
10724 
10725   /* Continue previous environment */
10726   multipart_startaccum(p);
10727 
10728   return true;
10729 }
10730 
start_fieldmask_path_text(upb_json_parser * p,const char * ptr)10731 static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
10732   capture_begin(p, ptr);
10733 }
10734 
end_fieldmask_path_text(upb_json_parser * p,const char * ptr)10735 static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
10736   return capture_end(p, ptr);
10737 }
10738 
start_fieldmask_path(upb_json_parser * p)10739 static bool start_fieldmask_path(upb_json_parser *p) {
10740   upb_jsonparser_frame *inner;
10741   upb_selector_t sel;
10742 
10743   if (!check_stack(p)) return false;
10744 
10745   /* Start a new parser frame: parser frames correspond one-to-one with
10746    * handler frames, and string events occur in a sub-frame. */
10747   inner = start_jsonparser_frame(p);
10748   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10749   upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10750   inner->m = p->top->m;
10751   inner->f = p->top->f;
10752   p->top = inner;
10753 
10754   multipart_startaccum(p);
10755   return true;
10756 }
10757 
lower_camel_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)10758 static bool lower_camel_push(
10759     upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
10760   const char *limit = ptr + len;
10761   bool first = true;
10762   for (;ptr < limit; ptr++) {
10763     if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
10764       char lower = tolower(*ptr);
10765       upb_sink_putstring(p->top->sink, sel, "_", 1, NULL);
10766       upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL);
10767     } else {
10768       upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL);
10769     }
10770     first = false;
10771   }
10772   return true;
10773 }
10774 
end_fieldmask_path(upb_json_parser * p)10775 static bool end_fieldmask_path(upb_json_parser *p) {
10776   upb_selector_t sel;
10777 
10778   if (!lower_camel_push(
10779            p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10780            p->accumulated, p->accumulated_len)) {
10781     return false;
10782   }
10783 
10784   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10785   upb_sink_endstr(p->top->sink, sel);
10786   p->top--;
10787 
10788   multipart_end(p);
10789   return true;
10790 }
10791 
start_member(upb_json_parser * p)10792 static void start_member(upb_json_parser *p) {
10793   UPB_ASSERT(!p->top->f);
10794   multipart_startaccum(p);
10795 }
10796 
10797 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10798  * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)10799 static bool parse_mapentry_key(upb_json_parser *p) {
10800 
10801   size_t len;
10802   const char *buf = accumulate_getptr(p, &len);
10803 
10804   /* Emit the key field. We do a bit of ad-hoc parsing here because the
10805    * parser state machine has already decided that this is a string field
10806    * name, and we are reinterpreting it as some arbitrary key type. In
10807    * particular, integer and bool keys are quoted, so we need to parse the
10808    * quoted string contents here. */
10809 
10810   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10811   if (p->top->f == NULL) {
10812     upb_status_seterrmsg(p->status, "mapentry message has no key");
10813     return false;
10814   }
10815   switch (upb_fielddef_type(p->top->f)) {
10816     case UPB_TYPE_INT32:
10817     case UPB_TYPE_INT64:
10818     case UPB_TYPE_UINT32:
10819     case UPB_TYPE_UINT64:
10820       /* Invoke end_number. The accum buffer has the number's text already. */
10821       if (!parse_number(p, true)) {
10822         return false;
10823       }
10824       break;
10825     case UPB_TYPE_BOOL:
10826       if (len == 4 && !strncmp(buf, "true", 4)) {
10827         if (!parser_putbool(p, true)) {
10828           return false;
10829         }
10830       } else if (len == 5 && !strncmp(buf, "false", 5)) {
10831         if (!parser_putbool(p, false)) {
10832           return false;
10833         }
10834       } else {
10835         upb_status_seterrmsg(p->status,
10836                              "Map bool key not 'true' or 'false'");
10837         return false;
10838       }
10839       multipart_end(p);
10840       break;
10841     case UPB_TYPE_STRING:
10842     case UPB_TYPE_BYTES: {
10843       upb_sink subsink;
10844       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10845       upb_sink_startstr(p->top->sink, sel, len, &subsink);
10846       sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10847       upb_sink_putstring(subsink, sel, buf, len, NULL);
10848       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10849       upb_sink_endstr(subsink, sel);
10850       multipart_end(p);
10851       break;
10852     }
10853     default:
10854       upb_status_seterrmsg(p->status, "Invalid field type for map key");
10855       return false;
10856   }
10857 
10858   return true;
10859 }
10860 
10861 /* Helper: emit one map entry (as a submessage in the map field sequence). This
10862  * is invoked from end_membername(), at the end of the map entry's key string,
10863  * with the map key in the accumulate buffer. It parses the key from that
10864  * buffer, emits the handler calls to start the mapentry submessage (setting up
10865  * its subframe in the process), and sets up state in the subframe so that the
10866  * value parser (invoked next) will emit the mapentry's value field and then
10867  * end the mapentry message. */
10868 
handle_mapentry(upb_json_parser * p)10869 static bool handle_mapentry(upb_json_parser *p) {
10870   const upb_fielddef *mapfield;
10871   const upb_msgdef *mapentrymsg;
10872   upb_jsonparser_frame *inner;
10873   upb_selector_t sel;
10874 
10875   /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10876    * for the mapentry itself, and then set |f| in that frame so that the map
10877    * value field is parsed, and also set a flag to end the frame after the
10878    * map-entry value is parsed. */
10879   if (!check_stack(p)) return false;
10880 
10881   mapfield = p->top->mapfield;
10882   mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10883 
10884   inner = start_jsonparser_frame(p);
10885   p->top->f = mapfield;
10886   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10887   upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
10888   inner->m = mapentrymsg;
10889   inner->mapfield = mapfield;
10890 
10891   /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10892    * the key field value to the sink, and these handlers will pop the frame
10893    * if they see is_mapentry (when invoked by the parser state machine, they
10894    * would have just seen the map-entry value, not key). */
10895   inner->is_mapentry = false;
10896   p->top = inner;
10897 
10898   /* send STARTMSG in submsg frame. */
10899   upb_sink_startmsg(p->top->sink);
10900 
10901   parse_mapentry_key(p);
10902 
10903   /* Set up the value field to receive the map-entry value. */
10904   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10905   p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
10906   p->top->mapfield = mapfield;
10907   if (p->top->f == NULL) {
10908     upb_status_seterrmsg(p->status, "mapentry message has no value");
10909     return false;
10910   }
10911 
10912   return true;
10913 }
10914 
end_membername(upb_json_parser * p)10915 static bool end_membername(upb_json_parser *p) {
10916   UPB_ASSERT(!p->top->f);
10917 
10918   if (!p->top->m) {
10919     p->top->is_unknown_field = true;
10920     multipart_end(p);
10921     return true;
10922   }
10923 
10924   if (p->top->is_any) {
10925     return end_any_membername(p);
10926   } else if (p->top->is_map) {
10927     return handle_mapentry(p);
10928   } else {
10929     size_t len;
10930     const char *buf = accumulate_getptr(p, &len);
10931     upb_value v;
10932 
10933     if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
10934       p->top->f = upb_value_getconstptr(v);
10935       multipart_end(p);
10936 
10937       return true;
10938     } else if (p->ignore_json_unknown) {
10939       p->top->is_unknown_field = true;
10940       multipart_end(p);
10941       return true;
10942     } else {
10943       upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
10944       return false;
10945     }
10946   }
10947 }
10948 
end_any_membername(upb_json_parser * p)10949 static bool end_any_membername(upb_json_parser *p) {
10950   size_t len;
10951   const char *buf = accumulate_getptr(p, &len);
10952   upb_value v;
10953 
10954   if (len == 5 && strncmp(buf, "@type", len) == 0) {
10955     upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
10956     p->top->f = upb_value_getconstptr(v);
10957     multipart_end(p);
10958     return true;
10959   } else {
10960     p->top->is_unknown_field = true;
10961     multipart_end(p);
10962     return true;
10963   }
10964 }
10965 
end_member(upb_json_parser * p)10966 static void end_member(upb_json_parser *p) {
10967   /* If we just parsed a map-entry value, end that frame too. */
10968   if (p->top->is_mapentry) {
10969     upb_selector_t sel;
10970     bool ok;
10971     const upb_fielddef *mapfield;
10972 
10973     UPB_ASSERT(p->top > p->stack);
10974     /* send ENDMSG on submsg. */
10975     upb_sink_endmsg(p->top->sink, p->status);
10976     mapfield = p->top->mapfield;
10977 
10978     /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10979     p->top--;
10980     ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
10981     UPB_ASSUME(ok);
10982     upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
10983   }
10984 
10985   p->top->f = NULL;
10986   p->top->is_unknown_field = false;
10987 }
10988 
start_any_member(upb_json_parser * p,const char * ptr)10989 static void start_any_member(upb_json_parser *p, const char *ptr) {
10990   start_member(p);
10991   json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
10992 }
10993 
end_any_member(upb_json_parser * p,const char * ptr)10994 static void end_any_member(upb_json_parser *p, const char *ptr) {
10995   json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
10996   end_member(p);
10997 }
10998 
start_subobject(upb_json_parser * p)10999 static bool start_subobject(upb_json_parser *p) {
11000   if (p->top->is_unknown_field) {
11001     if (!check_stack(p)) return false;
11002 
11003     p->top = start_jsonparser_frame(p);
11004     return true;
11005   }
11006 
11007   if (upb_fielddef_ismap(p->top->f)) {
11008     upb_jsonparser_frame *inner;
11009     upb_selector_t sel;
11010 
11011     /* Beginning of a map. Start a new parser frame in a repeated-field
11012      * context. */
11013     if (!check_stack(p)) return false;
11014 
11015     inner = start_jsonparser_frame(p);
11016     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11017     upb_sink_startseq(p->top->sink, sel, &inner->sink);
11018     inner->m = upb_fielddef_msgsubdef(p->top->f);
11019     inner->mapfield = p->top->f;
11020     inner->is_map = true;
11021     p->top = inner;
11022 
11023     return true;
11024   } else if (upb_fielddef_issubmsg(p->top->f)) {
11025     upb_jsonparser_frame *inner;
11026     upb_selector_t sel;
11027 
11028     /* Beginning of a subobject. Start a new parser frame in the submsg
11029      * context. */
11030     if (!check_stack(p)) return false;
11031 
11032     inner = start_jsonparser_frame(p);
11033     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
11034     upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
11035     inner->m = upb_fielddef_msgsubdef(p->top->f);
11036     set_name_table(p, inner);
11037     p->top = inner;
11038 
11039     if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
11040       p->top->is_any = true;
11041       p->top->any_frame = json_parser_any_frame_new(p);
11042     } else {
11043       p->top->is_any = false;
11044       p->top->any_frame = NULL;
11045     }
11046 
11047     return true;
11048   } else {
11049     upb_status_seterrf(p->status,
11050                        "Object specified for non-message/group field: %s",
11051                        upb_fielddef_name(p->top->f));
11052     return false;
11053   }
11054 }
11055 
start_subobject_full(upb_json_parser * p)11056 static bool start_subobject_full(upb_json_parser *p) {
11057   if (is_top_level(p)) {
11058     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11059       start_value_object(p, VALUE_STRUCTVALUE);
11060       if (!start_subobject(p)) return false;
11061       start_structvalue_object(p);
11062     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
11063       start_structvalue_object(p);
11064     } else {
11065       return true;
11066     }
11067   } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
11068     if (!start_subobject(p)) return false;
11069     start_structvalue_object(p);
11070   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
11071     if (!start_subobject(p)) return false;
11072     start_value_object(p, VALUE_STRUCTVALUE);
11073     if (!start_subobject(p)) return false;
11074     start_structvalue_object(p);
11075   }
11076 
11077   return start_subobject(p);
11078 }
11079 
end_subobject(upb_json_parser * p)11080 static void end_subobject(upb_json_parser *p) {
11081   if (is_top_level(p)) {
11082     return;
11083   }
11084 
11085   if (p->top->is_map) {
11086     upb_selector_t sel;
11087     p->top--;
11088     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11089     upb_sink_endseq(p->top->sink, sel);
11090   } else {
11091     upb_selector_t sel;
11092     bool is_unknown = p->top->m == NULL;
11093     p->top--;
11094     if (!is_unknown) {
11095       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
11096       upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
11097     }
11098   }
11099 }
11100 
end_subobject_full(upb_json_parser * p)11101 static void end_subobject_full(upb_json_parser *p) {
11102   end_subobject(p);
11103 
11104   if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
11105     end_structvalue_object(p);
11106     if (!is_top_level(p)) {
11107       end_subobject(p);
11108     }
11109   }
11110 
11111   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11112     end_value_object(p);
11113     if (!is_top_level(p)) {
11114       end_subobject(p);
11115     }
11116   }
11117 }
11118 
start_array(upb_json_parser * p)11119 static bool start_array(upb_json_parser *p) {
11120   upb_jsonparser_frame *inner;
11121   upb_selector_t sel;
11122 
11123   if (is_top_level(p)) {
11124     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11125       start_value_object(p, VALUE_LISTVALUE);
11126       if (!start_subobject(p)) return false;
11127       start_listvalue_object(p);
11128     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
11129       start_listvalue_object(p);
11130     } else {
11131       return false;
11132     }
11133   } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
11134              (!upb_fielddef_isseq(p->top->f) ||
11135               p->top->is_repeated)) {
11136     if (!start_subobject(p)) return false;
11137     start_listvalue_object(p);
11138   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
11139              (!upb_fielddef_isseq(p->top->f) ||
11140               p->top->is_repeated)) {
11141     if (!start_subobject(p)) return false;
11142     start_value_object(p, VALUE_LISTVALUE);
11143     if (!start_subobject(p)) return false;
11144     start_listvalue_object(p);
11145   }
11146 
11147   if (p->top->is_unknown_field) {
11148     inner = start_jsonparser_frame(p);
11149     inner->is_unknown_field = true;
11150     p->top = inner;
11151 
11152     return true;
11153   }
11154 
11155   if (!upb_fielddef_isseq(p->top->f)) {
11156     upb_status_seterrf(p->status,
11157                        "Array specified for non-repeated field: %s",
11158                        upb_fielddef_name(p->top->f));
11159     return false;
11160   }
11161 
11162   if (!check_stack(p)) return false;
11163 
11164   inner = start_jsonparser_frame(p);
11165   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11166   upb_sink_startseq(p->top->sink, sel, &inner->sink);
11167   inner->m = p->top->m;
11168   inner->f = p->top->f;
11169   inner->is_repeated = true;
11170   p->top = inner;
11171 
11172   return true;
11173 }
11174 
end_array(upb_json_parser * p)11175 static void end_array(upb_json_parser *p) {
11176   upb_selector_t sel;
11177 
11178   UPB_ASSERT(p->top > p->stack);
11179 
11180   p->top--;
11181 
11182   if (p->top->is_unknown_field) {
11183     return;
11184   }
11185 
11186   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11187   upb_sink_endseq(p->top->sink, sel);
11188 
11189   if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
11190     end_listvalue_object(p);
11191     if (!is_top_level(p)) {
11192       end_subobject(p);
11193     }
11194   }
11195 
11196   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11197     end_value_object(p);
11198     if (!is_top_level(p)) {
11199       end_subobject(p);
11200     }
11201   }
11202 }
11203 
start_object(upb_json_parser * p)11204 static void start_object(upb_json_parser *p) {
11205   if (!p->top->is_map && p->top->m != NULL) {
11206     upb_sink_startmsg(p->top->sink);
11207   }
11208 }
11209 
end_object(upb_json_parser * p)11210 static void end_object(upb_json_parser *p) {
11211   if (!p->top->is_map && p->top->m != NULL) {
11212     upb_sink_endmsg(p->top->sink, p->status);
11213   }
11214 }
11215 
start_any_object(upb_json_parser * p,const char * ptr)11216 static void start_any_object(upb_json_parser *p, const char *ptr) {
11217   start_object(p);
11218   p->top->any_frame->before_type_url_start = ptr;
11219   p->top->any_frame->before_type_url_end = ptr;
11220 }
11221 
end_any_object(upb_json_parser * p,const char * ptr)11222 static bool end_any_object(upb_json_parser *p, const char *ptr) {
11223   const char *value_membername = "value";
11224   bool is_well_known_packed = false;
11225   const char *packed_end = ptr + 1;
11226   upb_selector_t sel;
11227   upb_jsonparser_frame *inner;
11228 
11229   if (json_parser_any_frame_has_value(p->top->any_frame) &&
11230       !json_parser_any_frame_has_type_url(p->top->any_frame)) {
11231     upb_status_seterrmsg(p->status, "No valid type url");
11232     return false;
11233   }
11234 
11235   /* Well known types data is represented as value field. */
11236   if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
11237           UPB_WELLKNOWN_UNSPECIFIED) {
11238     is_well_known_packed = true;
11239 
11240     if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11241       p->top->any_frame->before_type_url_start =
11242           memchr(p->top->any_frame->before_type_url_start, ':',
11243                  p->top->any_frame->before_type_url_end -
11244                  p->top->any_frame->before_type_url_start);
11245       if (p->top->any_frame->before_type_url_start == NULL) {
11246         upb_status_seterrmsg(p->status, "invalid data for well known type.");
11247         return false;
11248       }
11249       p->top->any_frame->before_type_url_start++;
11250     }
11251 
11252     if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11253       p->top->any_frame->after_type_url_start =
11254           memchr(p->top->any_frame->after_type_url_start, ':',
11255                  (ptr + 1) -
11256                  p->top->any_frame->after_type_url_start);
11257       if (p->top->any_frame->after_type_url_start == NULL) {
11258         upb_status_seterrmsg(p->status, "Invalid data for well known type.");
11259         return false;
11260       }
11261       p->top->any_frame->after_type_url_start++;
11262       packed_end = ptr;
11263     }
11264   }
11265 
11266   if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11267     if (!parse(p->top->any_frame->parser, NULL,
11268                p->top->any_frame->before_type_url_start,
11269                p->top->any_frame->before_type_url_end -
11270                p->top->any_frame->before_type_url_start, NULL)) {
11271       return false;
11272     }
11273   } else {
11274     if (!is_well_known_packed) {
11275       if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
11276         return false;
11277       }
11278     }
11279   }
11280 
11281   if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
11282       json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11283     if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
11284       return false;
11285     }
11286   }
11287 
11288   if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11289     if (!parse(p->top->any_frame->parser, NULL,
11290                p->top->any_frame->after_type_url_start,
11291                packed_end - p->top->any_frame->after_type_url_start, NULL)) {
11292       return false;
11293     }
11294   } else {
11295     if (!is_well_known_packed) {
11296       if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
11297         return false;
11298       }
11299     }
11300   }
11301 
11302   if (!end(p->top->any_frame->parser, NULL)) {
11303     return false;
11304   }
11305 
11306   p->top->is_any = false;
11307 
11308   /* Set value */
11309   start_member(p);
11310   capture_begin(p, value_membername);
11311   capture_end(p, value_membername + 5);
11312   end_membername(p);
11313 
11314   if (!check_stack(p)) return false;
11315   inner = p->top + 1;
11316 
11317   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
11318   upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
11319   sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
11320   upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr,
11321                      p->top->any_frame->stringsink.len, NULL);
11322   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
11323   upb_sink_endstr(inner->sink, sel);
11324 
11325   end_member(p);
11326 
11327   end_object(p);
11328 
11329   /* Deallocate any parse frame. */
11330   json_parser_any_frame_free(p->top->any_frame);
11331 
11332   return true;
11333 }
11334 
is_string_wrapper(const upb_msgdef * m)11335 static bool is_string_wrapper(const upb_msgdef *m) {
11336   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11337   return type == UPB_WELLKNOWN_STRINGVALUE ||
11338          type == UPB_WELLKNOWN_BYTESVALUE;
11339 }
11340 
is_fieldmask(const upb_msgdef * m)11341 static bool is_fieldmask(const upb_msgdef *m) {
11342   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11343   return type == UPB_WELLKNOWN_FIELDMASK;
11344 }
11345 
start_fieldmask_object(upb_json_parser * p)11346 static void start_fieldmask_object(upb_json_parser *p) {
11347   const char *membername = "paths";
11348 
11349   start_object(p);
11350 
11351   /* Set up context for parsing value */
11352   start_member(p);
11353   capture_begin(p, membername);
11354   capture_end(p, membername + 5);
11355   end_membername(p);
11356 
11357   start_array(p);
11358 }
11359 
end_fieldmask_object(upb_json_parser * p)11360 static void end_fieldmask_object(upb_json_parser *p) {
11361   end_array(p);
11362   end_member(p);
11363   end_object(p);
11364 }
11365 
start_wrapper_object(upb_json_parser * p)11366 static void start_wrapper_object(upb_json_parser *p) {
11367   const char *membername = "value";
11368 
11369   start_object(p);
11370 
11371   /* Set up context for parsing value */
11372   start_member(p);
11373   capture_begin(p, membername);
11374   capture_end(p, membername + 5);
11375   end_membername(p);
11376 }
11377 
end_wrapper_object(upb_json_parser * p)11378 static void end_wrapper_object(upb_json_parser *p) {
11379   end_member(p);
11380   end_object(p);
11381 }
11382 
start_value_object(upb_json_parser * p,int value_type)11383 static void start_value_object(upb_json_parser *p, int value_type) {
11384   const char *nullmember = "null_value";
11385   const char *numbermember = "number_value";
11386   const char *stringmember = "string_value";
11387   const char *boolmember = "bool_value";
11388   const char *structmember = "struct_value";
11389   const char *listmember = "list_value";
11390   const char *membername = "";
11391 
11392   switch (value_type) {
11393     case VALUE_NULLVALUE:
11394       membername = nullmember;
11395       break;
11396     case VALUE_NUMBERVALUE:
11397       membername = numbermember;
11398       break;
11399     case VALUE_STRINGVALUE:
11400       membername = stringmember;
11401       break;
11402     case VALUE_BOOLVALUE:
11403       membername = boolmember;
11404       break;
11405     case VALUE_STRUCTVALUE:
11406       membername = structmember;
11407       break;
11408     case VALUE_LISTVALUE:
11409       membername = listmember;
11410       break;
11411   }
11412 
11413   start_object(p);
11414 
11415   /* Set up context for parsing value */
11416   start_member(p);
11417   capture_begin(p, membername);
11418   capture_end(p, membername + strlen(membername));
11419   end_membername(p);
11420 }
11421 
end_value_object(upb_json_parser * p)11422 static void end_value_object(upb_json_parser *p) {
11423   end_member(p);
11424   end_object(p);
11425 }
11426 
start_listvalue_object(upb_json_parser * p)11427 static void start_listvalue_object(upb_json_parser *p) {
11428   const char *membername = "values";
11429 
11430   start_object(p);
11431 
11432   /* Set up context for parsing value */
11433   start_member(p);
11434   capture_begin(p, membername);
11435   capture_end(p, membername + strlen(membername));
11436   end_membername(p);
11437 }
11438 
end_listvalue_object(upb_json_parser * p)11439 static void end_listvalue_object(upb_json_parser *p) {
11440   end_member(p);
11441   end_object(p);
11442 }
11443 
start_structvalue_object(upb_json_parser * p)11444 static void start_structvalue_object(upb_json_parser *p) {
11445   const char *membername = "fields";
11446 
11447   start_object(p);
11448 
11449   /* Set up context for parsing value */
11450   start_member(p);
11451   capture_begin(p, membername);
11452   capture_end(p, membername + strlen(membername));
11453   end_membername(p);
11454 }
11455 
end_structvalue_object(upb_json_parser * p)11456 static void end_structvalue_object(upb_json_parser *p) {
11457   end_member(p);
11458   end_object(p);
11459 }
11460 
is_top_level(upb_json_parser * p)11461 static bool is_top_level(upb_json_parser *p) {
11462   return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
11463 }
11464 
is_wellknown_msg(upb_json_parser * p,upb_wellknowntype_t type)11465 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
11466   return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
11467 }
11468 
is_wellknown_field(upb_json_parser * p,upb_wellknowntype_t type)11469 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
11470   return p->top->f != NULL &&
11471          upb_fielddef_issubmsg(p->top->f) &&
11472          (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
11473               == type);
11474 }
11475 
does_number_wrapper_start(upb_json_parser * p)11476 static bool does_number_wrapper_start(upb_json_parser *p) {
11477   return p->top->f != NULL &&
11478          upb_fielddef_issubmsg(p->top->f) &&
11479          upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
11480 }
11481 
does_number_wrapper_end(upb_json_parser * p)11482 static bool does_number_wrapper_end(upb_json_parser *p) {
11483   return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
11484 }
11485 
is_number_wrapper_object(upb_json_parser * p)11486 static bool is_number_wrapper_object(upb_json_parser *p) {
11487   return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
11488 }
11489 
does_string_wrapper_start(upb_json_parser * p)11490 static bool does_string_wrapper_start(upb_json_parser *p) {
11491   return p->top->f != NULL &&
11492          upb_fielddef_issubmsg(p->top->f) &&
11493          is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
11494 }
11495 
does_string_wrapper_end(upb_json_parser * p)11496 static bool does_string_wrapper_end(upb_json_parser *p) {
11497   return p->top->m != NULL && is_string_wrapper(p->top->m);
11498 }
11499 
is_string_wrapper_object(upb_json_parser * p)11500 static bool is_string_wrapper_object(upb_json_parser *p) {
11501   return p->top->m != NULL && is_string_wrapper(p->top->m);
11502 }
11503 
does_fieldmask_start(upb_json_parser * p)11504 static bool does_fieldmask_start(upb_json_parser *p) {
11505   return p->top->f != NULL &&
11506          upb_fielddef_issubmsg(p->top->f) &&
11507          is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
11508 }
11509 
does_fieldmask_end(upb_json_parser * p)11510 static bool does_fieldmask_end(upb_json_parser *p) {
11511   return p->top->m != NULL && is_fieldmask(p->top->m);
11512 }
11513 
11514 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
11515 
11516 
11517 /* The actual parser **********************************************************/
11518 
11519 /* What follows is the Ragel parser itself.  The language is specified in Ragel
11520  * and the actions call our C functions above.
11521  *
11522  * Ragel has an extensive set of functionality, and we use only a small part of
11523  * it.  There are many action types but we only use a few:
11524  *
11525  *   ">" -- transition into a machine
11526  *   "%" -- transition out of a machine
11527  *   "@" -- transition into a final state of a machine.
11528  *
11529  * "@" transitions are tricky because a machine can transition into a final
11530  * state repeatedly.  But in some cases we know this can't happen, for example
11531  * a string which is delimited by a final '"' can only transition into its
11532  * final state once, when the closing '"' is seen. */
11533 
11534 
11535 #line 2780 "upb/json/parser.rl"
11536 
11537 
11538 
11539 #line 2583 "upb/json/parser.c"
11540 static const char _json_actions[] = {
11541 	0, 1, 0, 1, 1, 1, 3, 1,
11542 	4, 1, 6, 1, 7, 1, 8, 1,
11543 	9, 1, 11, 1, 12, 1, 13, 1,
11544 	14, 1, 15, 1, 16, 1, 17, 1,
11545 	18, 1, 19, 1, 20, 1, 22, 1,
11546 	23, 1, 24, 1, 35, 1, 37, 1,
11547 	39, 1, 40, 1, 42, 1, 43, 1,
11548 	44, 1, 46, 1, 48, 1, 49, 1,
11549 	50, 1, 51, 1, 53, 1, 54, 2,
11550 	4, 9, 2, 5, 6, 2, 7, 3,
11551 	2, 7, 9, 2, 21, 26, 2, 25,
11552 	10, 2, 27, 28, 2, 29, 30, 2,
11553 	32, 34, 2, 33, 31, 2, 38, 36,
11554 	2, 40, 42, 2, 45, 2, 2, 46,
11555 	54, 2, 47, 36, 2, 49, 54, 2,
11556 	50, 54, 2, 51, 54, 2, 52, 41,
11557 	2, 53, 54, 3, 32, 34, 35, 4,
11558 	21, 26, 27, 28
11559 };
11560 
11561 static const short _json_key_offsets[] = {
11562 	0, 0, 12, 13, 18, 23, 28, 29,
11563 	30, 31, 32, 33, 34, 35, 36, 37,
11564 	38, 43, 44, 48, 53, 58, 63, 67,
11565 	71, 74, 77, 79, 83, 87, 89, 91,
11566 	96, 98, 100, 109, 115, 121, 127, 133,
11567 	135, 139, 142, 144, 146, 149, 150, 154,
11568 	156, 158, 160, 162, 163, 165, 167, 168,
11569 	170, 172, 173, 175, 177, 178, 180, 182,
11570 	183, 185, 187, 191, 193, 195, 196, 197,
11571 	198, 199, 201, 206, 208, 210, 212, 221,
11572 	222, 222, 222, 227, 232, 237, 238, 239,
11573 	240, 241, 241, 242, 243, 244, 244, 245,
11574 	246, 247, 247, 252, 253, 257, 262, 267,
11575 	272, 276, 276, 279, 282, 285, 288, 291,
11576 	294, 294, 294, 294, 294, 294
11577 };
11578 
11579 static const char _json_trans_keys[] = {
11580 	32, 34, 45, 91, 102, 110, 116, 123,
11581 	9, 13, 48, 57, 34, 32, 93, 125,
11582 	9, 13, 32, 44, 93, 9, 13, 32,
11583 	93, 125, 9, 13, 97, 108, 115, 101,
11584 	117, 108, 108, 114, 117, 101, 32, 34,
11585 	125, 9, 13, 34, 32, 58, 9, 13,
11586 	32, 93, 125, 9, 13, 32, 44, 125,
11587 	9, 13, 32, 44, 125, 9, 13, 32,
11588 	34, 9, 13, 45, 48, 49, 57, 48,
11589 	49, 57, 46, 69, 101, 48, 57, 69,
11590 	101, 48, 57, 43, 45, 48, 57, 48,
11591 	57, 48, 57, 46, 69, 101, 48, 57,
11592 	34, 92, 34, 92, 34, 47, 92, 98,
11593 	102, 110, 114, 116, 117, 48, 57, 65,
11594 	70, 97, 102, 48, 57, 65, 70, 97,
11595 	102, 48, 57, 65, 70, 97, 102, 48,
11596 	57, 65, 70, 97, 102, 34, 92, 45,
11597 	48, 49, 57, 48, 49, 57, 46, 115,
11598 	48, 57, 115, 48, 57, 34, 46, 115,
11599 	48, 57, 48, 57, 48, 57, 48, 57,
11600 	48, 57, 45, 48, 57, 48, 57, 45,
11601 	48, 57, 48, 57, 84, 48, 57, 48,
11602 	57, 58, 48, 57, 48, 57, 58, 48,
11603 	57, 48, 57, 43, 45, 46, 90, 48,
11604 	57, 48, 57, 58, 48, 48, 34, 48,
11605 	57, 43, 45, 90, 48, 57, 34, 44,
11606 	34, 44, 34, 44, 34, 45, 91, 102,
11607 	110, 116, 123, 48, 57, 34, 32, 93,
11608 	125, 9, 13, 32, 44, 93, 9, 13,
11609 	32, 93, 125, 9, 13, 97, 108, 115,
11610 	101, 117, 108, 108, 114, 117, 101, 32,
11611 	34, 125, 9, 13, 34, 32, 58, 9,
11612 	13, 32, 93, 125, 9, 13, 32, 44,
11613 	125, 9, 13, 32, 44, 125, 9, 13,
11614 	32, 34, 9, 13, 32, 9, 13, 32,
11615 	9, 13, 32, 9, 13, 32, 9, 13,
11616 	32, 9, 13, 32, 9, 13, 0
11617 };
11618 
11619 static const char _json_single_lengths[] = {
11620 	0, 8, 1, 3, 3, 3, 1, 1,
11621 	1, 1, 1, 1, 1, 1, 1, 1,
11622 	3, 1, 2, 3, 3, 3, 2, 2,
11623 	1, 3, 0, 2, 2, 0, 0, 3,
11624 	2, 2, 9, 0, 0, 0, 0, 2,
11625 	2, 1, 2, 0, 1, 1, 2, 0,
11626 	0, 0, 0, 1, 0, 0, 1, 0,
11627 	0, 1, 0, 0, 1, 0, 0, 1,
11628 	0, 0, 4, 0, 0, 1, 1, 1,
11629 	1, 0, 3, 2, 2, 2, 7, 1,
11630 	0, 0, 3, 3, 3, 1, 1, 1,
11631 	1, 0, 1, 1, 1, 0, 1, 1,
11632 	1, 0, 3, 1, 2, 3, 3, 3,
11633 	2, 0, 1, 1, 1, 1, 1, 1,
11634 	0, 0, 0, 0, 0, 0
11635 };
11636 
11637 static const char _json_range_lengths[] = {
11638 	0, 2, 0, 1, 1, 1, 0, 0,
11639 	0, 0, 0, 0, 0, 0, 0, 0,
11640 	1, 0, 1, 1, 1, 1, 1, 1,
11641 	1, 0, 1, 1, 1, 1, 1, 1,
11642 	0, 0, 0, 3, 3, 3, 3, 0,
11643 	1, 1, 0, 1, 1, 0, 1, 1,
11644 	1, 1, 1, 0, 1, 1, 0, 1,
11645 	1, 0, 1, 1, 0, 1, 1, 0,
11646 	1, 1, 0, 1, 1, 0, 0, 0,
11647 	0, 1, 1, 0, 0, 0, 1, 0,
11648 	0, 0, 1, 1, 1, 0, 0, 0,
11649 	0, 0, 0, 0, 0, 0, 0, 0,
11650 	0, 0, 1, 0, 1, 1, 1, 1,
11651 	1, 0, 1, 1, 1, 1, 1, 1,
11652 	0, 0, 0, 0, 0, 0
11653 };
11654 
11655 static const short _json_index_offsets[] = {
11656 	0, 0, 11, 13, 18, 23, 28, 30,
11657 	32, 34, 36, 38, 40, 42, 44, 46,
11658 	48, 53, 55, 59, 64, 69, 74, 78,
11659 	82, 85, 89, 91, 95, 99, 101, 103,
11660 	108, 111, 114, 124, 128, 132, 136, 140,
11661 	143, 147, 150, 153, 155, 158, 160, 164,
11662 	166, 168, 170, 172, 174, 176, 178, 180,
11663 	182, 184, 186, 188, 190, 192, 194, 196,
11664 	198, 200, 202, 207, 209, 211, 213, 215,
11665 	217, 219, 221, 226, 229, 232, 235, 244,
11666 	246, 247, 248, 253, 258, 263, 265, 267,
11667 	269, 271, 272, 274, 276, 278, 279, 281,
11668 	283, 285, 286, 291, 293, 297, 302, 307,
11669 	312, 316, 317, 320, 323, 326, 329, 332,
11670 	335, 336, 337, 338, 339, 340
11671 };
11672 
11673 static const unsigned char _json_indicies[] = {
11674 	0, 2, 3, 4, 5, 6, 7, 8,
11675 	0, 3, 1, 9, 1, 11, 12, 1,
11676 	11, 10, 13, 14, 12, 13, 1, 14,
11677 	1, 1, 14, 10, 15, 1, 16, 1,
11678 	17, 1, 18, 1, 19, 1, 20, 1,
11679 	21, 1, 22, 1, 23, 1, 24, 1,
11680 	25, 26, 27, 25, 1, 28, 1, 29,
11681 	30, 29, 1, 30, 1, 1, 30, 31,
11682 	32, 33, 34, 32, 1, 35, 36, 27,
11683 	35, 1, 36, 26, 36, 1, 37, 38,
11684 	39, 1, 38, 39, 1, 41, 42, 42,
11685 	40, 43, 1, 42, 42, 43, 40, 44,
11686 	44, 45, 1, 45, 1, 45, 40, 41,
11687 	42, 42, 39, 40, 47, 48, 46, 50,
11688 	51, 49, 52, 52, 52, 52, 52, 52,
11689 	52, 52, 53, 1, 54, 54, 54, 1,
11690 	55, 55, 55, 1, 56, 56, 56, 1,
11691 	57, 57, 57, 1, 59, 60, 58, 61,
11692 	62, 63, 1, 64, 65, 1, 66, 67,
11693 	1, 68, 1, 67, 68, 1, 69, 1,
11694 	66, 67, 65, 1, 70, 1, 71, 1,
11695 	72, 1, 73, 1, 74, 1, 75, 1,
11696 	76, 1, 77, 1, 78, 1, 79, 1,
11697 	80, 1, 81, 1, 82, 1, 83, 1,
11698 	84, 1, 85, 1, 86, 1, 87, 1,
11699 	88, 1, 89, 89, 90, 91, 1, 92,
11700 	1, 93, 1, 94, 1, 95, 1, 96,
11701 	1, 97, 1, 98, 1, 99, 99, 100,
11702 	98, 1, 102, 1, 101, 104, 105, 103,
11703 	1, 1, 101, 106, 107, 108, 109, 110,
11704 	111, 112, 107, 1, 113, 1, 114, 115,
11705 	117, 118, 1, 117, 116, 119, 120, 118,
11706 	119, 1, 120, 1, 1, 120, 116, 121,
11707 	1, 122, 1, 123, 1, 124, 1, 125,
11708 	126, 1, 127, 1, 128, 1, 129, 130,
11709 	1, 131, 1, 132, 1, 133, 134, 135,
11710 	136, 134, 1, 137, 1, 138, 139, 138,
11711 	1, 139, 1, 1, 139, 140, 141, 142,
11712 	143, 141, 1, 144, 145, 136, 144, 1,
11713 	145, 135, 145, 1, 146, 147, 147, 1,
11714 	148, 148, 1, 149, 149, 1, 150, 150,
11715 	1, 151, 151, 1, 152, 152, 1, 1,
11716 	1, 1, 1, 1, 1, 0
11717 };
11718 
11719 static const char _json_trans_targs[] = {
11720 	1, 0, 2, 107, 3, 6, 10, 13,
11721 	16, 106, 4, 3, 106, 4, 5, 7,
11722 	8, 9, 108, 11, 12, 109, 14, 15,
11723 	110, 16, 17, 111, 18, 18, 19, 20,
11724 	21, 22, 111, 21, 22, 24, 25, 31,
11725 	112, 26, 28, 27, 29, 30, 33, 113,
11726 	34, 33, 113, 34, 32, 35, 36, 37,
11727 	38, 39, 33, 113, 34, 41, 42, 46,
11728 	42, 46, 43, 45, 44, 114, 48, 49,
11729 	50, 51, 52, 53, 54, 55, 56, 57,
11730 	58, 59, 60, 61, 62, 63, 64, 65,
11731 	66, 67, 73, 72, 68, 69, 70, 71,
11732 	72, 115, 74, 67, 72, 76, 116, 76,
11733 	116, 77, 79, 81, 82, 85, 90, 94,
11734 	98, 80, 117, 117, 83, 82, 80, 83,
11735 	84, 86, 87, 88, 89, 117, 91, 92,
11736 	93, 117, 95, 96, 97, 117, 98, 99,
11737 	105, 100, 100, 101, 102, 103, 104, 105,
11738 	103, 104, 117, 106, 106, 106, 106, 106,
11739 	106
11740 };
11741 
11742 static const unsigned char _json_trans_actions[] = {
11743 	0, 0, 113, 107, 53, 0, 0, 0,
11744 	125, 59, 45, 0, 55, 0, 0, 0,
11745 	0, 0, 0, 0, 0, 0, 0, 0,
11746 	0, 0, 101, 51, 47, 0, 0, 45,
11747 	49, 49, 104, 0, 0, 0, 0, 0,
11748 	3, 0, 0, 0, 0, 0, 5, 15,
11749 	0, 0, 71, 7, 13, 0, 74, 9,
11750 	9, 9, 77, 80, 11, 37, 37, 37,
11751 	0, 0, 0, 39, 0, 41, 86, 0,
11752 	0, 0, 17, 19, 0, 21, 23, 0,
11753 	25, 27, 0, 29, 31, 0, 33, 35,
11754 	0, 135, 83, 135, 0, 0, 0, 0,
11755 	0, 92, 0, 89, 89, 98, 43, 0,
11756 	131, 95, 113, 107, 53, 0, 0, 0,
11757 	125, 59, 69, 110, 45, 0, 55, 0,
11758 	0, 0, 0, 0, 0, 119, 0, 0,
11759 	0, 122, 0, 0, 0, 116, 0, 101,
11760 	51, 47, 0, 0, 45, 49, 49, 104,
11761 	0, 0, 128, 0, 57, 63, 65, 61,
11762 	67
11763 };
11764 
11765 static const unsigned char _json_eof_actions[] = {
11766 	0, 0, 0, 0, 0, 0, 0, 0,
11767 	0, 0, 0, 0, 0, 0, 0, 0,
11768 	0, 0, 0, 0, 0, 0, 0, 0,
11769 	0, 1, 0, 1, 0, 0, 1, 1,
11770 	0, 0, 0, 0, 0, 0, 0, 0,
11771 	0, 0, 0, 0, 0, 0, 0, 0,
11772 	0, 0, 0, 0, 0, 0, 0, 0,
11773 	0, 0, 0, 0, 0, 0, 0, 0,
11774 	0, 0, 0, 0, 0, 0, 0, 0,
11775 	0, 0, 0, 0, 0, 0, 0, 0,
11776 	0, 0, 0, 0, 0, 0, 0, 0,
11777 	0, 0, 0, 0, 0, 0, 0, 0,
11778 	0, 0, 0, 0, 0, 0, 0, 0,
11779 	0, 0, 0, 57, 63, 65, 61, 67,
11780 	0, 0, 0, 0, 0, 0
11781 };
11782 
11783 static const int json_start = 1;
11784 
11785 static const int json_en_number_machine = 23;
11786 static const int json_en_string_machine = 32;
11787 static const int json_en_duration_machine = 40;
11788 static const int json_en_timestamp_machine = 47;
11789 static const int json_en_fieldmask_machine = 75;
11790 static const int json_en_value_machine = 78;
11791 static const int json_en_main = 1;
11792 
11793 
11794 #line 2783 "upb/json/parser.rl"
11795 
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)11796 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
11797              const upb_bufhandle *handle) {
11798   upb_json_parser *parser = closure;
11799 
11800   /* Variables used by Ragel's generated code. */
11801   int cs = parser->current_state;
11802   int *stack = parser->parser_stack;
11803   int top = parser->parser_top;
11804 
11805   const char *p = buf;
11806   const char *pe = buf + size;
11807   const char *eof = &eof_ch;
11808 
11809   parser->handle = handle;
11810 
11811   UPB_UNUSED(hd);
11812   UPB_UNUSED(handle);
11813 
11814   capture_resume(parser, buf);
11815 
11816 
11817 #line 2861 "upb/json/parser.c"
11818 	{
11819 	int _klen;
11820 	unsigned int _trans;
11821 	const char *_acts;
11822 	unsigned int _nacts;
11823 	const char *_keys;
11824 
11825 	if ( p == pe )
11826 		goto _test_eof;
11827 	if ( cs == 0 )
11828 		goto _out;
11829 _resume:
11830 	_keys = _json_trans_keys + _json_key_offsets[cs];
11831 	_trans = _json_index_offsets[cs];
11832 
11833 	_klen = _json_single_lengths[cs];
11834 	if ( _klen > 0 ) {
11835 		const char *_lower = _keys;
11836 		const char *_mid;
11837 		const char *_upper = _keys + _klen - 1;
11838 		while (1) {
11839 			if ( _upper < _lower )
11840 				break;
11841 
11842 			_mid = _lower + ((_upper-_lower) >> 1);
11843 			if ( (*p) < *_mid )
11844 				_upper = _mid - 1;
11845 			else if ( (*p) > *_mid )
11846 				_lower = _mid + 1;
11847 			else {
11848 				_trans += (unsigned int)(_mid - _keys);
11849 				goto _match;
11850 			}
11851 		}
11852 		_keys += _klen;
11853 		_trans += _klen;
11854 	}
11855 
11856 	_klen = _json_range_lengths[cs];
11857 	if ( _klen > 0 ) {
11858 		const char *_lower = _keys;
11859 		const char *_mid;
11860 		const char *_upper = _keys + (_klen<<1) - 2;
11861 		while (1) {
11862 			if ( _upper < _lower )
11863 				break;
11864 
11865 			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
11866 			if ( (*p) < _mid[0] )
11867 				_upper = _mid - 2;
11868 			else if ( (*p) > _mid[1] )
11869 				_lower = _mid + 2;
11870 			else {
11871 				_trans += (unsigned int)((_mid - _keys)>>1);
11872 				goto _match;
11873 			}
11874 		}
11875 		_trans += _klen;
11876 	}
11877 
11878 _match:
11879 	_trans = _json_indicies[_trans];
11880 	cs = _json_trans_targs[_trans];
11881 
11882 	if ( _json_trans_actions[_trans] == 0 )
11883 		goto _again;
11884 
11885 	_acts = _json_actions + _json_trans_actions[_trans];
11886 	_nacts = (unsigned int) *_acts++;
11887 	while ( _nacts-- > 0 )
11888 	{
11889 		switch ( *_acts++ )
11890 		{
11891 	case 1:
11892 #line 2588 "upb/json/parser.rl"
11893 	{ p--; {cs = stack[--top]; goto _again;} }
11894 	break;
11895 	case 2:
11896 #line 2590 "upb/json/parser.rl"
11897 	{ p--; {stack[top++] = cs; cs = 23;goto _again;} }
11898 	break;
11899 	case 3:
11900 #line 2594 "upb/json/parser.rl"
11901 	{ start_text(parser, p); }
11902 	break;
11903 	case 4:
11904 #line 2595 "upb/json/parser.rl"
11905 	{ CHECK_RETURN_TOP(end_text(parser, p)); }
11906 	break;
11907 	case 5:
11908 #line 2601 "upb/json/parser.rl"
11909 	{ start_hex(parser); }
11910 	break;
11911 	case 6:
11912 #line 2602 "upb/json/parser.rl"
11913 	{ hexdigit(parser, p); }
11914 	break;
11915 	case 7:
11916 #line 2603 "upb/json/parser.rl"
11917 	{ CHECK_RETURN_TOP(end_hex(parser)); }
11918 	break;
11919 	case 8:
11920 #line 2609 "upb/json/parser.rl"
11921 	{ CHECK_RETURN_TOP(escape(parser, p)); }
11922 	break;
11923 	case 9:
11924 #line 2615 "upb/json/parser.rl"
11925 	{ p--; {cs = stack[--top]; goto _again;} }
11926 	break;
11927 	case 10:
11928 #line 2620 "upb/json/parser.rl"
11929 	{ start_year(parser, p); }
11930 	break;
11931 	case 11:
11932 #line 2621 "upb/json/parser.rl"
11933 	{ CHECK_RETURN_TOP(end_year(parser, p)); }
11934 	break;
11935 	case 12:
11936 #line 2625 "upb/json/parser.rl"
11937 	{ start_month(parser, p); }
11938 	break;
11939 	case 13:
11940 #line 2626 "upb/json/parser.rl"
11941 	{ CHECK_RETURN_TOP(end_month(parser, p)); }
11942 	break;
11943 	case 14:
11944 #line 2630 "upb/json/parser.rl"
11945 	{ start_day(parser, p); }
11946 	break;
11947 	case 15:
11948 #line 2631 "upb/json/parser.rl"
11949 	{ CHECK_RETURN_TOP(end_day(parser, p)); }
11950 	break;
11951 	case 16:
11952 #line 2635 "upb/json/parser.rl"
11953 	{ start_hour(parser, p); }
11954 	break;
11955 	case 17:
11956 #line 2636 "upb/json/parser.rl"
11957 	{ CHECK_RETURN_TOP(end_hour(parser, p)); }
11958 	break;
11959 	case 18:
11960 #line 2640 "upb/json/parser.rl"
11961 	{ start_minute(parser, p); }
11962 	break;
11963 	case 19:
11964 #line 2641 "upb/json/parser.rl"
11965 	{ CHECK_RETURN_TOP(end_minute(parser, p)); }
11966 	break;
11967 	case 20:
11968 #line 2645 "upb/json/parser.rl"
11969 	{ start_second(parser, p); }
11970 	break;
11971 	case 21:
11972 #line 2646 "upb/json/parser.rl"
11973 	{ CHECK_RETURN_TOP(end_second(parser, p)); }
11974 	break;
11975 	case 22:
11976 #line 2651 "upb/json/parser.rl"
11977 	{ start_duration_base(parser, p); }
11978 	break;
11979 	case 23:
11980 #line 2652 "upb/json/parser.rl"
11981 	{ CHECK_RETURN_TOP(end_duration_base(parser, p)); }
11982 	break;
11983 	case 24:
11984 #line 2654 "upb/json/parser.rl"
11985 	{ p--; {cs = stack[--top]; goto _again;} }
11986 	break;
11987 	case 25:
11988 #line 2659 "upb/json/parser.rl"
11989 	{ start_timestamp_base(parser); }
11990 	break;
11991 	case 26:
11992 #line 2661 "upb/json/parser.rl"
11993 	{ start_timestamp_fraction(parser, p); }
11994 	break;
11995 	case 27:
11996 #line 2662 "upb/json/parser.rl"
11997 	{ CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
11998 	break;
11999 	case 28:
12000 #line 2664 "upb/json/parser.rl"
12001 	{ start_timestamp_zone(parser, p); }
12002 	break;
12003 	case 29:
12004 #line 2665 "upb/json/parser.rl"
12005 	{ CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
12006 	break;
12007 	case 30:
12008 #line 2667 "upb/json/parser.rl"
12009 	{ p--; {cs = stack[--top]; goto _again;} }
12010 	break;
12011 	case 31:
12012 #line 2672 "upb/json/parser.rl"
12013 	{ start_fieldmask_path_text(parser, p); }
12014 	break;
12015 	case 32:
12016 #line 2673 "upb/json/parser.rl"
12017 	{ end_fieldmask_path_text(parser, p); }
12018 	break;
12019 	case 33:
12020 #line 2678 "upb/json/parser.rl"
12021 	{ start_fieldmask_path(parser); }
12022 	break;
12023 	case 34:
12024 #line 2679 "upb/json/parser.rl"
12025 	{ end_fieldmask_path(parser); }
12026 	break;
12027 	case 35:
12028 #line 2685 "upb/json/parser.rl"
12029 	{ p--; {cs = stack[--top]; goto _again;} }
12030 	break;
12031 	case 36:
12032 #line 2690 "upb/json/parser.rl"
12033 	{
12034         if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
12035           {stack[top++] = cs; cs = 47;goto _again;}
12036         } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
12037           {stack[top++] = cs; cs = 40;goto _again;}
12038         } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
12039           {stack[top++] = cs; cs = 75;goto _again;}
12040         } else {
12041           {stack[top++] = cs; cs = 32;goto _again;}
12042         }
12043       }
12044 	break;
12045 	case 37:
12046 #line 2703 "upb/json/parser.rl"
12047 	{ p--; {stack[top++] = cs; cs = 78;goto _again;} }
12048 	break;
12049 	case 38:
12050 #line 2708 "upb/json/parser.rl"
12051 	{
12052         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12053           start_any_member(parser, p);
12054         } else {
12055           start_member(parser);
12056         }
12057       }
12058 	break;
12059 	case 39:
12060 #line 2715 "upb/json/parser.rl"
12061 	{ CHECK_RETURN_TOP(end_membername(parser)); }
12062 	break;
12063 	case 40:
12064 #line 2718 "upb/json/parser.rl"
12065 	{
12066         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12067           end_any_member(parser, p);
12068         } else {
12069           end_member(parser);
12070         }
12071       }
12072 	break;
12073 	case 41:
12074 #line 2729 "upb/json/parser.rl"
12075 	{
12076         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12077           start_any_object(parser, p);
12078         } else {
12079           start_object(parser);
12080         }
12081       }
12082 	break;
12083 	case 42:
12084 #line 2738 "upb/json/parser.rl"
12085 	{
12086         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12087           CHECK_RETURN_TOP(end_any_object(parser, p));
12088         } else {
12089           end_object(parser);
12090         }
12091       }
12092 	break;
12093 	case 43:
12094 #line 2750 "upb/json/parser.rl"
12095 	{ CHECK_RETURN_TOP(start_array(parser)); }
12096 	break;
12097 	case 44:
12098 #line 2754 "upb/json/parser.rl"
12099 	{ end_array(parser); }
12100 	break;
12101 	case 45:
12102 #line 2759 "upb/json/parser.rl"
12103 	{ CHECK_RETURN_TOP(start_number(parser, p)); }
12104 	break;
12105 	case 46:
12106 #line 2760 "upb/json/parser.rl"
12107 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
12108 	break;
12109 	case 47:
12110 #line 2762 "upb/json/parser.rl"
12111 	{ CHECK_RETURN_TOP(start_stringval(parser)); }
12112 	break;
12113 	case 48:
12114 #line 2763 "upb/json/parser.rl"
12115 	{ CHECK_RETURN_TOP(end_stringval(parser)); }
12116 	break;
12117 	case 49:
12118 #line 2765 "upb/json/parser.rl"
12119 	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
12120 	break;
12121 	case 50:
12122 #line 2767 "upb/json/parser.rl"
12123 	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
12124 	break;
12125 	case 51:
12126 #line 2769 "upb/json/parser.rl"
12127 	{ CHECK_RETURN_TOP(end_null(parser)); }
12128 	break;
12129 	case 52:
12130 #line 2771 "upb/json/parser.rl"
12131 	{ CHECK_RETURN_TOP(start_subobject_full(parser)); }
12132 	break;
12133 	case 53:
12134 #line 2772 "upb/json/parser.rl"
12135 	{ end_subobject_full(parser); }
12136 	break;
12137 	case 54:
12138 #line 2777 "upb/json/parser.rl"
12139 	{ p--; {cs = stack[--top]; goto _again;} }
12140 	break;
12141 #line 3185 "upb/json/parser.c"
12142 		}
12143 	}
12144 
12145 _again:
12146 	if ( cs == 0 )
12147 		goto _out;
12148 	if ( ++p != pe )
12149 		goto _resume;
12150 	_test_eof: {}
12151 	if ( p == eof )
12152 	{
12153 	const char *__acts = _json_actions + _json_eof_actions[cs];
12154 	unsigned int __nacts = (unsigned int) *__acts++;
12155 	while ( __nacts-- > 0 ) {
12156 		switch ( *__acts++ ) {
12157 	case 0:
12158 #line 2586 "upb/json/parser.rl"
12159 	{ p--; {cs = stack[--top]; 	if ( p == pe )
12160 		goto _test_eof;
12161 goto _again;} }
12162 	break;
12163 	case 46:
12164 #line 2760 "upb/json/parser.rl"
12165 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
12166 	break;
12167 	case 49:
12168 #line 2765 "upb/json/parser.rl"
12169 	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
12170 	break;
12171 	case 50:
12172 #line 2767 "upb/json/parser.rl"
12173 	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
12174 	break;
12175 	case 51:
12176 #line 2769 "upb/json/parser.rl"
12177 	{ CHECK_RETURN_TOP(end_null(parser)); }
12178 	break;
12179 	case 53:
12180 #line 2772 "upb/json/parser.rl"
12181 	{ end_subobject_full(parser); }
12182 	break;
12183 #line 3227 "upb/json/parser.c"
12184 		}
12185 	}
12186 	}
12187 
12188 	_out: {}
12189 	}
12190 
12191 #line 2805 "upb/json/parser.rl"
12192 
12193   if (p != pe) {
12194     upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p);
12195   } else {
12196     capture_suspend(parser, &p);
12197   }
12198 
12199 error:
12200   /* Save parsing state back to parser. */
12201   parser->current_state = cs;
12202   parser->parser_top = top;
12203 
12204   return p - buf;
12205 }
12206 
end(void * closure,const void * hd)12207 static bool end(void *closure, const void *hd) {
12208   upb_json_parser *parser = closure;
12209 
12210   /* Prevent compile warning on unused static constants. */
12211   UPB_UNUSED(json_start);
12212   UPB_UNUSED(json_en_duration_machine);
12213   UPB_UNUSED(json_en_fieldmask_machine);
12214   UPB_UNUSED(json_en_number_machine);
12215   UPB_UNUSED(json_en_string_machine);
12216   UPB_UNUSED(json_en_timestamp_machine);
12217   UPB_UNUSED(json_en_value_machine);
12218   UPB_UNUSED(json_en_main);
12219 
12220   parse(parser, hd, &eof_ch, 0, NULL);
12221 
12222   return parser->current_state >= 106;
12223 }
12224 
json_parser_reset(upb_json_parser * p)12225 static void json_parser_reset(upb_json_parser *p) {
12226   int cs;
12227   int top;
12228 
12229   p->top = p->stack;
12230   init_frame(p->top);
12231 
12232   /* Emit Ragel initialization of the parser. */
12233 
12234 #line 3278 "upb/json/parser.c"
12235 	{
12236 	cs = json_start;
12237 	top = 0;
12238 	}
12239 
12240 #line 2847 "upb/json/parser.rl"
12241   p->current_state = cs;
12242   p->parser_top = top;
12243   accumulate_clear(p);
12244   p->multipart_state = MULTIPART_INACTIVE;
12245   p->capture = NULL;
12246   p->accumulated = NULL;
12247 }
12248 
parsermethod_new(upb_json_codecache * c,const upb_msgdef * md)12249 static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
12250                                                const upb_msgdef *md) {
12251   upb_msg_field_iter i;
12252   upb_alloc *alloc = upb_arena_alloc(c->arena);
12253 
12254   upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m));
12255 
12256   m->cache = c;
12257 
12258   upb_byteshandler_init(&m->input_handler_);
12259   upb_byteshandler_setstring(&m->input_handler_, parse, m);
12260   upb_byteshandler_setendstr(&m->input_handler_, end, m);
12261 
12262   upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
12263 
12264   /* Build name_table */
12265 
12266   for(upb_msg_field_begin(&i, md);
12267       !upb_msg_field_done(&i);
12268       upb_msg_field_next(&i)) {
12269     const upb_fielddef *f = upb_msg_iter_field(&i);
12270     upb_value v = upb_value_constptr(f);
12271     const char *name;
12272 
12273     /* Add an entry for the JSON name. */
12274     name = upb_fielddef_jsonname(f);
12275     upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
12276 
12277     if (strcmp(name, upb_fielddef_name(f)) != 0) {
12278       /* Since the JSON name is different from the regular field name, add an
12279        * entry for the raw name (compliant proto3 JSON parsers must accept
12280        * both). */
12281       const char *name = upb_fielddef_name(f);
12282       upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
12283     }
12284   }
12285 
12286   return m;
12287 }
12288 
12289 /* Public API *****************************************************************/
12290 
upb_json_parser_create(upb_arena * arena,const upb_json_parsermethod * method,const upb_symtab * symtab,upb_sink output,upb_status * status,bool ignore_json_unknown)12291 upb_json_parser *upb_json_parser_create(upb_arena *arena,
12292                                         const upb_json_parsermethod *method,
12293                                         const upb_symtab* symtab,
12294                                         upb_sink output,
12295                                         upb_status *status,
12296                                         bool ignore_json_unknown) {
12297 #ifndef NDEBUG
12298   const size_t size_before = upb_arena_bytesallocated(arena);
12299 #endif
12300   upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser));
12301   if (!p) return false;
12302 
12303   p->arena = arena;
12304   p->method = method;
12305   p->status = status;
12306   p->limit = p->stack + UPB_JSON_MAX_DEPTH;
12307   p->accumulate_buf = NULL;
12308   p->accumulate_buf_size = 0;
12309   upb_bytessink_reset(&p->input_, &method->input_handler_, p);
12310 
12311   json_parser_reset(p);
12312   p->top->sink = output;
12313   p->top->m = upb_handlers_msgdef(output.handlers);
12314   if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
12315     p->top->is_any = true;
12316     p->top->any_frame = json_parser_any_frame_new(p);
12317   } else {
12318     p->top->is_any = false;
12319     p->top->any_frame = NULL;
12320   }
12321   set_name_table(p, p->top);
12322   p->symtab = symtab;
12323 
12324   p->ignore_json_unknown = ignore_json_unknown;
12325 
12326   /* If this fails, uncomment and increase the value in parser.h. */
12327   /* fprintf(stderr, "%zd\n", upb_arena_bytesallocated(arena) - size_before); */
12328   UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
12329                       UPB_JSON_PARSER_SIZE);
12330   return p;
12331 }
12332 
upb_json_parser_input(upb_json_parser * p)12333 upb_bytessink upb_json_parser_input(upb_json_parser *p) {
12334   return p->input_;
12335 }
12336 
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)12337 const upb_byteshandler *upb_json_parsermethod_inputhandler(
12338     const upb_json_parsermethod *m) {
12339   return &m->input_handler_;
12340 }
12341 
upb_json_codecache_new(void)12342 upb_json_codecache *upb_json_codecache_new(void) {
12343   upb_alloc *alloc;
12344   upb_json_codecache *c;
12345 
12346   c = upb_gmalloc(sizeof(*c));
12347 
12348   c->arena = upb_arena_new();
12349   alloc = upb_arena_alloc(c->arena);
12350 
12351   upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc);
12352 
12353   return c;
12354 }
12355 
upb_json_codecache_free(upb_json_codecache * c)12356 void upb_json_codecache_free(upb_json_codecache *c) {
12357   upb_arena_free(c->arena);
12358   upb_gfree(c);
12359 }
12360 
upb_json_codecache_get(upb_json_codecache * c,const upb_msgdef * md)12361 const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c,
12362                                                     const upb_msgdef *md) {
12363   upb_json_parsermethod *m;
12364   upb_value v;
12365   upb_msg_field_iter i;
12366   upb_alloc *alloc = upb_arena_alloc(c->arena);
12367 
12368   if (upb_inttable_lookupptr(&c->methods, md, &v)) {
12369     return upb_value_getconstptr(v);
12370   }
12371 
12372   m = parsermethod_new(c, md);
12373   v = upb_value_constptr(m);
12374 
12375   if (!m) return NULL;
12376   if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL;
12377 
12378   /* Populate parser methods for all submessages, so the name tables will
12379    * be available during parsing. */
12380   for(upb_msg_field_begin(&i, md);
12381       !upb_msg_field_done(&i);
12382       upb_msg_field_next(&i)) {
12383     upb_fielddef *f = upb_msg_iter_field(&i);
12384 
12385     if (upb_fielddef_issubmsg(f)) {
12386       const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
12387       const upb_json_parsermethod *sub_method =
12388           upb_json_codecache_get(c, subdef);
12389 
12390       if (!sub_method) return NULL;
12391     }
12392   }
12393 
12394   return m;
12395 }
12396 /*
12397 ** This currently uses snprintf() to format primitives, and could be optimized
12398 ** further.
12399 */
12400 
12401 
12402 #include <ctype.h>
12403 #include <inttypes.h>
12404 #include <stdint.h>
12405 #include <string.h>
12406 #include <time.h>
12407 
12408 
12409 struct upb_json_printer {
12410   upb_sink input_;
12411   /* BytesSink closure. */
12412   void *subc_;
12413   upb_bytessink output_;
12414 
12415   /* We track the depth so that we know when to emit startstr/endstr on the
12416    * output. */
12417   int depth_;
12418 
12419   /* Have we emitted the first element? This state is necessary to emit commas
12420    * without leaving a trailing comma in arrays/maps. We keep this state per
12421    * frame depth.
12422    *
12423    * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
12424    * We count frames (contexts in which we separate elements by commas) as both
12425    * repeated fields and messages (maps), and the worst case is a
12426    * message->repeated field->submessage->repeated field->... nesting. */
12427   bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
12428 
12429   /* To print timestamp, printer needs to cache its seconds and nanos values
12430    * and convert them when ending timestamp message. See comments of
12431    * printer_sethandlers_timestamp for more detail. */
12432   int64_t seconds;
12433   int32_t nanos;
12434 };
12435 
12436 /* StringPiece; a pointer plus a length. */
12437 typedef struct {
12438   char *ptr;
12439   size_t len;
12440 } strpc;
12441 
freestrpc(void * ptr)12442 void freestrpc(void *ptr) {
12443   strpc *pc = ptr;
12444   upb_gfree(pc->ptr);
12445   upb_gfree(pc);
12446 }
12447 
12448 typedef struct {
12449   bool preserve_fieldnames;
12450 } upb_json_printercache;
12451 
12452 /* Convert fielddef name to JSON name and return as a string piece. */
newstrpc(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames)12453 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
12454                 bool preserve_fieldnames) {
12455   /* TODO(haberman): handle malloc failure. */
12456   strpc *ret = upb_gmalloc(sizeof(*ret));
12457   if (preserve_fieldnames) {
12458     ret->ptr = upb_gstrdup(upb_fielddef_name(f));
12459     ret->len = strlen(ret->ptr);
12460   } else {
12461     ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f));
12462     ret->len = strlen(ret->ptr);
12463   }
12464 
12465   upb_handlers_addcleanup(h, ret, freestrpc);
12466   return ret;
12467 }
12468 
12469 /* Convert a null-terminated const char* to a string piece. */
newstrpc_str(upb_handlers * h,const char * str)12470 strpc *newstrpc_str(upb_handlers *h, const char * str) {
12471   strpc * ret = upb_gmalloc(sizeof(*ret));
12472   ret->ptr = upb_gstrdup(str);
12473   ret->len = strlen(str);
12474   upb_handlers_addcleanup(h, ret, freestrpc);
12475   return ret;
12476 }
12477 
12478 /* ------------ JSON string printing: values, maps, arrays ------------------ */
12479 
print_data(upb_json_printer * p,const char * buf,size_t len)12480 static void print_data(
12481     upb_json_printer *p, const char *buf, size_t len) {
12482   /* TODO: Will need to change if we support pushback from the sink. */
12483   size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
12484   UPB_ASSERT(n == len);
12485 }
12486 
print_comma(upb_json_printer * p)12487 static void print_comma(upb_json_printer *p) {
12488   if (!p->first_elem_[p->depth_]) {
12489     print_data(p, ",", 1);
12490   }
12491   p->first_elem_[p->depth_] = false;
12492 }
12493 
12494 /* Helpers that print properly formatted elements to the JSON output stream. */
12495 
12496 /* Used for escaping control chars in strings. */
12497 static const char kControlCharLimit = 0x20;
12498 
is_json_escaped(char c)12499 UPB_INLINE bool is_json_escaped(char c) {
12500   /* See RFC 4627. */
12501   unsigned char uc = (unsigned char)c;
12502   return uc < kControlCharLimit || uc == '"' || uc == '\\';
12503 }
12504 
json_nice_escape(char c)12505 UPB_INLINE const char* json_nice_escape(char c) {
12506   switch (c) {
12507     case '"':  return "\\\"";
12508     case '\\': return "\\\\";
12509     case '\b': return "\\b";
12510     case '\f': return "\\f";
12511     case '\n': return "\\n";
12512     case '\r': return "\\r";
12513     case '\t': return "\\t";
12514     default:   return NULL;
12515   }
12516 }
12517 
12518 /* Write a properly escaped string chunk. The surrounding quotes are *not*
12519  * printed; this is so that the caller has the option of emitting the string
12520  * content in chunks. */
putstring(upb_json_printer * p,const char * buf,size_t len)12521 static void putstring(upb_json_printer *p, const char *buf, size_t len) {
12522   const char* unescaped_run = NULL;
12523   unsigned int i;
12524   for (i = 0; i < len; i++) {
12525     char c = buf[i];
12526     /* Handle escaping. */
12527     if (is_json_escaped(c)) {
12528       /* Use a "nice" escape, like \n, if one exists for this character. */
12529       const char* escape = json_nice_escape(c);
12530       /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
12531        * escape. */
12532       char escape_buf[8];
12533       if (!escape) {
12534         unsigned char byte = (unsigned char)c;
12535         _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
12536         escape = escape_buf;
12537       }
12538 
12539       /* N.B. that we assume that the input encoding is equal to the output
12540        * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
12541        * can simply pass the bytes through. */
12542 
12543       /* If there's a current run of unescaped chars, print that run first. */
12544       if (unescaped_run) {
12545         print_data(p, unescaped_run, &buf[i] - unescaped_run);
12546         unescaped_run = NULL;
12547       }
12548       /* Then print the escape code. */
12549       print_data(p, escape, strlen(escape));
12550     } else {
12551       /* Add to the current unescaped run of characters. */
12552       if (unescaped_run == NULL) {
12553         unescaped_run = &buf[i];
12554       }
12555     }
12556   }
12557 
12558   /* If the string ended in a run of unescaped characters, print that last run. */
12559   if (unescaped_run) {
12560     print_data(p, unescaped_run, &buf[len] - unescaped_run);
12561   }
12562 }
12563 
12564 #define CHKLENGTH(x) if (!(x)) return -1;
12565 
12566 /* Helpers that format floating point values according to our custom formats.
12567  * Right now we use %.8g and %.17g for float/double, respectively, to match
12568  * proto2::util::JsonFormat's defaults.  May want to change this later. */
12569 
12570 const char neginf[] = "\"-Infinity\"";
12571 const char inf[] = "\"Infinity\"";
12572 
fmt_double(double val,char * buf,size_t length)12573 static size_t fmt_double(double val, char* buf, size_t length) {
12574   if (val == UPB_INFINITY) {
12575     CHKLENGTH(length >= strlen(inf));
12576     strcpy(buf, inf);
12577     return strlen(inf);
12578   } else if (val == -UPB_INFINITY) {
12579     CHKLENGTH(length >= strlen(neginf));
12580     strcpy(buf, neginf);
12581     return strlen(neginf);
12582   } else {
12583     size_t n = _upb_snprintf(buf, length, "%.17g", val);
12584     CHKLENGTH(n > 0 && n < length);
12585     return n;
12586   }
12587 }
12588 
fmt_float(float val,char * buf,size_t length)12589 static size_t fmt_float(float val, char* buf, size_t length) {
12590   size_t n = _upb_snprintf(buf, length, "%.8g", val);
12591   CHKLENGTH(n > 0 && n < length);
12592   return n;
12593 }
12594 
fmt_bool(bool val,char * buf,size_t length)12595 static size_t fmt_bool(bool val, char* buf, size_t length) {
12596   size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
12597   CHKLENGTH(n > 0 && n < length);
12598   return n;
12599 }
12600 
fmt_int64_as_number(int64_t val,char * buf,size_t length)12601 static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) {
12602   size_t n = _upb_snprintf(buf, length, "%" PRId64, val);
12603   CHKLENGTH(n > 0 && n < length);
12604   return n;
12605 }
12606 
fmt_uint64_as_number(uint64_t val,char * buf,size_t length)12607 static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) {
12608   size_t n = _upb_snprintf(buf, length, "%" PRIu64, val);
12609   CHKLENGTH(n > 0 && n < length);
12610   return n;
12611 }
12612 
fmt_int64_as_string(int64_t val,char * buf,size_t length)12613 static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) {
12614   size_t n = _upb_snprintf(buf, length, "\"%" PRId64 "\"", val);
12615   CHKLENGTH(n > 0 && n < length);
12616   return n;
12617 }
12618 
fmt_uint64_as_string(uint64_t val,char * buf,size_t length)12619 static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) {
12620   size_t n = _upb_snprintf(buf, length, "\"%" PRIu64 "\"", val);
12621   CHKLENGTH(n > 0 && n < length);
12622   return n;
12623 }
12624 
12625 /* Print a map key given a field name. Called by scalar field handlers and by
12626  * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)12627 static bool putkey(void *closure, const void *handler_data) {
12628   upb_json_printer *p = closure;
12629   const strpc *key = handler_data;
12630   print_comma(p);
12631   print_data(p, "\"", 1);
12632   putstring(p, key->ptr, key->len);
12633   print_data(p, "\":", 2);
12634   return true;
12635 }
12636 
12637 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
12638 #define CHK(val)    if (!(val)) return false;
12639 
12640 #define TYPE_HANDLERS(type, fmt_func)                                        \
12641   static bool put##type(void *closure, const void *handler_data, type val) { \
12642     upb_json_printer *p = closure;                                           \
12643     char data[64];                                                           \
12644     size_t length = fmt_func(val, data, sizeof(data));                       \
12645     UPB_UNUSED(handler_data);                                                \
12646     CHKFMT(length);                                                          \
12647     print_data(p, data, length);                                             \
12648     return true;                                                             \
12649   }                                                                          \
12650   static bool scalar_##type(void *closure, const void *handler_data,         \
12651                             type val) {                                      \
12652     CHK(putkey(closure, handler_data));                                      \
12653     CHK(put##type(closure, handler_data, val));                              \
12654     return true;                                                             \
12655   }                                                                          \
12656   static bool repeated_##type(void *closure, const void *handler_data,       \
12657                               type val) {                                    \
12658     upb_json_printer *p = closure;                                           \
12659     print_comma(p);                                                          \
12660     CHK(put##type(closure, handler_data, val));                              \
12661     return true;                                                             \
12662   }
12663 
12664 #define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
12665   static bool putmapkey_##type(void *closure, const void *handler_data,      \
12666                             type val) {                                      \
12667     upb_json_printer *p = closure;                                           \
12668     char data[64];                                                           \
12669     size_t length = fmt_func(val, data, sizeof(data));                       \
12670     UPB_UNUSED(handler_data);                                                \
12671     print_data(p, "\"", 1);                                                  \
12672     print_data(p, data, length);                                             \
12673     print_data(p, "\":", 2);                                                 \
12674     return true;                                                             \
12675   }
12676 
12677 TYPE_HANDLERS(double,   fmt_double)
12678 TYPE_HANDLERS(float,    fmt_float)
12679 TYPE_HANDLERS(bool,     fmt_bool)
12680 TYPE_HANDLERS(int32_t,  fmt_int64_as_number)
12681 TYPE_HANDLERS(uint32_t, fmt_int64_as_number)
12682 TYPE_HANDLERS(int64_t,  fmt_int64_as_string)
12683 TYPE_HANDLERS(uint64_t, fmt_uint64_as_string)
12684 
12685 /* double and float are not allowed to be map keys. */
12686 TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
12687 TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64_as_number)
12688 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number)
12689 TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64_as_number)
12690 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number)
12691 
12692 #undef TYPE_HANDLERS
12693 #undef TYPE_HANDLERS_MAPKEY
12694 
12695 typedef struct {
12696   void *keyname;
12697   const upb_enumdef *enumdef;
12698 } EnumHandlerData;
12699 
scalar_enum(void * closure,const void * handler_data,int32_t val)12700 static bool scalar_enum(void *closure, const void *handler_data,
12701                         int32_t val) {
12702   const EnumHandlerData *hd = handler_data;
12703   upb_json_printer *p = closure;
12704   const char *symbolic_name;
12705 
12706   CHK(putkey(closure, hd->keyname));
12707 
12708   symbolic_name = upb_enumdef_iton(hd->enumdef, val);
12709   if (symbolic_name) {
12710     print_data(p, "\"", 1);
12711     putstring(p, symbolic_name, strlen(symbolic_name));
12712     print_data(p, "\"", 1);
12713   } else {
12714     putint32_t(closure, NULL, val);
12715   }
12716 
12717   return true;
12718 }
12719 
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)12720 static void print_enum_symbolic_name(upb_json_printer *p,
12721                                      const upb_enumdef *def,
12722                                      int32_t val) {
12723   const char *symbolic_name = upb_enumdef_iton(def, val);
12724   if (symbolic_name) {
12725     print_data(p, "\"", 1);
12726     putstring(p, symbolic_name, strlen(symbolic_name));
12727     print_data(p, "\"", 1);
12728   } else {
12729     putint32_t(p, NULL, val);
12730   }
12731 }
12732 
repeated_enum(void * closure,const void * handler_data,int32_t val)12733 static bool repeated_enum(void *closure, const void *handler_data,
12734                           int32_t val) {
12735   const EnumHandlerData *hd = handler_data;
12736   upb_json_printer *p = closure;
12737   print_comma(p);
12738 
12739   print_enum_symbolic_name(p, hd->enumdef, val);
12740 
12741   return true;
12742 }
12743 
mapvalue_enum(void * closure,const void * handler_data,int32_t val)12744 static bool mapvalue_enum(void *closure, const void *handler_data,
12745                           int32_t val) {
12746   const EnumHandlerData *hd = handler_data;
12747   upb_json_printer *p = closure;
12748 
12749   print_enum_symbolic_name(p, hd->enumdef, val);
12750 
12751   return true;
12752 }
12753 
scalar_startsubmsg(void * closure,const void * handler_data)12754 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
12755   return putkey(closure, handler_data) ? closure : UPB_BREAK;
12756 }
12757 
repeated_startsubmsg(void * closure,const void * handler_data)12758 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
12759   upb_json_printer *p = closure;
12760   UPB_UNUSED(handler_data);
12761   print_comma(p);
12762   return closure;
12763 }
12764 
start_frame(upb_json_printer * p)12765 static void start_frame(upb_json_printer *p) {
12766   p->depth_++;
12767   p->first_elem_[p->depth_] = true;
12768   print_data(p, "{", 1);
12769 }
12770 
end_frame(upb_json_printer * p)12771 static void end_frame(upb_json_printer *p) {
12772   print_data(p, "}", 1);
12773   p->depth_--;
12774 }
12775 
printer_startmsg(void * closure,const void * handler_data)12776 static bool printer_startmsg(void *closure, const void *handler_data) {
12777   upb_json_printer *p = closure;
12778   UPB_UNUSED(handler_data);
12779   if (p->depth_ == 0) {
12780     upb_bytessink_start(p->output_, 0, &p->subc_);
12781   }
12782   start_frame(p);
12783   return true;
12784 }
12785 
printer_endmsg(void * closure,const void * handler_data,upb_status * s)12786 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
12787   upb_json_printer *p = closure;
12788   UPB_UNUSED(handler_data);
12789   UPB_UNUSED(s);
12790   end_frame(p);
12791   if (p->depth_ == 0) {
12792     upb_bytessink_end(p->output_);
12793   }
12794   return true;
12795 }
12796 
startseq(void * closure,const void * handler_data)12797 static void *startseq(void *closure, const void *handler_data) {
12798   upb_json_printer *p = closure;
12799   CHK(putkey(closure, handler_data));
12800   p->depth_++;
12801   p->first_elem_[p->depth_] = true;
12802   print_data(p, "[", 1);
12803   return closure;
12804 }
12805 
endseq(void * closure,const void * handler_data)12806 static bool endseq(void *closure, const void *handler_data) {
12807   upb_json_printer *p = closure;
12808   UPB_UNUSED(handler_data);
12809   print_data(p, "]", 1);
12810   p->depth_--;
12811   return true;
12812 }
12813 
startmap(void * closure,const void * handler_data)12814 static void *startmap(void *closure, const void *handler_data) {
12815   upb_json_printer *p = closure;
12816   CHK(putkey(closure, handler_data));
12817   p->depth_++;
12818   p->first_elem_[p->depth_] = true;
12819   print_data(p, "{", 1);
12820   return closure;
12821 }
12822 
endmap(void * closure,const void * handler_data)12823 static bool endmap(void *closure, const void *handler_data) {
12824   upb_json_printer *p = closure;
12825   UPB_UNUSED(handler_data);
12826   print_data(p, "}", 1);
12827   p->depth_--;
12828   return true;
12829 }
12830 
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12831 static size_t putstr(void *closure, const void *handler_data, const char *str,
12832                      size_t len, const upb_bufhandle *handle) {
12833   upb_json_printer *p = closure;
12834   UPB_UNUSED(handler_data);
12835   UPB_UNUSED(handle);
12836   putstring(p, str, len);
12837   return len;
12838 }
12839 
12840 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12841 static size_t putbytes(void *closure, const void *handler_data, const char *str,
12842                        size_t len, const upb_bufhandle *handle) {
12843   upb_json_printer *p = closure;
12844 
12845   /* This is the regular base64, not the "web-safe" version. */
12846   static const char base64[] =
12847       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12848 
12849   /* Base64-encode. */
12850   char data[16000];
12851   const char *limit = data + sizeof(data);
12852   const unsigned char *from = (const unsigned char*)str;
12853   char *to = data;
12854   size_t remaining = len;
12855   size_t bytes;
12856 
12857   UPB_UNUSED(handler_data);
12858   UPB_UNUSED(handle);
12859 
12860   print_data(p, "\"", 1);
12861 
12862   while (remaining > 2) {
12863     if (limit - to < 4) {
12864       bytes = to - data;
12865       putstring(p, data, bytes);
12866       to = data;
12867     }
12868 
12869     to[0] = base64[from[0] >> 2];
12870     to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12871     to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
12872     to[3] = base64[from[2] & 0x3f];
12873 
12874     remaining -= 3;
12875     to += 4;
12876     from += 3;
12877   }
12878 
12879   switch (remaining) {
12880     case 2:
12881       to[0] = base64[from[0] >> 2];
12882       to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12883       to[2] = base64[(from[1] & 0xf) << 2];
12884       to[3] = '=';
12885       to += 4;
12886       from += 2;
12887       break;
12888     case 1:
12889       to[0] = base64[from[0] >> 2];
12890       to[1] = base64[((from[0] & 0x3) << 4)];
12891       to[2] = '=';
12892       to[3] = '=';
12893       to += 4;
12894       from += 1;
12895       break;
12896   }
12897 
12898   bytes = to - data;
12899   putstring(p, data, bytes);
12900   print_data(p, "\"", 1);
12901   return len;
12902 }
12903 
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)12904 static void *scalar_startstr(void *closure, const void *handler_data,
12905                              size_t size_hint) {
12906   upb_json_printer *p = closure;
12907   UPB_UNUSED(handler_data);
12908   UPB_UNUSED(size_hint);
12909   CHK(putkey(closure, handler_data));
12910   print_data(p, "\"", 1);
12911   return p;
12912 }
12913 
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12914 static size_t scalar_str(void *closure, const void *handler_data,
12915                          const char *str, size_t len,
12916                          const upb_bufhandle *handle) {
12917   CHK(putstr(closure, handler_data, str, len, handle));
12918   return len;
12919 }
12920 
scalar_endstr(void * closure,const void * handler_data)12921 static bool scalar_endstr(void *closure, const void *handler_data) {
12922   upb_json_printer *p = closure;
12923   UPB_UNUSED(handler_data);
12924   print_data(p, "\"", 1);
12925   return true;
12926 }
12927 
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)12928 static void *repeated_startstr(void *closure, const void *handler_data,
12929                                size_t size_hint) {
12930   upb_json_printer *p = closure;
12931   UPB_UNUSED(handler_data);
12932   UPB_UNUSED(size_hint);
12933   print_comma(p);
12934   print_data(p, "\"", 1);
12935   return p;
12936 }
12937 
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12938 static size_t repeated_str(void *closure, const void *handler_data,
12939                            const char *str, size_t len,
12940                            const upb_bufhandle *handle) {
12941   CHK(putstr(closure, handler_data, str, len, handle));
12942   return len;
12943 }
12944 
repeated_endstr(void * closure,const void * handler_data)12945 static bool repeated_endstr(void *closure, const void *handler_data) {
12946   upb_json_printer *p = closure;
12947   UPB_UNUSED(handler_data);
12948   print_data(p, "\"", 1);
12949   return true;
12950 }
12951 
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)12952 static void *mapkeyval_startstr(void *closure, const void *handler_data,
12953                                 size_t size_hint) {
12954   upb_json_printer *p = closure;
12955   UPB_UNUSED(handler_data);
12956   UPB_UNUSED(size_hint);
12957   print_data(p, "\"", 1);
12958   return p;
12959 }
12960 
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12961 static size_t mapkey_str(void *closure, const void *handler_data,
12962                          const char *str, size_t len,
12963                          const upb_bufhandle *handle) {
12964   CHK(putstr(closure, handler_data, str, len, handle));
12965   return len;
12966 }
12967 
mapkey_endstr(void * closure,const void * handler_data)12968 static bool mapkey_endstr(void *closure, const void *handler_data) {
12969   upb_json_printer *p = closure;
12970   UPB_UNUSED(handler_data);
12971   print_data(p, "\":", 2);
12972   return true;
12973 }
12974 
mapvalue_endstr(void * closure,const void * handler_data)12975 static bool mapvalue_endstr(void *closure, const void *handler_data) {
12976   upb_json_printer *p = closure;
12977   UPB_UNUSED(handler_data);
12978   print_data(p, "\"", 1);
12979   return true;
12980 }
12981 
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12982 static size_t scalar_bytes(void *closure, const void *handler_data,
12983                            const char *str, size_t len,
12984                            const upb_bufhandle *handle) {
12985   CHK(putkey(closure, handler_data));
12986   CHK(putbytes(closure, handler_data, str, len, handle));
12987   return len;
12988 }
12989 
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12990 static size_t repeated_bytes(void *closure, const void *handler_data,
12991                              const char *str, size_t len,
12992                              const upb_bufhandle *handle) {
12993   upb_json_printer *p = closure;
12994   print_comma(p);
12995   CHK(putbytes(closure, handler_data, str, len, handle));
12996   return len;
12997 }
12998 
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12999 static size_t mapkey_bytes(void *closure, const void *handler_data,
13000                            const char *str, size_t len,
13001                            const upb_bufhandle *handle) {
13002   upb_json_printer *p = closure;
13003   CHK(putbytes(closure, handler_data, str, len, handle));
13004   print_data(p, ":", 1);
13005   return len;
13006 }
13007 
set_enum_hd(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames,upb_handlerattr * attr)13008 static void set_enum_hd(upb_handlers *h,
13009                         const upb_fielddef *f,
13010                         bool preserve_fieldnames,
13011                         upb_handlerattr *attr) {
13012   EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
13013   hd->enumdef = upb_fielddef_enumsubdef(f);
13014   hd->keyname = newstrpc(h, f, preserve_fieldnames);
13015   upb_handlers_addcleanup(h, hd, upb_gfree);
13016   attr->handler_data = hd;
13017 }
13018 
13019 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
13020  * in a map).
13021  *
13022  * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
13023  * key or value cases properly. The right way to do this is to allocate a
13024  * temporary structure at the start of a mapentry submessage, store key and
13025  * value data in it as key and value handlers are called, and then print the
13026  * key/value pair once at the end of the submessage. If we don't do this, we
13027  * should at least detect the case and throw an error. However, so far all of
13028  * our sources that emit mapentry messages do so canonically (with one key
13029  * field, and then one value field), so this is not a pressing concern at the
13030  * moment. */
printer_sethandlers_mapentry(const void * closure,bool preserve_fieldnames,upb_handlers * h)13031 void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
13032                                   upb_handlers *h) {
13033   const upb_msgdef *md = upb_handlers_msgdef(h);
13034 
13035   /* A mapentry message is printed simply as '"key": value'. Rather than
13036    * special-case key and value for every type below, we just handle both
13037    * fields explicitly here. */
13038   const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
13039   const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
13040 
13041   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13042 
13043   UPB_UNUSED(closure);
13044 
13045   switch (upb_fielddef_type(key_field)) {
13046     case UPB_TYPE_INT32:
13047       upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
13048       break;
13049     case UPB_TYPE_INT64:
13050       upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
13051       break;
13052     case UPB_TYPE_UINT32:
13053       upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
13054       break;
13055     case UPB_TYPE_UINT64:
13056       upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
13057       break;
13058     case UPB_TYPE_BOOL:
13059       upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
13060       break;
13061     case UPB_TYPE_STRING:
13062       upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
13063       upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
13064       upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
13065       break;
13066     case UPB_TYPE_BYTES:
13067       upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
13068       break;
13069     default:
13070       UPB_ASSERT(false);
13071       break;
13072   }
13073 
13074   switch (upb_fielddef_type(value_field)) {
13075     case UPB_TYPE_INT32:
13076       upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
13077       break;
13078     case UPB_TYPE_INT64:
13079       upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
13080       break;
13081     case UPB_TYPE_UINT32:
13082       upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
13083       break;
13084     case UPB_TYPE_UINT64:
13085       upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
13086       break;
13087     case UPB_TYPE_BOOL:
13088       upb_handlers_setbool(h, value_field, putbool, &empty_attr);
13089       break;
13090     case UPB_TYPE_FLOAT:
13091       upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
13092       break;
13093     case UPB_TYPE_DOUBLE:
13094       upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
13095       break;
13096     case UPB_TYPE_STRING:
13097       upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
13098       upb_handlers_setstring(h, value_field, putstr, &empty_attr);
13099       upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
13100       break;
13101     case UPB_TYPE_BYTES:
13102       upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
13103       break;
13104     case UPB_TYPE_ENUM: {
13105       upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
13106       set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
13107       upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
13108       break;
13109     }
13110     case UPB_TYPE_MESSAGE:
13111       /* No handler necessary -- the submsg handlers will print the message
13112        * as appropriate. */
13113       break;
13114   }
13115 }
13116 
putseconds(void * closure,const void * handler_data,int64_t seconds)13117 static bool putseconds(void *closure, const void *handler_data,
13118                        int64_t seconds) {
13119   upb_json_printer *p = closure;
13120   p->seconds = seconds;
13121   UPB_UNUSED(handler_data);
13122   return true;
13123 }
13124 
putnanos(void * closure,const void * handler_data,int32_t nanos)13125 static bool putnanos(void *closure, const void *handler_data,
13126                      int32_t nanos) {
13127   upb_json_printer *p = closure;
13128   p->nanos = nanos;
13129   UPB_UNUSED(handler_data);
13130   return true;
13131 }
13132 
scalar_startstr_nokey(void * closure,const void * handler_data,size_t size_hint)13133 static void *scalar_startstr_nokey(void *closure, const void *handler_data,
13134                                    size_t size_hint) {
13135   upb_json_printer *p = closure;
13136   UPB_UNUSED(handler_data);
13137   UPB_UNUSED(size_hint);
13138   print_data(p, "\"", 1);
13139   return p;
13140 }
13141 
putstr_nokey(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)13142 static size_t putstr_nokey(void *closure, const void *handler_data,
13143                            const char *str, size_t len,
13144                            const upb_bufhandle *handle) {
13145   upb_json_printer *p = closure;
13146   UPB_UNUSED(handler_data);
13147   UPB_UNUSED(handle);
13148   print_data(p, "\"", 1);
13149   putstring(p, str, len);
13150   print_data(p, "\"", 1);
13151   return len + 2;
13152 }
13153 
startseq_nokey(void * closure,const void * handler_data)13154 static void *startseq_nokey(void *closure, const void *handler_data) {
13155   upb_json_printer *p = closure;
13156   UPB_UNUSED(handler_data);
13157   p->depth_++;
13158   p->first_elem_[p->depth_] = true;
13159   print_data(p, "[", 1);
13160   return closure;
13161 }
13162 
startseq_fieldmask(void * closure,const void * handler_data)13163 static void *startseq_fieldmask(void *closure, const void *handler_data) {
13164   upb_json_printer *p = closure;
13165   UPB_UNUSED(handler_data);
13166   p->depth_++;
13167   p->first_elem_[p->depth_] = true;
13168   return closure;
13169 }
13170 
endseq_fieldmask(void * closure,const void * handler_data)13171 static bool endseq_fieldmask(void *closure, const void *handler_data) {
13172   upb_json_printer *p = closure;
13173   UPB_UNUSED(handler_data);
13174   p->depth_--;
13175   return true;
13176 }
13177 
repeated_startstr_fieldmask(void * closure,const void * handler_data,size_t size_hint)13178 static void *repeated_startstr_fieldmask(
13179     void *closure, const void *handler_data,
13180     size_t size_hint) {
13181   upb_json_printer *p = closure;
13182   UPB_UNUSED(handler_data);
13183   UPB_UNUSED(size_hint);
13184   print_comma(p);
13185   return p;
13186 }
13187 
repeated_str_fieldmask(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)13188 static size_t repeated_str_fieldmask(
13189     void *closure, const void *handler_data,
13190     const char *str, size_t len,
13191     const upb_bufhandle *handle) {
13192   const char* limit = str + len;
13193   bool upper = false;
13194   size_t result_len = 0;
13195   for (; str < limit; str++) {
13196     if (*str == '_') {
13197       upper = true;
13198       continue;
13199     }
13200     if (upper && *str >= 'a' && *str <= 'z') {
13201       char upper_char = toupper(*str);
13202       CHK(putstr(closure, handler_data, &upper_char, 1, handle));
13203     } else {
13204       CHK(putstr(closure, handler_data, str, 1, handle));
13205     }
13206     upper = false;
13207     result_len++;
13208   }
13209   return result_len;
13210 }
13211 
startmap_nokey(void * closure,const void * handler_data)13212 static void *startmap_nokey(void *closure, const void *handler_data) {
13213   upb_json_printer *p = closure;
13214   UPB_UNUSED(handler_data);
13215   p->depth_++;
13216   p->first_elem_[p->depth_] = true;
13217   print_data(p, "{", 1);
13218   return closure;
13219 }
13220 
putnull(void * closure,const void * handler_data,int32_t null)13221 static bool putnull(void *closure, const void *handler_data,
13222                     int32_t null) {
13223   upb_json_printer *p = closure;
13224   print_data(p, "null", 4);
13225   UPB_UNUSED(handler_data);
13226   UPB_UNUSED(null);
13227   return true;
13228 }
13229 
printer_startdurationmsg(void * closure,const void * handler_data)13230 static bool printer_startdurationmsg(void *closure, const void *handler_data) {
13231   upb_json_printer *p = closure;
13232   UPB_UNUSED(handler_data);
13233   if (p->depth_ == 0) {
13234     upb_bytessink_start(p->output_, 0, &p->subc_);
13235   }
13236   return true;
13237 }
13238 
13239 #define UPB_DURATION_MAX_JSON_LEN 23
13240 #define UPB_DURATION_MAX_NANO_LEN 9
13241 
printer_enddurationmsg(void * closure,const void * handler_data,upb_status * s)13242 static bool printer_enddurationmsg(void *closure, const void *handler_data,
13243                                    upb_status *s) {
13244   upb_json_printer *p = closure;
13245   char buffer[UPB_DURATION_MAX_JSON_LEN];
13246   size_t base_len;
13247   size_t curr;
13248   size_t i;
13249 
13250   memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN);
13251 
13252   if (p->seconds < -315576000000) {
13253     upb_status_seterrf(s, "error parsing duration: "
13254                           "minimum acceptable value is "
13255                           "-315576000000");
13256     return false;
13257   }
13258 
13259   if (p->seconds > 315576000000) {
13260     upb_status_seterrf(s, "error serializing duration: "
13261                           "maximum acceptable value is "
13262                           "315576000000");
13263     return false;
13264   }
13265 
13266   _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
13267   base_len = strlen(buffer);
13268 
13269   if (p->nanos != 0) {
13270     char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
13271     _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13272                   p->nanos / 1000000000.0);
13273     /* Remove trailing 0. */
13274     for (i = UPB_DURATION_MAX_NANO_LEN + 2;
13275          nanos_buffer[i] == '0'; i--) {
13276       nanos_buffer[i] = 0;
13277     }
13278     strcpy(buffer + base_len, nanos_buffer + 1);
13279   }
13280 
13281   curr = strlen(buffer);
13282   strcpy(buffer + curr, "s");
13283 
13284   p->seconds = 0;
13285   p->nanos = 0;
13286 
13287   print_data(p, "\"", 1);
13288   print_data(p, buffer, strlen(buffer));
13289   print_data(p, "\"", 1);
13290 
13291   if (p->depth_ == 0) {
13292     upb_bytessink_end(p->output_);
13293   }
13294 
13295   UPB_UNUSED(handler_data);
13296   return true;
13297 }
13298 
printer_starttimestampmsg(void * closure,const void * handler_data)13299 static bool printer_starttimestampmsg(void *closure, const void *handler_data) {
13300   upb_json_printer *p = closure;
13301   UPB_UNUSED(handler_data);
13302   if (p->depth_ == 0) {
13303     upb_bytessink_start(p->output_, 0, &p->subc_);
13304   }
13305   return true;
13306 }
13307 
13308 #define UPB_TIMESTAMP_MAX_JSON_LEN 31
13309 #define UPB_TIMESTAMP_BEFORE_NANO_LEN 19
13310 #define UPB_TIMESTAMP_MAX_NANO_LEN 9
13311 
printer_endtimestampmsg(void * closure,const void * handler_data,upb_status * s)13312 static bool printer_endtimestampmsg(void *closure, const void *handler_data,
13313                                     upb_status *s) {
13314   upb_json_printer *p = closure;
13315   char buffer[UPB_TIMESTAMP_MAX_JSON_LEN];
13316   time_t time = p->seconds;
13317   size_t curr;
13318   size_t i;
13319   size_t year_length =
13320       strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time));
13321 
13322   if (p->seconds < -62135596800) {
13323     upb_status_seterrf(s, "error parsing timestamp: "
13324                           "minimum acceptable value is "
13325                           "0001-01-01T00:00:00Z");
13326     return false;
13327   }
13328 
13329   if (p->seconds > 253402300799) {
13330     upb_status_seterrf(s, "error parsing timestamp: "
13331                           "maximum acceptable value is "
13332                           "9999-12-31T23:59:59Z");
13333     return false;
13334   }
13335 
13336   /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */
13337   for (i = 0; i < 4 - year_length; i++) {
13338     buffer[i] = '0';
13339   }
13340 
13341   strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN,
13342            "%Y-%m-%dT%H:%M:%S", gmtime(&time));
13343   if (p->nanos != 0) {
13344     char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
13345     _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13346                   p->nanos / 1000000000.0);
13347     /* Remove trailing 0. */
13348     for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
13349          nanos_buffer[i] == '0'; i--) {
13350       nanos_buffer[i] = 0;
13351     }
13352     strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1);
13353   }
13354 
13355   curr = strlen(buffer);
13356   strcpy(buffer + curr, "Z");
13357 
13358   p->seconds = 0;
13359   p->nanos = 0;
13360 
13361   print_data(p, "\"", 1);
13362   print_data(p, buffer, strlen(buffer));
13363   print_data(p, "\"", 1);
13364 
13365   if (p->depth_ == 0) {
13366     upb_bytessink_end(p->output_);
13367   }
13368 
13369   UPB_UNUSED(handler_data);
13370   UPB_UNUSED(s);
13371   return true;
13372 }
13373 
printer_startmsg_noframe(void * closure,const void * handler_data)13374 static bool printer_startmsg_noframe(void *closure, const void *handler_data) {
13375   upb_json_printer *p = closure;
13376   UPB_UNUSED(handler_data);
13377   if (p->depth_ == 0) {
13378     upb_bytessink_start(p->output_, 0, &p->subc_);
13379   }
13380   return true;
13381 }
13382 
printer_endmsg_noframe(void * closure,const void * handler_data,upb_status * s)13383 static bool printer_endmsg_noframe(
13384     void *closure, const void *handler_data, upb_status *s) {
13385   upb_json_printer *p = closure;
13386   UPB_UNUSED(handler_data);
13387   UPB_UNUSED(s);
13388   if (p->depth_ == 0) {
13389     upb_bytessink_end(p->output_);
13390   }
13391   return true;
13392 }
13393 
printer_startmsg_fieldmask(void * closure,const void * handler_data)13394 static bool printer_startmsg_fieldmask(
13395     void *closure, const void *handler_data) {
13396   upb_json_printer *p = closure;
13397   UPB_UNUSED(handler_data);
13398   if (p->depth_ == 0) {
13399     upb_bytessink_start(p->output_, 0, &p->subc_);
13400   }
13401   print_data(p, "\"", 1);
13402   return true;
13403 }
13404 
printer_endmsg_fieldmask(void * closure,const void * handler_data,upb_status * s)13405 static bool printer_endmsg_fieldmask(
13406     void *closure, const void *handler_data, upb_status *s) {
13407   upb_json_printer *p = closure;
13408   UPB_UNUSED(handler_data);
13409   UPB_UNUSED(s);
13410   print_data(p, "\"", 1);
13411   if (p->depth_ == 0) {
13412     upb_bytessink_end(p->output_);
13413   }
13414   return true;
13415 }
13416 
scalar_startstr_onlykey(void * closure,const void * handler_data,size_t size_hint)13417 static void *scalar_startstr_onlykey(
13418     void *closure, const void *handler_data, size_t size_hint) {
13419   upb_json_printer *p = closure;
13420   UPB_UNUSED(size_hint);
13421   CHK(putkey(closure, handler_data));
13422   return p;
13423 }
13424 
13425 /* Set up handlers for an Any submessage. */
printer_sethandlers_any(const void * closure,upb_handlers * h)13426 void printer_sethandlers_any(const void *closure, upb_handlers *h) {
13427   const upb_msgdef *md = upb_handlers_msgdef(h);
13428 
13429   const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE);
13430   const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE);
13431 
13432   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13433 
13434   /* type_url's json name is "@type" */
13435   upb_handlerattr type_name_attr = UPB_HANDLERATTR_INIT;
13436   upb_handlerattr value_name_attr = UPB_HANDLERATTR_INIT;
13437   strpc *type_url_json_name = newstrpc_str(h, "@type");
13438   strpc *value_json_name = newstrpc_str(h, "value");
13439 
13440   type_name_attr.handler_data = type_url_json_name;
13441   value_name_attr.handler_data = value_json_name;
13442 
13443   /* Set up handlers. */
13444   upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13445   upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13446 
13447   upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr);
13448   upb_handlers_setstring(h, type_field, scalar_str, &empty_attr);
13449   upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr);
13450 
13451   /* This is not the full and correct JSON encoding for the Any value field. It
13452    * requires further processing by the wrapper code based on the type URL.
13453    */
13454   upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey,
13455                            &value_name_attr);
13456 
13457   UPB_UNUSED(closure);
13458 }
13459 
13460 /* Set up handlers for a fieldmask submessage. */
printer_sethandlers_fieldmask(const void * closure,upb_handlers * h)13461 void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) {
13462   const upb_msgdef *md = upb_handlers_msgdef(h);
13463   const upb_fielddef* f = upb_msgdef_itof(md, 1);
13464 
13465   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13466 
13467   upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr);
13468   upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr);
13469 
13470   upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr);
13471   upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr);
13472 
13473   upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr);
13474   upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr);
13475 
13476   UPB_UNUSED(closure);
13477 }
13478 
13479 /* Set up handlers for a duration submessage. */
printer_sethandlers_duration(const void * closure,upb_handlers * h)13480 void printer_sethandlers_duration(const void *closure, upb_handlers *h) {
13481   const upb_msgdef *md = upb_handlers_msgdef(h);
13482 
13483   const upb_fielddef* seconds_field =
13484       upb_msgdef_itof(md, UPB_DURATION_SECONDS);
13485   const upb_fielddef* nanos_field =
13486       upb_msgdef_itof(md, UPB_DURATION_NANOS);
13487 
13488   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13489 
13490   upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr);
13491   upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13492   upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13493   upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr);
13494 
13495   UPB_UNUSED(closure);
13496 }
13497 
13498 /* Set up handlers for a timestamp submessage. Instead of printing fields
13499  * separately, the json representation of timestamp follows RFC 3339 */
printer_sethandlers_timestamp(const void * closure,upb_handlers * h)13500 void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) {
13501   const upb_msgdef *md = upb_handlers_msgdef(h);
13502 
13503   const upb_fielddef* seconds_field =
13504       upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS);
13505   const upb_fielddef* nanos_field =
13506       upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS);
13507 
13508   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13509 
13510   upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr);
13511   upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13512   upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13513   upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr);
13514 
13515   UPB_UNUSED(closure);
13516 }
13517 
printer_sethandlers_value(const void * closure,upb_handlers * h)13518 void printer_sethandlers_value(const void *closure, upb_handlers *h) {
13519   const upb_msgdef *md = upb_handlers_msgdef(h);
13520   upb_msg_field_iter i;
13521 
13522   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13523 
13524   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13525   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13526 
13527   upb_msg_field_begin(&i, md);
13528   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13529     const upb_fielddef *f = upb_msg_iter_field(&i);
13530 
13531     switch (upb_fielddef_type(f)) {
13532       case UPB_TYPE_ENUM:
13533         upb_handlers_setint32(h, f, putnull, &empty_attr);
13534         break;
13535       case UPB_TYPE_DOUBLE:
13536         upb_handlers_setdouble(h, f, putdouble, &empty_attr);
13537         break;
13538       case UPB_TYPE_STRING:
13539         upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr);
13540         upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13541         upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13542         break;
13543       case UPB_TYPE_BOOL:
13544         upb_handlers_setbool(h, f, putbool, &empty_attr);
13545         break;
13546       case UPB_TYPE_MESSAGE:
13547         break;
13548       default:
13549         UPB_ASSERT(false);
13550         break;
13551     }
13552   }
13553 
13554   UPB_UNUSED(closure);
13555 }
13556 
13557 #define WRAPPER_SETHANDLERS(wrapper, type, putmethod)                      \
13558 void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \
13559   const upb_msgdef *md = upb_handlers_msgdef(h);                           \
13560   const upb_fielddef* f = upb_msgdef_itof(md, 1);                          \
13561   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;                \
13562   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);      \
13563   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);          \
13564   upb_handlers_set##type(h, f, putmethod, &empty_attr);                    \
13565   UPB_UNUSED(closure);                                                     \
13566 }
13567 
WRAPPER_SETHANDLERS(doublevalue,double,putdouble)13568 WRAPPER_SETHANDLERS(doublevalue, double, putdouble)
13569 WRAPPER_SETHANDLERS(floatvalue,  float,  putfloat)
13570 WRAPPER_SETHANDLERS(int64value,  int64,  putint64_t)
13571 WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t)
13572 WRAPPER_SETHANDLERS(int32value,  int32,  putint32_t)
13573 WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t)
13574 WRAPPER_SETHANDLERS(boolvalue,   bool,   putbool)
13575 WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey)
13576 WRAPPER_SETHANDLERS(bytesvalue,  string, putbytes)
13577 
13578 #undef WRAPPER_SETHANDLERS
13579 
13580 void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) {
13581   const upb_msgdef *md = upb_handlers_msgdef(h);
13582   const upb_fielddef* f = upb_msgdef_itof(md, 1);
13583 
13584   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13585 
13586   upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr);
13587   upb_handlers_setendseq(h, f, endseq, &empty_attr);
13588 
13589   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13590   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13591 
13592   upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13593 
13594   UPB_UNUSED(closure);
13595 }
13596 
printer_sethandlers_structvalue(const void * closure,upb_handlers * h)13597 void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) {
13598   const upb_msgdef *md = upb_handlers_msgdef(h);
13599   const upb_fielddef* f = upb_msgdef_itof(md, 1);
13600 
13601   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13602 
13603   upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr);
13604   upb_handlers_setendseq(h, f, endmap, &empty_attr);
13605 
13606   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13607   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13608 
13609   upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13610 
13611   UPB_UNUSED(closure);
13612 }
13613 
printer_sethandlers(const void * closure,upb_handlers * h)13614 void printer_sethandlers(const void *closure, upb_handlers *h) {
13615   const upb_msgdef *md = upb_handlers_msgdef(h);
13616   bool is_mapentry = upb_msgdef_mapentry(md);
13617   upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13618   upb_msg_field_iter i;
13619   const upb_json_printercache *cache = closure;
13620   const bool preserve_fieldnames = cache->preserve_fieldnames;
13621 
13622   if (is_mapentry) {
13623     /* mapentry messages are sufficiently different that we handle them
13624      * separately. */
13625     printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
13626     return;
13627   }
13628 
13629   switch (upb_msgdef_wellknowntype(md)) {
13630     case UPB_WELLKNOWN_UNSPECIFIED:
13631       break;
13632     case UPB_WELLKNOWN_ANY:
13633       printer_sethandlers_any(closure, h);
13634       return;
13635     case UPB_WELLKNOWN_FIELDMASK:
13636       printer_sethandlers_fieldmask(closure, h);
13637       return;
13638     case UPB_WELLKNOWN_DURATION:
13639       printer_sethandlers_duration(closure, h);
13640       return;
13641     case UPB_WELLKNOWN_TIMESTAMP:
13642       printer_sethandlers_timestamp(closure, h);
13643       return;
13644     case UPB_WELLKNOWN_VALUE:
13645       printer_sethandlers_value(closure, h);
13646       return;
13647     case UPB_WELLKNOWN_LISTVALUE:
13648       printer_sethandlers_listvalue(closure, h);
13649       return;
13650     case UPB_WELLKNOWN_STRUCT:
13651       printer_sethandlers_structvalue(closure, h);
13652       return;
13653 #define WRAPPER(wellknowntype, name)        \
13654   case wellknowntype:                       \
13655     printer_sethandlers_##name(closure, h); \
13656     return;                                 \
13657 
13658     WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue);
13659     WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue);
13660     WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value);
13661     WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value);
13662     WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value);
13663     WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value);
13664     WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue);
13665     WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue);
13666     WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue);
13667 
13668 #undef WRAPPER
13669   }
13670 
13671   upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13672   upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13673 
13674 #define TYPE(type, name, ctype)                                               \
13675   case type:                                                                  \
13676     if (upb_fielddef_isseq(f)) {                                              \
13677       upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
13678     } else {                                                                  \
13679       upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
13680     }                                                                         \
13681     break;
13682 
13683   upb_msg_field_begin(&i, md);
13684   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13685     const upb_fielddef *f = upb_msg_iter_field(&i);
13686 
13687     upb_handlerattr name_attr = UPB_HANDLERATTR_INIT;
13688     name_attr.handler_data = newstrpc(h, f, preserve_fieldnames);
13689 
13690     if (upb_fielddef_ismap(f)) {
13691       upb_handlers_setstartseq(h, f, startmap, &name_attr);
13692       upb_handlers_setendseq(h, f, endmap, &name_attr);
13693     } else if (upb_fielddef_isseq(f)) {
13694       upb_handlers_setstartseq(h, f, startseq, &name_attr);
13695       upb_handlers_setendseq(h, f, endseq, &empty_attr);
13696     }
13697 
13698     switch (upb_fielddef_type(f)) {
13699       TYPE(UPB_TYPE_FLOAT,  float,  float);
13700       TYPE(UPB_TYPE_DOUBLE, double, double);
13701       TYPE(UPB_TYPE_BOOL,   bool,   bool);
13702       TYPE(UPB_TYPE_INT32,  int32,  int32_t);
13703       TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
13704       TYPE(UPB_TYPE_INT64,  int64,  int64_t);
13705       TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
13706       case UPB_TYPE_ENUM: {
13707         /* For now, we always emit symbolic names for enums. We may want an
13708          * option later to control this behavior, but we will wait for a real
13709          * need first. */
13710         upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
13711         set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
13712 
13713         if (upb_fielddef_isseq(f)) {
13714           upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
13715         } else {
13716           upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
13717         }
13718 
13719         break;
13720       }
13721       case UPB_TYPE_STRING:
13722         if (upb_fielddef_isseq(f)) {
13723           upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
13724           upb_handlers_setstring(h, f, repeated_str, &empty_attr);
13725           upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
13726         } else {
13727           upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
13728           upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13729           upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13730         }
13731         break;
13732       case UPB_TYPE_BYTES:
13733         /* XXX: this doesn't support strings that span buffers yet. The base64
13734          * encoder will need to be made resumable for this to work properly. */
13735         if (upb_fielddef_isseq(f)) {
13736           upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
13737         } else {
13738           upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
13739         }
13740         break;
13741       case UPB_TYPE_MESSAGE:
13742         if (upb_fielddef_isseq(f)) {
13743           upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
13744         } else {
13745           upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
13746         }
13747         break;
13748     }
13749   }
13750 
13751 #undef TYPE
13752 }
13753 
json_printer_reset(upb_json_printer * p)13754 static void json_printer_reset(upb_json_printer *p) {
13755   p->depth_ = 0;
13756 }
13757 
13758 
13759 /* Public API *****************************************************************/
13760 
upb_json_printer_create(upb_arena * a,const upb_handlers * h,upb_bytessink output)13761 upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h,
13762                                           upb_bytessink output) {
13763 #ifndef NDEBUG
13764   size_t size_before = upb_arena_bytesallocated(a);
13765 #endif
13766 
13767   upb_json_printer *p = upb_arena_malloc(a, sizeof(upb_json_printer));
13768   if (!p) return NULL;
13769 
13770   p->output_ = output;
13771   json_printer_reset(p);
13772   upb_sink_reset(&p->input_, h, p);
13773   p->seconds = 0;
13774   p->nanos = 0;
13775 
13776   /* If this fails, increase the value in printer.h. */
13777   UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
13778                       UPB_JSON_PRINTER_SIZE);
13779   return p;
13780 }
13781 
upb_json_printer_input(upb_json_printer * p)13782 upb_sink upb_json_printer_input(upb_json_printer *p) {
13783   return p->input_;
13784 }
13785 
upb_json_printer_newcache(bool preserve_proto_fieldnames)13786 upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames) {
13787   upb_json_printercache *cache = upb_gmalloc(sizeof(*cache));
13788   upb_handlercache *ret = upb_handlercache_new(printer_sethandlers, cache);
13789 
13790   cache->preserve_fieldnames = preserve_proto_fieldnames;
13791   upb_handlercache_addcleanup(ret, cache, upb_gfree);
13792 
13793   return ret;
13794 }
13795 /* See port_def.inc.  This should #undef all macros #defined there. */
13796 
13797 #undef UPB_MAPTYPE_STRING
13798 #undef UPB_SIZE
13799 #undef UPB_PTR_AT
13800 #undef UPB_READ_ONEOF
13801 #undef UPB_WRITE_ONEOF
13802 #undef UPB_INLINE
13803 #undef UPB_FORCEINLINE
13804 #undef UPB_NOINLINE
13805 #undef UPB_NORETURN
13806 #undef UPB_MAX
13807 #undef UPB_MIN
13808 #undef UPB_UNUSED
13809 #undef UPB_ASSUME
13810 #undef UPB_ASSERT
13811 #undef UPB_ASSERT_DEBUGVAR
13812 #undef UPB_UNREACHABLE
13813 #undef UPB_INFINITY
13814 #undef UPB_MSVC_VSNPRINTF
13815 #undef _upb_snprintf
13816 #undef _upb_vsnprintf
13817 #undef _upb_va_copy
13818