1 // Amalgamated source file
2 #include "upb.h"
3
4
5 #include <ctype.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 typedef struct {
10 size_t len;
11 char str[1]; /* Null-terminated string data follows. */
12 } str_t;
13
newstr(const char * data,size_t len)14 static str_t *newstr(const char *data, size_t len) {
15 str_t *ret = upb_gmalloc(sizeof(*ret) + len);
16 if (!ret) return NULL;
17 ret->len = len;
18 memcpy(ret->str, data, len);
19 ret->str[len] = '\0';
20 return ret;
21 }
22
freestr(str_t * s)23 static void freestr(str_t *s) { upb_gfree(s); }
24
25 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)26 static bool upb_isbetween(char c, char low, char high) {
27 return c >= low && c <= high;
28 }
29
upb_isletter(char c)30 static bool upb_isletter(char c) {
31 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
32 }
33
upb_isalphanum(char c)34 static bool upb_isalphanum(char c) {
35 return upb_isletter(c) || upb_isbetween(c, '0', '9');
36 }
37
upb_isident(const char * str,size_t len,bool full,upb_status * s)38 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
39 bool start = true;
40 size_t i;
41 for (i = 0; i < len; i++) {
42 char c = str[i];
43 if (c == '.') {
44 if (start || !full) {
45 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
46 return false;
47 }
48 start = true;
49 } else if (start) {
50 if (!upb_isletter(c)) {
51 upb_status_seterrf(
52 s, "invalid name: path components must start with a letter (%s)",
53 str);
54 return false;
55 }
56 start = false;
57 } else {
58 if (!upb_isalphanum(c)) {
59 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
60 str);
61 return false;
62 }
63 }
64 }
65 return !start;
66 }
67
upb_isoneof(const upb_refcounted * def)68 static bool upb_isoneof(const upb_refcounted *def) {
69 return def->vtbl == &upb_oneofdef_vtbl;
70 }
71
upb_isfield(const upb_refcounted * def)72 static bool upb_isfield(const upb_refcounted *def) {
73 return def->vtbl == &upb_fielddef_vtbl;
74 }
75
upb_trygetoneof(const upb_refcounted * def)76 static const upb_oneofdef *upb_trygetoneof(const upb_refcounted *def) {
77 return upb_isoneof(def) ? (const upb_oneofdef*)def : NULL;
78 }
79
upb_trygetfield(const upb_refcounted * def)80 static const upb_fielddef *upb_trygetfield(const upb_refcounted *def) {
81 return upb_isfield(def) ? (const upb_fielddef*)def : NULL;
82 }
83
84
85 /* upb_def ********************************************************************/
86
upb_def_type(const upb_def * d)87 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
88
upb_def_fullname(const upb_def * d)89 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
90
upb_def_name(const upb_def * d)91 const char *upb_def_name(const upb_def *d) {
92 const char *p;
93
94 if (d->fullname == NULL) {
95 return NULL;
96 } else if ((p = strrchr(d->fullname, '.')) == NULL) {
97 /* No '.' in the name, return the full string. */
98 return d->fullname;
99 } else {
100 /* Return one past the last '.'. */
101 return p + 1;
102 }
103 }
104
upb_def_setfullname(upb_def * def,const char * fullname,upb_status * s)105 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
106 assert(!upb_def_isfrozen(def));
107 if (!upb_isident(fullname, strlen(fullname), true, s)) {
108 return false;
109 }
110
111 fullname = upb_gstrdup(fullname);
112 if (!fullname) {
113 upb_upberr_setoom(s);
114 return false;
115 }
116
117 upb_gfree((void*)def->fullname);
118 def->fullname = fullname;
119 return true;
120 }
121
upb_def_file(const upb_def * d)122 const upb_filedef *upb_def_file(const upb_def *d) { return d->file; }
123
upb_def_dup(const upb_def * def,const void * o)124 upb_def *upb_def_dup(const upb_def *def, const void *o) {
125 switch (def->type) {
126 case UPB_DEF_MSG:
127 return upb_msgdef_upcast_mutable(
128 upb_msgdef_dup(upb_downcast_msgdef(def), o));
129 case UPB_DEF_FIELD:
130 return upb_fielddef_upcast_mutable(
131 upb_fielddef_dup(upb_downcast_fielddef(def), o));
132 case UPB_DEF_ENUM:
133 return upb_enumdef_upcast_mutable(
134 upb_enumdef_dup(upb_downcast_enumdef(def), o));
135 default: assert(false); return NULL;
136 }
137 }
138
upb_def_init(upb_def * def,upb_deftype_t type,const struct upb_refcounted_vtbl * vtbl,const void * owner)139 static bool upb_def_init(upb_def *def, upb_deftype_t type,
140 const struct upb_refcounted_vtbl *vtbl,
141 const void *owner) {
142 if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
143 def->type = type;
144 def->fullname = NULL;
145 def->came_from_user = false;
146 def->file = NULL;
147 return true;
148 }
149
upb_def_uninit(upb_def * def)150 static void upb_def_uninit(upb_def *def) {
151 upb_gfree((void*)def->fullname);
152 }
153
msgdef_name(const upb_msgdef * m)154 static const char *msgdef_name(const upb_msgdef *m) {
155 const char *name = upb_def_fullname(upb_msgdef_upcast(m));
156 return name ? name : "(anonymous)";
157 }
158
upb_validate_field(upb_fielddef * f,upb_status * s)159 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
160 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
161 upb_status_seterrmsg(s, "fielddef must have name and number set");
162 return false;
163 }
164
165 if (!f->type_is_set_) {
166 upb_status_seterrmsg(s, "fielddef type was not initialized");
167 return false;
168 }
169
170 if (upb_fielddef_lazy(f) &&
171 upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
172 upb_status_seterrmsg(s,
173 "only length-delimited submessage fields may be lazy");
174 return false;
175 }
176
177 if (upb_fielddef_hassubdef(f)) {
178 const upb_def *subdef;
179
180 if (f->subdef_is_symbolic) {
181 upb_status_seterrf(s, "field '%s.%s' has not been resolved",
182 msgdef_name(f->msg.def), upb_fielddef_name(f));
183 return false;
184 }
185
186 subdef = upb_fielddef_subdef(f);
187 if (subdef == NULL) {
188 upb_status_seterrf(s, "field %s.%s is missing required subdef",
189 msgdef_name(f->msg.def), upb_fielddef_name(f));
190 return false;
191 }
192
193 if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
194 upb_status_seterrf(s,
195 "subdef of field %s.%s is not frozen or being frozen",
196 msgdef_name(f->msg.def), upb_fielddef_name(f));
197 return false;
198 }
199 }
200
201 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
202 bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
203 bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
204
205 /* Previously verified by upb_validate_enumdef(). */
206 assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
207
208 /* We've already validated that we have an associated enumdef and that it
209 * has at least one member, so at least one of these should be true.
210 * Because if the user didn't set anything, we'll pick up the enum's
211 * default, but if the user *did* set something we should at least pick up
212 * the one they set (int32 or string). */
213 assert(has_default_name || has_default_number);
214
215 if (!has_default_name) {
216 upb_status_seterrf(s,
217 "enum default for field %s.%s (%d) is not in the enum",
218 msgdef_name(f->msg.def), upb_fielddef_name(f),
219 upb_fielddef_defaultint32(f));
220 return false;
221 }
222
223 if (!has_default_number) {
224 upb_status_seterrf(s,
225 "enum default for field %s.%s (%s) is not in the enum",
226 msgdef_name(f->msg.def), upb_fielddef_name(f),
227 upb_fielddef_defaultstr(f, NULL));
228 return false;
229 }
230
231 /* Lift the effective numeric default into the field's default slot, in case
232 * we were only getting it "by reference" from the enumdef. */
233 upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
234 }
235
236 /* Ensure that MapEntry submessages only appear as repeated fields, not
237 * optional/required (singular) fields. */
238 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
239 upb_fielddef_msgsubdef(f) != NULL) {
240 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
241 if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
242 upb_status_seterrf(s,
243 "Field %s refers to mapentry message but is not "
244 "a repeated field",
245 upb_fielddef_name(f) ? upb_fielddef_name(f) :
246 "(unnamed)");
247 return false;
248 }
249 }
250
251 return true;
252 }
253
upb_validate_enumdef(const upb_enumdef * e,upb_status * s)254 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
255 if (upb_enumdef_numvals(e) == 0) {
256 upb_status_seterrf(s, "enum %s has no members (must have at least one)",
257 upb_enumdef_fullname(e));
258 return false;
259 }
260
261 return true;
262 }
263
264 /* All submessage fields are lower than all other fields.
265 * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)266 uint32_t field_rank(const upb_fielddef *f) {
267 uint32_t ret = upb_fielddef_number(f);
268 const uint32_t high_bit = 1 << 30;
269 assert(ret < high_bit);
270 if (!upb_fielddef_issubmsg(f))
271 ret |= high_bit;
272 return ret;
273 }
274
cmp_fields(const void * p1,const void * p2)275 int cmp_fields(const void *p1, const void *p2) {
276 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
277 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
278 return field_rank(f1) - field_rank(f2);
279 }
280
assign_msg_indices(upb_msgdef * m,upb_status * s)281 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
282 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
283 * lowest indexes, but we do not publicly guarantee this. */
284 upb_msg_field_iter j;
285 int i;
286 uint32_t selector;
287 int n = upb_msgdef_numfields(m);
288 upb_fielddef **fields;
289
290 if (n == 0) {
291 m->selector_count = UPB_STATIC_SELECTOR_COUNT;
292 m->submsg_field_count = 0;
293 return true;
294 }
295
296 fields = upb_gmalloc(n * sizeof(*fields));
297 if (!fields) {
298 upb_upberr_setoom(s);
299 return false;
300 }
301
302 m->submsg_field_count = 0;
303 for(i = 0, upb_msg_field_begin(&j, m);
304 !upb_msg_field_done(&j);
305 upb_msg_field_next(&j), i++) {
306 upb_fielddef *f = upb_msg_iter_field(&j);
307 assert(f->msg.def == m);
308 if (!upb_validate_field(f, s)) {
309 upb_gfree(fields);
310 return false;
311 }
312 if (upb_fielddef_issubmsg(f)) {
313 m->submsg_field_count++;
314 }
315 fields[i] = f;
316 }
317
318 qsort(fields, n, sizeof(*fields), cmp_fields);
319
320 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
321 for (i = 0; i < n; i++) {
322 upb_fielddef *f = fields[i];
323 f->index_ = i;
324 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
325 selector += upb_handlers_selectorcount(f);
326 }
327 m->selector_count = selector;
328
329 #ifndef NDEBUG
330 {
331 /* Verify that all selectors for the message are distinct. */
332 #define TRY(type) \
333 if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
334
335 upb_inttable t;
336 upb_value v;
337 upb_selector_t sel;
338
339 upb_inttable_init(&t, UPB_CTYPE_BOOL);
340 v = upb_value_bool(true);
341 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
342 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
343 for(upb_msg_field_begin(&j, m);
344 !upb_msg_field_done(&j);
345 upb_msg_field_next(&j)) {
346 upb_fielddef *f = upb_msg_iter_field(&j);
347 /* These calls will assert-fail in upb_table if the value already
348 * exists. */
349 TRY(UPB_HANDLER_INT32);
350 TRY(UPB_HANDLER_INT64)
351 TRY(UPB_HANDLER_UINT32)
352 TRY(UPB_HANDLER_UINT64)
353 TRY(UPB_HANDLER_FLOAT)
354 TRY(UPB_HANDLER_DOUBLE)
355 TRY(UPB_HANDLER_BOOL)
356 TRY(UPB_HANDLER_STARTSTR)
357 TRY(UPB_HANDLER_STRING)
358 TRY(UPB_HANDLER_ENDSTR)
359 TRY(UPB_HANDLER_STARTSUBMSG)
360 TRY(UPB_HANDLER_ENDSUBMSG)
361 TRY(UPB_HANDLER_STARTSEQ)
362 TRY(UPB_HANDLER_ENDSEQ)
363 }
364 upb_inttable_uninit(&t);
365 }
366 #undef TRY
367 #endif
368
369 upb_gfree(fields);
370 return true;
371 }
372
_upb_def_validate(upb_def * const * defs,size_t n,upb_status * s)373 bool _upb_def_validate(upb_def *const*defs, size_t n, upb_status *s) {
374 size_t i;
375
376 /* First perform validation, in two passes so we can check that we have a
377 * transitive closure without needing to search. */
378 for (i = 0; i < n; i++) {
379 upb_def *def = defs[i];
380 if (upb_def_isfrozen(def)) {
381 /* Could relax this requirement if it's annoying. */
382 upb_status_seterrmsg(s, "def is already frozen");
383 goto err;
384 } else if (def->type == UPB_DEF_FIELD) {
385 upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
386 goto err;
387 } else if (def->type == UPB_DEF_ENUM) {
388 if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
389 goto err;
390 }
391 } else {
392 /* Set now to detect transitive closure in the second pass. */
393 def->came_from_user = true;
394 }
395 }
396
397 /* Second pass of validation. Also assign selector bases and indexes, and
398 * compact tables. */
399 for (i = 0; i < n; i++) {
400 upb_def *def = defs[i];
401 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
402 upb_enumdef *e = upb_dyncast_enumdef_mutable(def);
403 if (m) {
404 upb_inttable_compact(&m->itof);
405 if (!assign_msg_indices(m, s)) {
406 goto err;
407 }
408 } else if (e) {
409 upb_inttable_compact(&e->iton);
410 }
411 }
412
413 return true;
414
415 err:
416 for (i = 0; i < n; i++) {
417 upb_def *def = defs[i];
418 def->came_from_user = false;
419 }
420 assert(!(s && upb_ok(s)));
421 return false;
422 }
423
upb_def_freeze(upb_def * const * defs,size_t n,upb_status * s)424 bool upb_def_freeze(upb_def *const* defs, size_t n, upb_status *s) {
425 /* Def graph contains FieldDefs between each MessageDef, so double the
426 * limit. */
427 const size_t maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
428
429 if (!_upb_def_validate(defs, n, s)) {
430 return false;
431 }
432
433
434 /* Validation all passed; freeze the objects. */
435 return upb_refcounted_freeze((upb_refcounted *const*)defs, n, s, maxdepth);
436 }
437
438
439 /* upb_enumdef ****************************************************************/
440
upb_enumdef_free(upb_refcounted * r)441 static void upb_enumdef_free(upb_refcounted *r) {
442 upb_enumdef *e = (upb_enumdef*)r;
443 upb_inttable_iter i;
444 upb_inttable_begin(&i, &e->iton);
445 for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
446 /* To clean up the upb_gstrdup() from upb_enumdef_addval(). */
447 upb_gfree(upb_value_getcstr(upb_inttable_iter_value(&i)));
448 }
449 upb_strtable_uninit(&e->ntoi);
450 upb_inttable_uninit(&e->iton);
451 upb_def_uninit(upb_enumdef_upcast_mutable(e));
452 upb_gfree(e);
453 }
454
455 const struct upb_refcounted_vtbl upb_enumdef_vtbl = {NULL, &upb_enumdef_free};
456
upb_enumdef_new(const void * owner)457 upb_enumdef *upb_enumdef_new(const void *owner) {
458 upb_enumdef *e = upb_gmalloc(sizeof(*e));
459 if (!e) return NULL;
460
461 if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM,
462 &upb_enumdef_vtbl, owner)) {
463 goto err2;
464 }
465
466 if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
467 if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
468 return e;
469
470 err1:
471 upb_strtable_uninit(&e->ntoi);
472 err2:
473 upb_gfree(e);
474 return NULL;
475 }
476
upb_enumdef_dup(const upb_enumdef * e,const void * owner)477 upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
478 upb_enum_iter i;
479 upb_enumdef *new_e = upb_enumdef_new(owner);
480 if (!new_e) return NULL;
481 for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
482 bool success = upb_enumdef_addval(
483 new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
484 if (!success) {
485 upb_enumdef_unref(new_e, owner);
486 return NULL;
487 }
488 }
489 return new_e;
490 }
491
upb_enumdef_freeze(upb_enumdef * e,upb_status * status)492 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
493 upb_def *d = upb_enumdef_upcast_mutable(e);
494 return upb_def_freeze(&d, 1, status);
495 }
496
upb_enumdef_fullname(const upb_enumdef * e)497 const char *upb_enumdef_fullname(const upb_enumdef *e) {
498 return upb_def_fullname(upb_enumdef_upcast(e));
499 }
500
upb_enumdef_name(const upb_enumdef * e)501 const char *upb_enumdef_name(const upb_enumdef *e) {
502 return upb_def_name(upb_enumdef_upcast(e));
503 }
504
upb_enumdef_setfullname(upb_enumdef * e,const char * fullname,upb_status * s)505 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
506 upb_status *s) {
507 return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
508 }
509
upb_enumdef_addval(upb_enumdef * e,const char * name,int32_t num,upb_status * status)510 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
511 upb_status *status) {
512 char *name2;
513
514 if (!upb_isident(name, strlen(name), false, status)) {
515 return false;
516 }
517
518 if (upb_enumdef_ntoiz(e, name, NULL)) {
519 upb_status_seterrf(status, "name '%s' is already defined", name);
520 return false;
521 }
522
523 if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
524 upb_status_seterrmsg(status, "out of memory");
525 return false;
526 }
527
528 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
529 name2 = upb_gstrdup(name);
530 if (!name2 || !upb_inttable_insert(&e->iton, num, upb_value_cstr(name2))) {
531 upb_status_seterrmsg(status, "out of memory");
532 upb_strtable_remove(&e->ntoi, name, NULL);
533 return false;
534 }
535 }
536
537 if (upb_enumdef_numvals(e) == 1) {
538 bool ok = upb_enumdef_setdefault(e, num, NULL);
539 UPB_ASSERT_VAR(ok, ok);
540 }
541
542 return true;
543 }
544
upb_enumdef_default(const upb_enumdef * e)545 int32_t upb_enumdef_default(const upb_enumdef *e) {
546 assert(upb_enumdef_iton(e, e->defaultval));
547 return e->defaultval;
548 }
549
upb_enumdef_setdefault(upb_enumdef * e,int32_t val,upb_status * s)550 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
551 assert(!upb_enumdef_isfrozen(e));
552 if (!upb_enumdef_iton(e, val)) {
553 upb_status_seterrf(s, "number '%d' is not in the enum.", val);
554 return false;
555 }
556 e->defaultval = val;
557 return true;
558 }
559
upb_enumdef_numvals(const upb_enumdef * e)560 int upb_enumdef_numvals(const upb_enumdef *e) {
561 return upb_strtable_count(&e->ntoi);
562 }
563
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)564 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
565 /* We iterate over the ntoi table, to account for duplicate numbers. */
566 upb_strtable_begin(i, &e->ntoi);
567 }
568
upb_enum_next(upb_enum_iter * iter)569 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)570 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
571
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)572 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
573 size_t len, int32_t *num) {
574 upb_value v;
575 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
576 return false;
577 }
578 if (num) *num = upb_value_getint32(v);
579 return true;
580 }
581
upb_enumdef_iton(const upb_enumdef * def,int32_t num)582 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
583 upb_value v;
584 return upb_inttable_lookup32(&def->iton, num, &v) ?
585 upb_value_getcstr(v) : NULL;
586 }
587
upb_enum_iter_name(upb_enum_iter * iter)588 const char *upb_enum_iter_name(upb_enum_iter *iter) {
589 return upb_strtable_iter_key(iter);
590 }
591
upb_enum_iter_number(upb_enum_iter * iter)592 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
593 return upb_value_getint32(upb_strtable_iter_value(iter));
594 }
595
596
597 /* upb_fielddef ***************************************************************/
598
599 static void upb_fielddef_init_default(upb_fielddef *f);
600
upb_fielddef_uninit_default(upb_fielddef * f)601 static void upb_fielddef_uninit_default(upb_fielddef *f) {
602 if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
603 freestr(f->defaultval.bytes);
604 }
605
upb_fielddef_fullname(const upb_fielddef * e)606 const char *upb_fielddef_fullname(const upb_fielddef *e) {
607 return upb_def_fullname(upb_fielddef_upcast(e));
608 }
609
visitfield(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)610 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
611 void *closure) {
612 const upb_fielddef *f = (const upb_fielddef*)r;
613 if (upb_fielddef_containingtype(f)) {
614 visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
615 }
616 if (upb_fielddef_containingoneof(f)) {
617 visit(r, upb_oneofdef_upcast(upb_fielddef_containingoneof(f)), closure);
618 }
619 if (upb_fielddef_subdef(f)) {
620 visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
621 }
622 }
623
freefield(upb_refcounted * r)624 static void freefield(upb_refcounted *r) {
625 upb_fielddef *f = (upb_fielddef*)r;
626 upb_fielddef_uninit_default(f);
627 if (f->subdef_is_symbolic)
628 upb_gfree(f->sub.name);
629 upb_def_uninit(upb_fielddef_upcast_mutable(f));
630 upb_gfree(f);
631 }
632
enumdefaultstr(const upb_fielddef * f)633 static const char *enumdefaultstr(const upb_fielddef *f) {
634 const upb_enumdef *e;
635 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
636 e = upb_fielddef_enumsubdef(f);
637 if (f->default_is_string && f->defaultval.bytes) {
638 /* Default was explicitly set as a string. */
639 str_t *s = f->defaultval.bytes;
640 return s->str;
641 } else if (e) {
642 if (!f->default_is_string) {
643 /* Default was explicitly set as an integer; look it up in enumdef. */
644 const char *name = upb_enumdef_iton(e, f->defaultval.sint);
645 if (name) {
646 return name;
647 }
648 } else {
649 /* Default is completely unset; pull enumdef default. */
650 if (upb_enumdef_numvals(e) > 0) {
651 const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
652 assert(name);
653 return name;
654 }
655 }
656 }
657 return NULL;
658 }
659
enumdefaultint32(const upb_fielddef * f,int32_t * val)660 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
661 const upb_enumdef *e;
662 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
663 e = upb_fielddef_enumsubdef(f);
664 if (!f->default_is_string) {
665 /* Default was explicitly set as an integer. */
666 *val = f->defaultval.sint;
667 return true;
668 } else if (e) {
669 if (f->defaultval.bytes) {
670 /* Default was explicitly set as a str; try to lookup corresponding int. */
671 str_t *s = f->defaultval.bytes;
672 if (upb_enumdef_ntoiz(e, s->str, val)) {
673 return true;
674 }
675 } else {
676 /* Default is unset; try to pull in enumdef default. */
677 if (upb_enumdef_numvals(e) > 0) {
678 *val = upb_enumdef_default(e);
679 return true;
680 }
681 }
682 }
683 return false;
684 }
685
686 const struct upb_refcounted_vtbl upb_fielddef_vtbl = {visitfield, freefield};
687
upb_fielddef_new(const void * o)688 upb_fielddef *upb_fielddef_new(const void *o) {
689 upb_fielddef *f = upb_gmalloc(sizeof(*f));
690 if (!f) return NULL;
691 if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD,
692 &upb_fielddef_vtbl, o)) {
693 upb_gfree(f);
694 return NULL;
695 }
696 f->msg.def = NULL;
697 f->sub.def = NULL;
698 f->oneof = NULL;
699 f->subdef_is_symbolic = false;
700 f->msg_is_symbolic = false;
701 f->label_ = UPB_LABEL_OPTIONAL;
702 f->type_ = UPB_TYPE_INT32;
703 f->number_ = 0;
704 f->type_is_set_ = false;
705 f->tagdelim = false;
706 f->is_extension_ = false;
707 f->lazy_ = false;
708 f->packed_ = true;
709
710 /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
711 * with all integer types and is in some since more "default" since the most
712 * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
713 *
714 * Other options to consider:
715 * - there is no default; users must set this manually (like type).
716 * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
717 * be an optimal default for signed integers. */
718 f->intfmt = UPB_INTFMT_VARIABLE;
719 return f;
720 }
721
upb_fielddef_dup(const upb_fielddef * f,const void * owner)722 upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
723 const char *srcname;
724 upb_fielddef *newf = upb_fielddef_new(owner);
725 if (!newf) return NULL;
726 upb_fielddef_settype(newf, upb_fielddef_type(f));
727 upb_fielddef_setlabel(newf, upb_fielddef_label(f));
728 upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
729 upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
730 if (f->default_is_string && f->defaultval.bytes) {
731 str_t *s = f->defaultval.bytes;
732 upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
733 } else {
734 newf->default_is_string = f->default_is_string;
735 newf->defaultval = f->defaultval;
736 }
737
738 if (f->subdef_is_symbolic) {
739 srcname = f->sub.name; /* Might be NULL. */
740 } else {
741 srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
742 }
743 if (srcname) {
744 char *newname = upb_gmalloc(strlen(f->sub.def->fullname) + 2);
745 if (!newname) {
746 upb_fielddef_unref(newf, owner);
747 return NULL;
748 }
749 strcpy(newname, ".");
750 strcat(newname, f->sub.def->fullname);
751 upb_fielddef_setsubdefname(newf, newname, NULL);
752 upb_gfree(newname);
753 }
754
755 return newf;
756 }
757
upb_fielddef_typeisset(const upb_fielddef * f)758 bool upb_fielddef_typeisset(const upb_fielddef *f) {
759 return f->type_is_set_;
760 }
761
upb_fielddef_type(const upb_fielddef * f)762 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
763 assert(f->type_is_set_);
764 return f->type_;
765 }
766
upb_fielddef_index(const upb_fielddef * f)767 uint32_t upb_fielddef_index(const upb_fielddef *f) {
768 return f->index_;
769 }
770
upb_fielddef_label(const upb_fielddef * f)771 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
772 return f->label_;
773 }
774
upb_fielddef_intfmt(const upb_fielddef * f)775 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
776 return f->intfmt;
777 }
778
upb_fielddef_istagdelim(const upb_fielddef * f)779 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
780 return f->tagdelim;
781 }
782
upb_fielddef_number(const upb_fielddef * f)783 uint32_t upb_fielddef_number(const upb_fielddef *f) {
784 return f->number_;
785 }
786
upb_fielddef_isextension(const upb_fielddef * f)787 bool upb_fielddef_isextension(const upb_fielddef *f) {
788 return f->is_extension_;
789 }
790
upb_fielddef_lazy(const upb_fielddef * f)791 bool upb_fielddef_lazy(const upb_fielddef *f) {
792 return f->lazy_;
793 }
794
upb_fielddef_packed(const upb_fielddef * f)795 bool upb_fielddef_packed(const upb_fielddef *f) {
796 return f->packed_;
797 }
798
upb_fielddef_name(const upb_fielddef * f)799 const char *upb_fielddef_name(const upb_fielddef *f) {
800 return upb_def_fullname(upb_fielddef_upcast(f));
801 }
802
upb_fielddef_getjsonname(const upb_fielddef * f,char * buf,size_t len)803 size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
804 const char *name = upb_fielddef_name(f);
805 size_t src, dst = 0;
806 bool ucase_next = false;
807
808 #define WRITE(byte) \
809 ++dst; \
810 if (dst < len) buf[dst - 1] = byte; \
811 else if (dst == len) buf[dst - 1] = '\0'
812
813 if (!name) {
814 WRITE('\0');
815 return 0;
816 }
817
818 /* Implement the transformation as described in the spec:
819 * 1. upper case all letters after an underscore.
820 * 2. remove all underscores.
821 */
822 for (src = 0; name[src]; src++) {
823 if (name[src] == '_') {
824 ucase_next = true;
825 continue;
826 }
827
828 if (ucase_next) {
829 WRITE(toupper(name[src]));
830 ucase_next = false;
831 } else {
832 WRITE(name[src]);
833 }
834 }
835
836 WRITE('\0');
837 return dst;
838
839 #undef WRITE
840 }
841
upb_fielddef_containingtype(const upb_fielddef * f)842 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
843 return f->msg_is_symbolic ? NULL : f->msg.def;
844 }
845
upb_fielddef_containingoneof(const upb_fielddef * f)846 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
847 return f->oneof;
848 }
849
upb_fielddef_containingtype_mutable(upb_fielddef * f)850 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
851 return (upb_msgdef*)upb_fielddef_containingtype(f);
852 }
853
upb_fielddef_containingtypename(upb_fielddef * f)854 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
855 return f->msg_is_symbolic ? f->msg.name : NULL;
856 }
857
release_containingtype(upb_fielddef * f)858 static void release_containingtype(upb_fielddef *f) {
859 if (f->msg_is_symbolic) upb_gfree(f->msg.name);
860 }
861
upb_fielddef_setcontainingtypename(upb_fielddef * f,const char * name,upb_status * s)862 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
863 upb_status *s) {
864 char *name_copy;
865 assert(!upb_fielddef_isfrozen(f));
866 if (upb_fielddef_containingtype(f)) {
867 upb_status_seterrmsg(s, "field has already been added to a message.");
868 return false;
869 }
870 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
871 * may have a leading "."). */
872
873 name_copy = upb_gstrdup(name);
874 if (!name_copy) {
875 upb_upberr_setoom(s);
876 return false;
877 }
878
879 release_containingtype(f);
880 f->msg.name = name_copy;
881 f->msg_is_symbolic = true;
882 return true;
883 }
884
upb_fielddef_setname(upb_fielddef * f,const char * name,upb_status * s)885 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
886 if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
887 upb_status_seterrmsg(s, "Already added to message or oneof");
888 return false;
889 }
890 return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
891 }
892
chkdefaulttype(const upb_fielddef * f,upb_fieldtype_t type)893 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
894 UPB_UNUSED(f);
895 UPB_UNUSED(type);
896 assert(f->type_is_set_ && upb_fielddef_type(f) == type);
897 }
898
upb_fielddef_defaultint64(const upb_fielddef * f)899 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
900 chkdefaulttype(f, UPB_TYPE_INT64);
901 return f->defaultval.sint;
902 }
903
upb_fielddef_defaultint32(const upb_fielddef * f)904 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
905 if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
906 int32_t val;
907 bool ok = enumdefaultint32(f, &val);
908 UPB_ASSERT_VAR(ok, ok);
909 return val;
910 } else {
911 chkdefaulttype(f, UPB_TYPE_INT32);
912 return f->defaultval.sint;
913 }
914 }
915
upb_fielddef_defaultuint64(const upb_fielddef * f)916 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
917 chkdefaulttype(f, UPB_TYPE_UINT64);
918 return f->defaultval.uint;
919 }
920
upb_fielddef_defaultuint32(const upb_fielddef * f)921 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
922 chkdefaulttype(f, UPB_TYPE_UINT32);
923 return f->defaultval.uint;
924 }
925
upb_fielddef_defaultbool(const upb_fielddef * f)926 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
927 chkdefaulttype(f, UPB_TYPE_BOOL);
928 return f->defaultval.uint;
929 }
930
upb_fielddef_defaultfloat(const upb_fielddef * f)931 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
932 chkdefaulttype(f, UPB_TYPE_FLOAT);
933 return f->defaultval.flt;
934 }
935
upb_fielddef_defaultdouble(const upb_fielddef * f)936 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
937 chkdefaulttype(f, UPB_TYPE_DOUBLE);
938 return f->defaultval.dbl;
939 }
940
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)941 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
942 assert(f->type_is_set_);
943 assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
944 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
945 upb_fielddef_type(f) == UPB_TYPE_ENUM);
946
947 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
948 const char *ret = enumdefaultstr(f);
949 assert(ret);
950 /* Enum defaults can't have embedded NULLs. */
951 if (len) *len = strlen(ret);
952 return ret;
953 }
954
955 if (f->default_is_string) {
956 str_t *str = f->defaultval.bytes;
957 if (len) *len = str->len;
958 return str->str;
959 }
960
961 return NULL;
962 }
963
upb_fielddef_init_default(upb_fielddef * f)964 static void upb_fielddef_init_default(upb_fielddef *f) {
965 f->default_is_string = false;
966 switch (upb_fielddef_type(f)) {
967 case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
968 case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
969 case UPB_TYPE_INT32:
970 case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
971 case UPB_TYPE_UINT64:
972 case UPB_TYPE_UINT32:
973 case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
974 case UPB_TYPE_STRING:
975 case UPB_TYPE_BYTES:
976 f->defaultval.bytes = newstr("", 0);
977 f->default_is_string = true;
978 break;
979 case UPB_TYPE_MESSAGE: break;
980 case UPB_TYPE_ENUM:
981 /* This is our special sentinel that indicates "not set" for an enum. */
982 f->default_is_string = true;
983 f->defaultval.bytes = NULL;
984 break;
985 }
986 }
987
upb_fielddef_subdef(const upb_fielddef * f)988 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
989 return f->subdef_is_symbolic ? NULL : f->sub.def;
990 }
991
upb_fielddef_msgsubdef(const upb_fielddef * f)992 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
993 const upb_def *def = upb_fielddef_subdef(f);
994 return def ? upb_dyncast_msgdef(def) : NULL;
995 }
996
upb_fielddef_enumsubdef(const upb_fielddef * f)997 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
998 const upb_def *def = upb_fielddef_subdef(f);
999 return def ? upb_dyncast_enumdef(def) : NULL;
1000 }
1001
upb_fielddef_subdef_mutable(upb_fielddef * f)1002 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
1003 return (upb_def*)upb_fielddef_subdef(f);
1004 }
1005
upb_fielddef_subdefname(const upb_fielddef * f)1006 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
1007 if (f->subdef_is_symbolic) {
1008 return f->sub.name;
1009 } else if (f->sub.def) {
1010 return upb_def_fullname(f->sub.def);
1011 } else {
1012 return NULL;
1013 }
1014 }
1015
upb_fielddef_setnumber(upb_fielddef * f,uint32_t number,upb_status * s)1016 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
1017 if (upb_fielddef_containingtype(f)) {
1018 upb_status_seterrmsg(
1019 s, "cannot change field number after adding to a message");
1020 return false;
1021 }
1022 if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
1023 upb_status_seterrf(s, "invalid field number (%u)", number);
1024 return false;
1025 }
1026 f->number_ = number;
1027 return true;
1028 }
1029
upb_fielddef_settype(upb_fielddef * f,upb_fieldtype_t type)1030 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
1031 assert(!upb_fielddef_isfrozen(f));
1032 assert(upb_fielddef_checktype(type));
1033 upb_fielddef_uninit_default(f);
1034 f->type_ = type;
1035 f->type_is_set_ = true;
1036 upb_fielddef_init_default(f);
1037 }
1038
upb_fielddef_setdescriptortype(upb_fielddef * f,int type)1039 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
1040 assert(!upb_fielddef_isfrozen(f));
1041 switch (type) {
1042 case UPB_DESCRIPTOR_TYPE_DOUBLE:
1043 upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
1044 break;
1045 case UPB_DESCRIPTOR_TYPE_FLOAT:
1046 upb_fielddef_settype(f, UPB_TYPE_FLOAT);
1047 break;
1048 case UPB_DESCRIPTOR_TYPE_INT64:
1049 case UPB_DESCRIPTOR_TYPE_SFIXED64:
1050 case UPB_DESCRIPTOR_TYPE_SINT64:
1051 upb_fielddef_settype(f, UPB_TYPE_INT64);
1052 break;
1053 case UPB_DESCRIPTOR_TYPE_UINT64:
1054 case UPB_DESCRIPTOR_TYPE_FIXED64:
1055 upb_fielddef_settype(f, UPB_TYPE_UINT64);
1056 break;
1057 case UPB_DESCRIPTOR_TYPE_INT32:
1058 case UPB_DESCRIPTOR_TYPE_SFIXED32:
1059 case UPB_DESCRIPTOR_TYPE_SINT32:
1060 upb_fielddef_settype(f, UPB_TYPE_INT32);
1061 break;
1062 case UPB_DESCRIPTOR_TYPE_UINT32:
1063 case UPB_DESCRIPTOR_TYPE_FIXED32:
1064 upb_fielddef_settype(f, UPB_TYPE_UINT32);
1065 break;
1066 case UPB_DESCRIPTOR_TYPE_BOOL:
1067 upb_fielddef_settype(f, UPB_TYPE_BOOL);
1068 break;
1069 case UPB_DESCRIPTOR_TYPE_STRING:
1070 upb_fielddef_settype(f, UPB_TYPE_STRING);
1071 break;
1072 case UPB_DESCRIPTOR_TYPE_BYTES:
1073 upb_fielddef_settype(f, UPB_TYPE_BYTES);
1074 break;
1075 case UPB_DESCRIPTOR_TYPE_GROUP:
1076 case UPB_DESCRIPTOR_TYPE_MESSAGE:
1077 upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
1078 break;
1079 case UPB_DESCRIPTOR_TYPE_ENUM:
1080 upb_fielddef_settype(f, UPB_TYPE_ENUM);
1081 break;
1082 default: assert(false);
1083 }
1084
1085 if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
1086 type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
1087 type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
1088 type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
1089 upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
1090 } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
1091 type == UPB_DESCRIPTOR_TYPE_SINT32) {
1092 upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
1093 } else {
1094 upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
1095 }
1096
1097 upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
1098 }
1099
upb_fielddef_descriptortype(const upb_fielddef * f)1100 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
1101 switch (upb_fielddef_type(f)) {
1102 case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT;
1103 case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
1104 case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL;
1105 case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
1106 case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES;
1107 case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM;
1108 case UPB_TYPE_INT32:
1109 switch (upb_fielddef_intfmt(f)) {
1110 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
1111 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32;
1112 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32;
1113 }
1114 case UPB_TYPE_INT64:
1115 switch (upb_fielddef_intfmt(f)) {
1116 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
1117 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64;
1118 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64;
1119 }
1120 case UPB_TYPE_UINT32:
1121 switch (upb_fielddef_intfmt(f)) {
1122 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
1123 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32;
1124 case UPB_INTFMT_ZIGZAG: return -1;
1125 }
1126 case UPB_TYPE_UINT64:
1127 switch (upb_fielddef_intfmt(f)) {
1128 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
1129 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64;
1130 case UPB_INTFMT_ZIGZAG: return -1;
1131 }
1132 case UPB_TYPE_MESSAGE:
1133 return upb_fielddef_istagdelim(f) ?
1134 UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1135 }
1136 return 0;
1137 }
1138
upb_fielddef_setisextension(upb_fielddef * f,bool is_extension)1139 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
1140 assert(!upb_fielddef_isfrozen(f));
1141 f->is_extension_ = is_extension;
1142 }
1143
upb_fielddef_setlazy(upb_fielddef * f,bool lazy)1144 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
1145 assert(!upb_fielddef_isfrozen(f));
1146 f->lazy_ = lazy;
1147 }
1148
upb_fielddef_setpacked(upb_fielddef * f,bool packed)1149 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
1150 assert(!upb_fielddef_isfrozen(f));
1151 f->packed_ = packed;
1152 }
1153
upb_fielddef_setlabel(upb_fielddef * f,upb_label_t label)1154 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
1155 assert(!upb_fielddef_isfrozen(f));
1156 assert(upb_fielddef_checklabel(label));
1157 f->label_ = label;
1158 }
1159
upb_fielddef_setintfmt(upb_fielddef * f,upb_intfmt_t fmt)1160 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
1161 assert(!upb_fielddef_isfrozen(f));
1162 assert(upb_fielddef_checkintfmt(fmt));
1163 f->intfmt = fmt;
1164 }
1165
upb_fielddef_settagdelim(upb_fielddef * f,bool tag_delim)1166 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
1167 assert(!upb_fielddef_isfrozen(f));
1168 f->tagdelim = tag_delim;
1169 f->tagdelim = tag_delim;
1170 }
1171
checksetdefault(upb_fielddef * f,upb_fieldtype_t type)1172 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1173 if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1174 upb_fielddef_type(f) != type) {
1175 assert(false);
1176 return false;
1177 }
1178 if (f->default_is_string) {
1179 str_t *s = f->defaultval.bytes;
1180 assert(s || type == UPB_TYPE_ENUM);
1181 if (s) freestr(s);
1182 }
1183 f->default_is_string = false;
1184 return true;
1185 }
1186
upb_fielddef_setdefaultint64(upb_fielddef * f,int64_t value)1187 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1188 if (checksetdefault(f, UPB_TYPE_INT64))
1189 f->defaultval.sint = value;
1190 }
1191
upb_fielddef_setdefaultint32(upb_fielddef * f,int32_t value)1192 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1193 if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1194 checksetdefault(f, UPB_TYPE_ENUM)) ||
1195 checksetdefault(f, UPB_TYPE_INT32)) {
1196 f->defaultval.sint = value;
1197 }
1198 }
1199
upb_fielddef_setdefaultuint64(upb_fielddef * f,uint64_t value)1200 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1201 if (checksetdefault(f, UPB_TYPE_UINT64))
1202 f->defaultval.uint = value;
1203 }
1204
upb_fielddef_setdefaultuint32(upb_fielddef * f,uint32_t value)1205 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1206 if (checksetdefault(f, UPB_TYPE_UINT32))
1207 f->defaultval.uint = value;
1208 }
1209
upb_fielddef_setdefaultbool(upb_fielddef * f,bool value)1210 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1211 if (checksetdefault(f, UPB_TYPE_BOOL))
1212 f->defaultval.uint = value;
1213 }
1214
upb_fielddef_setdefaultfloat(upb_fielddef * f,float value)1215 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1216 if (checksetdefault(f, UPB_TYPE_FLOAT))
1217 f->defaultval.flt = value;
1218 }
1219
upb_fielddef_setdefaultdouble(upb_fielddef * f,double value)1220 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1221 if (checksetdefault(f, UPB_TYPE_DOUBLE))
1222 f->defaultval.dbl = value;
1223 }
1224
upb_fielddef_setdefaultstr(upb_fielddef * f,const void * str,size_t len,upb_status * s)1225 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1226 upb_status *s) {
1227 str_t *str2;
1228 assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
1229 if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1230 return false;
1231
1232 if (f->default_is_string) {
1233 str_t *s = f->defaultval.bytes;
1234 assert(s || f->type_ == UPB_TYPE_ENUM);
1235 if (s) freestr(s);
1236 } else {
1237 assert(f->type_ == UPB_TYPE_ENUM);
1238 }
1239
1240 str2 = newstr(str, len);
1241 f->defaultval.bytes = str2;
1242 f->default_is_string = true;
1243 return true;
1244 }
1245
upb_fielddef_setdefaultcstr(upb_fielddef * f,const char * str,upb_status * s)1246 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1247 upb_status *s) {
1248 assert(f->type_is_set_);
1249 upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1250 }
1251
upb_fielddef_enumhasdefaultint32(const upb_fielddef * f)1252 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
1253 int32_t val;
1254 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1255 return enumdefaultint32(f, &val);
1256 }
1257
upb_fielddef_enumhasdefaultstr(const upb_fielddef * f)1258 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
1259 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1260 return enumdefaultstr(f) != NULL;
1261 }
1262
upb_subdef_typecheck(upb_fielddef * f,const upb_def * subdef,upb_status * s)1263 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1264 upb_status *s) {
1265 if (f->type_ == UPB_TYPE_MESSAGE) {
1266 if (upb_dyncast_msgdef(subdef)) return true;
1267 upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1268 return false;
1269 } else if (f->type_ == UPB_TYPE_ENUM) {
1270 if (upb_dyncast_enumdef(subdef)) return true;
1271 upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1272 return false;
1273 } else {
1274 upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1275 return false;
1276 }
1277 }
1278
release_subdef(upb_fielddef * f)1279 static void release_subdef(upb_fielddef *f) {
1280 if (f->subdef_is_symbolic) {
1281 upb_gfree(f->sub.name);
1282 } else if (f->sub.def) {
1283 upb_unref2(f->sub.def, f);
1284 }
1285 }
1286
upb_fielddef_setsubdef(upb_fielddef * f,const upb_def * subdef,upb_status * s)1287 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1288 upb_status *s) {
1289 assert(!upb_fielddef_isfrozen(f));
1290 assert(upb_fielddef_hassubdef(f));
1291 if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1292 release_subdef(f);
1293 f->sub.def = subdef;
1294 f->subdef_is_symbolic = false;
1295 if (f->sub.def) upb_ref2(f->sub.def, f);
1296 return true;
1297 }
1298
upb_fielddef_setmsgsubdef(upb_fielddef * f,const upb_msgdef * subdef,upb_status * s)1299 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1300 upb_status *s) {
1301 return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
1302 }
1303
upb_fielddef_setenumsubdef(upb_fielddef * f,const upb_enumdef * subdef,upb_status * s)1304 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1305 upb_status *s) {
1306 return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
1307 }
1308
upb_fielddef_setsubdefname(upb_fielddef * f,const char * name,upb_status * s)1309 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1310 upb_status *s) {
1311 char *name_copy;
1312 assert(!upb_fielddef_isfrozen(f));
1313 if (!upb_fielddef_hassubdef(f)) {
1314 upb_status_seterrmsg(s, "field type does not accept a subdef");
1315 return false;
1316 }
1317
1318 name_copy = upb_gstrdup(name);
1319 if (!name_copy) {
1320 upb_upberr_setoom(s);
1321 return false;
1322 }
1323
1324 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1325 * may have a leading "."). */
1326 release_subdef(f);
1327 f->sub.name = name_copy;
1328 f->subdef_is_symbolic = true;
1329 return true;
1330 }
1331
upb_fielddef_issubmsg(const upb_fielddef * f)1332 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1333 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1334 }
1335
upb_fielddef_isstring(const upb_fielddef * f)1336 bool upb_fielddef_isstring(const upb_fielddef *f) {
1337 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1338 upb_fielddef_type(f) == UPB_TYPE_BYTES;
1339 }
1340
upb_fielddef_isseq(const upb_fielddef * f)1341 bool upb_fielddef_isseq(const upb_fielddef *f) {
1342 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1343 }
1344
upb_fielddef_isprimitive(const upb_fielddef * f)1345 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1346 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1347 }
1348
upb_fielddef_ismap(const upb_fielddef * f)1349 bool upb_fielddef_ismap(const upb_fielddef *f) {
1350 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1351 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1352 }
1353
upb_fielddef_haspresence(const upb_fielddef * f)1354 bool upb_fielddef_haspresence(const upb_fielddef *f) {
1355 if (upb_fielddef_isseq(f)) return false;
1356 if (upb_fielddef_issubmsg(f)) return true;
1357
1358 /* Primitive field: return true unless there is a message that specifies
1359 * presence should not exist. */
1360 if (f->msg_is_symbolic || !f->msg.def) return true;
1361 return f->msg.def->syntax == UPB_SYNTAX_PROTO2;
1362 }
1363
upb_fielddef_hassubdef(const upb_fielddef * f)1364 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1365 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1366 }
1367
between(int32_t x,int32_t low,int32_t high)1368 static bool between(int32_t x, int32_t low, int32_t high) {
1369 return x >= low && x <= high;
1370 }
1371
upb_fielddef_checklabel(int32_t label)1372 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)1373 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)1374 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1375
upb_fielddef_checkdescriptortype(int32_t type)1376 bool upb_fielddef_checkdescriptortype(int32_t type) {
1377 return between(type, 1, 18);
1378 }
1379
1380 /* upb_msgdef *****************************************************************/
1381
visitmsg(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1382 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1383 void *closure) {
1384 upb_msg_oneof_iter o;
1385 const upb_msgdef *m = (const upb_msgdef*)r;
1386 upb_msg_field_iter i;
1387 for(upb_msg_field_begin(&i, m);
1388 !upb_msg_field_done(&i);
1389 upb_msg_field_next(&i)) {
1390 upb_fielddef *f = upb_msg_iter_field(&i);
1391 visit(r, upb_fielddef_upcast2(f), closure);
1392 }
1393 for(upb_msg_oneof_begin(&o, m);
1394 !upb_msg_oneof_done(&o);
1395 upb_msg_oneof_next(&o)) {
1396 upb_oneofdef *f = upb_msg_iter_oneof(&o);
1397 visit(r, upb_oneofdef_upcast(f), closure);
1398 }
1399 }
1400
freemsg(upb_refcounted * r)1401 static void freemsg(upb_refcounted *r) {
1402 upb_msgdef *m = (upb_msgdef*)r;
1403 upb_strtable_uninit(&m->ntof);
1404 upb_inttable_uninit(&m->itof);
1405 upb_def_uninit(upb_msgdef_upcast_mutable(m));
1406 upb_gfree(m);
1407 }
1408
1409 const struct upb_refcounted_vtbl upb_msgdef_vtbl = {visitmsg, freemsg};
1410
upb_msgdef_new(const void * owner)1411 upb_msgdef *upb_msgdef_new(const void *owner) {
1412 upb_msgdef *m = upb_gmalloc(sizeof(*m));
1413 if (!m) return NULL;
1414
1415 if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &upb_msgdef_vtbl,
1416 owner)) {
1417 goto err2;
1418 }
1419
1420 if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
1421 if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
1422 m->map_entry = false;
1423 m->syntax = UPB_SYNTAX_PROTO2;
1424 return m;
1425
1426 err1:
1427 upb_inttable_uninit(&m->itof);
1428 err2:
1429 upb_gfree(m);
1430 return NULL;
1431 }
1432
upb_msgdef_dup(const upb_msgdef * m,const void * owner)1433 upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1434 bool ok;
1435 upb_msg_field_iter i;
1436 upb_msg_oneof_iter o;
1437
1438 upb_msgdef *newm = upb_msgdef_new(owner);
1439 if (!newm) return NULL;
1440 ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
1441 upb_def_fullname(upb_msgdef_upcast(m)),
1442 NULL);
1443 newm->map_entry = m->map_entry;
1444 newm->syntax = m->syntax;
1445 UPB_ASSERT_VAR(ok, ok);
1446 for(upb_msg_field_begin(&i, m);
1447 !upb_msg_field_done(&i);
1448 upb_msg_field_next(&i)) {
1449 upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
1450 /* Fields in oneofs are dup'd below. */
1451 if (upb_fielddef_containingoneof(f)) continue;
1452 if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
1453 upb_msgdef_unref(newm, owner);
1454 return NULL;
1455 }
1456 }
1457 for(upb_msg_oneof_begin(&o, m);
1458 !upb_msg_oneof_done(&o);
1459 upb_msg_oneof_next(&o)) {
1460 upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
1461 if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
1462 upb_msgdef_unref(newm, owner);
1463 return NULL;
1464 }
1465 }
1466 return newm;
1467 }
1468
upb_msgdef_freeze(upb_msgdef * m,upb_status * status)1469 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
1470 upb_def *d = upb_msgdef_upcast_mutable(m);
1471 return upb_def_freeze(&d, 1, status);
1472 }
1473
upb_msgdef_fullname(const upb_msgdef * m)1474 const char *upb_msgdef_fullname(const upb_msgdef *m) {
1475 return upb_def_fullname(upb_msgdef_upcast(m));
1476 }
1477
upb_msgdef_name(const upb_msgdef * m)1478 const char *upb_msgdef_name(const upb_msgdef *m) {
1479 return upb_def_name(upb_msgdef_upcast(m));
1480 }
1481
upb_msgdef_setfullname(upb_msgdef * m,const char * fullname,upb_status * s)1482 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1483 upb_status *s) {
1484 return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
1485 }
1486
upb_msgdef_setsyntax(upb_msgdef * m,upb_syntax_t syntax)1487 bool upb_msgdef_setsyntax(upb_msgdef *m, upb_syntax_t syntax) {
1488 if (syntax != UPB_SYNTAX_PROTO2 && syntax != UPB_SYNTAX_PROTO3) {
1489 return false;
1490 }
1491
1492 m->syntax = syntax;
1493 return true;
1494 }
1495
upb_msgdef_syntax(const upb_msgdef * m)1496 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
1497 return m->syntax;
1498 }
1499
1500 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
1501 * on status |s| and return false if not. */
check_field_add(const upb_msgdef * m,const upb_fielddef * f,upb_status * s)1502 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
1503 upb_status *s) {
1504 if (upb_fielddef_containingtype(f) != NULL) {
1505 upb_status_seterrmsg(s, "fielddef already belongs to a message");
1506 return false;
1507 } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1508 upb_status_seterrmsg(s, "field name or number were not set");
1509 return false;
1510 } else if (upb_msgdef_itof(m, upb_fielddef_number(f))) {
1511 upb_status_seterrmsg(s, "duplicate field number");
1512 return false;
1513 } else if (upb_strtable_lookup(&m->ntof, upb_fielddef_name(f), NULL)) {
1514 upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
1515 return false;
1516 }
1517 return true;
1518 }
1519
add_field(upb_msgdef * m,upb_fielddef * f,const void * ref_donor)1520 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
1521 release_containingtype(f);
1522 f->msg.def = m;
1523 f->msg_is_symbolic = false;
1524 upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
1525 upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1526 upb_ref2(f, m);
1527 upb_ref2(m, f);
1528 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1529 }
1530
upb_msgdef_addfield(upb_msgdef * m,upb_fielddef * f,const void * ref_donor,upb_status * s)1531 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
1532 upb_status *s) {
1533 /* TODO: extensions need to have a separate namespace, because proto2 allows a
1534 * top-level extension (ie. one not in any package) to have the same name as a
1535 * field from the message.
1536 *
1537 * This also implies that there needs to be a separate lookup-by-name method
1538 * for extensions. It seems desirable for iteration to return both extensions
1539 * and non-extensions though.
1540 *
1541 * We also need to validate that the field number is in an extension range iff
1542 * it is an extension.
1543 *
1544 * This method is idempotent. Check if |f| is already part of this msgdef and
1545 * return immediately if so. */
1546 if (upb_fielddef_containingtype(f) == m) {
1547 return true;
1548 }
1549
1550 /* Check constraints for all fields before performing any action. */
1551 if (!check_field_add(m, f, s)) {
1552 return false;
1553 } else if (upb_fielddef_containingoneof(f) != NULL) {
1554 /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
1555 upb_status_seterrmsg(s, "fielddef is part of a oneof");
1556 return false;
1557 }
1558
1559 /* Constraint checks ok, perform the action. */
1560 add_field(m, f, ref_donor);
1561 return true;
1562 }
1563
upb_msgdef_addoneof(upb_msgdef * m,upb_oneofdef * o,const void * ref_donor,upb_status * s)1564 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
1565 upb_status *s) {
1566 upb_oneof_iter it;
1567
1568 /* Check various conditions that would prevent this oneof from being added. */
1569 if (upb_oneofdef_containingtype(o)) {
1570 upb_status_seterrmsg(s, "oneofdef already belongs to a message");
1571 return false;
1572 } else if (upb_oneofdef_name(o) == NULL) {
1573 upb_status_seterrmsg(s, "oneofdef name was not set");
1574 return false;
1575 } else if (upb_strtable_lookup(&m->ntof, upb_oneofdef_name(o), NULL)) {
1576 upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
1577 return false;
1578 }
1579
1580 /* Check that all of the oneof's fields do not conflict with names or numbers
1581 * of fields already in the message. */
1582 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1583 const upb_fielddef *f = upb_oneof_iter_field(&it);
1584 if (!check_field_add(m, f, s)) {
1585 return false;
1586 }
1587 }
1588
1589 /* Everything checks out -- commit now. */
1590
1591 /* Add oneof itself first. */
1592 o->parent = m;
1593 upb_strtable_insert(&m->ntof, upb_oneofdef_name(o), upb_value_ptr(o));
1594 upb_ref2(o, m);
1595 upb_ref2(m, o);
1596
1597 /* Add each field of the oneof directly to the msgdef. */
1598 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1599 upb_fielddef *f = upb_oneof_iter_field(&it);
1600 add_field(m, f, NULL);
1601 }
1602
1603 if (ref_donor) upb_oneofdef_unref(o, ref_donor);
1604
1605 return true;
1606 }
1607
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)1608 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1609 upb_value val;
1610 return upb_inttable_lookup32(&m->itof, i, &val) ?
1611 upb_value_getptr(val) : NULL;
1612 }
1613
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)1614 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1615 size_t len) {
1616 upb_value val;
1617
1618 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1619 return NULL;
1620 }
1621
1622 return upb_trygetfield(upb_value_getptr(val));
1623 }
1624
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)1625 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1626 size_t len) {
1627 upb_value val;
1628
1629 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1630 return NULL;
1631 }
1632
1633 return upb_trygetoneof(upb_value_getptr(val));
1634 }
1635
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)1636 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
1637 const upb_fielddef **f, const upb_oneofdef **o) {
1638 upb_value val;
1639
1640 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1641 return false;
1642 }
1643
1644 *o = upb_trygetoneof(upb_value_getptr(val));
1645 *f = upb_trygetfield(upb_value_getptr(val));
1646 assert((*o != NULL) ^ (*f != NULL)); /* Exactly one of the two should be set. */
1647 return true;
1648 }
1649
upb_msgdef_numfields(const upb_msgdef * m)1650 int upb_msgdef_numfields(const upb_msgdef *m) {
1651 /* The number table contains only fields. */
1652 return upb_inttable_count(&m->itof);
1653 }
1654
upb_msgdef_numoneofs(const upb_msgdef * m)1655 int upb_msgdef_numoneofs(const upb_msgdef *m) {
1656 /* The name table includes oneofs, and the number table does not. */
1657 return upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof);
1658 }
1659
upb_msgdef_setmapentry(upb_msgdef * m,bool map_entry)1660 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1661 assert(!upb_msgdef_isfrozen(m));
1662 m->map_entry = map_entry;
1663 }
1664
upb_msgdef_mapentry(const upb_msgdef * m)1665 bool upb_msgdef_mapentry(const upb_msgdef *m) {
1666 return m->map_entry;
1667 }
1668
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)1669 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
1670 upb_inttable_begin(iter, &m->itof);
1671 }
1672
upb_msg_field_next(upb_msg_field_iter * iter)1673 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
1674
upb_msg_field_done(const upb_msg_field_iter * iter)1675 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1676 return upb_inttable_done(iter);
1677 }
1678
upb_msg_iter_field(const upb_msg_field_iter * iter)1679 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
1680 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1681 }
1682
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)1683 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1684 upb_inttable_iter_setdone(iter);
1685 }
1686
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)1687 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
1688 upb_strtable_begin(iter, &m->ntof);
1689 /* We need to skip past any initial fields. */
1690 while (!upb_strtable_done(iter) &&
1691 !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter)))) {
1692 upb_strtable_next(iter);
1693 }
1694 }
1695
upb_msg_oneof_next(upb_msg_oneof_iter * iter)1696 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
1697 /* We need to skip past fields to return only oneofs. */
1698 do {
1699 upb_strtable_next(iter);
1700 } while (!upb_strtable_done(iter) &&
1701 !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter))));
1702 }
1703
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)1704 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1705 return upb_strtable_done(iter);
1706 }
1707
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)1708 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1709 return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
1710 }
1711
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)1712 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1713 upb_strtable_iter_setdone(iter);
1714 }
1715
1716 /* upb_oneofdef ***************************************************************/
1717
visitoneof(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1718 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
1719 void *closure) {
1720 const upb_oneofdef *o = (const upb_oneofdef*)r;
1721 upb_oneof_iter i;
1722 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1723 const upb_fielddef *f = upb_oneof_iter_field(&i);
1724 visit(r, upb_fielddef_upcast2(f), closure);
1725 }
1726 if (o->parent) {
1727 visit(r, upb_msgdef_upcast2(o->parent), closure);
1728 }
1729 }
1730
freeoneof(upb_refcounted * r)1731 static void freeoneof(upb_refcounted *r) {
1732 upb_oneofdef *o = (upb_oneofdef*)r;
1733 upb_strtable_uninit(&o->ntof);
1734 upb_inttable_uninit(&o->itof);
1735 upb_gfree((void*)o->name);
1736 upb_gfree(o);
1737 }
1738
1739 const struct upb_refcounted_vtbl upb_oneofdef_vtbl = {visitoneof, freeoneof};
1740
upb_oneofdef_new(const void * owner)1741 upb_oneofdef *upb_oneofdef_new(const void *owner) {
1742 upb_oneofdef *o = upb_gmalloc(sizeof(*o));
1743
1744 if (!o) {
1745 return NULL;
1746 }
1747
1748 o->parent = NULL;
1749 o->name = NULL;
1750
1751 if (!upb_refcounted_init(upb_oneofdef_upcast_mutable(o), &upb_oneofdef_vtbl,
1752 owner)) {
1753 goto err2;
1754 }
1755
1756 if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
1757 if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
1758
1759 return o;
1760
1761 err1:
1762 upb_inttable_uninit(&o->itof);
1763 err2:
1764 upb_gfree(o);
1765 return NULL;
1766 }
1767
upb_oneofdef_dup(const upb_oneofdef * o,const void * owner)1768 upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
1769 bool ok;
1770 upb_oneof_iter i;
1771 upb_oneofdef *newo = upb_oneofdef_new(owner);
1772 if (!newo) return NULL;
1773 ok = upb_oneofdef_setname(newo, upb_oneofdef_name(o), NULL);
1774 UPB_ASSERT_VAR(ok, ok);
1775 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1776 upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
1777 if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
1778 upb_oneofdef_unref(newo, owner);
1779 return NULL;
1780 }
1781 }
1782 return newo;
1783 }
1784
upb_oneofdef_name(const upb_oneofdef * o)1785 const char *upb_oneofdef_name(const upb_oneofdef *o) { return o->name; }
1786
upb_oneofdef_setname(upb_oneofdef * o,const char * name,upb_status * s)1787 bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s) {
1788 assert(!upb_oneofdef_isfrozen(o));
1789 if (upb_oneofdef_containingtype(o)) {
1790 upb_status_seterrmsg(s, "oneof already added to a message");
1791 return false;
1792 }
1793
1794 if (!upb_isident(name, strlen(name), true, s)) {
1795 return false;
1796 }
1797
1798 name = upb_gstrdup(name);
1799 if (!name) {
1800 upb_status_seterrmsg(s, "One of memory");
1801 return false;
1802 }
1803
1804 upb_gfree((void*)o->name);
1805 o->name = name;
1806 return true;
1807 }
1808
upb_oneofdef_containingtype(const upb_oneofdef * o)1809 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1810 return o->parent;
1811 }
1812
upb_oneofdef_numfields(const upb_oneofdef * o)1813 int upb_oneofdef_numfields(const upb_oneofdef *o) {
1814 return upb_strtable_count(&o->ntof);
1815 }
1816
upb_oneofdef_addfield(upb_oneofdef * o,upb_fielddef * f,const void * ref_donor,upb_status * s)1817 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
1818 const void *ref_donor,
1819 upb_status *s) {
1820 assert(!upb_oneofdef_isfrozen(o));
1821 assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
1822
1823 /* This method is idempotent. Check if |f| is already part of this oneofdef
1824 * and return immediately if so. */
1825 if (upb_fielddef_containingoneof(f) == o) {
1826 return true;
1827 }
1828
1829 /* The field must have an OPTIONAL label. */
1830 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1831 upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
1832 return false;
1833 }
1834
1835 /* Check that no field with this name or number exists already in the oneof.
1836 * Also check that the field is not already part of a oneof. */
1837 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1838 upb_status_seterrmsg(s, "field name or number were not set");
1839 return false;
1840 } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
1841 upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
1842 upb_status_seterrmsg(s, "duplicate field name or number");
1843 return false;
1844 } else if (upb_fielddef_containingoneof(f) != NULL) {
1845 upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
1846 return false;
1847 }
1848
1849 /* We allow adding a field to the oneof either if the field is not part of a
1850 * msgdef, or if it is and we are also part of the same msgdef. */
1851 if (o->parent == NULL) {
1852 /* If we're not in a msgdef, the field cannot be either. Otherwise we would
1853 * need to magically add this oneof to a msgdef to remain consistent, which
1854 * is surprising behavior. */
1855 if (upb_fielddef_containingtype(f) != NULL) {
1856 upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
1857 "oneof does not");
1858 return false;
1859 }
1860 } else {
1861 /* If we're in a msgdef, the user can add fields that either aren't in any
1862 * msgdef (in which case they're added to our msgdef) or already a part of
1863 * our msgdef. */
1864 if (upb_fielddef_containingtype(f) != NULL &&
1865 upb_fielddef_containingtype(f) != o->parent) {
1866 upb_status_seterrmsg(s, "fielddef belongs to a different message "
1867 "than oneof");
1868 return false;
1869 }
1870 }
1871
1872 /* Commit phase. First add the field to our parent msgdef, if any, because
1873 * that may fail; then add the field to our own tables. */
1874
1875 if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
1876 if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
1877 return false;
1878 }
1879 }
1880
1881 release_containingtype(f);
1882 f->oneof = o;
1883 upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
1884 upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1885 upb_ref2(f, o);
1886 upb_ref2(o, f);
1887 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1888
1889 return true;
1890 }
1891
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)1892 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1893 const char *name, size_t length) {
1894 upb_value val;
1895 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1896 upb_value_getptr(val) : NULL;
1897 }
1898
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)1899 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1900 upb_value val;
1901 return upb_inttable_lookup32(&o->itof, num, &val) ?
1902 upb_value_getptr(val) : NULL;
1903 }
1904
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)1905 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1906 upb_inttable_begin(iter, &o->itof);
1907 }
1908
upb_oneof_next(upb_oneof_iter * iter)1909 void upb_oneof_next(upb_oneof_iter *iter) {
1910 upb_inttable_next(iter);
1911 }
1912
upb_oneof_done(upb_oneof_iter * iter)1913 bool upb_oneof_done(upb_oneof_iter *iter) {
1914 return upb_inttable_done(iter);
1915 }
1916
upb_oneof_iter_field(const upb_oneof_iter * iter)1917 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1918 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1919 }
1920
upb_oneof_iter_setdone(upb_oneof_iter * iter)1921 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1922 upb_inttable_iter_setdone(iter);
1923 }
1924
1925 /* upb_filedef ****************************************************************/
1926
visitfiledef(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1927 static void visitfiledef(const upb_refcounted *r, upb_refcounted_visit *visit,
1928 void *closure) {
1929 const upb_filedef *f = (const upb_filedef*)r;
1930 size_t i;
1931
1932 for(i = 0; i < upb_filedef_defcount(f); i++) {
1933 visit(r, upb_def_upcast(upb_filedef_def(f, i)), closure);
1934 }
1935 }
1936
freefiledef(upb_refcounted * r)1937 static void freefiledef(upb_refcounted *r) {
1938 upb_filedef *f = (upb_filedef*)r;
1939 size_t i;
1940
1941 for(i = 0; i < upb_filedef_depcount(f); i++) {
1942 upb_filedef_unref(upb_filedef_dep(f, i), f);
1943 }
1944
1945 upb_inttable_uninit(&f->defs);
1946 upb_inttable_uninit(&f->deps);
1947 upb_gfree((void*)f->name);
1948 upb_gfree((void*)f->package);
1949 upb_gfree(f);
1950 }
1951
1952 const struct upb_refcounted_vtbl upb_filedef_vtbl = {visitfiledef, freefiledef};
1953
upb_filedef_new(const void * owner)1954 upb_filedef *upb_filedef_new(const void *owner) {
1955 upb_filedef *f = upb_gmalloc(sizeof(*f));
1956
1957 if (!f) {
1958 return NULL;
1959 }
1960
1961 f->package = NULL;
1962 f->name = NULL;
1963 f->syntax = UPB_SYNTAX_PROTO2;
1964
1965 if (!upb_refcounted_init(upb_filedef_upcast_mutable(f), &upb_filedef_vtbl,
1966 owner)) {
1967 goto err;
1968 }
1969
1970 if (!upb_inttable_init(&f->defs, UPB_CTYPE_CONSTPTR)) {
1971 goto err;
1972 }
1973
1974 if (!upb_inttable_init(&f->deps, UPB_CTYPE_CONSTPTR)) {
1975 goto err2;
1976 }
1977
1978 return f;
1979
1980
1981 err2:
1982 upb_inttable_uninit(&f->defs);
1983
1984 err:
1985 upb_gfree(f);
1986 return NULL;
1987 }
1988
upb_filedef_name(const upb_filedef * f)1989 const char *upb_filedef_name(const upb_filedef *f) {
1990 return f->name;
1991 }
1992
upb_filedef_package(const upb_filedef * f)1993 const char *upb_filedef_package(const upb_filedef *f) {
1994 return f->package;
1995 }
1996
upb_filedef_syntax(const upb_filedef * f)1997 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
1998 return f->syntax;
1999 }
2000
upb_filedef_defcount(const upb_filedef * f)2001 size_t upb_filedef_defcount(const upb_filedef *f) {
2002 return upb_inttable_count(&f->defs);
2003 }
2004
upb_filedef_depcount(const upb_filedef * f)2005 size_t upb_filedef_depcount(const upb_filedef *f) {
2006 return upb_inttable_count(&f->deps);
2007 }
2008
upb_filedef_def(const upb_filedef * f,size_t i)2009 const upb_def *upb_filedef_def(const upb_filedef *f, size_t i) {
2010 upb_value v;
2011
2012 if (upb_inttable_lookup32(&f->defs, i, &v)) {
2013 return upb_value_getconstptr(v);
2014 } else {
2015 return NULL;
2016 }
2017 }
2018
upb_filedef_dep(const upb_filedef * f,size_t i)2019 const upb_filedef *upb_filedef_dep(const upb_filedef *f, size_t i) {
2020 upb_value v;
2021
2022 if (upb_inttable_lookup32(&f->deps, i, &v)) {
2023 return upb_value_getconstptr(v);
2024 } else {
2025 return NULL;
2026 }
2027 }
2028
upb_filedef_setname(upb_filedef * f,const char * name,upb_status * s)2029 bool upb_filedef_setname(upb_filedef *f, const char *name, upb_status *s) {
2030 name = upb_gstrdup(name);
2031 if (!name) {
2032 upb_upberr_setoom(s);
2033 return false;
2034 }
2035 upb_gfree((void*)f->name);
2036 f->name = name;
2037 return true;
2038 }
2039
upb_filedef_setpackage(upb_filedef * f,const char * package,upb_status * s)2040 bool upb_filedef_setpackage(upb_filedef *f, const char *package,
2041 upb_status *s) {
2042 if (!upb_isident(package, strlen(package), true, s)) return false;
2043 package = upb_gstrdup(package);
2044 if (!package) {
2045 upb_upberr_setoom(s);
2046 return false;
2047 }
2048 upb_gfree((void*)f->package);
2049 f->package = package;
2050 return true;
2051 }
2052
upb_filedef_setsyntax(upb_filedef * f,upb_syntax_t syntax,upb_status * s)2053 bool upb_filedef_setsyntax(upb_filedef *f, upb_syntax_t syntax,
2054 upb_status *s) {
2055 UPB_UNUSED(s);
2056 if (syntax != UPB_SYNTAX_PROTO2 &&
2057 syntax != UPB_SYNTAX_PROTO3) {
2058 upb_status_seterrmsg(s, "Unknown syntax value.");
2059 return false;
2060 }
2061 f->syntax = syntax;
2062
2063 {
2064 /* Set all messages in this file to match. */
2065 size_t i;
2066 for (i = 0; i < upb_filedef_defcount(f); i++) {
2067 /* Casting const away is safe since all defs in mutable filedef must
2068 * also be mutable. */
2069 upb_def *def = (upb_def*)upb_filedef_def(f, i);
2070
2071 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
2072 if (m) {
2073 m->syntax = syntax;
2074 }
2075 }
2076 }
2077
2078 return true;
2079 }
2080
upb_filedef_adddef(upb_filedef * f,upb_def * def,const void * ref_donor,upb_status * s)2081 bool upb_filedef_adddef(upb_filedef *f, upb_def *def, const void *ref_donor,
2082 upb_status *s) {
2083 if (def->file) {
2084 upb_status_seterrmsg(s, "Def is already part of another filedef.");
2085 return false;
2086 }
2087
2088 if (upb_inttable_push(&f->defs, upb_value_constptr(def))) {
2089 def->file = f;
2090 upb_ref2(def, f);
2091 if (ref_donor) upb_def_unref(def, ref_donor);
2092 if (def->type == UPB_DEF_MSG) {
2093 upb_downcast_msgdef_mutable(def)->syntax = f->syntax;
2094 }
2095 return true;
2096 } else {
2097 upb_upberr_setoom(s);
2098 return false;
2099 }
2100 }
2101
upb_filedef_adddep(upb_filedef * f,const upb_filedef * dep)2102 bool upb_filedef_adddep(upb_filedef *f, const upb_filedef *dep) {
2103 if (upb_inttable_push(&f->deps, upb_value_constptr(dep))) {
2104 /* Regular ref instead of ref2 because files can't form cycles. */
2105 upb_filedef_ref(dep, f);
2106 return true;
2107 } else {
2108 return false;
2109 }
2110 }
2111 /*
2112 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
2113 ** assert() or return false.
2114 */
2115
2116
2117 #include <string.h>
2118
2119
upb_calloc(size_t size)2120 static void *upb_calloc(size_t size) {
2121 void *mem = upb_gmalloc(size);
2122 if (mem) {
2123 memset(mem, 0, size);
2124 }
2125 return mem;
2126 }
2127
2128 /* Defined for the sole purpose of having a unique pointer value for
2129 * UPB_NO_CLOSURE. */
2130 char _upb_noclosure;
2131
freehandlers(upb_refcounted * r)2132 static void freehandlers(upb_refcounted *r) {
2133 upb_handlers *h = (upb_handlers*)r;
2134
2135 upb_inttable_iter i;
2136 upb_inttable_begin(&i, &h->cleanup_);
2137 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
2138 void *val = (void*)upb_inttable_iter_key(&i);
2139 upb_value func_val = upb_inttable_iter_value(&i);
2140 upb_handlerfree *func = upb_value_getfptr(func_val);
2141 func(val);
2142 }
2143
2144 upb_inttable_uninit(&h->cleanup_);
2145 upb_msgdef_unref(h->msg, h);
2146 upb_gfree(h->sub);
2147 upb_gfree(h);
2148 }
2149
visithandlers(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2150 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
2151 void *closure) {
2152 const upb_handlers *h = (const upb_handlers*)r;
2153 upb_msg_field_iter i;
2154 for(upb_msg_field_begin(&i, h->msg);
2155 !upb_msg_field_done(&i);
2156 upb_msg_field_next(&i)) {
2157 upb_fielddef *f = upb_msg_iter_field(&i);
2158 const upb_handlers *sub;
2159 if (!upb_fielddef_issubmsg(f)) continue;
2160 sub = upb_handlers_getsubhandlers(h, f);
2161 if (sub) visit(r, upb_handlers_upcast(sub), closure);
2162 }
2163 }
2164
2165 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
2166
2167 typedef struct {
2168 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
2169 upb_handlers_callback *callback;
2170 const void *closure;
2171 } dfs_state;
2172
2173 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
2174 * handlers set and cannot reach any upb_handlers* object that does. This is
2175 * slightly tricky to do correctly. */
newformsg(const upb_msgdef * m,const void * owner,dfs_state * s)2176 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
2177 dfs_state *s) {
2178 upb_msg_field_iter i;
2179 upb_handlers *h = upb_handlers_new(m, owner);
2180 if (!h) return NULL;
2181 if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
2182
2183 s->callback(s->closure, h);
2184
2185 /* For each submessage field, get or create a handlers object and set it as
2186 * the subhandlers. */
2187 for(upb_msg_field_begin(&i, m);
2188 !upb_msg_field_done(&i);
2189 upb_msg_field_next(&i)) {
2190 upb_fielddef *f = upb_msg_iter_field(&i);
2191 const upb_msgdef *subdef;
2192 upb_value subm_ent;
2193
2194 if (!upb_fielddef_issubmsg(f)) continue;
2195
2196 subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
2197 if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
2198 upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
2199 } else {
2200 upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
2201 if (!sub_mh) goto oom;
2202 upb_handlers_setsubhandlers(h, f, sub_mh);
2203 upb_handlers_unref(sub_mh, &sub_mh);
2204 }
2205 }
2206 return h;
2207
2208 oom:
2209 upb_handlers_unref(h, owner);
2210 return NULL;
2211 }
2212
2213 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
2214 * subhandlers for this submessage field. */
2215 #define SUBH(h, selector) (h->sub[selector])
2216
2217 /* The selector for a submessage field is the field index. */
2218 #define SUBH_F(h, f) SUBH(h, f->index_)
2219
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2220 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
2221 upb_handlertype_t type) {
2222 upb_selector_t sel;
2223 assert(!upb_handlers_isfrozen(h));
2224 if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
2225 upb_status_seterrf(
2226 &h->status_, "type mismatch: field %s does not belong to message %s",
2227 upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
2228 return -1;
2229 }
2230 if (!upb_handlers_getselector(f, type, &sel)) {
2231 upb_status_seterrf(
2232 &h->status_,
2233 "type mismatch: cannot register handler type %d for field %s",
2234 type, upb_fielddef_name(f));
2235 return -1;
2236 }
2237 return sel;
2238 }
2239
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2240 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
2241 upb_handlertype_t type) {
2242 int32_t sel = trygetsel(h, f, type);
2243 assert(sel >= 0);
2244 return sel;
2245 }
2246
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2247 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
2248 upb_handlertype_t type) {
2249 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
2250 }
2251
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,upb_handlerattr * attr)2252 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
2253 upb_handlertype_t type, upb_func *func,
2254 upb_handlerattr *attr) {
2255 upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
2256 const void *closure_type;
2257 const void **context_closure_type;
2258
2259 assert(!upb_handlers_isfrozen(h));
2260
2261 if (sel < 0) {
2262 upb_status_seterrmsg(&h->status_,
2263 "incorrect handler type for this field.");
2264 return false;
2265 }
2266
2267 if (h->table[sel].func) {
2268 upb_status_seterrmsg(&h->status_,
2269 "cannot change handler once it has been set.");
2270 return false;
2271 }
2272
2273 if (attr) {
2274 set_attr = *attr;
2275 }
2276
2277 /* Check that the given closure type matches the closure type that has been
2278 * established for this context (if any). */
2279 closure_type = upb_handlerattr_closuretype(&set_attr);
2280
2281 if (type == UPB_HANDLER_STRING) {
2282 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
2283 } else if (f && upb_fielddef_isseq(f) &&
2284 type != UPB_HANDLER_STARTSEQ &&
2285 type != UPB_HANDLER_ENDSEQ) {
2286 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
2287 } else {
2288 context_closure_type = &h->top_closure_type;
2289 }
2290
2291 if (closure_type && *context_closure_type &&
2292 closure_type != *context_closure_type) {
2293 /* TODO(haberman): better message for debugging. */
2294 if (f) {
2295 upb_status_seterrf(&h->status_,
2296 "closure type does not match for field %s",
2297 upb_fielddef_name(f));
2298 } else {
2299 upb_status_seterrmsg(
2300 &h->status_, "closure type does not match for message-level handler");
2301 }
2302 return false;
2303 }
2304
2305 if (closure_type)
2306 *context_closure_type = closure_type;
2307
2308 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
2309 * matches any pre-existing expectations about what type is expected. */
2310 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
2311 const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
2312 const void *table_return_type =
2313 upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2314 if (return_type && table_return_type && return_type != table_return_type) {
2315 upb_status_seterrmsg(&h->status_, "closure return type does not match");
2316 return false;
2317 }
2318
2319 if (table_return_type && !return_type)
2320 upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
2321 }
2322
2323 h->table[sel].func = (upb_func*)func;
2324 h->table[sel].attr = set_attr;
2325 return true;
2326 }
2327
2328 /* Returns the effective closure type for this handler (which will propagate
2329 * from outer frames if this frame has no START* handler). Not implemented for
2330 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
2331 * the effective closure type is unspecified (either no handler was registered
2332 * to specify it or the handler that was registered did not specify the closure
2333 * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2334 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
2335 upb_handlertype_t type) {
2336 const void *ret;
2337 upb_selector_t sel;
2338
2339 assert(type != UPB_HANDLER_STRING);
2340 ret = h->top_closure_type;
2341
2342 if (upb_fielddef_isseq(f) &&
2343 type != UPB_HANDLER_STARTSEQ &&
2344 type != UPB_HANDLER_ENDSEQ &&
2345 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
2346 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2347 }
2348
2349 if (type == UPB_HANDLER_STRING &&
2350 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
2351 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2352 }
2353
2354 /* The effective type of the submessage; not used yet.
2355 * if (type == SUBMESSAGE &&
2356 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
2357 * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2358 * } */
2359
2360 return ret;
2361 }
2362
2363 /* Checks whether the START* handler specified by f & type is missing even
2364 * though it is required to convert the established type of an outer frame
2365 * ("closure_type") into the established type of an inner frame (represented in
2366 * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)2367 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
2368 upb_status *status) {
2369 const void *closure_type;
2370 const upb_handlerattr *attr;
2371 const void *return_closure_type;
2372
2373 upb_selector_t sel = handlers_getsel(h, f, type);
2374 if (h->table[sel].func) return true;
2375 closure_type = effective_closure_type(h, f, type);
2376 attr = &h->table[sel].attr;
2377 return_closure_type = upb_handlerattr_returnclosuretype(attr);
2378 if (closure_type && return_closure_type &&
2379 closure_type != return_closure_type) {
2380 upb_status_seterrf(status,
2381 "expected start handler to return sub type for field %f",
2382 upb_fielddef_name(f));
2383 return false;
2384 }
2385 return true;
2386 }
2387
2388 /* Public interface ***********************************************************/
2389
upb_handlers_new(const upb_msgdef * md,const void * owner)2390 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
2391 int extra;
2392 upb_handlers *h;
2393
2394 assert(upb_msgdef_isfrozen(md));
2395
2396 extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
2397 h = upb_calloc(sizeof(*h) + extra);
2398 if (!h) return NULL;
2399
2400 h->msg = md;
2401 upb_msgdef_ref(h->msg, h);
2402 upb_status_clear(&h->status_);
2403
2404 if (md->submsg_field_count > 0) {
2405 h->sub = upb_calloc(md->submsg_field_count * sizeof(*h->sub));
2406 if (!h->sub) goto oom;
2407 } else {
2408 h->sub = 0;
2409 }
2410
2411 if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
2412 goto oom;
2413 if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
2414
2415 /* calloc() above initialized all handlers to NULL. */
2416 return h;
2417
2418 oom:
2419 freehandlers(upb_handlers_upcast_mutable(h));
2420 return NULL;
2421 }
2422
upb_handlers_newfrozen(const upb_msgdef * m,const void * owner,upb_handlers_callback * callback,const void * closure)2423 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
2424 const void *owner,
2425 upb_handlers_callback *callback,
2426 const void *closure) {
2427 dfs_state state;
2428 upb_handlers *ret;
2429 bool ok;
2430 upb_refcounted *r;
2431
2432 state.callback = callback;
2433 state.closure = closure;
2434 if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
2435
2436 ret = newformsg(m, owner, &state);
2437
2438 upb_inttable_uninit(&state.tab);
2439 if (!ret) return NULL;
2440
2441 r = upb_handlers_upcast_mutable(ret);
2442 ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
2443 UPB_ASSERT_VAR(ok, ok);
2444
2445 return ret;
2446 }
2447
upb_handlers_status(upb_handlers * h)2448 const upb_status *upb_handlers_status(upb_handlers *h) {
2449 assert(!upb_handlers_isfrozen(h));
2450 return &h->status_;
2451 }
2452
upb_handlers_clearerr(upb_handlers * h)2453 void upb_handlers_clearerr(upb_handlers *h) {
2454 assert(!upb_handlers_isfrozen(h));
2455 upb_status_clear(&h->status_);
2456 }
2457
2458 #define SETTER(name, handlerctype, handlertype) \
2459 bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
2460 handlerctype func, upb_handlerattr *attr) { \
2461 int32_t sel = trygetsel(h, f, handlertype); \
2462 return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
2463 }
2464
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)2465 SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
2466 SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
2467 SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
2468 SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
2469 SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
2470 SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
2471 SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
2472 SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
2473 SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
2474 SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
2475 SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
2476 SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
2477 SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
2478 SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
2479
2480 #undef SETTER
2481
2482 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
2483 upb_handlerattr *attr) {
2484 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2485 (upb_func *)func, attr);
2486 }
2487
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,upb_handlerattr * attr)2488 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
2489 upb_handlerattr *attr) {
2490 assert(!upb_handlers_isfrozen(h));
2491 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2492 (upb_func *)func, attr);
2493 }
2494
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)2495 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
2496 const upb_handlers *sub) {
2497 assert(sub);
2498 assert(!upb_handlers_isfrozen(h));
2499 assert(upb_fielddef_issubmsg(f));
2500 if (SUBH_F(h, f)) return false; /* Can't reset. */
2501 if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
2502 return false;
2503 }
2504 SUBH_F(h, f) = sub;
2505 upb_ref2(sub, h);
2506 return true;
2507 }
2508
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)2509 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
2510 const upb_fielddef *f) {
2511 assert(upb_fielddef_issubmsg(f));
2512 return SUBH_F(h, f);
2513 }
2514
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)2515 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
2516 upb_handlerattr *attr) {
2517 if (!upb_handlers_gethandler(h, sel))
2518 return false;
2519 *attr = h->table[sel].attr;
2520 return true;
2521 }
2522
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)2523 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
2524 upb_selector_t sel) {
2525 /* STARTSUBMSG selector in sel is the field's selector base. */
2526 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
2527 }
2528
upb_handlers_msgdef(const upb_handlers * h)2529 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
2530
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)2531 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
2532 bool ok;
2533 if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
2534 return false;
2535 }
2536 ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
2537 UPB_ASSERT_VAR(ok, ok);
2538 return true;
2539 }
2540
2541
2542 /* "Static" methods ***********************************************************/
2543
upb_handlers_freeze(upb_handlers * const * handlers,int n,upb_status * s)2544 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
2545 /* TODO: verify we have a transitive closure. */
2546 int i;
2547 for (i = 0; i < n; i++) {
2548 upb_msg_field_iter j;
2549 upb_handlers *h = handlers[i];
2550
2551 if (!upb_ok(&h->status_)) {
2552 upb_status_seterrf(s, "handlers for message %s had error status: %s",
2553 upb_msgdef_fullname(upb_handlers_msgdef(h)),
2554 upb_status_errmsg(&h->status_));
2555 return false;
2556 }
2557
2558 /* Check that there are no closure mismatches due to missing Start* handlers
2559 * or subhandlers with different type-level types. */
2560 for(upb_msg_field_begin(&j, h->msg);
2561 !upb_msg_field_done(&j);
2562 upb_msg_field_next(&j)) {
2563
2564 const upb_fielddef *f = upb_msg_iter_field(&j);
2565 if (upb_fielddef_isseq(f)) {
2566 if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
2567 return false;
2568 }
2569
2570 if (upb_fielddef_isstring(f)) {
2571 if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
2572 return false;
2573 }
2574
2575 if (upb_fielddef_issubmsg(f)) {
2576 bool hashandler = false;
2577 if (upb_handlers_gethandler(
2578 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
2579 upb_handlers_gethandler(
2580 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
2581 hashandler = true;
2582 }
2583
2584 if (upb_fielddef_isseq(f) &&
2585 (upb_handlers_gethandler(
2586 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
2587 upb_handlers_gethandler(
2588 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
2589 hashandler = true;
2590 }
2591
2592 if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
2593 /* For now we add an empty subhandlers in this case. It makes the
2594 * decoder code generator simpler, because it only has to handle two
2595 * cases (submessage has handlers or not) as opposed to three
2596 * (submessage has handlers in enclosing message but no subhandlers).
2597 *
2598 * This makes parsing less efficient in the case that we want to
2599 * notice a submessage but skip its contents (like if we're testing
2600 * for submessage presence or counting the number of repeated
2601 * submessages). In this case we will end up parsing the submessage
2602 * field by field and throwing away the results for each, instead of
2603 * skipping the whole delimited thing at once. If this is an issue we
2604 * can revisit it, but do remember that this only arises when you have
2605 * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
2606 * submessage but no subhandlers. The uses cases for this are
2607 * limited. */
2608 upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
2609 upb_handlers_setsubhandlers(h, f, sub);
2610 upb_handlers_unref(sub, &sub);
2611 }
2612
2613 /* TODO(haberman): check type of submessage.
2614 * This is slightly tricky; also consider whether we should check that
2615 * they match at setsubhandlers time. */
2616 }
2617 }
2618 }
2619
2620 if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
2621 UPB_MAX_HANDLER_DEPTH)) {
2622 return false;
2623 }
2624
2625 return true;
2626 }
2627
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)2628 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
2629 switch (upb_fielddef_type(f)) {
2630 case UPB_TYPE_INT32:
2631 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
2632 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
2633 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
2634 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
2635 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
2636 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
2637 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
2638 default: assert(false); return -1; /* Invalid input. */
2639 }
2640 }
2641
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)2642 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2643 upb_selector_t *s) {
2644 switch (type) {
2645 case UPB_HANDLER_INT32:
2646 case UPB_HANDLER_INT64:
2647 case UPB_HANDLER_UINT32:
2648 case UPB_HANDLER_UINT64:
2649 case UPB_HANDLER_FLOAT:
2650 case UPB_HANDLER_DOUBLE:
2651 case UPB_HANDLER_BOOL:
2652 if (!upb_fielddef_isprimitive(f) ||
2653 upb_handlers_getprimitivehandlertype(f) != type)
2654 return false;
2655 *s = f->selector_base;
2656 break;
2657 case UPB_HANDLER_STRING:
2658 if (upb_fielddef_isstring(f)) {
2659 *s = f->selector_base;
2660 } else if (upb_fielddef_lazy(f)) {
2661 *s = f->selector_base + 3;
2662 } else {
2663 return false;
2664 }
2665 break;
2666 case UPB_HANDLER_STARTSTR:
2667 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2668 *s = f->selector_base + 1;
2669 } else {
2670 return false;
2671 }
2672 break;
2673 case UPB_HANDLER_ENDSTR:
2674 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2675 *s = f->selector_base + 2;
2676 } else {
2677 return false;
2678 }
2679 break;
2680 case UPB_HANDLER_STARTSEQ:
2681 if (!upb_fielddef_isseq(f)) return false;
2682 *s = f->selector_base - 2;
2683 break;
2684 case UPB_HANDLER_ENDSEQ:
2685 if (!upb_fielddef_isseq(f)) return false;
2686 *s = f->selector_base - 1;
2687 break;
2688 case UPB_HANDLER_STARTSUBMSG:
2689 if (!upb_fielddef_issubmsg(f)) return false;
2690 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
2691 * selector can also be used as an index into the "sub" array of
2692 * subhandlers. The indexes for the two into these two tables are the
2693 * same, except that in the handler table the static selectors come first. */
2694 *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
2695 break;
2696 case UPB_HANDLER_ENDSUBMSG:
2697 if (!upb_fielddef_issubmsg(f)) return false;
2698 *s = f->selector_base;
2699 break;
2700 }
2701 assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2702 return true;
2703 }
2704
upb_handlers_selectorbaseoffset(const upb_fielddef * f)2705 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
2706 return upb_fielddef_isseq(f) ? 2 : 0;
2707 }
2708
upb_handlers_selectorcount(const upb_fielddef * f)2709 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
2710 uint32_t ret = 1;
2711 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
2712 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
2713 if (upb_fielddef_issubmsg(f)) {
2714 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
2715 ret += 0;
2716 if (upb_fielddef_lazy(f)) {
2717 /* STARTSTR/ENDSTR/STRING (for lazy) */
2718 ret += 3;
2719 }
2720 }
2721 return ret;
2722 }
2723
2724
2725 /* upb_handlerattr ************************************************************/
2726
upb_handlerattr_init(upb_handlerattr * attr)2727 void upb_handlerattr_init(upb_handlerattr *attr) {
2728 upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
2729 memcpy(attr, &from, sizeof(*attr));
2730 }
2731
upb_handlerattr_uninit(upb_handlerattr * attr)2732 void upb_handlerattr_uninit(upb_handlerattr *attr) {
2733 UPB_UNUSED(attr);
2734 }
2735
upb_handlerattr_sethandlerdata(upb_handlerattr * attr,const void * hd)2736 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
2737 attr->handler_data_ = hd;
2738 return true;
2739 }
2740
upb_handlerattr_setclosuretype(upb_handlerattr * attr,const void * type)2741 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
2742 attr->closure_type_ = type;
2743 return true;
2744 }
2745
upb_handlerattr_closuretype(const upb_handlerattr * attr)2746 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
2747 return attr->closure_type_;
2748 }
2749
upb_handlerattr_setreturnclosuretype(upb_handlerattr * attr,const void * type)2750 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
2751 const void *type) {
2752 attr->return_closure_type_ = type;
2753 return true;
2754 }
2755
upb_handlerattr_returnclosuretype(const upb_handlerattr * attr)2756 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
2757 return attr->return_closure_type_;
2758 }
2759
upb_handlerattr_setalwaysok(upb_handlerattr * attr,bool alwaysok)2760 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
2761 attr->alwaysok_ = alwaysok;
2762 return true;
2763 }
2764
upb_handlerattr_alwaysok(const upb_handlerattr * attr)2765 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
2766 return attr->alwaysok_;
2767 }
2768
2769 /* upb_bufhandle **************************************************************/
2770
upb_bufhandle_objofs(const upb_bufhandle * h)2771 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
2772 return h->objofs_;
2773 }
2774
2775 /* upb_byteshandler ***********************************************************/
2776
upb_byteshandler_init(upb_byteshandler * h)2777 void upb_byteshandler_init(upb_byteshandler* h) {
2778 memset(h, 0, sizeof(*h));
2779 }
2780
2781 /* For when we support handlerfree callbacks. */
upb_byteshandler_uninit(upb_byteshandler * h)2782 void upb_byteshandler_uninit(upb_byteshandler* h) {
2783 UPB_UNUSED(h);
2784 }
2785
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)2786 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
2787 upb_startstr_handlerfunc *func, void *d) {
2788 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
2789 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
2790 return true;
2791 }
2792
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)2793 bool upb_byteshandler_setstring(upb_byteshandler *h,
2794 upb_string_handlerfunc *func, void *d) {
2795 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
2796 h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
2797 return true;
2798 }
2799
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)2800 bool upb_byteshandler_setendstr(upb_byteshandler *h,
2801 upb_endfield_handlerfunc *func, void *d) {
2802 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
2803 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
2804 return true;
2805 }
2806 /*
2807 ** upb::RefCounted Implementation
2808 **
2809 ** Our key invariants are:
2810 ** 1. reference cycles never span groups
2811 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
2812 **
2813 ** The previous two are how we avoid leaking cycles. Other important
2814 ** invariants are:
2815 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
2816 ** this implies group(from) == group(to). (In practice, what we implement
2817 ** is even stronger; "from" and "to" will share a group if there has *ever*
2818 ** been a ref2(to, from), but all that is necessary for correctness is the
2819 ** weaker one).
2820 ** 4. mutable and immutable objects are never in the same group.
2821 */
2822
2823
2824 #include <setjmp.h>
2825
2826 static void freeobj(upb_refcounted *o);
2827
2828 const char untracked_val;
2829 const void *UPB_UNTRACKED_REF = &untracked_val;
2830
2831 /* arch-specific atomic primitives *******************************************/
2832
2833 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
2834
atomic_inc(uint32_t * a)2835 static void atomic_inc(uint32_t *a) { (*a)++; }
atomic_dec(uint32_t * a)2836 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
2837
2838 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
2839
atomic_inc(uint32_t * a)2840 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
atomic_dec(uint32_t * a)2841 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
2842
2843 #elif defined(WIN32) /*-------------------------------------------------------*/
2844
2845 #include <Windows.h>
2846
atomic_inc(upb_atomic_t * a)2847 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
atomic_dec(upb_atomic_t * a)2848 static bool atomic_dec(upb_atomic_t *a) {
2849 return InterlockedDecrement(&a->val) == 0;
2850 }
2851
2852 #else
2853 #error Atomic primitives not defined for your platform/CPU. \
2854 Implement them or compile with UPB_THREAD_UNSAFE.
2855 #endif
2856
2857 /* All static objects point to this refcount.
2858 * It is special-cased in ref/unref below. */
2859 uint32_t static_refcount = -1;
2860
2861 /* We can avoid atomic ops for statically-declared objects.
2862 * This is a minor optimization but nice since we can avoid degrading under
2863 * contention in this case. */
2864
refgroup(uint32_t * group)2865 static void refgroup(uint32_t *group) {
2866 if (group != &static_refcount)
2867 atomic_inc(group);
2868 }
2869
unrefgroup(uint32_t * group)2870 static bool unrefgroup(uint32_t *group) {
2871 if (group == &static_refcount) {
2872 return false;
2873 } else {
2874 return atomic_dec(group);
2875 }
2876 }
2877
2878
2879 /* Reference tracking (debug only) ********************************************/
2880
2881 #ifdef UPB_DEBUG_REFS
2882
2883 #ifdef UPB_THREAD_UNSAFE
2884
upb_lock()2885 static void upb_lock() {}
upb_unlock()2886 static void upb_unlock() {}
2887
2888 #else
2889
2890 /* User must define functions that lock/unlock a global mutex and link this
2891 * file against them. */
2892 void upb_lock();
2893 void upb_unlock();
2894
2895 #endif
2896
2897 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
2898 * code-paths that can normally never fail, like upb_refcounted_ref(). Since
2899 * we have no way to propagage out-of-memory errors back to the user, and since
2900 * these errors can only occur in UPB_DEBUG_REFS mode, we use an allocator that
2901 * immediately aborts on failure (avoiding the global allocator, which might
2902 * inject failures). */
2903
2904 #include <stdlib.h>
2905
upb_debugrefs_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)2906 static void *upb_debugrefs_allocfunc(upb_alloc *alloc, void *ptr,
2907 size_t oldsize, size_t size) {
2908 UPB_UNUSED(alloc);
2909 UPB_UNUSED(oldsize);
2910 if (size == 0) {
2911 free(ptr);
2912 return NULL;
2913 } else {
2914 void *ret = realloc(ptr, size);
2915
2916 if (!ret) {
2917 abort();
2918 }
2919
2920 return ret;
2921 }
2922 }
2923
2924 upb_alloc upb_alloc_debugrefs = {&upb_debugrefs_allocfunc};
2925
2926 typedef struct {
2927 int count; /* How many refs there are (duplicates only allowed for ref2). */
2928 bool is_ref2;
2929 } trackedref;
2930
trackedref_new(bool is_ref2)2931 static trackedref *trackedref_new(bool is_ref2) {
2932 trackedref *ret = upb_malloc(&upb_alloc_debugrefs, sizeof(*ret));
2933 ret->count = 1;
2934 ret->is_ref2 = is_ref2;
2935 return ret;
2936 }
2937
track(const upb_refcounted * r,const void * owner,bool ref2)2938 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2939 upb_value v;
2940
2941 assert(owner);
2942 if (owner == UPB_UNTRACKED_REF) return;
2943
2944 upb_lock();
2945 if (upb_inttable_lookupptr(r->refs, owner, &v)) {
2946 trackedref *ref = upb_value_getptr(v);
2947 /* Since we allow multiple ref2's for the same to/from pair without
2948 * allocating separate memory for each one, we lose the fine-grained
2949 * tracking behavior we get with regular refs. Since ref2s only happen
2950 * inside upb, we'll accept this limitation until/unless there is a really
2951 * difficult upb-internal bug that can't be figured out without it. */
2952 assert(ref2);
2953 assert(ref->is_ref2);
2954 ref->count++;
2955 } else {
2956 trackedref *ref = trackedref_new(ref2);
2957 upb_inttable_insertptr2(r->refs, owner, upb_value_ptr(ref),
2958 &upb_alloc_debugrefs);
2959 if (ref2) {
2960 /* We know this cast is safe when it is a ref2, because it's coming from
2961 * another refcounted object. */
2962 const upb_refcounted *from = owner;
2963 assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
2964 upb_inttable_insertptr2(from->ref2s, r, upb_value_ptr(NULL),
2965 &upb_alloc_debugrefs);
2966 }
2967 }
2968 upb_unlock();
2969 }
2970
untrack(const upb_refcounted * r,const void * owner,bool ref2)2971 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2972 upb_value v;
2973 bool found;
2974 trackedref *ref;
2975
2976 assert(owner);
2977 if (owner == UPB_UNTRACKED_REF) return;
2978
2979 upb_lock();
2980 found = upb_inttable_lookupptr(r->refs, owner, &v);
2981 /* This assert will fail if an owner attempts to release a ref it didn't have. */
2982 UPB_ASSERT_VAR(found, found);
2983 ref = upb_value_getptr(v);
2984 assert(ref->is_ref2 == ref2);
2985 if (--ref->count == 0) {
2986 free(ref);
2987 upb_inttable_removeptr(r->refs, owner, NULL);
2988 if (ref2) {
2989 /* We know this cast is safe when it is a ref2, because it's coming from
2990 * another refcounted object. */
2991 const upb_refcounted *from = owner;
2992 bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
2993 assert(removed);
2994 }
2995 }
2996 upb_unlock();
2997 }
2998
checkref(const upb_refcounted * r,const void * owner,bool ref2)2999 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
3000 upb_value v;
3001 bool found;
3002 trackedref *ref;
3003
3004 upb_lock();
3005 found = upb_inttable_lookupptr(r->refs, owner, &v);
3006 UPB_ASSERT_VAR(found, found);
3007 ref = upb_value_getptr(v);
3008 assert(ref->is_ref2 == ref2);
3009 upb_unlock();
3010 }
3011
3012 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
3013 * originate from the given owner. */
getref2s(const upb_refcounted * owner,upb_inttable * tab)3014 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
3015 upb_inttable_iter i;
3016
3017 upb_lock();
3018 upb_inttable_begin(&i, owner->ref2s);
3019 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
3020 upb_value v;
3021 upb_value count;
3022 trackedref *ref;
3023 bool found;
3024
3025 upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
3026
3027 /* To get the count we need to look in the target's table. */
3028 found = upb_inttable_lookupptr(to->refs, owner, &v);
3029 assert(found);
3030 ref = upb_value_getptr(v);
3031 count = upb_value_int32(ref->count);
3032
3033 upb_inttable_insertptr2(tab, to, count, &upb_alloc_debugrefs);
3034 }
3035 upb_unlock();
3036 }
3037
3038 typedef struct {
3039 upb_inttable ref2;
3040 const upb_refcounted *obj;
3041 } check_state;
3042
visit_check(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3043 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
3044 void *closure) {
3045 check_state *s = closure;
3046 upb_inttable *ref2 = &s->ref2;
3047 upb_value v;
3048 bool removed;
3049 int32_t newcount;
3050
3051 assert(obj == s->obj);
3052 assert(subobj);
3053 removed = upb_inttable_removeptr(ref2, subobj, &v);
3054 /* The following assertion will fail if the visit() function visits a subobj
3055 * that it did not have a ref2 on, or visits the same subobj too many times. */
3056 assert(removed);
3057 newcount = upb_value_getint32(v) - 1;
3058 if (newcount > 0) {
3059 upb_inttable_insert2(ref2, (uintptr_t)subobj, upb_value_int32(newcount),
3060 &upb_alloc_debugrefs);
3061 }
3062 }
3063
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)3064 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
3065 void *closure) {
3066 /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
3067 * exactly the set of nodes that visit() should visit. So we verify visit()'s
3068 * correctness here. */
3069 check_state state;
3070 state.obj = r;
3071 upb_inttable_init2(&state.ref2, UPB_CTYPE_INT32, &upb_alloc_debugrefs);
3072 getref2s(r, &state.ref2);
3073
3074 /* This should visit any children in the ref2 table. */
3075 if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
3076
3077 /* This assertion will fail if the visit() function missed any children. */
3078 assert(upb_inttable_count(&state.ref2) == 0);
3079 upb_inttable_uninit2(&state.ref2, &upb_alloc_debugrefs);
3080 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
3081 }
3082
trackinit(upb_refcounted * r)3083 static void trackinit(upb_refcounted *r) {
3084 r->refs = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->refs));
3085 r->ref2s = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->ref2s));
3086 upb_inttable_init2(r->refs, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
3087 upb_inttable_init2(r->ref2s, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
3088 }
3089
trackfree(const upb_refcounted * r)3090 static void trackfree(const upb_refcounted *r) {
3091 upb_inttable_uninit2(r->refs, &upb_alloc_debugrefs);
3092 upb_inttable_uninit2(r->ref2s, &upb_alloc_debugrefs);
3093 upb_free(&upb_alloc_debugrefs, r->refs);
3094 upb_free(&upb_alloc_debugrefs, r->ref2s);
3095 }
3096
3097 #else
3098
track(const upb_refcounted * r,const void * owner,bool ref2)3099 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
3100 UPB_UNUSED(r);
3101 UPB_UNUSED(owner);
3102 UPB_UNUSED(ref2);
3103 }
3104
untrack(const upb_refcounted * r,const void * owner,bool ref2)3105 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
3106 UPB_UNUSED(r);
3107 UPB_UNUSED(owner);
3108 UPB_UNUSED(ref2);
3109 }
3110
checkref(const upb_refcounted * r,const void * owner,bool ref2)3111 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
3112 UPB_UNUSED(r);
3113 UPB_UNUSED(owner);
3114 UPB_UNUSED(ref2);
3115 }
3116
trackinit(upb_refcounted * r)3117 static void trackinit(upb_refcounted *r) {
3118 UPB_UNUSED(r);
3119 }
3120
trackfree(const upb_refcounted * r)3121 static void trackfree(const upb_refcounted *r) {
3122 UPB_UNUSED(r);
3123 }
3124
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)3125 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
3126 void *closure) {
3127 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
3128 }
3129
3130 #endif /* UPB_DEBUG_REFS */
3131
3132
3133 /* freeze() *******************************************************************/
3134
3135 /* The freeze() operation is by far the most complicated part of this scheme.
3136 * We compute strongly-connected components and then mutate the graph such that
3137 * we preserve the invariants documented at the top of this file. And we must
3138 * handle out-of-memory errors gracefully (without leaving the graph
3139 * inconsistent), which adds to the fun. */
3140
3141 /* The state used by the freeze operation (shared across many functions). */
3142 typedef struct {
3143 int depth;
3144 int maxdepth;
3145 uint64_t index;
3146 /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
3147 * color. */
3148 upb_inttable objattr;
3149 upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */
3150 upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */
3151 upb_status *status;
3152 jmp_buf err;
3153 } tarjan;
3154
3155 static void release_ref2(const upb_refcounted *obj,
3156 const upb_refcounted *subobj,
3157 void *closure);
3158
3159 /* Node attributes -----------------------------------------------------------*/
3160
3161 /* After our analysis phase all nodes will be either GRAY or WHITE. */
3162
3163 typedef enum {
3164 BLACK = 0, /* Object has not been seen. */
3165 GRAY, /* Object has been found via a refgroup but may not be reachable. */
3166 GREEN, /* Object is reachable and is currently on the Tarjan stack. */
3167 WHITE /* Object is reachable and has been assigned a group (SCC). */
3168 } color_t;
3169
err(tarjan * t)3170 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
oom(tarjan * t)3171 UPB_NORETURN static void oom(tarjan *t) {
3172 upb_status_seterrmsg(t->status, "out of memory");
3173 err(t);
3174 }
3175
trygetattr(const tarjan * t,const upb_refcounted * r)3176 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
3177 upb_value v;
3178 return upb_inttable_lookupptr(&t->objattr, r, &v) ?
3179 upb_value_getuint64(v) : 0;
3180 }
3181
getattr(const tarjan * t,const upb_refcounted * r)3182 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
3183 upb_value v;
3184 bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
3185 UPB_ASSERT_VAR(found, found);
3186 return upb_value_getuint64(v);
3187 }
3188
setattr(tarjan * t,const upb_refcounted * r,uint64_t attr)3189 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
3190 upb_inttable_removeptr(&t->objattr, r, NULL);
3191 upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
3192 }
3193
color(tarjan * t,const upb_refcounted * r)3194 static color_t color(tarjan *t, const upb_refcounted *r) {
3195 return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */
3196 }
3197
set_gray(tarjan * t,const upb_refcounted * r)3198 static void set_gray(tarjan *t, const upb_refcounted *r) {
3199 assert(color(t, r) == BLACK);
3200 setattr(t, r, GRAY);
3201 }
3202
3203 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
push(tarjan * t,const upb_refcounted * r)3204 static void push(tarjan *t, const upb_refcounted *r) {
3205 assert(color(t, r) == BLACK || color(t, r) == GRAY);
3206 /* This defines the attr layout for the GREEN state. "index" and "lowlink"
3207 * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
3208 setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
3209 if (++t->index == 0x80000000) {
3210 upb_status_seterrmsg(t->status, "too many objects to freeze");
3211 err(t);
3212 }
3213 upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
3214 }
3215
3216 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
3217 * SCC group. */
pop(tarjan * t)3218 static upb_refcounted *pop(tarjan *t) {
3219 upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
3220 assert(color(t, r) == GREEN);
3221 /* This defines the attr layout for nodes in the WHITE state.
3222 * Top of group stack is [group, NULL]; we point at group. */
3223 setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
3224 return r;
3225 }
3226
tarjan_newgroup(tarjan * t)3227 static void tarjan_newgroup(tarjan *t) {
3228 uint32_t *group = upb_gmalloc(sizeof(*group));
3229 if (!group) oom(t);
3230 /* Push group and empty group leader (we'll fill in leader later). */
3231 if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
3232 !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
3233 upb_gfree(group);
3234 oom(t);
3235 }
3236 *group = 0;
3237 }
3238
idx(tarjan * t,const upb_refcounted * r)3239 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
3240 assert(color(t, r) == GREEN);
3241 return (getattr(t, r) >> 2) & 0x7FFFFFFF;
3242 }
3243
lowlink(tarjan * t,const upb_refcounted * r)3244 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
3245 if (color(t, r) == GREEN) {
3246 return getattr(t, r) >> 33;
3247 } else {
3248 return UINT32_MAX;
3249 }
3250 }
3251
set_lowlink(tarjan * t,const upb_refcounted * r,uint32_t lowlink)3252 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
3253 assert(color(t, r) == GREEN);
3254 setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
3255 }
3256
group(tarjan * t,upb_refcounted * r)3257 static uint32_t *group(tarjan *t, upb_refcounted *r) {
3258 uint64_t groupnum;
3259 upb_value v;
3260 bool found;
3261
3262 assert(color(t, r) == WHITE);
3263 groupnum = getattr(t, r) >> 8;
3264 found = upb_inttable_lookup(&t->groups, groupnum, &v);
3265 UPB_ASSERT_VAR(found, found);
3266 return upb_value_getptr(v);
3267 }
3268
3269 /* If the group leader for this object's group has not previously been set,
3270 * the given object is assigned to be its leader. */
groupleader(tarjan * t,upb_refcounted * r)3271 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
3272 uint64_t leader_slot;
3273 upb_value v;
3274 bool found;
3275
3276 assert(color(t, r) == WHITE);
3277 leader_slot = (getattr(t, r) >> 8) + 1;
3278 found = upb_inttable_lookup(&t->groups, leader_slot, &v);
3279 UPB_ASSERT_VAR(found, found);
3280 if (upb_value_getptr(v)) {
3281 return upb_value_getptr(v);
3282 } else {
3283 upb_inttable_remove(&t->groups, leader_slot, NULL);
3284 upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
3285 return r;
3286 }
3287 }
3288
3289
3290 /* Tarjan's algorithm --------------------------------------------------------*/
3291
3292 /* See:
3293 * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
3294 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
3295
tarjan_visit(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3296 static void tarjan_visit(const upb_refcounted *obj,
3297 const upb_refcounted *subobj,
3298 void *closure) {
3299 tarjan *t = closure;
3300 if (++t->depth > t->maxdepth) {
3301 upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
3302 err(t);
3303 } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
3304 /* Do nothing: we don't want to visit or color already-frozen nodes,
3305 * and WHITE nodes have already been assigned a SCC. */
3306 } else if (color(t, subobj) < GREEN) {
3307 /* Subdef has not yet been visited; recurse on it. */
3308 do_tarjan(subobj, t);
3309 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
3310 } else if (color(t, subobj) == GREEN) {
3311 /* Subdef is in the stack and hence in the current SCC. */
3312 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
3313 }
3314 --t->depth;
3315 }
3316
do_tarjan(const upb_refcounted * obj,tarjan * t)3317 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
3318 if (color(t, obj) == BLACK) {
3319 /* We haven't seen this object's group; mark the whole group GRAY. */
3320 const upb_refcounted *o = obj;
3321 do { set_gray(t, o); } while ((o = o->next) != obj);
3322 }
3323
3324 push(t, obj);
3325 visit(obj, tarjan_visit, t);
3326 if (lowlink(t, obj) == idx(t, obj)) {
3327 tarjan_newgroup(t);
3328 while (pop(t) != obj)
3329 ;
3330 }
3331 }
3332
3333
3334 /* freeze() ------------------------------------------------------------------*/
3335
crossref(const upb_refcounted * r,const upb_refcounted * subobj,void * _t)3336 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
3337 void *_t) {
3338 tarjan *t = _t;
3339 assert(color(t, r) > BLACK);
3340 if (color(t, subobj) > BLACK && r->group != subobj->group) {
3341 /* Previously this ref was not reflected in subobj->group because they
3342 * were in the same group; now that they are split a ref must be taken. */
3343 refgroup(subobj->group);
3344 }
3345 }
3346
freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3347 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
3348 int maxdepth) {
3349 volatile bool ret = false;
3350 int i;
3351 upb_inttable_iter iter;
3352
3353 /* We run in two passes so that we can allocate all memory before performing
3354 * any mutation of the input -- this allows us to leave the input unchanged
3355 * in the case of memory allocation failure. */
3356 tarjan t;
3357 t.index = 0;
3358 t.depth = 0;
3359 t.maxdepth = maxdepth;
3360 t.status = s;
3361 if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
3362 if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
3363 if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
3364 if (setjmp(t.err) != 0) goto err4;
3365
3366
3367 for (i = 0; i < n; i++) {
3368 if (color(&t, roots[i]) < GREEN) {
3369 do_tarjan(roots[i], &t);
3370 }
3371 }
3372
3373 /* If we've made it this far, no further errors are possible so it's safe to
3374 * mutate the objects without risk of leaving them in an inconsistent state. */
3375 ret = true;
3376
3377 /* The transformation that follows requires care. The preconditions are:
3378 * - all objects in attr map are WHITE or GRAY, and are in mutable groups
3379 * (groups of all mutable objs)
3380 * - no ref2(to, from) refs have incremented count(to) if both "to" and
3381 * "from" are in our attr map (this follows from invariants (2) and (3)) */
3382
3383 /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
3384 * new groups according to the SCC's we computed. These new groups will
3385 * consist of only frozen objects. None will be immediately collectible,
3386 * because WHITE objects are by definition reachable from one of "roots",
3387 * which the caller must own refs on. */
3388 upb_inttable_begin(&iter, &t.objattr);
3389 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3390 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3391 /* Since removal from a singly-linked list requires access to the object's
3392 * predecessor, we consider obj->next instead of obj for moving. With the
3393 * while() loop we guarantee that we will visit every node's predecessor.
3394 * Proof:
3395 * 1. every node's predecessor is in our attr map.
3396 * 2. though the loop body may change a node's predecessor, it will only
3397 * change it to be the node we are currently operating on, so with a
3398 * while() loop we guarantee ourselves the chance to remove each node. */
3399 while (color(&t, obj->next) == WHITE &&
3400 group(&t, obj->next) != obj->next->group) {
3401 upb_refcounted *leader;
3402
3403 /* Remove from old group. */
3404 upb_refcounted *move = obj->next;
3405 if (obj == move) {
3406 /* Removing the last object from a group. */
3407 assert(*obj->group == obj->individual_count);
3408 upb_gfree(obj->group);
3409 } else {
3410 obj->next = move->next;
3411 /* This may decrease to zero; we'll collect GRAY objects (if any) that
3412 * remain in the group in the third pass. */
3413 assert(*move->group >= move->individual_count);
3414 *move->group -= move->individual_count;
3415 }
3416
3417 /* Add to new group. */
3418 leader = groupleader(&t, move);
3419 if (move == leader) {
3420 /* First object added to new group is its leader. */
3421 move->group = group(&t, move);
3422 move->next = move;
3423 *move->group = move->individual_count;
3424 } else {
3425 /* Group already has at least one object in it. */
3426 assert(leader->group == group(&t, move));
3427 move->group = group(&t, move);
3428 move->next = leader->next;
3429 leader->next = move;
3430 *move->group += move->individual_count;
3431 }
3432
3433 move->is_frozen = true;
3434 }
3435 }
3436
3437 /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
3438 * increment count(to) if group(obj) != group(to) (which could now be the
3439 * case if "to" was just frozen). */
3440 upb_inttable_begin(&iter, &t.objattr);
3441 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3442 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3443 visit(obj, crossref, &t);
3444 }
3445
3446 /* Pass 3: GRAY objects are collected if their group's refcount dropped to
3447 * zero when we removed its white nodes. This can happen if they had only
3448 * been kept alive by virtue of sharing a group with an object that was just
3449 * frozen.
3450 *
3451 * It is important that we do this last, since the GRAY object's free()
3452 * function could call unref2() on just-frozen objects, which will decrement
3453 * refs that were added in pass 2. */
3454 upb_inttable_begin(&iter, &t.objattr);
3455 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3456 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3457 if (obj->group == NULL || *obj->group == 0) {
3458 if (obj->group) {
3459 upb_refcounted *o;
3460
3461 /* We eagerly free() the group's count (since we can't easily determine
3462 * the group's remaining size it's the easiest way to ensure it gets
3463 * done). */
3464 upb_gfree(obj->group);
3465
3466 /* Visit to release ref2's (done in a separate pass since release_ref2
3467 * depends on o->group being unmodified so it can test merged()). */
3468 o = obj;
3469 do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
3470
3471 /* Mark "group" fields as NULL so we know to free the objects later in
3472 * this loop, but also don't try to delete the group twice. */
3473 o = obj;
3474 do { o->group = NULL; } while ((o = o->next) != obj);
3475 }
3476 freeobj(obj);
3477 }
3478 }
3479
3480 err4:
3481 if (!ret) {
3482 upb_inttable_begin(&iter, &t.groups);
3483 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
3484 upb_gfree(upb_value_getptr(upb_inttable_iter_value(&iter)));
3485 }
3486 upb_inttable_uninit(&t.groups);
3487 err3:
3488 upb_inttable_uninit(&t.stack);
3489 err2:
3490 upb_inttable_uninit(&t.objattr);
3491 err1:
3492 return ret;
3493 }
3494
3495
3496 /* Misc internal functions ***************************************************/
3497
merged(const upb_refcounted * r,const upb_refcounted * r2)3498 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
3499 return r->group == r2->group;
3500 }
3501
merge(upb_refcounted * r,upb_refcounted * from)3502 static void merge(upb_refcounted *r, upb_refcounted *from) {
3503 upb_refcounted *base;
3504 upb_refcounted *tmp;
3505
3506 if (merged(r, from)) return;
3507 *r->group += *from->group;
3508 upb_gfree(from->group);
3509 base = from;
3510
3511 /* Set all refcount pointers in the "from" chain to the merged refcount.
3512 *
3513 * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
3514 * if the user continuously extends a group by one object. Prevent this by
3515 * using one of the techniques in this paper:
3516 * ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
3517 do { from->group = r->group; } while ((from = from->next) != base);
3518
3519 /* Merge the two circularly linked lists by swapping their next pointers. */
3520 tmp = r->next;
3521 r->next = base->next;
3522 base->next = tmp;
3523 }
3524
3525 static void unref(const upb_refcounted *r);
3526
release_ref2(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3527 static void release_ref2(const upb_refcounted *obj,
3528 const upb_refcounted *subobj,
3529 void *closure) {
3530 UPB_UNUSED(closure);
3531 untrack(subobj, obj, true);
3532 if (!merged(obj, subobj)) {
3533 assert(subobj->is_frozen);
3534 unref(subobj);
3535 }
3536 }
3537
unref(const upb_refcounted * r)3538 static void unref(const upb_refcounted *r) {
3539 if (unrefgroup(r->group)) {
3540 const upb_refcounted *o;
3541
3542 upb_gfree(r->group);
3543
3544 /* In two passes, since release_ref2 needs a guarantee that any subobjs
3545 * are alive. */
3546 o = r;
3547 do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
3548
3549 o = r;
3550 do {
3551 const upb_refcounted *next = o->next;
3552 assert(o->is_frozen || o->individual_count == 0);
3553 freeobj((upb_refcounted*)o);
3554 o = next;
3555 } while(o != r);
3556 }
3557 }
3558
freeobj(upb_refcounted * o)3559 static void freeobj(upb_refcounted *o) {
3560 trackfree(o);
3561 o->vtbl->free((upb_refcounted*)o);
3562 }
3563
3564
3565 /* Public interface ***********************************************************/
3566
upb_refcounted_init(upb_refcounted * r,const struct upb_refcounted_vtbl * vtbl,const void * owner)3567 bool upb_refcounted_init(upb_refcounted *r,
3568 const struct upb_refcounted_vtbl *vtbl,
3569 const void *owner) {
3570 #ifndef NDEBUG
3571 /* Endianness check. This is unrelated to upb_refcounted, it's just a
3572 * convenient place to put the check that we can be assured will run for
3573 * basically every program using upb. */
3574 const int x = 1;
3575 #ifdef UPB_BIG_ENDIAN
3576 assert(*(char*)&x != 1);
3577 #else
3578 assert(*(char*)&x == 1);
3579 #endif
3580 #endif
3581
3582 r->next = r;
3583 r->vtbl = vtbl;
3584 r->individual_count = 0;
3585 r->is_frozen = false;
3586 r->group = upb_gmalloc(sizeof(*r->group));
3587 if (!r->group) return false;
3588 *r->group = 0;
3589 trackinit(r);
3590 upb_refcounted_ref(r, owner);
3591 return true;
3592 }
3593
upb_refcounted_isfrozen(const upb_refcounted * r)3594 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
3595 return r->is_frozen;
3596 }
3597
upb_refcounted_ref(const upb_refcounted * r,const void * owner)3598 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
3599 track(r, owner, false);
3600 if (!r->is_frozen)
3601 ((upb_refcounted*)r)->individual_count++;
3602 refgroup(r->group);
3603 }
3604
upb_refcounted_unref(const upb_refcounted * r,const void * owner)3605 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
3606 untrack(r, owner, false);
3607 if (!r->is_frozen)
3608 ((upb_refcounted*)r)->individual_count--;
3609 unref(r);
3610 }
3611
upb_refcounted_ref2(const upb_refcounted * r,upb_refcounted * from)3612 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
3613 assert(!from->is_frozen); /* Non-const pointer implies this. */
3614 track(r, from, true);
3615 if (r->is_frozen) {
3616 refgroup(r->group);
3617 } else {
3618 merge((upb_refcounted*)r, from);
3619 }
3620 }
3621
upb_refcounted_unref2(const upb_refcounted * r,upb_refcounted * from)3622 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
3623 assert(!from->is_frozen); /* Non-const pointer implies this. */
3624 untrack(r, from, true);
3625 if (r->is_frozen) {
3626 unref(r);
3627 } else {
3628 assert(merged(r, from));
3629 }
3630 }
3631
upb_refcounted_donateref(const upb_refcounted * r,const void * from,const void * to)3632 void upb_refcounted_donateref(
3633 const upb_refcounted *r, const void *from, const void *to) {
3634 assert(from != to);
3635 if (to != NULL)
3636 upb_refcounted_ref(r, to);
3637 if (from != NULL)
3638 upb_refcounted_unref(r, from);
3639 }
3640
upb_refcounted_checkref(const upb_refcounted * r,const void * owner)3641 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
3642 checkref(r, owner, false);
3643 }
3644
upb_refcounted_freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3645 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
3646 int maxdepth) {
3647 int i;
3648 bool ret;
3649 for (i = 0; i < n; i++) {
3650 assert(!roots[i]->is_frozen);
3651 }
3652 ret = freeze(roots, n, s, maxdepth);
3653 assert(!s || ret == upb_ok(s));
3654 return ret;
3655 }
3656
3657
3658 /* Fallback implementation if the shim is not specialized by the JIT. */
3659 #define SHIM_WRITER(type, ctype) \
3660 bool upb_shim_set ## type (void *c, const void *hd, ctype val) { \
3661 uint8_t *m = c; \
3662 const upb_shim_data *d = hd; \
3663 if (d->hasbit > 0) \
3664 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
3665 *(ctype*)&m[d->offset] = val; \
3666 return true; \
3667 } \
3668
SHIM_WRITER(double,double)3669 SHIM_WRITER(double, double)
3670 SHIM_WRITER(float, float)
3671 SHIM_WRITER(int32, int32_t)
3672 SHIM_WRITER(int64, int64_t)
3673 SHIM_WRITER(uint32, uint32_t)
3674 SHIM_WRITER(uint64, uint64_t)
3675 SHIM_WRITER(bool, bool)
3676 #undef SHIM_WRITER
3677
3678 bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
3679 int32_t hasbit) {
3680 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
3681 bool ok;
3682
3683 upb_shim_data *d = upb_gmalloc(sizeof(*d));
3684 if (!d) return false;
3685 d->offset = offset;
3686 d->hasbit = hasbit;
3687
3688 upb_handlerattr_sethandlerdata(&attr, d);
3689 upb_handlerattr_setalwaysok(&attr, true);
3690 upb_handlers_addcleanup(h, d, upb_gfree);
3691
3692 #define TYPE(u, l) \
3693 case UPB_TYPE_##u: \
3694 ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
3695
3696 ok = false;
3697
3698 switch (upb_fielddef_type(f)) {
3699 TYPE(INT64, int64);
3700 TYPE(INT32, int32);
3701 TYPE(ENUM, int32);
3702 TYPE(UINT64, uint64);
3703 TYPE(UINT32, uint32);
3704 TYPE(DOUBLE, double);
3705 TYPE(FLOAT, float);
3706 TYPE(BOOL, bool);
3707 default: assert(false); break;
3708 }
3709 #undef TYPE
3710
3711 upb_handlerattr_uninit(&attr);
3712 return ok;
3713 }
3714
upb_shim_getdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type)3715 const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
3716 upb_fieldtype_t *type) {
3717 upb_func *f = upb_handlers_gethandler(h, s);
3718
3719 if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
3720 *type = UPB_TYPE_INT64;
3721 } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
3722 *type = UPB_TYPE_INT32;
3723 } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
3724 *type = UPB_TYPE_UINT64;
3725 } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
3726 *type = UPB_TYPE_UINT32;
3727 } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
3728 *type = UPB_TYPE_DOUBLE;
3729 } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
3730 *type = UPB_TYPE_FLOAT;
3731 } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
3732 *type = UPB_TYPE_BOOL;
3733 } else {
3734 return NULL;
3735 }
3736
3737 return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
3738 }
3739
3740
3741 #include <string.h>
3742
upb_symtab_free(upb_refcounted * r)3743 static void upb_symtab_free(upb_refcounted *r) {
3744 upb_symtab *s = (upb_symtab*)r;
3745 upb_strtable_iter i;
3746 upb_strtable_begin(&i, &s->symtab);
3747 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3748 const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3749 upb_def_unref(def, s);
3750 }
3751 upb_strtable_uninit(&s->symtab);
3752 upb_gfree(s);
3753 }
3754
upb_symtab_new(const void * owner)3755 upb_symtab *upb_symtab_new(const void *owner) {
3756 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
3757
3758 upb_symtab *s = upb_gmalloc(sizeof(*s));
3759 if (!s) {
3760 return NULL;
3761 }
3762
3763 upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
3764 upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3765 return s;
3766 }
3767
upb_symtab_freeze(upb_symtab * s)3768 void upb_symtab_freeze(upb_symtab *s) {
3769 upb_refcounted *r;
3770 bool ok;
3771
3772 assert(!upb_symtab_isfrozen(s));
3773 r = upb_symtab_upcast_mutable(s);
3774 /* The symtab does not take ref2's (see refcounted.h) on the defs, because
3775 * defs cannot refer back to the table and therefore cannot create cycles. So
3776 * 0 will suffice for maxdepth here. */
3777 ok = upb_refcounted_freeze(&r, 1, NULL, 0);
3778 UPB_ASSERT_VAR(ok, ok);
3779 }
3780
upb_symtab_lookup(const upb_symtab * s,const char * sym)3781 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3782 upb_value v;
3783 upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3784 upb_value_getptr(v) : NULL;
3785 return ret;
3786 }
3787
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)3788 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3789 upb_value v;
3790 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3791 upb_value_getptr(v) : NULL;
3792 return def ? upb_dyncast_msgdef(def) : NULL;
3793 }
3794
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)3795 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3796 upb_value v;
3797 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3798 upb_value_getptr(v) : NULL;
3799 return def ? upb_dyncast_enumdef(def) : NULL;
3800 }
3801
3802 /* Given a symbol and the base symbol inside which it is defined, find the
3803 * symbol's definition in t. */
upb_resolvename(const upb_strtable * t,const char * base,const char * sym)3804 static upb_def *upb_resolvename(const upb_strtable *t,
3805 const char *base, const char *sym) {
3806 if(strlen(sym) == 0) return NULL;
3807 if(sym[0] == '.') {
3808 /* Symbols starting with '.' are absolute, so we do a single lookup.
3809 * Slice to omit the leading '.' */
3810 upb_value v;
3811 return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3812 } else {
3813 /* Remove components from base until we find an entry or run out.
3814 * TODO: This branch is totally broken, but currently not used. */
3815 (void)base;
3816 assert(false);
3817 return NULL;
3818 }
3819 }
3820
upb_symtab_resolve(const upb_symtab * s,const char * base,const char * sym)3821 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3822 const char *sym) {
3823 upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3824 return ret;
3825 }
3826
3827 /* Starts a depth-first traversal at "def", recursing into any subdefs
3828 * (ie. submessage types). Adds duplicates of existing defs to addtab
3829 * wherever necessary, so that the resulting symtab will be consistent once
3830 * addtab is added.
3831 *
3832 * More specifically, if any def D is found in the DFS that:
3833 *
3834 * 1. can reach a def that is being replaced by something in addtab, AND
3835 *
3836 * 2. is not itself being replaced already (ie. this name doesn't already
3837 * exist in addtab)
3838 *
3839 * ...then a duplicate (new copy) of D will be added to addtab.
3840 *
3841 * Returns true if this happened for any def reachable from "def."
3842 *
3843 * It is slightly tricky to do this correctly in the presence of cycles. If we
3844 * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
3845 * our stack can reach a def in addtab or not. Once we figure this out, that
3846 * answer needs to apply to *all* defs in these SCCs, even if we visited them
3847 * already. So a straight up one-pass cycle-detecting DFS won't work.
3848 *
3849 * To work around this problem, we traverse each SCC (which we already
3850 * computed, since these defs are frozen) as a single node. We first compute
3851 * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
3852 * the entire SCC. This requires breaking the encapsulation of upb_refcounted,
3853 * since that is where we get the data about what SCC we are in. */
upb_resolve_dfs(const upb_def * def,upb_strtable * addtab,const void * new_owner,upb_inttable * seen,upb_status * s)3854 static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
3855 const void *new_owner, upb_inttable *seen,
3856 upb_status *s) {
3857 upb_value v;
3858 bool need_dup;
3859 const upb_def *base;
3860 const void* memoize_key;
3861
3862 /* Memoize results of this function for efficiency (since we're traversing a
3863 * DAG this is not needed to limit the depth of the search).
3864 *
3865 * We memoize by SCC instead of by individual def. */
3866 memoize_key = def->base.group;
3867
3868 if (upb_inttable_lookupptr(seen, memoize_key, &v))
3869 return upb_value_getbool(v);
3870
3871 /* Visit submessages for all messages in the SCC. */
3872 need_dup = false;
3873 base = def;
3874 do {
3875 upb_value v;
3876 const upb_msgdef *m;
3877
3878 assert(upb_def_isfrozen(def));
3879 if (def->type == UPB_DEF_FIELD) continue;
3880 if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
3881 need_dup = true;
3882 }
3883
3884 /* For messages, continue the recursion by visiting all subdefs, but only
3885 * ones in different SCCs. */
3886 m = upb_dyncast_msgdef(def);
3887 if (m) {
3888 upb_msg_field_iter i;
3889 for(upb_msg_field_begin(&i, m);
3890 !upb_msg_field_done(&i);
3891 upb_msg_field_next(&i)) {
3892 upb_fielddef *f = upb_msg_iter_field(&i);
3893 const upb_def *subdef;
3894
3895 if (!upb_fielddef_hassubdef(f)) continue;
3896 subdef = upb_fielddef_subdef(f);
3897
3898 /* Skip subdefs in this SCC. */
3899 if (def->base.group == subdef->base.group) continue;
3900
3901 /* |= to avoid short-circuit; we need its side-effects. */
3902 need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
3903 if (!upb_ok(s)) return false;
3904 }
3905 }
3906 } while ((def = (upb_def*)def->base.next) != base);
3907
3908 if (need_dup) {
3909 /* Dup all defs in this SCC that don't already have entries in addtab. */
3910 def = base;
3911 do {
3912 const char *name;
3913
3914 if (def->type == UPB_DEF_FIELD) continue;
3915 name = upb_def_fullname(def);
3916 if (!upb_strtable_lookup(addtab, name, NULL)) {
3917 upb_def *newdef = upb_def_dup(def, new_owner);
3918 if (!newdef) goto oom;
3919 newdef->came_from_user = false;
3920 if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
3921 goto oom;
3922 }
3923 } while ((def = (upb_def*)def->base.next) != base);
3924 }
3925
3926 upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
3927 return need_dup;
3928
3929 oom:
3930 upb_status_seterrmsg(s, "out of memory");
3931 return false;
3932 }
3933
3934 /* TODO(haberman): we need a lot more testing of error conditions.
3935 * The came_from_user stuff in particular is not tested. */
symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_refcounted * freeze_also,upb_status * status)3936 static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
3937 void *ref_donor, upb_refcounted *freeze_also,
3938 upb_status *status) {
3939 size_t i;
3940 size_t add_n;
3941 size_t freeze_n;
3942 upb_strtable_iter iter;
3943 upb_refcounted **add_objs = NULL;
3944 upb_def **add_defs = NULL;
3945 size_t add_objs_size;
3946 upb_strtable addtab;
3947 upb_inttable seen;
3948
3949 if (n == 0 && !freeze_also) {
3950 return true;
3951 }
3952
3953 assert(!upb_symtab_isfrozen(s));
3954 if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3955 upb_status_seterrmsg(status, "out of memory");
3956 return false;
3957 }
3958
3959 /* Add new defs to our "add" set. */
3960 for (i = 0; i < n; i++) {
3961 upb_def *def = defs[i];
3962 const char *fullname;
3963 upb_fielddef *f;
3964
3965 if (upb_def_isfrozen(def)) {
3966 upb_status_seterrmsg(status, "added defs must be mutable");
3967 goto err;
3968 }
3969 assert(!upb_def_isfrozen(def));
3970 fullname = upb_def_fullname(def);
3971 if (!fullname) {
3972 upb_status_seterrmsg(
3973 status, "Anonymous defs cannot be added to a symtab");
3974 goto err;
3975 }
3976
3977 f = upb_dyncast_fielddef_mutable(def);
3978
3979 if (f) {
3980 if (!upb_fielddef_containingtypename(f)) {
3981 upb_status_seterrmsg(status,
3982 "Standalone fielddefs must have a containing type "
3983 "(extendee) name set");
3984 goto err;
3985 }
3986 } else {
3987 if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3988 upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3989 goto err;
3990 }
3991 /* We need this to back out properly, because if there is a failure we
3992 * need to donate the ref back to the caller. */
3993 def->came_from_user = true;
3994 upb_def_donateref(def, ref_donor, s);
3995 if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3996 goto oom_err;
3997 }
3998 }
3999
4000 /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
4001 * If the appropriate message only exists in the existing symtab, duplicate
4002 * it so we have a mutable copy we can add the fields to. */
4003 for (i = 0; i < n; i++) {
4004 upb_def *def = defs[i];
4005 upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
4006 const char *msgname;
4007 upb_value v;
4008 upb_msgdef *m;
4009
4010 if (!f) continue;
4011 msgname = upb_fielddef_containingtypename(f);
4012 /* We validated this earlier in this function. */
4013 assert(msgname);
4014
4015 /* If the extendee name is absolutely qualified, move past the initial ".".
4016 * TODO(haberman): it is not obvious what it would mean if this was not
4017 * absolutely qualified. */
4018 if (msgname[0] == '.') {
4019 msgname++;
4020 }
4021
4022 if (upb_strtable_lookup(&addtab, msgname, &v)) {
4023 /* Extendee is in the set of defs the user asked us to add. */
4024 m = upb_value_getptr(v);
4025 } else {
4026 /* Need to find and dup the extendee from the existing symtab. */
4027 const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
4028 if (!frozen_m) {
4029 upb_status_seterrf(status,
4030 "Tried to extend message %s that does not exist "
4031 "in this SymbolTable.",
4032 msgname);
4033 goto err;
4034 }
4035 m = upb_msgdef_dup(frozen_m, s);
4036 if (!m) goto oom_err;
4037 if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
4038 upb_msgdef_unref(m, s);
4039 goto oom_err;
4040 }
4041 }
4042
4043 if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
4044 goto err;
4045 }
4046 }
4047
4048 /* Add dups of any existing def that can reach a def with the same name as
4049 * anything in our "add" set. */
4050 if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
4051 upb_strtable_begin(&iter, &s->symtab);
4052 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4053 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
4054 upb_resolve_dfs(def, &addtab, s, &seen, status);
4055 if (!upb_ok(status)) goto err;
4056 }
4057 upb_inttable_uninit(&seen);
4058
4059 /* Now using the table, resolve symbolic references for subdefs. */
4060 upb_strtable_begin(&iter, &addtab);
4061 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4062 const char *base;
4063 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
4064 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
4065 upb_msg_field_iter j;
4066
4067 if (!m) continue;
4068 /* Type names are resolved relative to the message in which they appear. */
4069 base = upb_msgdef_fullname(m);
4070
4071 for(upb_msg_field_begin(&j, m);
4072 !upb_msg_field_done(&j);
4073 upb_msg_field_next(&j)) {
4074 upb_fielddef *f = upb_msg_iter_field(&j);
4075 const char *name = upb_fielddef_subdefname(f);
4076 if (name && !upb_fielddef_subdef(f)) {
4077 /* Try the lookup in the current set of to-be-added defs first. If not
4078 * there, try existing defs. */
4079 upb_def *subdef = upb_resolvename(&addtab, base, name);
4080 if (subdef == NULL) {
4081 subdef = upb_resolvename(&s->symtab, base, name);
4082 }
4083 if (subdef == NULL) {
4084 upb_status_seterrf(
4085 status, "couldn't resolve name '%s' in message '%s'", name, base);
4086 goto err;
4087 } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
4088 goto err;
4089 }
4090 }
4091 }
4092 }
4093
4094 /* We need an array of the defs in addtab, for passing to
4095 * upb_refcounted_freeze(). */
4096 add_objs_size = upb_strtable_count(&addtab);
4097 if (freeze_also) {
4098 add_objs_size++;
4099 }
4100
4101 add_defs = upb_gmalloc(sizeof(void*) * add_objs_size);
4102 if (add_defs == NULL) goto oom_err;
4103 upb_strtable_begin(&iter, &addtab);
4104 for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4105 add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
4106 }
4107
4108 /* Validate defs. */
4109 if (!_upb_def_validate(add_defs, add_n, status)) {
4110 goto err;
4111 }
4112
4113 /* Cheat a little and give the array a new type.
4114 * This is probably undefined behavior, but this code will be deleted soon. */
4115 add_objs = (upb_refcounted**)add_defs;
4116
4117 freeze_n = add_n;
4118 if (freeze_also) {
4119 add_objs[freeze_n++] = freeze_also;
4120 }
4121
4122 if (!upb_refcounted_freeze(add_objs, freeze_n, status,
4123 UPB_MAX_MESSAGE_DEPTH * 2)) {
4124 goto err;
4125 }
4126
4127 /* This must be delayed until all errors have been detected, since error
4128 * recovery code uses this table to cleanup defs. */
4129 upb_strtable_uninit(&addtab);
4130
4131 /* TODO(haberman) we don't properly handle errors after this point (like
4132 * OOM in upb_strtable_insert() below). */
4133 for (i = 0; i < add_n; i++) {
4134 upb_def *def = (upb_def*)add_objs[i];
4135 const char *name = upb_def_fullname(def);
4136 upb_value v;
4137 bool success;
4138
4139 if (upb_strtable_remove(&s->symtab, name, &v)) {
4140 const upb_def *def = upb_value_getptr(v);
4141 upb_def_unref(def, s);
4142 }
4143 success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
4144 UPB_ASSERT_VAR(success, success == true);
4145 }
4146 upb_gfree(add_defs);
4147 return true;
4148
4149 oom_err:
4150 upb_status_seterrmsg(status, "out of memory");
4151 err: {
4152 /* For defs the user passed in, we need to donate the refs back. For defs
4153 * we dup'd, we need to just unref them. */
4154 upb_strtable_begin(&iter, &addtab);
4155 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4156 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
4157 bool came_from_user = def->came_from_user;
4158 def->came_from_user = false;
4159 if (came_from_user) {
4160 upb_def_donateref(def, s, ref_donor);
4161 } else {
4162 upb_def_unref(def, s);
4163 }
4164 }
4165 }
4166 upb_strtable_uninit(&addtab);
4167 upb_gfree(add_defs);
4168 assert(!upb_ok(status));
4169 return false;
4170 }
4171
upb_symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_status * status)4172 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
4173 void *ref_donor, upb_status *status) {
4174 return symtab_add(s, defs, n, ref_donor, NULL, status);
4175 }
4176
upb_symtab_addfile(upb_symtab * s,upb_filedef * file,upb_status * status)4177 bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) {
4178 size_t n;
4179 size_t i;
4180 upb_def **defs;
4181 bool ret;
4182
4183 n = upb_filedef_defcount(file);
4184 defs = upb_gmalloc(sizeof(*defs) * n);
4185
4186 if (defs == NULL) {
4187 upb_status_seterrmsg(status, "Out of memory");
4188 return false;
4189 }
4190
4191 for (i = 0; i < n; i++) {
4192 defs[i] = upb_filedef_mutabledef(file, i);
4193 }
4194
4195 ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status);
4196
4197 upb_gfree(defs);
4198 return ret;
4199 }
4200
4201 /* Iteration. */
4202
advance_to_matching(upb_symtab_iter * iter)4203 static void advance_to_matching(upb_symtab_iter *iter) {
4204 if (iter->type == UPB_DEF_ANY)
4205 return;
4206
4207 while (!upb_strtable_done(&iter->iter) &&
4208 iter->type != upb_symtab_iter_def(iter)->type) {
4209 upb_strtable_next(&iter->iter);
4210 }
4211 }
4212
upb_symtab_begin(upb_symtab_iter * iter,const upb_symtab * s,upb_deftype_t type)4213 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
4214 upb_deftype_t type) {
4215 upb_strtable_begin(&iter->iter, &s->symtab);
4216 iter->type = type;
4217 advance_to_matching(iter);
4218 }
4219
upb_symtab_next(upb_symtab_iter * iter)4220 void upb_symtab_next(upb_symtab_iter *iter) {
4221 upb_strtable_next(&iter->iter);
4222 advance_to_matching(iter);
4223 }
4224
upb_symtab_done(const upb_symtab_iter * iter)4225 bool upb_symtab_done(const upb_symtab_iter *iter) {
4226 return upb_strtable_done(&iter->iter);
4227 }
4228
upb_symtab_iter_def(const upb_symtab_iter * iter)4229 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
4230 return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
4231 }
4232 /*
4233 ** upb_table Implementation
4234 **
4235 ** Implementation is heavily inspired by Lua's ltable.c.
4236 */
4237
4238
4239 #include <string.h>
4240
4241 #define UPB_MAXARRSIZE 16 /* 64k. */
4242
4243 /* From Chromium. */
4244 #define ARRAY_SIZE(x) \
4245 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4246
4247 #ifdef NDEBUG
upb_check_alloc(upb_table * t,upb_alloc * a)4248 static void upb_check_alloc(upb_table *t, upb_alloc *a) {
4249 UPB_UNUSED(t);
4250 UPB_UNUSED(a);
4251 }
4252 #else
upb_check_alloc(upb_table * t,upb_alloc * a)4253 static void upb_check_alloc(upb_table *t, upb_alloc *a) {
4254 assert(t->alloc == a);
4255 }
4256 #endif
4257
4258 static const double MAX_LOAD = 0.85;
4259
4260 /* The minimum utilization of the array part of a mixed hash/array table. This
4261 * is a speed/memory-usage tradeoff (though it's not straightforward because of
4262 * cache effects). The lower this is, the more memory we'll use. */
4263 static const double MIN_DENSITY = 0.1;
4264
is_pow2(uint64_t v)4265 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4266
log2ceil(uint64_t v)4267 int log2ceil(uint64_t v) {
4268 int ret = 0;
4269 bool pow2 = is_pow2(v);
4270 while (v >>= 1) ret++;
4271 ret = pow2 ? ret : ret + 1; /* Ceiling. */
4272 return UPB_MIN(UPB_MAXARRSIZE, ret);
4273 }
4274
upb_strdup(const char * s,upb_alloc * a)4275 char *upb_strdup(const char *s, upb_alloc *a) {
4276 return upb_strdup2(s, strlen(s), a);
4277 }
4278
upb_strdup2(const char * s,size_t len,upb_alloc * a)4279 char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
4280 size_t n;
4281 char *p;
4282
4283 /* Prevent overflow errors. */
4284 if (len == SIZE_MAX) return NULL;
4285 /* Always null-terminate, even if binary data; but don't rely on the input to
4286 * have a null-terminating byte since it may be a raw binary buffer. */
4287 n = len + 1;
4288 p = upb_malloc(a, n);
4289 if (p) {
4290 memcpy(p, s, len);
4291 p[len] = 0;
4292 }
4293 return p;
4294 }
4295
4296 /* A type to represent the lookup key of either a strtable or an inttable. */
4297 typedef union {
4298 uintptr_t num;
4299 struct {
4300 const char *str;
4301 size_t len;
4302 } str;
4303 } lookupkey_t;
4304
strkey2(const char * str,size_t len)4305 static lookupkey_t strkey2(const char *str, size_t len) {
4306 lookupkey_t k;
4307 k.str.str = str;
4308 k.str.len = len;
4309 return k;
4310 }
4311
intkey(uintptr_t key)4312 static lookupkey_t intkey(uintptr_t key) {
4313 lookupkey_t k;
4314 k.num = key;
4315 return k;
4316 }
4317
4318 typedef uint32_t hashfunc_t(upb_tabkey key);
4319 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4320
4321 /* Base table (shared code) ***************************************************/
4322
4323 /* For when we need to cast away const. */
mutable_entries(upb_table * t)4324 static upb_tabent *mutable_entries(upb_table *t) {
4325 return (upb_tabent*)t->entries;
4326 }
4327
isfull(upb_table * t)4328 static bool isfull(upb_table *t) {
4329 if (upb_table_size(t) == 0) {
4330 return true;
4331 } else {
4332 return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
4333 }
4334 }
4335
init(upb_table * t,upb_ctype_t ctype,uint8_t size_lg2,upb_alloc * a)4336 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2,
4337 upb_alloc *a) {
4338 size_t bytes;
4339
4340 t->count = 0;
4341 t->ctype = ctype;
4342 t->size_lg2 = size_lg2;
4343 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
4344 #ifndef NDEBUG
4345 t->alloc = a;
4346 #endif
4347 bytes = upb_table_size(t) * sizeof(upb_tabent);
4348 if (bytes > 0) {
4349 t->entries = upb_malloc(a, bytes);
4350 if (!t->entries) return false;
4351 memset(mutable_entries(t), 0, bytes);
4352 } else {
4353 t->entries = NULL;
4354 }
4355 return true;
4356 }
4357
uninit(upb_table * t,upb_alloc * a)4358 static void uninit(upb_table *t, upb_alloc *a) {
4359 upb_check_alloc(t, a);
4360 upb_free(a, mutable_entries(t));
4361 }
4362
emptyent(upb_table * t)4363 static upb_tabent *emptyent(upb_table *t) {
4364 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
4365 while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
4366 }
4367
getentry_mutable(upb_table * t,uint32_t hash)4368 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4369 return (upb_tabent*)upb_getentry(t, hash);
4370 }
4371
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4372 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4373 uint32_t hash, eqlfunc_t *eql) {
4374 const upb_tabent *e;
4375
4376 if (t->size_lg2 == 0) return NULL;
4377 e = upb_getentry(t, hash);
4378 if (upb_tabent_isempty(e)) return NULL;
4379 while (1) {
4380 if (eql(e->key, key)) return e;
4381 if ((e = e->next) == NULL) return NULL;
4382 }
4383 }
4384
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4385 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4386 uint32_t hash, eqlfunc_t *eql) {
4387 return (upb_tabent*)findentry(t, key, hash, eql);
4388 }
4389
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)4390 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4391 uint32_t hash, eqlfunc_t *eql) {
4392 const upb_tabent *e = findentry(t, key, hash, eql);
4393 if (e) {
4394 if (v) {
4395 _upb_value_setval(v, e->val.val, t->ctype);
4396 }
4397 return true;
4398 } else {
4399 return false;
4400 }
4401 }
4402
4403 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)4404 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4405 upb_value val, uint32_t hash,
4406 hashfunc_t *hashfunc, eqlfunc_t *eql) {
4407 upb_tabent *mainpos_e;
4408 upb_tabent *our_e;
4409
4410 UPB_UNUSED(eql);
4411 UPB_UNUSED(key);
4412 assert(findentry(t, key, hash, eql) == NULL);
4413 assert(val.ctype == t->ctype);
4414
4415 t->count++;
4416 mainpos_e = getentry_mutable(t, hash);
4417 our_e = mainpos_e;
4418
4419 if (upb_tabent_isempty(mainpos_e)) {
4420 /* Our main position is empty; use it. */
4421 our_e->next = NULL;
4422 } else {
4423 /* Collision. */
4424 upb_tabent *new_e = emptyent(t);
4425 /* Head of collider's chain. */
4426 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4427 if (chain == mainpos_e) {
4428 /* Existing ent is in its main posisiton (it has the same hash as us, and
4429 * is the head of our chain). Insert to new ent and append to this chain. */
4430 new_e->next = mainpos_e->next;
4431 mainpos_e->next = new_e;
4432 our_e = new_e;
4433 } else {
4434 /* Existing ent is not in its main position (it is a node in some other
4435 * chain). This implies that no existing ent in the table has our hash.
4436 * Evict it (updating its chain) and use its ent for head of our chain. */
4437 *new_e = *mainpos_e; /* copies next. */
4438 while (chain->next != mainpos_e) {
4439 chain = (upb_tabent*)chain->next;
4440 assert(chain);
4441 }
4442 chain->next = new_e;
4443 our_e = mainpos_e;
4444 our_e->next = NULL;
4445 }
4446 }
4447 our_e->key = tabkey;
4448 our_e->val.val = val.val;
4449 assert(findentry(t, key, hash, eql) == our_e);
4450 }
4451
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)4452 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4453 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4454 upb_tabent *chain = getentry_mutable(t, hash);
4455 if (upb_tabent_isempty(chain)) return false;
4456 if (eql(chain->key, key)) {
4457 /* Element to remove is at the head of its chain. */
4458 t->count--;
4459 if (val) {
4460 _upb_value_setval(val, chain->val.val, t->ctype);
4461 }
4462 if (chain->next) {
4463 upb_tabent *move = (upb_tabent*)chain->next;
4464 *chain = *move;
4465 if (removed) *removed = move->key;
4466 move->key = 0; /* Make the slot empty. */
4467 } else {
4468 if (removed) *removed = chain->key;
4469 chain->key = 0; /* Make the slot empty. */
4470 }
4471 return true;
4472 } else {
4473 /* Element to remove is either in a non-head position or not in the
4474 * table. */
4475 while (chain->next && !eql(chain->next->key, key))
4476 chain = (upb_tabent*)chain->next;
4477 if (chain->next) {
4478 /* Found element to remove. */
4479 upb_tabent *rm;
4480
4481 if (val) {
4482 _upb_value_setval(val, chain->next->val.val, t->ctype);
4483 }
4484 rm = (upb_tabent*)chain->next;
4485 if (removed) *removed = rm->key;
4486 rm->key = 0;
4487 chain->next = rm->next;
4488 t->count--;
4489 return true;
4490 } else {
4491 return false;
4492 }
4493 }
4494 }
4495
next(const upb_table * t,size_t i)4496 static size_t next(const upb_table *t, size_t i) {
4497 do {
4498 if (++i >= upb_table_size(t))
4499 return SIZE_MAX;
4500 } while(upb_tabent_isempty(&t->entries[i]));
4501
4502 return i;
4503 }
4504
begin(const upb_table * t)4505 static size_t begin(const upb_table *t) {
4506 return next(t, -1);
4507 }
4508
4509
4510 /* upb_strtable ***************************************************************/
4511
4512 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
4513
strcopy(lookupkey_t k2,upb_alloc * a)4514 static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
4515 char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
4516 if (str == NULL) return 0;
4517 memcpy(str, &k2.str.len, sizeof(uint32_t));
4518 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4519 return (uintptr_t)str;
4520 }
4521
strhash(upb_tabkey key)4522 static uint32_t strhash(upb_tabkey key) {
4523 uint32_t len;
4524 char *str = upb_tabstr(key, &len);
4525 return MurmurHash2(str, len, 0);
4526 }
4527
streql(upb_tabkey k1,lookupkey_t k2)4528 static bool streql(upb_tabkey k1, lookupkey_t k2) {
4529 uint32_t len;
4530 char *str = upb_tabstr(k1, &len);
4531 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
4532 }
4533
upb_strtable_init2(upb_strtable * t,upb_ctype_t ctype,upb_alloc * a)4534 bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
4535 return init(&t->t, ctype, 2, a);
4536 }
4537
upb_strtable_uninit2(upb_strtable * t,upb_alloc * a)4538 void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
4539 size_t i;
4540 for (i = 0; i < upb_table_size(&t->t); i++)
4541 upb_free(a, (void*)t->t.entries[i].key);
4542 uninit(&t->t, a);
4543 }
4544
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_alloc * a)4545 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
4546 upb_strtable new_table;
4547 upb_strtable_iter i;
4548
4549 upb_check_alloc(&t->t, a);
4550
4551 if (!init(&new_table.t, t->t.ctype, size_lg2, a))
4552 return false;
4553 upb_strtable_begin(&i, t);
4554 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
4555 upb_strtable_insert3(
4556 &new_table,
4557 upb_strtable_iter_key(&i),
4558 upb_strtable_iter_keylength(&i),
4559 upb_strtable_iter_value(&i),
4560 a);
4561 }
4562 upb_strtable_uninit2(t, a);
4563 *t = new_table;
4564 return true;
4565 }
4566
upb_strtable_insert3(upb_strtable * t,const char * k,size_t len,upb_value v,upb_alloc * a)4567 bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
4568 upb_value v, upb_alloc *a) {
4569 lookupkey_t key;
4570 upb_tabkey tabkey;
4571 uint32_t hash;
4572
4573 upb_check_alloc(&t->t, a);
4574
4575 if (isfull(&t->t)) {
4576 /* Need to resize. New table of double the size, add old elements to it. */
4577 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
4578 return false;
4579 }
4580 }
4581
4582 key = strkey2(k, len);
4583 tabkey = strcopy(key, a);
4584 if (tabkey == 0) return false;
4585
4586 hash = MurmurHash2(key.str.str, key.str.len, 0);
4587 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
4588 return true;
4589 }
4590
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)4591 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4592 upb_value *v) {
4593 uint32_t hash = MurmurHash2(key, len, 0);
4594 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4595 }
4596
upb_strtable_remove3(upb_strtable * t,const char * key,size_t len,upb_value * val,upb_alloc * alloc)4597 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
4598 upb_value *val, upb_alloc *alloc) {
4599 uint32_t hash = MurmurHash2(key, strlen(key), 0);
4600 upb_tabkey tabkey;
4601 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
4602 upb_free(alloc, (void*)tabkey);
4603 return true;
4604 } else {
4605 return false;
4606 }
4607 }
4608
4609 /* Iteration */
4610
str_tabent(const upb_strtable_iter * i)4611 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
4612 return &i->t->t.entries[i->index];
4613 }
4614
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)4615 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
4616 i->t = t;
4617 i->index = begin(&t->t);
4618 }
4619
upb_strtable_next(upb_strtable_iter * i)4620 void upb_strtable_next(upb_strtable_iter *i) {
4621 i->index = next(&i->t->t, i->index);
4622 }
4623
upb_strtable_done(const upb_strtable_iter * i)4624 bool upb_strtable_done(const upb_strtable_iter *i) {
4625 return i->index >= upb_table_size(&i->t->t) ||
4626 upb_tabent_isempty(str_tabent(i));
4627 }
4628
upb_strtable_iter_key(const upb_strtable_iter * i)4629 const char *upb_strtable_iter_key(const upb_strtable_iter *i) {
4630 assert(!upb_strtable_done(i));
4631 return upb_tabstr(str_tabent(i)->key, NULL);
4632 }
4633
upb_strtable_iter_keylength(const upb_strtable_iter * i)4634 size_t upb_strtable_iter_keylength(const upb_strtable_iter *i) {
4635 uint32_t len;
4636 assert(!upb_strtable_done(i));
4637 upb_tabstr(str_tabent(i)->key, &len);
4638 return len;
4639 }
4640
upb_strtable_iter_value(const upb_strtable_iter * i)4641 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
4642 assert(!upb_strtable_done(i));
4643 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
4644 }
4645
upb_strtable_iter_setdone(upb_strtable_iter * i)4646 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
4647 i->index = SIZE_MAX;
4648 }
4649
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)4650 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
4651 const upb_strtable_iter *i2) {
4652 if (upb_strtable_done(i1) && upb_strtable_done(i2))
4653 return true;
4654 return i1->t == i2->t && i1->index == i2->index;
4655 }
4656
4657
4658 /* upb_inttable ***************************************************************/
4659
4660 /* For inttables we use a hybrid structure where small keys are kept in an
4661 * array and large keys are put in the hash table. */
4662
inthash(upb_tabkey key)4663 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
4664
inteql(upb_tabkey k1,lookupkey_t k2)4665 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
4666 return k1 == k2.num;
4667 }
4668
mutable_array(upb_inttable * t)4669 static upb_tabval *mutable_array(upb_inttable *t) {
4670 return (upb_tabval*)t->array;
4671 }
4672
inttable_val(upb_inttable * t,uintptr_t key)4673 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
4674 if (key < t->array_size) {
4675 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
4676 } else {
4677 upb_tabent *e =
4678 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
4679 return e ? &e->val : NULL;
4680 }
4681 }
4682
inttable_val_const(const upb_inttable * t,uintptr_t key)4683 static const upb_tabval *inttable_val_const(const upb_inttable *t,
4684 uintptr_t key) {
4685 return inttable_val((upb_inttable*)t, key);
4686 }
4687
upb_inttable_count(const upb_inttable * t)4688 size_t upb_inttable_count(const upb_inttable *t) {
4689 return t->t.count + t->array_count;
4690 }
4691
check(upb_inttable * t)4692 static void check(upb_inttable *t) {
4693 UPB_UNUSED(t);
4694 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
4695 {
4696 /* This check is very expensive (makes inserts/deletes O(N)). */
4697 size_t count = 0;
4698 upb_inttable_iter i;
4699 upb_inttable_begin(&i, t);
4700 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
4701 assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
4702 }
4703 assert(count == upb_inttable_count(t));
4704 }
4705 #endif
4706 }
4707
upb_inttable_sizedinit(upb_inttable * t,upb_ctype_t ctype,size_t asize,int hsize_lg2,upb_alloc * a)4708 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
4709 size_t asize, int hsize_lg2, upb_alloc *a) {
4710 size_t array_bytes;
4711
4712 if (!init(&t->t, ctype, hsize_lg2, a)) return false;
4713 /* Always make the array part at least 1 long, so that we know key 0
4714 * won't be in the hash part, which simplifies things. */
4715 t->array_size = UPB_MAX(1, asize);
4716 t->array_count = 0;
4717 array_bytes = t->array_size * sizeof(upb_value);
4718 t->array = upb_malloc(a, array_bytes);
4719 if (!t->array) {
4720 uninit(&t->t, a);
4721 return false;
4722 }
4723 memset(mutable_array(t), 0xff, array_bytes);
4724 check(t);
4725 return true;
4726 }
4727
upb_inttable_init2(upb_inttable * t,upb_ctype_t ctype,upb_alloc * a)4728 bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
4729 return upb_inttable_sizedinit(t, ctype, 0, 4, a);
4730 }
4731
upb_inttable_uninit2(upb_inttable * t,upb_alloc * a)4732 void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
4733 uninit(&t->t, a);
4734 upb_free(a, mutable_array(t));
4735 }
4736
upb_inttable_insert2(upb_inttable * t,uintptr_t key,upb_value val,upb_alloc * a)4737 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
4738 upb_alloc *a) {
4739 upb_tabval tabval;
4740 tabval.val = val.val;
4741 UPB_UNUSED(tabval);
4742 assert(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
4743
4744 upb_check_alloc(&t->t, a);
4745
4746 if (key < t->array_size) {
4747 assert(!upb_arrhas(t->array[key]));
4748 t->array_count++;
4749 mutable_array(t)[key].val = val.val;
4750 } else {
4751 if (isfull(&t->t)) {
4752 /* Need to resize the hash part, but we re-use the array part. */
4753 size_t i;
4754 upb_table new_table;
4755
4756 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1, a)) {
4757 return false;
4758 }
4759
4760 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
4761 const upb_tabent *e = &t->t.entries[i];
4762 uint32_t hash;
4763 upb_value v;
4764
4765 _upb_value_setval(&v, e->val.val, t->t.ctype);
4766 hash = upb_inthash(e->key);
4767 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
4768 }
4769
4770 assert(t->t.count == new_table.count);
4771
4772 uninit(&t->t, a);
4773 t->t = new_table;
4774 }
4775 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
4776 }
4777 check(t);
4778 return true;
4779 }
4780
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)4781 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
4782 const upb_tabval *table_v = inttable_val_const(t, key);
4783 if (!table_v) return false;
4784 if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
4785 return true;
4786 }
4787
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)4788 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
4789 upb_tabval *table_v = inttable_val(t, key);
4790 if (!table_v) return false;
4791 table_v->val = val.val;
4792 return true;
4793 }
4794
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)4795 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
4796 bool success;
4797 if (key < t->array_size) {
4798 if (upb_arrhas(t->array[key])) {
4799 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
4800 t->array_count--;
4801 if (val) {
4802 _upb_value_setval(val, t->array[key].val, t->t.ctype);
4803 }
4804 mutable_array(t)[key] = empty;
4805 success = true;
4806 } else {
4807 success = false;
4808 }
4809 } else {
4810 upb_tabkey removed;
4811 uint32_t hash = upb_inthash(key);
4812 success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
4813 }
4814 check(t);
4815 return success;
4816 }
4817
upb_inttable_push2(upb_inttable * t,upb_value val,upb_alloc * a)4818 bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
4819 upb_check_alloc(&t->t, a);
4820 return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
4821 }
4822
upb_inttable_pop(upb_inttable * t)4823 upb_value upb_inttable_pop(upb_inttable *t) {
4824 upb_value val;
4825 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
4826 UPB_ASSERT_VAR(ok, ok);
4827 return val;
4828 }
4829
upb_inttable_insertptr2(upb_inttable * t,const void * key,upb_value val,upb_alloc * a)4830 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
4831 upb_alloc *a) {
4832 upb_check_alloc(&t->t, a);
4833 return upb_inttable_insert2(t, (uintptr_t)key, val, a);
4834 }
4835
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)4836 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
4837 upb_value *v) {
4838 return upb_inttable_lookup(t, (uintptr_t)key, v);
4839 }
4840
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)4841 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
4842 return upb_inttable_remove(t, (uintptr_t)key, val);
4843 }
4844
upb_inttable_compact2(upb_inttable * t,upb_alloc * a)4845 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
4846 /* A power-of-two histogram of the table keys. */
4847 size_t counts[UPB_MAXARRSIZE + 1] = {0};
4848
4849 /* The max key in each bucket. */
4850 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
4851
4852 upb_inttable_iter i;
4853 size_t arr_count;
4854 int size_lg2;
4855 upb_inttable new_t;
4856
4857 upb_check_alloc(&t->t, a);
4858
4859 upb_inttable_begin(&i, t);
4860 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4861 uintptr_t key = upb_inttable_iter_key(&i);
4862 int bucket = log2ceil(key);
4863 max[bucket] = UPB_MAX(max[bucket], key);
4864 counts[bucket]++;
4865 }
4866
4867 /* Find the largest power of two that satisfies the MIN_DENSITY
4868 * definition (while actually having some keys). */
4869 arr_count = upb_inttable_count(t);
4870
4871 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
4872 if (counts[size_lg2] == 0) {
4873 /* We can halve again without losing any entries. */
4874 continue;
4875 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
4876 break;
4877 }
4878
4879 arr_count -= counts[size_lg2];
4880 }
4881
4882 assert(arr_count <= upb_inttable_count(t));
4883
4884 {
4885 /* Insert all elements into new, perfectly-sized table. */
4886 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
4887 size_t hash_count = upb_inttable_count(t) - arr_count;
4888 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
4889 size_t hashsize_lg2 = log2ceil(hash_size);
4890
4891 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2, a);
4892 upb_inttable_begin(&i, t);
4893 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4894 uintptr_t k = upb_inttable_iter_key(&i);
4895 upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
4896 }
4897 assert(new_t.array_size == arr_size);
4898 assert(new_t.t.size_lg2 == hashsize_lg2);
4899 }
4900 upb_inttable_uninit2(t, a);
4901 *t = new_t;
4902 }
4903
4904 /* Iteration. */
4905
int_tabent(const upb_inttable_iter * i)4906 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
4907 assert(!i->array_part);
4908 return &i->t->t.entries[i->index];
4909 }
4910
int_arrent(const upb_inttable_iter * i)4911 static upb_tabval int_arrent(const upb_inttable_iter *i) {
4912 assert(i->array_part);
4913 return i->t->array[i->index];
4914 }
4915
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)4916 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
4917 i->t = t;
4918 i->index = -1;
4919 i->array_part = true;
4920 upb_inttable_next(i);
4921 }
4922
upb_inttable_next(upb_inttable_iter * iter)4923 void upb_inttable_next(upb_inttable_iter *iter) {
4924 const upb_inttable *t = iter->t;
4925 if (iter->array_part) {
4926 while (++iter->index < t->array_size) {
4927 if (upb_arrhas(int_arrent(iter))) {
4928 return;
4929 }
4930 }
4931 iter->array_part = false;
4932 iter->index = begin(&t->t);
4933 } else {
4934 iter->index = next(&t->t, iter->index);
4935 }
4936 }
4937
upb_inttable_done(const upb_inttable_iter * i)4938 bool upb_inttable_done(const upb_inttable_iter *i) {
4939 if (i->array_part) {
4940 return i->index >= i->t->array_size ||
4941 !upb_arrhas(int_arrent(i));
4942 } else {
4943 return i->index >= upb_table_size(&i->t->t) ||
4944 upb_tabent_isempty(int_tabent(i));
4945 }
4946 }
4947
upb_inttable_iter_key(const upb_inttable_iter * i)4948 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
4949 assert(!upb_inttable_done(i));
4950 return i->array_part ? i->index : int_tabent(i)->key;
4951 }
4952
upb_inttable_iter_value(const upb_inttable_iter * i)4953 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
4954 assert(!upb_inttable_done(i));
4955 return _upb_value_val(
4956 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
4957 i->t->t.ctype);
4958 }
4959
upb_inttable_iter_setdone(upb_inttable_iter * i)4960 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
4961 i->index = SIZE_MAX;
4962 i->array_part = false;
4963 }
4964
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)4965 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
4966 const upb_inttable_iter *i2) {
4967 if (upb_inttable_done(i1) && upb_inttable_done(i2))
4968 return true;
4969 return i1->t == i2->t && i1->index == i2->index &&
4970 i1->array_part == i2->array_part;
4971 }
4972
4973 #ifdef UPB_UNALIGNED_READS_OK
4974 /* -----------------------------------------------------------------------------
4975 * MurmurHash2, by Austin Appleby (released as public domain).
4976 * Reformatted and C99-ified by Joshua Haberman.
4977 * Note - This code makes a few assumptions about how your machine behaves -
4978 * 1. We can read a 4-byte value from any address without crashing
4979 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
4980 * And it has a few limitations -
4981 * 1. It will not work incrementally.
4982 * 2. It will not produce the same results on little-endian and big-endian
4983 * machines. */
MurmurHash2(const void * key,size_t len,uint32_t seed)4984 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
4985 /* 'm' and 'r' are mixing constants generated offline.
4986 * They're not really 'magic', they just happen to work well. */
4987 const uint32_t m = 0x5bd1e995;
4988 const int32_t r = 24;
4989
4990 /* Initialize the hash to a 'random' value */
4991 uint32_t h = seed ^ len;
4992
4993 /* Mix 4 bytes at a time into the hash */
4994 const uint8_t * data = (const uint8_t *)key;
4995 while(len >= 4) {
4996 uint32_t k = *(uint32_t *)data;
4997
4998 k *= m;
4999 k ^= k >> r;
5000 k *= m;
5001
5002 h *= m;
5003 h ^= k;
5004
5005 data += 4;
5006 len -= 4;
5007 }
5008
5009 /* Handle the last few bytes of the input array */
5010 switch(len) {
5011 case 3: h ^= data[2] << 16;
5012 case 2: h ^= data[1] << 8;
5013 case 1: h ^= data[0]; h *= m;
5014 };
5015
5016 /* Do a few final mixes of the hash to ensure the last few
5017 * bytes are well-incorporated. */
5018 h ^= h >> 13;
5019 h *= m;
5020 h ^= h >> 15;
5021
5022 return h;
5023 }
5024
5025 #else /* !UPB_UNALIGNED_READS_OK */
5026
5027 /* -----------------------------------------------------------------------------
5028 * MurmurHashAligned2, by Austin Appleby
5029 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
5030 * on certain platforms.
5031 * Performance will be lower than MurmurHash2 */
5032
5033 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
5034
MurmurHash2(const void * key,size_t len,uint32_t seed)5035 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
5036 const uint32_t m = 0x5bd1e995;
5037 const int32_t r = 24;
5038 const uint8_t * data = (const uint8_t *)key;
5039 uint32_t h = seed ^ len;
5040 uint8_t align = (uintptr_t)data & 3;
5041
5042 if(align && (len >= 4)) {
5043 /* Pre-load the temp registers */
5044 uint32_t t = 0, d = 0;
5045 int32_t sl;
5046 int32_t sr;
5047
5048 switch(align) {
5049 case 1: t |= data[2] << 16;
5050 case 2: t |= data[1] << 8;
5051 case 3: t |= data[0];
5052 }
5053
5054 t <<= (8 * align);
5055
5056 data += 4-align;
5057 len -= 4-align;
5058
5059 sl = 8 * (4-align);
5060 sr = 8 * align;
5061
5062 /* Mix */
5063
5064 while(len >= 4) {
5065 uint32_t k;
5066
5067 d = *(uint32_t *)data;
5068 t = (t >> sr) | (d << sl);
5069
5070 k = t;
5071
5072 MIX(h,k,m);
5073
5074 t = d;
5075
5076 data += 4;
5077 len -= 4;
5078 }
5079
5080 /* Handle leftover data in temp registers */
5081
5082 d = 0;
5083
5084 if(len >= align) {
5085 uint32_t k;
5086
5087 switch(align) {
5088 case 3: d |= data[2] << 16;
5089 case 2: d |= data[1] << 8;
5090 case 1: d |= data[0];
5091 }
5092
5093 k = (t >> sr) | (d << sl);
5094 MIX(h,k,m);
5095
5096 data += align;
5097 len -= align;
5098
5099 /* ----------
5100 * Handle tail bytes */
5101
5102 switch(len) {
5103 case 3: h ^= data[2] << 16;
5104 case 2: h ^= data[1] << 8;
5105 case 1: h ^= data[0]; h *= m;
5106 };
5107 } else {
5108 switch(len) {
5109 case 3: d |= data[2] << 16;
5110 case 2: d |= data[1] << 8;
5111 case 1: d |= data[0];
5112 case 0: h ^= (t >> sr) | (d << sl); h *= m;
5113 }
5114 }
5115
5116 h ^= h >> 13;
5117 h *= m;
5118 h ^= h >> 15;
5119
5120 return h;
5121 } else {
5122 while(len >= 4) {
5123 uint32_t k = *(uint32_t *)data;
5124
5125 MIX(h,k,m);
5126
5127 data += 4;
5128 len -= 4;
5129 }
5130
5131 /* ----------
5132 * Handle tail bytes */
5133
5134 switch(len) {
5135 case 3: h ^= data[2] << 16;
5136 case 2: h ^= data[1] << 8;
5137 case 1: h ^= data[0]; h *= m;
5138 };
5139
5140 h ^= h >> 13;
5141 h *= m;
5142 h ^= h >> 15;
5143
5144 return h;
5145 }
5146 }
5147 #undef MIX
5148
5149 #endif /* UPB_UNALIGNED_READS_OK */
5150
5151 #include <errno.h>
5152 #include <stdarg.h>
5153 #include <stddef.h>
5154 #include <stdint.h>
5155 #include <stdio.h>
5156 #include <stdlib.h>
5157 #include <string.h>
5158
upb_dumptostderr(void * closure,const upb_status * status)5159 bool upb_dumptostderr(void *closure, const upb_status* status) {
5160 UPB_UNUSED(closure);
5161 fprintf(stderr, "%s\n", upb_status_errmsg(status));
5162 return false;
5163 }
5164
5165 /* Guarantee null-termination and provide ellipsis truncation.
5166 * It may be tempting to "optimize" this by initializing these final
5167 * four bytes up-front and then being careful never to overwrite them,
5168 * this is safer and simpler. */
nullz(upb_status * status)5169 static void nullz(upb_status *status) {
5170 const char *ellipsis = "...";
5171 size_t len = strlen(ellipsis);
5172 assert(sizeof(status->msg) > len);
5173 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
5174 }
5175
5176
5177 /* upb_upberr *****************************************************************/
5178
5179 upb_errorspace upb_upberr = {"upb error"};
5180
upb_upberr_setoom(upb_status * status)5181 void upb_upberr_setoom(upb_status *status) {
5182 status->error_space_ = &upb_upberr;
5183 upb_status_seterrmsg(status, "Out of memory");
5184 }
5185
5186
5187 /* upb_status *****************************************************************/
5188
upb_status_clear(upb_status * status)5189 void upb_status_clear(upb_status *status) {
5190 if (!status) return;
5191 status->ok_ = true;
5192 status->code_ = 0;
5193 status->msg[0] = '\0';
5194 }
5195
upb_ok(const upb_status * status)5196 bool upb_ok(const upb_status *status) { return status->ok_; }
5197
upb_status_errspace(const upb_status * status)5198 upb_errorspace *upb_status_errspace(const upb_status *status) {
5199 return status->error_space_;
5200 }
5201
upb_status_errcode(const upb_status * status)5202 int upb_status_errcode(const upb_status *status) { return status->code_; }
5203
upb_status_errmsg(const upb_status * status)5204 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
5205
upb_status_seterrmsg(upb_status * status,const char * msg)5206 void upb_status_seterrmsg(upb_status *status, const char *msg) {
5207 if (!status) return;
5208 status->ok_ = false;
5209 strncpy(status->msg, msg, sizeof(status->msg));
5210 nullz(status);
5211 }
5212
upb_status_seterrf(upb_status * status,const char * fmt,...)5213 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
5214 va_list args;
5215 va_start(args, fmt);
5216 upb_status_vseterrf(status, fmt, args);
5217 va_end(args);
5218 }
5219
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)5220 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
5221 if (!status) return;
5222 status->ok_ = false;
5223 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
5224 nullz(status);
5225 }
5226
upb_status_copy(upb_status * to,const upb_status * from)5227 void upb_status_copy(upb_status *to, const upb_status *from) {
5228 if (!to) return;
5229 *to = *from;
5230 }
5231
5232
5233 /* upb_alloc ******************************************************************/
5234
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)5235 static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
5236 size_t size) {
5237 UPB_UNUSED(alloc);
5238 UPB_UNUSED(oldsize);
5239 if (size == 0) {
5240 free(ptr);
5241 return NULL;
5242 } else {
5243 return realloc(ptr, size);
5244 }
5245 }
5246
5247 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
5248
5249
5250 /* upb_arena ******************************************************************/
5251
5252 /* Be conservative and choose 16 in case anyone is using SSE. */
5253 static const size_t maxalign = 16;
5254
align_up(size_t size)5255 static size_t align_up(size_t size) {
5256 return ((size + maxalign - 1) / maxalign) * maxalign;
5257 }
5258
5259 typedef struct mem_block {
5260 struct mem_block *next;
5261 size_t size;
5262 size_t used;
5263 bool owned;
5264 /* Data follows. */
5265 } mem_block;
5266
5267 typedef struct cleanup_ent {
5268 struct cleanup_ent *next;
5269 upb_cleanup_func *cleanup;
5270 void *ud;
5271 } cleanup_ent;
5272
upb_arena_addblock(upb_arena * a,void * ptr,size_t size,bool owned)5273 static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
5274 bool owned) {
5275 mem_block *block = ptr;
5276
5277 block->next = a->block_head;
5278 block->size = size;
5279 block->used = align_up(sizeof(mem_block));
5280 block->owned = owned;
5281
5282 a->block_head = block;
5283
5284 /* TODO(haberman): ASAN poison. */
5285 }
5286
5287
upb_arena_allocblock(upb_arena * a,size_t size)5288 static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
5289 size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
5290 mem_block *block = upb_malloc(a->block_alloc, block_size);
5291
5292 if (!block) {
5293 return NULL;
5294 }
5295
5296 upb_arena_addblock(a, block, block_size, true);
5297 a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
5298
5299 return block;
5300 }
5301
upb_arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)5302 static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
5303 size_t size) {
5304 upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */
5305 mem_block *block = a->block_head;
5306 void *ret;
5307
5308 if (size == 0) {
5309 return NULL; /* We are an arena, don't need individual frees. */
5310 }
5311
5312 size = align_up(size);
5313
5314 /* TODO(haberman): special-case if this is a realloc of the last alloc? */
5315
5316 if (!block || block->size - block->used < size) {
5317 /* Slow path: have to allocate a new block. */
5318 block = upb_arena_allocblock(a, size);
5319
5320 if (!block) {
5321 return NULL; /* Out of memory. */
5322 }
5323 }
5324
5325 ret = (char*)block + block->used;
5326 block->used += size;
5327
5328 if (oldsize > 0) {
5329 memcpy(ret, ptr, oldsize); /* Preserve existing data. */
5330 }
5331
5332 /* TODO(haberman): ASAN unpoison. */
5333
5334 a->bytes_allocated += size;
5335 return ret;
5336 }
5337
5338 /* Public Arena API ***********************************************************/
5339
upb_arena_init(upb_arena * a)5340 void upb_arena_init(upb_arena *a) {
5341 a->alloc.func = &upb_arena_doalloc;
5342 a->block_alloc = &upb_alloc_global;
5343 a->bytes_allocated = 0;
5344 a->next_block_size = 256;
5345 a->max_block_size = 16384;
5346 a->cleanup_head = NULL;
5347 a->block_head = NULL;
5348 }
5349
upb_arena_init2(upb_arena * a,void * mem,size_t size,upb_alloc * alloc)5350 void upb_arena_init2(upb_arena *a, void *mem, size_t size, upb_alloc *alloc) {
5351 upb_arena_init(a);
5352
5353 if (size > sizeof(mem_block)) {
5354 upb_arena_addblock(a, mem, size, false);
5355 }
5356
5357 if (alloc) {
5358 a->block_alloc = alloc;
5359 }
5360 }
5361
upb_arena_uninit(upb_arena * a)5362 void upb_arena_uninit(upb_arena *a) {
5363 cleanup_ent *ent = a->cleanup_head;
5364 mem_block *block = a->block_head;
5365
5366 while (ent) {
5367 ent->cleanup(ent->ud);
5368 ent = ent->next;
5369 }
5370
5371 /* Must do this after running cleanup functions, because this will delete
5372 * the memory we store our cleanup entries in! */
5373 while (block) {
5374 mem_block *next = block->next;
5375
5376 if (block->owned) {
5377 upb_free(a->block_alloc, block);
5378 }
5379
5380 block = next;
5381 }
5382 }
5383
upb_arena_addcleanup(upb_arena * a,upb_cleanup_func * func,void * ud)5384 bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud) {
5385 cleanup_ent *ent = upb_malloc(&a->alloc, sizeof(cleanup_ent));
5386 if (!ent) {
5387 return false; /* Out of memory. */
5388 }
5389
5390 ent->cleanup = func;
5391 ent->ud = ud;
5392 ent->next = a->cleanup_head;
5393 a->cleanup_head = ent;
5394
5395 return true;
5396 }
5397
upb_arena_bytesallocated(const upb_arena * a)5398 size_t upb_arena_bytesallocated(const upb_arena *a) {
5399 return a->bytes_allocated;
5400 }
5401
5402
5403 /* Standard error functions ***************************************************/
5404
default_err(void * ud,const upb_status * status)5405 static bool default_err(void *ud, const upb_status *status) {
5406 UPB_UNUSED(ud);
5407 UPB_UNUSED(status);
5408 return false;
5409 }
5410
write_err_to(void * ud,const upb_status * status)5411 static bool write_err_to(void *ud, const upb_status *status) {
5412 upb_status *copy_to = ud;
5413 upb_status_copy(copy_to, status);
5414 return false;
5415 }
5416
5417
5418 /* upb_env ********************************************************************/
5419
upb_env_initonly(upb_env * e)5420 void upb_env_initonly(upb_env *e) {
5421 e->ok_ = true;
5422 e->error_func_ = &default_err;
5423 e->error_ud_ = NULL;
5424 }
5425
upb_env_init(upb_env * e)5426 void upb_env_init(upb_env *e) {
5427 upb_arena_init(&e->arena_);
5428 upb_env_initonly(e);
5429 }
5430
upb_env_init2(upb_env * e,void * mem,size_t n,upb_alloc * alloc)5431 void upb_env_init2(upb_env *e, void *mem, size_t n, upb_alloc *alloc) {
5432 upb_arena_init2(&e->arena_, mem, n, alloc);
5433 upb_env_initonly(e);
5434 }
5435
upb_env_uninit(upb_env * e)5436 void upb_env_uninit(upb_env *e) {
5437 upb_arena_uninit(&e->arena_);
5438 }
5439
upb_env_seterrorfunc(upb_env * e,upb_error_func * func,void * ud)5440 void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud) {
5441 e->error_func_ = func;
5442 e->error_ud_ = ud;
5443 }
5444
upb_env_reporterrorsto(upb_env * e,upb_status * s)5445 void upb_env_reporterrorsto(upb_env *e, upb_status *s) {
5446 e->error_func_ = &write_err_to;
5447 e->error_ud_ = s;
5448 }
5449
upb_env_reporterror(upb_env * e,const upb_status * status)5450 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
5451 e->ok_ = false;
5452 return e->error_func_(e->error_ud_, status);
5453 }
5454
upb_env_malloc(upb_env * e,size_t size)5455 void *upb_env_malloc(upb_env *e, size_t size) {
5456 return upb_malloc(&e->arena_.alloc, size);
5457 }
5458
upb_env_realloc(upb_env * e,void * ptr,size_t oldsize,size_t size)5459 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
5460 return upb_realloc(&e->arena_.alloc, ptr, oldsize, size);
5461 }
5462
upb_env_free(upb_env * e,void * ptr)5463 void upb_env_free(upb_env *e, void *ptr) {
5464 upb_free(&e->arena_.alloc, ptr);
5465 }
5466
upb_env_addcleanup(upb_env * e,upb_cleanup_func * func,void * ud)5467 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
5468 return upb_arena_addcleanup(&e->arena_, func, ud);
5469 }
5470
upb_env_bytesallocated(const upb_env * e)5471 size_t upb_env_bytesallocated(const upb_env *e) {
5472 return upb_arena_bytesallocated(&e->arena_);
5473 }
5474 /* This file was generated by upbc (the upb compiler) from the input
5475 * file:
5476 *
5477 * upb/descriptor/descriptor.proto
5478 *
5479 * Do not edit -- your changes will be discarded when the file is
5480 * regenerated. */
5481
5482 #include <assert.h>
5483
5484
5485 static const upb_msgdef msgs[22];
5486 static const upb_fielddef fields[105];
5487 static const upb_enumdef enums[5];
5488 static const upb_tabent strentries[236];
5489 static const upb_tabent intentries[18];
5490 static const upb_tabval arrays[184];
5491
5492 #ifdef UPB_DEBUG_REFS
5493 static upb_inttable reftables[264];
5494 #endif
5495
5496 static const upb_msgdef msgs[22] = {
5497 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 40, 8, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[0]), false, UPB_SYNTAX_PROTO2, &reftables[0], &reftables[1]),
5498 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]), false, UPB_SYNTAX_PROTO2, &reftables[2], &reftables[3]),
5499 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ReservedRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[14], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[20]), false, UPB_SYNTAX_PROTO2, &reftables[4], &reftables[5]),
5500 UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[17], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[24]), false, UPB_SYNTAX_PROTO2, &reftables[6], &reftables[7]),
5501 UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[21], 4, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]), false, UPB_SYNTAX_PROTO2, &reftables[8], &reftables[9]),
5502 UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[25], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[32]), false, UPB_SYNTAX_PROTO2, &reftables[10], &reftables[11]),
5503 UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[29], 2, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[36]), false, UPB_SYNTAX_PROTO2, &reftables[12], &reftables[13]),
5504 UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 23, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[40]), false, UPB_SYNTAX_PROTO2, &reftables[14], &reftables[15]),
5505 UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 12, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[42], 11, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[56]), false, UPB_SYNTAX_PROTO2, &reftables[16], &reftables[17]),
5506 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 42, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[53], 13, 12), UPB_STRTABLE_INIT(12, 15, UPB_CTYPE_PTR, 4, &strentries[72]), false, UPB_SYNTAX_PROTO2, &reftables[18], &reftables[19]),
5507 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[66], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[88]), false, UPB_SYNTAX_PROTO2, &reftables[20], &reftables[21]),
5508 UPB_MSGDEF_INIT("google.protobuf.FileOptions", 31, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[68], 39, 15), UPB_STRTABLE_INIT(16, 31, UPB_CTYPE_PTR, 5, &strentries[92]), false, UPB_SYNTAX_PROTO2, &reftables[22], &reftables[23]),
5509 UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 10, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[107], 8, 4), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[124]), false, UPB_SYNTAX_PROTO2, &reftables[24], &reftables[25]),
5510 UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 15, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[115], 7, 6), UPB_STRTABLE_INIT(6, 7, UPB_CTYPE_PTR, 3, &strentries[132]), false, UPB_SYNTAX_PROTO2, &reftables[26], &reftables[27]),
5511 UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 7, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[10], &arrays[122], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[140]), false, UPB_SYNTAX_PROTO2, &reftables[28], &reftables[29]),
5512 UPB_MSGDEF_INIT("google.protobuf.OneofDescriptorProto", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[123], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[144]), false, UPB_SYNTAX_PROTO2, &reftables[30], &reftables[31]),
5513 UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[125], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[148]), false, UPB_SYNTAX_PROTO2, &reftables[32], &reftables[33]),
5514 UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 7, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[14], &arrays[129], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[152]), false, UPB_SYNTAX_PROTO2, &reftables[34], &reftables[35]),
5515 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[130], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[156]), false, UPB_SYNTAX_PROTO2, &reftables[36], &reftables[37]),
5516 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 19, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[132], 7, 5), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[160]), false, UPB_SYNTAX_PROTO2, &reftables[38], &reftables[39]),
5517 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[139], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[168]), false, UPB_SYNTAX_PROTO2, &reftables[40], &reftables[41]),
5518 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[148], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[184]), false, UPB_SYNTAX_PROTO2, &reftables[42], &reftables[43]),
5519 };
5520
5521 static const upb_fielddef fields[105] = {
5522 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[20], NULL, 15, 6, {0},&reftables[44], &reftables[45]),
5523 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[4], NULL, 6, 1, {0},&reftables[46], &reftables[47]),
5524 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_enable_arenas", 31, &msgs[11], NULL, 23, 12, {0},&reftables[48], &reftables[49]),
5525 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[11], NULL, 17, 6, {0},&reftables[50], &reftables[51]),
5526 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "client_streaming", 5, &msgs[13], NULL, 13, 4, {0},&reftables[52], &reftables[53]),
5527 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "csharp_namespace", 37, &msgs[11], NULL, 27, 14, {0},&reftables[54], &reftables[55]),
5528 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[8], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[56], &reftables[57]),
5529 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[7], NULL, 16, 7, {0},&reftables[58], &reftables[59]),
5530 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[9], NULL, 30, 8, {0},&reftables[60], &reftables[61]),
5531 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[12], NULL, 8, 3, {0},&reftables[62], &reftables[63]),
5532 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[8], NULL, 8, 3, {0},&reftables[64], &reftables[65]),
5533 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[14], NULL, 6, 1, {0},&reftables[66], &reftables[67]),
5534 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 23, &msgs[11], NULL, 21, 10, {0},&reftables[68], &reftables[69]),
5535 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[4], NULL, 7, 2, {0},&reftables[70], &reftables[71]),
5536 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[17], NULL, 6, 1, {0},&reftables[72], &reftables[73]),
5537 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 1, &msgs[6], NULL, 6, 1, {0},&reftables[74], &reftables[75]),
5538 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[20], NULL, 11, 4, {0},&reftables[76], &reftables[77]),
5539 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[2], NULL, 3, 1, {0},&reftables[78], &reftables[79]),
5540 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[80], &reftables[81]),
5541 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[3]), 18, 2, {0},&reftables[82], &reftables[83]),
5542 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[9], (const upb_def*)(&msgs[3]), 13, 1, {0},&reftables[84], &reftables[85]),
5543 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[7], NULL, 7, 2, {0},&reftables[86], &reftables[87]),
5544 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[7]), 24, 4, {0},&reftables[88], &reftables[89]),
5545 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[9], (const upb_def*)(&msgs[7]), 19, 3, {0},&reftables[90], &reftables[91]),
5546 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 21, 3, {0},&reftables[92], &reftables[93]),
5547 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[7]), 12, 0, {0},&reftables[94], &reftables[95]),
5548 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[10], (const upb_def*)(&msgs[9]), 5, 0, {0},&reftables[96], &reftables[97]),
5549 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[11], NULL, 14, 5, {0},&reftables[98], &reftables[99]),
5550 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[20], NULL, 6, 1, {0},&reftables[100], &reftables[101]),
5551 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[13], NULL, 7, 2, {0},&reftables[102], &reftables[103]),
5552 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[21], NULL, 5, 1, {0},&reftables[104], &reftables[105]),
5553 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[11], NULL, 20, 9, {0},&reftables[106], &reftables[107]),
5554 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[11], NULL, 18, 7, {0},&reftables[108], &reftables[109]),
5555 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[11], NULL, 13, 4, {0},&reftables[110], &reftables[111]),
5556 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[11], NULL, 9, 2, {0},&reftables[112], &reftables[113]),
5557 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[11], NULL, 6, 1, {0},&reftables[114], &reftables[115]),
5558 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_string_check_utf8", 27, &msgs[11], NULL, 22, 11, {0},&reftables[116], &reftables[117]),
5559 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "javanano_use_deprecated_package", 38, &msgs[11], NULL, 30, 15, {0},&reftables[118], &reftables[119]),
5560 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "json_name", 10, &msgs[7], NULL, 20, 9, {0},&reftables[120], &reftables[121]),
5561 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "jstype", 6, &msgs[8], (const upb_def*)(&enums[3]), 10, 5, {0},&reftables[122], &reftables[123]),
5562 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[7], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[124], &reftables[125]),
5563 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[8], NULL, 9, 4, {0},&reftables[126], &reftables[127]),
5564 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[19], NULL, 8, 2, {0},&reftables[128], &reftables[129]),
5565 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "leading_detached_comments", 6, &msgs[19], NULL, 16, 4, {0},&reftables[130], &reftables[131]),
5566 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[132], &reftables[133]),
5567 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "map_entry", 7, &msgs[12], NULL, 9, 4, {0},&reftables[134], &reftables[135]),
5568 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[12], NULL, 6, 1, {0},&reftables[136], &reftables[137]),
5569 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[9], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[138], &reftables[139]),
5570 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[16], (const upb_def*)(&msgs[13]), 6, 0, {0},&reftables[140], &reftables[141]),
5571 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[3], NULL, 8, 2, {0},&reftables[142], &reftables[143]),
5572 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[15], NULL, 2, 0, {0},&reftables[144], &reftables[145]),
5573 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[20], (const upb_def*)(&msgs[21]), 5, 0, {0},&reftables[146], &reftables[147]),
5574 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 32, 8, {0},&reftables[148], &reftables[149]),
5575 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[5], NULL, 4, 1, {0},&reftables[150], &reftables[151]),
5576 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[9], NULL, 22, 6, {0},&reftables[152], &reftables[153]),
5577 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[7], NULL, 4, 1, {0},&reftables[154], &reftables[155]),
5578 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[13], NULL, 4, 1, {0},&reftables[156], &reftables[157]),
5579 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[16], NULL, 8, 2, {0},&reftables[158], &reftables[159]),
5580 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[21], NULL, 2, 0, {0},&reftables[160], &reftables[161]),
5581 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[20], NULL, 10, 3, {0},&reftables[162], &reftables[163]),
5582 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 15, 1, {0},&reftables[164], &reftables[165]),
5583 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[12], NULL, 7, 2, {0},&reftables[166], &reftables[167]),
5584 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[5], NULL, 7, 2, {0},&reftables[168], &reftables[169]),
5585 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[7], NULL, 10, 3, {0},&reftables[170], &reftables[171]),
5586 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "objc_class_prefix", 36, &msgs[11], NULL, 24, 13, {0},&reftables[172], &reftables[173]),
5587 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "oneof_decl", 8, &msgs[0], (const upb_def*)(&msgs[15]), 28, 6, {0},&reftables[174], &reftables[175]),
5588 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "oneof_index", 9, &msgs[7], NULL, 19, 8, {0},&reftables[176], &reftables[177]),
5589 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[11], (const upb_def*)(&enums[4]), 12, 3, {0},&reftables[178], &reftables[179]),
5590 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[12]), 25, 5, {0},&reftables[180], &reftables[181]),
5591 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[9], (const upb_def*)(&msgs[11]), 20, 4, {0},&reftables[182], &reftables[183]),
5592 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[13], (const upb_def*)(&msgs[14]), 3, 0, {0},&reftables[184], &reftables[185]),
5593 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[7], (const upb_def*)(&msgs[8]), 3, 0, {0},&reftables[186], &reftables[187]),
5594 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[16], (const upb_def*)(&msgs[17]), 7, 1, {0},&reftables[188], &reftables[189]),
5595 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[5], (const upb_def*)(&msgs[6]), 3, 0, {0},&reftables[190], &reftables[191]),
5596 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[3], (const upb_def*)(&msgs[4]), 7, 1, {0},&reftables[192], &reftables[193]),
5597 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[13], NULL, 10, 3, {0},&reftables[194], &reftables[195]),
5598 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[9], NULL, 25, 7, {0},&reftables[196], &reftables[197]),
5599 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[8], NULL, 7, 2, {0},&reftables[198], &reftables[199]),
5600 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[19], NULL, 4, 0, {0},&reftables[200], &reftables[201]),
5601 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[20], NULL, 9, 2, {0},&reftables[202], &reftables[203]),
5602 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[9], NULL, 35, 9, {0},&reftables[204], &reftables[205]),
5603 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[11], NULL, 19, 8, {0},&reftables[206], &reftables[207]),
5604 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "reserved_name", 10, &msgs[0], NULL, 37, 9, {0},&reftables[208], &reftables[209]),
5605 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "reserved_range", 9, &msgs[0], (const upb_def*)(&msgs[2]), 31, 7, {0},&reftables[210], &reftables[211]),
5606 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "server_streaming", 6, &msgs[13], NULL, 14, 5, {0},&reftables[212], &reftables[213]),
5607 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[9], (const upb_def*)(&msgs[16]), 16, 2, {0},&reftables[214], &reftables[215]),
5608 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[9], (const upb_def*)(&msgs[18]), 21, 5, {0},&reftables[216], &reftables[217]),
5609 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[19], NULL, 7, 1, {0},&reftables[218], &reftables[219]),
5610 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[2], NULL, 2, 0, {0},&reftables[220], &reftables[221]),
5611 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[222], &reftables[223]),
5612 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[20], NULL, 12, 5, {0},&reftables[224], &reftables[225]),
5613 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "syntax", 12, &msgs[9], NULL, 39, 11, {0},&reftables[226], &reftables[227]),
5614 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[19], NULL, 11, 3, {0},&reftables[228], &reftables[229]),
5615 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[7], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[230], &reftables[231]),
5616 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[7], NULL, 13, 6, {0},&reftables[232], &reftables[233]),
5617 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[234], &reftables[235]),
5618 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[12], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[236], &reftables[237]),
5619 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[6], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[238], &reftables[239]),
5620 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[4], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[240], &reftables[241]),
5621 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[8], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[242], &reftables[243]),
5622 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[14], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[244], &reftables[245]),
5623 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[17], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[246], &reftables[247]),
5624 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[3], (const upb_def*)(&msgs[5]), 6, 0, {0},&reftables[248], &reftables[249]),
5625 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[8], NULL, 11, 6, {0},&reftables[250], &reftables[251]),
5626 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[9], NULL, 38, 10, {0},&reftables[252], &reftables[253]),
5627 };
5628
5629 static const upb_enumdef enums[5] = {
5630 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[188]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[151], 4, 3), 0, &reftables[254], &reftables[255]),
5631 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[192]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[155], 19, 18), 0, &reftables[256], &reftables[257]),
5632 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[224]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[174], 3, 3), 0, &reftables[258], &reftables[259]),
5633 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.JSType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[228]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[177], 3, 3), 0, &reftables[260], &reftables[261]),
5634 UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[232]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[180], 4, 3), 0, &reftables[262], &reftables[263]),
5635 };
5636
5637 static const upb_tabent strentries[236] = {
5638 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
5639 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5640 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "reserved_name"), UPB_TABVALUE_PTR_INIT(&fields[82]), NULL},
5641 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
5642 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5643 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5644 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5645 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[25]), &strentries[12]},
5646 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[24]), &strentries[14]},
5647 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5648 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
5649 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5650 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "reserved_range"), UPB_TABVALUE_PTR_INIT(&fields[83]), NULL},
5651 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
5652 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "oneof_decl"), UPB_TABVALUE_PTR_INIT(&fields[65]), NULL},
5653 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[19]), &strentries[13]},
5654 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[89]), NULL},
5655 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
5656 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5657 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5658 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[88]), NULL},
5659 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
5660 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5661 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5662 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5663 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
5664 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5665 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[49]), &strentries[26]},
5666 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
5667 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
5668 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
5669 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5670 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL},
5671 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5672 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5673 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[34]},
5674 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
5675 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
5676 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5677 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5678 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "oneof_index"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
5679 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[40]), NULL},
5680 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5681 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
5682 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5683 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5684 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5685 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5686 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[63]), &strentries[53]},
5687 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5688 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
5689 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[94]), NULL},
5690 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "json_name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
5691 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[93]), &strentries[50]},
5692 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
5693 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5694 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
5695 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5696 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
5697 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5698 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5699 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5700 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5701 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5702 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
5703 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5704 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
5705 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5706 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "jstype"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
5707 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
5708 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5709 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5710 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[23]), NULL},
5711 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[104]), NULL},
5712 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5713 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[54]), NULL},
5714 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[85]), NULL},
5715 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5716 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[86]), NULL},
5717 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5718 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5719 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "syntax"), UPB_TABVALUE_PTR_INIT(&fields[91]), NULL},
5720 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
5721 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
5722 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5723 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[86]},
5724 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL},
5725 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), &strentries[85]},
5726 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5727 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
5728 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5729 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5730 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5731 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5732 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
5733 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "csharp_namespace"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
5734 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5735 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5736 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5737 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5738 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5739 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5740 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5741 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
5742 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[120]},
5743 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5744 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5745 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
5746 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[95]), NULL},
5747 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5748 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5749 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5750 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[33]), &strentries[117]},
5751 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5752 {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[32]), &strentries[118]},
5753 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[31]), NULL},
5754 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5755 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "javanano_use_deprecated_package"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[123]},
5756 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[81]), NULL},
5757 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[67]), NULL},
5758 {UPB_TABKEY_STR("\026", "\000", "\000", "\000", "java_string_check_utf8"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
5759 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[12]), &strentries[119]},
5760 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "objc_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
5761 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "cc_enable_arenas"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
5762 {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[128]},
5763 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5764 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5765 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5766 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
5767 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[9]), NULL},
5768 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "map_entry"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
5769 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[61]), NULL},
5770 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5771 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "client_streaming"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
5772 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "server_streaming"), UPB_TABVALUE_PTR_INIT(&fields[84]), NULL},
5773 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
5774 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[29]), NULL},
5775 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5776 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5777 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
5778 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
5779 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
5780 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5781 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5782 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5783 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5784 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5785 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL},
5786 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5787 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[72]), &strentries[150]},
5788 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
5789 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[57]), &strentries[149]},
5790 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
5791 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5792 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5793 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5794 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5795 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5796 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
5797 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5798 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5799 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5800 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5801 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[87]), &strentries[167]},
5802 {UPB_TABKEY_STR("\031", "\000", "\000", "\000", "leading_detached_comments"), UPB_TABVALUE_PTR_INIT(&fields[43]), &strentries[165]},
5803 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[92]), NULL},
5804 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[42]), &strentries[164]},
5805 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
5806 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
5807 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5808 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5809 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
5810 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5811 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5812 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5813 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
5814 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
5815 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5816 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5817 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5818 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5819 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
5820 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
5821 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[90]), &strentries[182]},
5822 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5823 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5824 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
5825 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
5826 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[190]},
5827 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5828 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
5829 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
5830 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
5831 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5832 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5833 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5834 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5835 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
5836 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[221]},
5837 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
5838 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5839 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
5840 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
5841 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
5842 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5843 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[222]},
5844 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5845 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5846 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[219]},
5847 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5848 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5849 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5850 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5851 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
5852 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
5853 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5854 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[218]},
5855 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5856 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
5857 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
5858 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
5859 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
5860 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
5861 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
5862 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5863 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
5864 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[225]},
5865 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
5866 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5867 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NORMAL"), UPB_TABVALUE_INT_INIT(0), NULL},
5868 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NUMBER"), UPB_TABVALUE_INT_INIT(2), NULL},
5869 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_STRING"), UPB_TABVALUE_INT_INIT(1), NULL},
5870 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
5871 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[235]},
5872 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5873 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
5874 };
5875
5876 static const upb_tabent intentries[18] = {
5877 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5878 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
5879 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5880 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
5881 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5882 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
5883 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5884 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[95]), NULL},
5885 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5886 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
5887 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5888 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
5889 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5890 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
5891 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5892 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5893 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5894 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
5895 };
5896
5897 static const upb_tabval arrays[184] = {
5898 UPB_TABVALUE_EMPTY_INIT,
5899 UPB_TABVALUE_PTR_INIT(&fields[52]),
5900 UPB_TABVALUE_PTR_INIT(&fields[25]),
5901 UPB_TABVALUE_PTR_INIT(&fields[60]),
5902 UPB_TABVALUE_PTR_INIT(&fields[19]),
5903 UPB_TABVALUE_PTR_INIT(&fields[24]),
5904 UPB_TABVALUE_PTR_INIT(&fields[22]),
5905 UPB_TABVALUE_PTR_INIT(&fields[68]),
5906 UPB_TABVALUE_PTR_INIT(&fields[65]),
5907 UPB_TABVALUE_PTR_INIT(&fields[83]),
5908 UPB_TABVALUE_PTR_INIT(&fields[82]),
5909 UPB_TABVALUE_EMPTY_INIT,
5910 UPB_TABVALUE_PTR_INIT(&fields[89]),
5911 UPB_TABVALUE_PTR_INIT(&fields[18]),
5912 UPB_TABVALUE_EMPTY_INIT,
5913 UPB_TABVALUE_PTR_INIT(&fields[88]),
5914 UPB_TABVALUE_PTR_INIT(&fields[17]),
5915 UPB_TABVALUE_EMPTY_INIT,
5916 UPB_TABVALUE_PTR_INIT(&fields[49]),
5917 UPB_TABVALUE_PTR_INIT(&fields[102]),
5918 UPB_TABVALUE_PTR_INIT(&fields[74]),
5919 UPB_TABVALUE_EMPTY_INIT,
5920 UPB_TABVALUE_EMPTY_INIT,
5921 UPB_TABVALUE_PTR_INIT(&fields[1]),
5922 UPB_TABVALUE_PTR_INIT(&fields[13]),
5923 UPB_TABVALUE_EMPTY_INIT,
5924 UPB_TABVALUE_PTR_INIT(&fields[53]),
5925 UPB_TABVALUE_PTR_INIT(&fields[62]),
5926 UPB_TABVALUE_PTR_INIT(&fields[73]),
5927 UPB_TABVALUE_EMPTY_INIT,
5928 UPB_TABVALUE_PTR_INIT(&fields[15]),
5929 UPB_TABVALUE_EMPTY_INIT,
5930 UPB_TABVALUE_PTR_INIT(&fields[55]),
5931 UPB_TABVALUE_PTR_INIT(&fields[21]),
5932 UPB_TABVALUE_PTR_INIT(&fields[63]),
5933 UPB_TABVALUE_PTR_INIT(&fields[40]),
5934 UPB_TABVALUE_PTR_INIT(&fields[93]),
5935 UPB_TABVALUE_PTR_INIT(&fields[94]),
5936 UPB_TABVALUE_PTR_INIT(&fields[7]),
5937 UPB_TABVALUE_PTR_INIT(&fields[71]),
5938 UPB_TABVALUE_PTR_INIT(&fields[66]),
5939 UPB_TABVALUE_PTR_INIT(&fields[38]),
5940 UPB_TABVALUE_EMPTY_INIT,
5941 UPB_TABVALUE_PTR_INIT(&fields[6]),
5942 UPB_TABVALUE_PTR_INIT(&fields[77]),
5943 UPB_TABVALUE_PTR_INIT(&fields[10]),
5944 UPB_TABVALUE_EMPTY_INIT,
5945 UPB_TABVALUE_PTR_INIT(&fields[41]),
5946 UPB_TABVALUE_PTR_INIT(&fields[39]),
5947 UPB_TABVALUE_EMPTY_INIT,
5948 UPB_TABVALUE_EMPTY_INIT,
5949 UPB_TABVALUE_EMPTY_INIT,
5950 UPB_TABVALUE_PTR_INIT(&fields[103]),
5951 UPB_TABVALUE_EMPTY_INIT,
5952 UPB_TABVALUE_PTR_INIT(&fields[54]),
5953 UPB_TABVALUE_PTR_INIT(&fields[76]),
5954 UPB_TABVALUE_PTR_INIT(&fields[8]),
5955 UPB_TABVALUE_PTR_INIT(&fields[47]),
5956 UPB_TABVALUE_PTR_INIT(&fields[20]),
5957 UPB_TABVALUE_PTR_INIT(&fields[85]),
5958 UPB_TABVALUE_PTR_INIT(&fields[23]),
5959 UPB_TABVALUE_PTR_INIT(&fields[69]),
5960 UPB_TABVALUE_PTR_INIT(&fields[86]),
5961 UPB_TABVALUE_PTR_INIT(&fields[80]),
5962 UPB_TABVALUE_PTR_INIT(&fields[104]),
5963 UPB_TABVALUE_PTR_INIT(&fields[91]),
5964 UPB_TABVALUE_EMPTY_INIT,
5965 UPB_TABVALUE_PTR_INIT(&fields[26]),
5966 UPB_TABVALUE_EMPTY_INIT,
5967 UPB_TABVALUE_PTR_INIT(&fields[35]),
5968 UPB_TABVALUE_EMPTY_INIT,
5969 UPB_TABVALUE_EMPTY_INIT,
5970 UPB_TABVALUE_EMPTY_INIT,
5971 UPB_TABVALUE_EMPTY_INIT,
5972 UPB_TABVALUE_EMPTY_INIT,
5973 UPB_TABVALUE_EMPTY_INIT,
5974 UPB_TABVALUE_PTR_INIT(&fields[34]),
5975 UPB_TABVALUE_PTR_INIT(&fields[67]),
5976 UPB_TABVALUE_PTR_INIT(&fields[33]),
5977 UPB_TABVALUE_PTR_INIT(&fields[27]),
5978 UPB_TABVALUE_EMPTY_INIT,
5979 UPB_TABVALUE_EMPTY_INIT,
5980 UPB_TABVALUE_EMPTY_INIT,
5981 UPB_TABVALUE_EMPTY_INIT,
5982 UPB_TABVALUE_PTR_INIT(&fields[3]),
5983 UPB_TABVALUE_PTR_INIT(&fields[32]),
5984 UPB_TABVALUE_PTR_INIT(&fields[81]),
5985 UPB_TABVALUE_EMPTY_INIT,
5986 UPB_TABVALUE_PTR_INIT(&fields[31]),
5987 UPB_TABVALUE_EMPTY_INIT,
5988 UPB_TABVALUE_EMPTY_INIT,
5989 UPB_TABVALUE_PTR_INIT(&fields[12]),
5990 UPB_TABVALUE_EMPTY_INIT,
5991 UPB_TABVALUE_EMPTY_INIT,
5992 UPB_TABVALUE_EMPTY_INIT,
5993 UPB_TABVALUE_PTR_INIT(&fields[36]),
5994 UPB_TABVALUE_EMPTY_INIT,
5995 UPB_TABVALUE_EMPTY_INIT,
5996 UPB_TABVALUE_EMPTY_INIT,
5997 UPB_TABVALUE_PTR_INIT(&fields[2]),
5998 UPB_TABVALUE_EMPTY_INIT,
5999 UPB_TABVALUE_EMPTY_INIT,
6000 UPB_TABVALUE_EMPTY_INIT,
6001 UPB_TABVALUE_EMPTY_INIT,
6002 UPB_TABVALUE_PTR_INIT(&fields[64]),
6003 UPB_TABVALUE_PTR_INIT(&fields[5]),
6004 UPB_TABVALUE_PTR_INIT(&fields[37]),
6005 UPB_TABVALUE_EMPTY_INIT,
6006 UPB_TABVALUE_PTR_INIT(&fields[46]),
6007 UPB_TABVALUE_PTR_INIT(&fields[61]),
6008 UPB_TABVALUE_PTR_INIT(&fields[9]),
6009 UPB_TABVALUE_EMPTY_INIT,
6010 UPB_TABVALUE_EMPTY_INIT,
6011 UPB_TABVALUE_EMPTY_INIT,
6012 UPB_TABVALUE_PTR_INIT(&fields[45]),
6013 UPB_TABVALUE_EMPTY_INIT,
6014 UPB_TABVALUE_PTR_INIT(&fields[56]),
6015 UPB_TABVALUE_PTR_INIT(&fields[29]),
6016 UPB_TABVALUE_PTR_INIT(&fields[75]),
6017 UPB_TABVALUE_PTR_INIT(&fields[70]),
6018 UPB_TABVALUE_PTR_INIT(&fields[4]),
6019 UPB_TABVALUE_PTR_INIT(&fields[84]),
6020 UPB_TABVALUE_EMPTY_INIT,
6021 UPB_TABVALUE_EMPTY_INIT,
6022 UPB_TABVALUE_PTR_INIT(&fields[50]),
6023 UPB_TABVALUE_EMPTY_INIT,
6024 UPB_TABVALUE_PTR_INIT(&fields[57]),
6025 UPB_TABVALUE_PTR_INIT(&fields[48]),
6026 UPB_TABVALUE_PTR_INIT(&fields[72]),
6027 UPB_TABVALUE_EMPTY_INIT,
6028 UPB_TABVALUE_EMPTY_INIT,
6029 UPB_TABVALUE_PTR_INIT(&fields[44]),
6030 UPB_TABVALUE_EMPTY_INIT,
6031 UPB_TABVALUE_PTR_INIT(&fields[78]),
6032 UPB_TABVALUE_PTR_INIT(&fields[87]),
6033 UPB_TABVALUE_PTR_INIT(&fields[42]),
6034 UPB_TABVALUE_PTR_INIT(&fields[92]),
6035 UPB_TABVALUE_EMPTY_INIT,
6036 UPB_TABVALUE_PTR_INIT(&fields[43]),
6037 UPB_TABVALUE_EMPTY_INIT,
6038 UPB_TABVALUE_EMPTY_INIT,
6039 UPB_TABVALUE_PTR_INIT(&fields[51]),
6040 UPB_TABVALUE_PTR_INIT(&fields[28]),
6041 UPB_TABVALUE_PTR_INIT(&fields[79]),
6042 UPB_TABVALUE_PTR_INIT(&fields[59]),
6043 UPB_TABVALUE_PTR_INIT(&fields[16]),
6044 UPB_TABVALUE_PTR_INIT(&fields[90]),
6045 UPB_TABVALUE_PTR_INIT(&fields[0]),
6046 UPB_TABVALUE_EMPTY_INIT,
6047 UPB_TABVALUE_PTR_INIT(&fields[58]),
6048 UPB_TABVALUE_PTR_INIT(&fields[30]),
6049 UPB_TABVALUE_EMPTY_INIT,
6050 UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
6051 UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
6052 UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
6053 UPB_TABVALUE_EMPTY_INIT,
6054 UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
6055 UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
6056 UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
6057 UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
6058 UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
6059 UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
6060 UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
6061 UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
6062 UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
6063 UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
6064 UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
6065 UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
6066 UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
6067 UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
6068 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
6069 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
6070 UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
6071 UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
6072 UPB_TABVALUE_PTR_INIT("STRING"),
6073 UPB_TABVALUE_PTR_INIT("CORD"),
6074 UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
6075 UPB_TABVALUE_PTR_INIT("JS_NORMAL"),
6076 UPB_TABVALUE_PTR_INIT("JS_STRING"),
6077 UPB_TABVALUE_PTR_INIT("JS_NUMBER"),
6078 UPB_TABVALUE_EMPTY_INIT,
6079 UPB_TABVALUE_PTR_INIT("SPEED"),
6080 UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
6081 UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
6082 };
6083
6084 #ifdef UPB_DEBUG_REFS
6085 static upb_inttable reftables[264] = {
6086 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6087 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6088 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6089 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6090 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6091 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6092 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6093 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6094 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6095 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6096 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6097 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6098 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6099 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6100 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6101 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6102 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6103 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6104 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6105 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6106 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6107 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6108 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6109 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6110 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6111 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6112 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6113 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6114 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6115 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6116 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6117 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6118 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6119 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6120 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6121 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6122 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6123 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6124 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6125 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6126 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6127 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6128 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6129 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6130 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6131 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6132 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6133 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6134 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6135 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6136 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6137 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6138 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6139 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6140 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6141 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6142 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6143 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6144 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6145 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6146 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6147 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6148 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6149 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6150 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6151 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6152 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6153 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6154 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6155 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6156 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6157 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6158 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6159 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6160 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6161 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6162 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6163 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6164 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6165 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6166 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6167 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6168 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6169 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6170 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6171 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6172 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6173 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6174 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6175 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6176 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6177 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6178 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6179 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6180 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6181 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6182 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6183 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6184 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6185 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6186 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6187 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6188 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6189 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6190 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6191 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6192 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6193 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6194 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6195 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6196 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6197 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6198 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6199 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6200 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6201 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6202 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6203 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6204 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6205 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6206 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6207 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6208 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6209 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6210 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6211 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6212 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6213 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6214 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6215 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6216 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6217 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6218 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6219 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6220 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6221 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6222 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6223 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6224 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6225 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6226 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6227 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6228 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6229 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6230 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6231 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6232 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6233 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6234 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6235 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6236 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6237 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6238 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6239 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6240 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6241 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6242 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6243 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6244 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6245 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6246 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6247 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6248 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6249 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6250 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6251 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6252 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6253 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6254 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6255 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6256 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6257 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6258 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6259 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6260 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6261 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6262 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6263 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6264 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6265 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6266 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6267 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6268 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6269 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6270 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6271 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6272 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6273 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6274 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6275 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6276 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6277 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6278 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6279 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6280 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6281 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6282 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6283 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6284 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6285 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6286 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6287 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6288 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6289 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6290 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6291 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6292 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6293 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6294 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6295 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6296 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6297 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6298 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6299 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6300 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6301 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6302 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6303 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6304 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6305 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6306 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6307 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6308 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6309 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6310 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6311 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6312 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6313 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6314 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6315 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6316 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6317 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6318 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6319 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6320 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6321 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6322 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6323 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6324 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6325 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6326 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6327 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6328 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6329 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6330 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6331 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6332 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6333 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6334 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6335 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6336 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6337 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6338 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6339 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6340 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6341 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6342 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6343 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6344 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6345 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6346 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6347 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6348 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6349 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6350 };
6351 #endif
6352
refm(const upb_msgdef * m,const void * owner)6353 static const upb_msgdef *refm(const upb_msgdef *m, const void *owner) {
6354 upb_msgdef_ref(m, owner);
6355 return m;
6356 }
6357
refe(const upb_enumdef * e,const void * owner)6358 static const upb_enumdef *refe(const upb_enumdef *e, const void *owner) {
6359 upb_enumdef_ref(e, owner);
6360 return e;
6361 }
6362
6363 /* Public API. */
upbdefs_google_protobuf_DescriptorProto_get(const void * owner)6364 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_get(const void *owner) { return refm(&msgs[0], owner); }
upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void * owner)6365 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void *owner) { return refm(&msgs[1], owner); }
upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void * owner)6366 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void *owner) { return refm(&msgs[2], owner); }
upbdefs_google_protobuf_EnumDescriptorProto_get(const void * owner)6367 const upb_msgdef *upbdefs_google_protobuf_EnumDescriptorProto_get(const void *owner) { return refm(&msgs[3], owner); }
upbdefs_google_protobuf_EnumOptions_get(const void * owner)6368 const upb_msgdef *upbdefs_google_protobuf_EnumOptions_get(const void *owner) { return refm(&msgs[4], owner); }
upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void * owner)6369 const upb_msgdef *upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void *owner) { return refm(&msgs[5], owner); }
upbdefs_google_protobuf_EnumValueOptions_get(const void * owner)6370 const upb_msgdef *upbdefs_google_protobuf_EnumValueOptions_get(const void *owner) { return refm(&msgs[6], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_get(const void * owner)6371 const upb_msgdef *upbdefs_google_protobuf_FieldDescriptorProto_get(const void *owner) { return refm(&msgs[7], owner); }
upbdefs_google_protobuf_FieldOptions_get(const void * owner)6372 const upb_msgdef *upbdefs_google_protobuf_FieldOptions_get(const void *owner) { return refm(&msgs[8], owner); }
upbdefs_google_protobuf_FileDescriptorProto_get(const void * owner)6373 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorProto_get(const void *owner) { return refm(&msgs[9], owner); }
upbdefs_google_protobuf_FileDescriptorSet_get(const void * owner)6374 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorSet_get(const void *owner) { return refm(&msgs[10], owner); }
upbdefs_google_protobuf_FileOptions_get(const void * owner)6375 const upb_msgdef *upbdefs_google_protobuf_FileOptions_get(const void *owner) { return refm(&msgs[11], owner); }
upbdefs_google_protobuf_MessageOptions_get(const void * owner)6376 const upb_msgdef *upbdefs_google_protobuf_MessageOptions_get(const void *owner) { return refm(&msgs[12], owner); }
upbdefs_google_protobuf_MethodDescriptorProto_get(const void * owner)6377 const upb_msgdef *upbdefs_google_protobuf_MethodDescriptorProto_get(const void *owner) { return refm(&msgs[13], owner); }
upbdefs_google_protobuf_MethodOptions_get(const void * owner)6378 const upb_msgdef *upbdefs_google_protobuf_MethodOptions_get(const void *owner) { return refm(&msgs[14], owner); }
upbdefs_google_protobuf_OneofDescriptorProto_get(const void * owner)6379 const upb_msgdef *upbdefs_google_protobuf_OneofDescriptorProto_get(const void *owner) { return refm(&msgs[15], owner); }
upbdefs_google_protobuf_ServiceDescriptorProto_get(const void * owner)6380 const upb_msgdef *upbdefs_google_protobuf_ServiceDescriptorProto_get(const void *owner) { return refm(&msgs[16], owner); }
upbdefs_google_protobuf_ServiceOptions_get(const void * owner)6381 const upb_msgdef *upbdefs_google_protobuf_ServiceOptions_get(const void *owner) { return refm(&msgs[17], owner); }
upbdefs_google_protobuf_SourceCodeInfo_get(const void * owner)6382 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_get(const void *owner) { return refm(&msgs[18], owner); }
upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void * owner)6383 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void *owner) { return refm(&msgs[19], owner); }
upbdefs_google_protobuf_UninterpretedOption_get(const void * owner)6384 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_get(const void *owner) { return refm(&msgs[20], owner); }
upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void * owner)6385 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void *owner) { return refm(&msgs[21], owner); }
6386
upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void * owner)6387 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void *owner) { return refe(&enums[0], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void * owner)6388 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void *owner) { return refe(&enums[1], owner); }
upbdefs_google_protobuf_FieldOptions_CType_get(const void * owner)6389 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_CType_get(const void *owner) { return refe(&enums[2], owner); }
upbdefs_google_protobuf_FieldOptions_JSType_get(const void * owner)6390 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_JSType_get(const void *owner) { return refe(&enums[3], owner); }
upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void * owner)6391 const upb_enumdef *upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void *owner) { return refe(&enums[4], owner); }
6392 /*
6393 ** XXX: The routines in this file that consume a string do not currently
6394 ** support having the string span buffers. In the future, as upb_sink and
6395 ** its buffering/sharing functionality evolve there should be an easy and
6396 ** idiomatic way of correctly handling this case. For now, we accept this
6397 ** limitation since we currently only parse descriptors from single strings.
6398 */
6399
6400
6401 #include <errno.h>
6402 #include <stdlib.h>
6403 #include <string.h>
6404
6405 /* Compares a NULL-terminated string with a non-NULL-terminated string. */
upb_streq(const char * str,const char * buf,size_t n)6406 static bool upb_streq(const char *str, const char *buf, size_t n) {
6407 return strlen(str) == n && memcmp(str, buf, n) == 0;
6408 }
6409
6410 /* We keep a stack of all the messages scopes we are currently in, as well as
6411 * the top-level file scope. This is necessary to correctly qualify the
6412 * definitions that are contained inside. "name" tracks the name of the
6413 * message or package (a bare name -- not qualified by any enclosing scopes). */
6414 typedef struct {
6415 char *name;
6416 /* Index of the first def that is under this scope. For msgdefs, the
6417 * msgdef itself is at start-1. */
6418 int start;
6419 } upb_descreader_frame;
6420
6421 /* The maximum number of nested declarations that are allowed, ie.
6422 * message Foo {
6423 * message Bar {
6424 * message Baz {
6425 * }
6426 * }
6427 * }
6428 *
6429 * This is a resource limit that affects how big our runtime stack can grow.
6430 * TODO: make this a runtime-settable property of the Reader instance. */
6431 #define UPB_MAX_MESSAGE_NESTING 64
6432
6433 struct upb_descreader {
6434 upb_sink sink;
6435 upb_inttable files;
6436 upb_filedef *file; /* The last file in files. */
6437 upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
6438 int stack_len;
6439
6440 uint32_t number;
6441 char *name;
6442 bool saw_number;
6443 bool saw_name;
6444
6445 char *default_string;
6446
6447 upb_fielddef *f;
6448 };
6449
upb_strndup(const char * buf,size_t n)6450 static char *upb_strndup(const char *buf, size_t n) {
6451 char *ret = upb_gmalloc(n + 1);
6452 if (!ret) return NULL;
6453 memcpy(ret, buf, n);
6454 ret[n] = '\0';
6455 return ret;
6456 }
6457
6458 /* Returns a newly allocated string that joins input strings together, for
6459 * example:
6460 * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
6461 * join("", "Baz") -> "Baz"
6462 * Caller owns a ref on the returned string. */
upb_join(const char * base,const char * name)6463 static char *upb_join(const char *base, const char *name) {
6464 if (!base || strlen(base) == 0) {
6465 return upb_gstrdup(name);
6466 } else {
6467 char *ret = upb_gmalloc(strlen(base) + strlen(name) + 2);
6468 if (!ret) {
6469 return NULL;
6470 }
6471 ret[0] = '\0';
6472 strcat(ret, base);
6473 strcat(ret, ".");
6474 strcat(ret, name);
6475 return ret;
6476 }
6477 }
6478
6479 /* Qualify the defname for all defs starting with offset "start" with "str". */
upb_descreader_qualify(upb_filedef * f,char * str,int32_t start)6480 static bool upb_descreader_qualify(upb_filedef *f, char *str, int32_t start) {
6481 size_t i;
6482 for (i = start; i < upb_filedef_defcount(f); i++) {
6483 upb_def *def = upb_filedef_mutabledef(f, i);
6484 char *name = upb_join(str, upb_def_fullname(def));
6485 if (!name) {
6486 /* Need better logic here; at this point we've qualified some names but
6487 * not others. */
6488 return false;
6489 }
6490 upb_def_setfullname(def, name, NULL);
6491 upb_gfree(name);
6492 }
6493 return true;
6494 }
6495
6496
6497 /* upb_descreader ************************************************************/
6498
upb_descreader_top(upb_descreader * r)6499 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
6500 int index;
6501 assert(r->stack_len > 1);
6502 index = r->stack[r->stack_len-1].start - 1;
6503 assert(index >= 0);
6504 return upb_downcast_msgdef_mutable(upb_filedef_mutabledef(r->file, index));
6505 }
6506
upb_descreader_last(upb_descreader * r)6507 static upb_def *upb_descreader_last(upb_descreader *r) {
6508 return upb_filedef_mutabledef(r->file, upb_filedef_defcount(r->file) - 1);
6509 }
6510
6511 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
6512 * entities that have names and can contain sub-definitions. */
upb_descreader_startcontainer(upb_descreader * r)6513 void upb_descreader_startcontainer(upb_descreader *r) {
6514 upb_descreader_frame *f = &r->stack[r->stack_len++];
6515 f->start = upb_filedef_defcount(r->file);
6516 f->name = NULL;
6517 }
6518
upb_descreader_endcontainer(upb_descreader * r)6519 bool upb_descreader_endcontainer(upb_descreader *r) {
6520 upb_descreader_frame *f = &r->stack[--r->stack_len];
6521 if (!upb_descreader_qualify(r->file, f->name, f->start)) {
6522 return false;
6523 }
6524 upb_gfree(f->name);
6525 f->name = NULL;
6526 return true;
6527 }
6528
upb_descreader_setscopename(upb_descreader * r,char * str)6529 void upb_descreader_setscopename(upb_descreader *r, char *str) {
6530 upb_descreader_frame *f = &r->stack[r->stack_len-1];
6531 upb_gfree(f->name);
6532 f->name = str;
6533 }
6534
6535 /** Handlers for google.protobuf.FileDescriptorSet. ***************************/
6536
fileset_startfile(void * closure,const void * hd)6537 static void *fileset_startfile(void *closure, const void *hd) {
6538 upb_descreader *r = closure;
6539 UPB_UNUSED(hd);
6540 r->file = upb_filedef_new(&r->files);
6541 upb_inttable_push(&r->files, upb_value_ptr(r->file));
6542 return r;
6543 }
6544
6545 /** Handlers for google.protobuf.FileDescriptorProto. *************************/
6546
file_start(void * closure,const void * hd)6547 static bool file_start(void *closure, const void *hd) {
6548 upb_descreader *r = closure;
6549 UPB_UNUSED(hd);
6550 upb_descreader_startcontainer(r);
6551 return true;
6552 }
6553
file_end(void * closure,const void * hd,upb_status * status)6554 static bool file_end(void *closure, const void *hd, upb_status *status) {
6555 upb_descreader *r = closure;
6556 UPB_UNUSED(hd);
6557 UPB_UNUSED(status);
6558 return upb_descreader_endcontainer(r);
6559 }
6560
file_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6561 static size_t file_onname(void *closure, const void *hd, const char *buf,
6562 size_t n, const upb_bufhandle *handle) {
6563 upb_descreader *r = closure;
6564 char *name;
6565 bool ok;
6566 UPB_UNUSED(hd);
6567 UPB_UNUSED(handle);
6568
6569 name = upb_strndup(buf, n);
6570 /* XXX: see comment at the top of the file. */
6571 ok = upb_filedef_setname(r->file, name, NULL);
6572 upb_gfree(name);
6573 UPB_ASSERT_VAR(ok, ok);
6574 return n;
6575 }
6576
file_onpackage(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6577 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
6578 size_t n, const upb_bufhandle *handle) {
6579 upb_descreader *r = closure;
6580 char *package;
6581 bool ok;
6582 UPB_UNUSED(hd);
6583 UPB_UNUSED(handle);
6584
6585 package = upb_strndup(buf, n);
6586 /* XXX: see comment at the top of the file. */
6587 upb_descreader_setscopename(r, package);
6588 ok = upb_filedef_setpackage(r->file, package, NULL);
6589 UPB_ASSERT_VAR(ok, ok);
6590 return n;
6591 }
6592
file_onsyntax(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6593 static size_t file_onsyntax(void *closure, const void *hd, const char *buf,
6594 size_t n, const upb_bufhandle *handle) {
6595 upb_descreader *r = closure;
6596 bool ok;
6597 UPB_UNUSED(hd);
6598 UPB_UNUSED(handle);
6599 /* XXX: see comment at the top of the file. */
6600 if (upb_streq("proto2", buf, n)) {
6601 ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO2, NULL);
6602 } else if (upb_streq("proto3", buf, n)) {
6603 ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO3, NULL);
6604 } else {
6605 ok = false;
6606 }
6607
6608 UPB_ASSERT_VAR(ok, ok);
6609 return n;
6610 }
6611
file_startmsg(void * closure,const void * hd)6612 static void *file_startmsg(void *closure, const void *hd) {
6613 upb_descreader *r = closure;
6614 upb_msgdef *m = upb_msgdef_new(&m);
6615 bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
6616 UPB_UNUSED(hd);
6617 UPB_ASSERT_VAR(ok, ok);
6618 return r;
6619 }
6620
file_startenum(void * closure,const void * hd)6621 static void *file_startenum(void *closure, const void *hd) {
6622 upb_descreader *r = closure;
6623 upb_enumdef *e = upb_enumdef_new(&e);
6624 bool ok = upb_filedef_addenum(r->file, e, &e, NULL);
6625 UPB_UNUSED(hd);
6626 UPB_ASSERT_VAR(ok, ok);
6627 return r;
6628 }
6629
file_startext(void * closure,const void * hd)6630 static void *file_startext(void *closure, const void *hd) {
6631 upb_descreader *r = closure;
6632 bool ok;
6633 r->f = upb_fielddef_new(r);
6634 ok = upb_filedef_addext(r->file, r->f, r, NULL);
6635 UPB_UNUSED(hd);
6636 UPB_ASSERT_VAR(ok, ok);
6637 return r;
6638 }
6639
6640 /** Handlers for google.protobuf.EnumValueDescriptorProto. *********************/
6641
enumval_startmsg(void * closure,const void * hd)6642 static bool enumval_startmsg(void *closure, const void *hd) {
6643 upb_descreader *r = closure;
6644 UPB_UNUSED(hd);
6645 r->saw_number = false;
6646 r->saw_name = false;
6647 return true;
6648 }
6649
enumval_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6650 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
6651 size_t n, const upb_bufhandle *handle) {
6652 upb_descreader *r = closure;
6653 UPB_UNUSED(hd);
6654 UPB_UNUSED(handle);
6655 /* XXX: see comment at the top of the file. */
6656 upb_gfree(r->name);
6657 r->name = upb_strndup(buf, n);
6658 r->saw_name = true;
6659 return n;
6660 }
6661
enumval_onnumber(void * closure,const void * hd,int32_t val)6662 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
6663 upb_descreader *r = closure;
6664 UPB_UNUSED(hd);
6665 r->number = val;
6666 r->saw_number = true;
6667 return true;
6668 }
6669
enumval_endmsg(void * closure,const void * hd,upb_status * status)6670 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
6671 upb_descreader *r = closure;
6672 upb_enumdef *e;
6673 UPB_UNUSED(hd);
6674
6675 if(!r->saw_number || !r->saw_name) {
6676 upb_status_seterrmsg(status, "Enum value missing name or number.");
6677 return false;
6678 }
6679 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6680 upb_enumdef_addval(e, r->name, r->number, status);
6681 upb_gfree(r->name);
6682 r->name = NULL;
6683 return true;
6684 }
6685
6686 /** Handlers for google.protobuf.EnumDescriptorProto. *************************/
6687
enum_endmsg(void * closure,const void * hd,upb_status * status)6688 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
6689 upb_descreader *r = closure;
6690 upb_enumdef *e;
6691 UPB_UNUSED(hd);
6692
6693 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6694 if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
6695 upb_status_seterrmsg(status, "Enum had no name.");
6696 return false;
6697 }
6698 if (upb_enumdef_numvals(e) == 0) {
6699 upb_status_seterrmsg(status, "Enum had no values.");
6700 return false;
6701 }
6702 return true;
6703 }
6704
enum_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6705 static size_t enum_onname(void *closure, const void *hd, const char *buf,
6706 size_t n, const upb_bufhandle *handle) {
6707 upb_descreader *r = closure;
6708 char *fullname = upb_strndup(buf, n);
6709 UPB_UNUSED(hd);
6710 UPB_UNUSED(handle);
6711 /* XXX: see comment at the top of the file. */
6712 upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
6713 upb_gfree(fullname);
6714 return n;
6715 }
6716
6717 /** Handlers for google.protobuf.FieldDescriptorProto *************************/
6718
field_startmsg(void * closure,const void * hd)6719 static bool field_startmsg(void *closure, const void *hd) {
6720 upb_descreader *r = closure;
6721 UPB_UNUSED(hd);
6722 assert(r->f);
6723 upb_gfree(r->default_string);
6724 r->default_string = NULL;
6725
6726 /* fielddefs default to packed, but descriptors default to non-packed. */
6727 upb_fielddef_setpacked(r->f, false);
6728 return true;
6729 }
6730
6731 /* Converts the default value in string "str" into "d". Passes a ref on str.
6732 * Returns true on success. */
parse_default(char * str,upb_fielddef * f)6733 static bool parse_default(char *str, upb_fielddef *f) {
6734 bool success = true;
6735 char *end;
6736 switch (upb_fielddef_type(f)) {
6737 case UPB_TYPE_INT32: {
6738 long val = strtol(str, &end, 0);
6739 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
6740 success = false;
6741 else
6742 upb_fielddef_setdefaultint32(f, val);
6743 break;
6744 }
6745 case UPB_TYPE_INT64: {
6746 /* XXX: Need to write our own strtoll, since it's not available in c89. */
6747 long long val = strtol(str, &end, 0);
6748 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
6749 success = false;
6750 else
6751 upb_fielddef_setdefaultint64(f, val);
6752 break;
6753 }
6754 case UPB_TYPE_UINT32: {
6755 unsigned long val = strtoul(str, &end, 0);
6756 if (val > UINT32_MAX || errno == ERANGE || *end)
6757 success = false;
6758 else
6759 upb_fielddef_setdefaultuint32(f, val);
6760 break;
6761 }
6762 case UPB_TYPE_UINT64: {
6763 /* XXX: Need to write our own strtoull, since it's not available in c89. */
6764 unsigned long long val = strtoul(str, &end, 0);
6765 if (val > UINT64_MAX || errno == ERANGE || *end)
6766 success = false;
6767 else
6768 upb_fielddef_setdefaultuint64(f, val);
6769 break;
6770 }
6771 case UPB_TYPE_DOUBLE: {
6772 double val = strtod(str, &end);
6773 if (errno == ERANGE || *end)
6774 success = false;
6775 else
6776 upb_fielddef_setdefaultdouble(f, val);
6777 break;
6778 }
6779 case UPB_TYPE_FLOAT: {
6780 /* XXX: Need to write our own strtof, since it's not available in c89. */
6781 float val = strtod(str, &end);
6782 if (errno == ERANGE || *end)
6783 success = false;
6784 else
6785 upb_fielddef_setdefaultfloat(f, val);
6786 break;
6787 }
6788 case UPB_TYPE_BOOL: {
6789 if (strcmp(str, "false") == 0)
6790 upb_fielddef_setdefaultbool(f, false);
6791 else if (strcmp(str, "true") == 0)
6792 upb_fielddef_setdefaultbool(f, true);
6793 else
6794 success = false;
6795 break;
6796 }
6797 default: abort();
6798 }
6799 return success;
6800 }
6801
field_endmsg(void * closure,const void * hd,upb_status * status)6802 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
6803 upb_descreader *r = closure;
6804 upb_fielddef *f = r->f;
6805 UPB_UNUSED(hd);
6806
6807 /* TODO: verify that all required fields were present. */
6808 assert(upb_fielddef_number(f) != 0);
6809 assert(upb_fielddef_name(f) != NULL);
6810 assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
6811
6812 if (r->default_string) {
6813 if (upb_fielddef_issubmsg(f)) {
6814 upb_status_seterrmsg(status, "Submessages cannot have defaults.");
6815 return false;
6816 }
6817 if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
6818 upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
6819 } else {
6820 if (r->default_string && !parse_default(r->default_string, f)) {
6821 /* We don't worry too much about giving a great error message since the
6822 * compiler should have ensured this was correct. */
6823 upb_status_seterrmsg(status, "Error converting default value.");
6824 return false;
6825 }
6826 }
6827 }
6828 return true;
6829 }
6830
field_onlazy(void * closure,const void * hd,bool val)6831 static bool field_onlazy(void *closure, const void *hd, bool val) {
6832 upb_descreader *r = closure;
6833 UPB_UNUSED(hd);
6834
6835 upb_fielddef_setlazy(r->f, val);
6836 return true;
6837 }
6838
field_onpacked(void * closure,const void * hd,bool val)6839 static bool field_onpacked(void *closure, const void *hd, bool val) {
6840 upb_descreader *r = closure;
6841 UPB_UNUSED(hd);
6842
6843 upb_fielddef_setpacked(r->f, val);
6844 return true;
6845 }
6846
field_ontype(void * closure,const void * hd,int32_t val)6847 static bool field_ontype(void *closure, const void *hd, int32_t val) {
6848 upb_descreader *r = closure;
6849 UPB_UNUSED(hd);
6850
6851 upb_fielddef_setdescriptortype(r->f, val);
6852 return true;
6853 }
6854
field_onlabel(void * closure,const void * hd,int32_t val)6855 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
6856 upb_descreader *r = closure;
6857 UPB_UNUSED(hd);
6858
6859 upb_fielddef_setlabel(r->f, val);
6860 return true;
6861 }
6862
field_onnumber(void * closure,const void * hd,int32_t val)6863 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
6864 upb_descreader *r = closure;
6865 bool ok;
6866 UPB_UNUSED(hd);
6867
6868 ok = upb_fielddef_setnumber(r->f, val, NULL);
6869 UPB_ASSERT_VAR(ok, ok);
6870 return true;
6871 }
6872
field_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6873 static size_t field_onname(void *closure, const void *hd, const char *buf,
6874 size_t n, const upb_bufhandle *handle) {
6875 upb_descreader *r = closure;
6876 char *name = upb_strndup(buf, n);
6877 UPB_UNUSED(hd);
6878 UPB_UNUSED(handle);
6879
6880 /* XXX: see comment at the top of the file. */
6881 upb_fielddef_setname(r->f, name, NULL);
6882 upb_gfree(name);
6883 return n;
6884 }
6885
field_ontypename(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6886 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
6887 size_t n, const upb_bufhandle *handle) {
6888 upb_descreader *r = closure;
6889 char *name = upb_strndup(buf, n);
6890 UPB_UNUSED(hd);
6891 UPB_UNUSED(handle);
6892
6893 /* XXX: see comment at the top of the file. */
6894 upb_fielddef_setsubdefname(r->f, name, NULL);
6895 upb_gfree(name);
6896 return n;
6897 }
6898
field_onextendee(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6899 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
6900 size_t n, const upb_bufhandle *handle) {
6901 upb_descreader *r = closure;
6902 char *name = upb_strndup(buf, n);
6903 UPB_UNUSED(hd);
6904 UPB_UNUSED(handle);
6905
6906 /* XXX: see comment at the top of the file. */
6907 upb_fielddef_setcontainingtypename(r->f, name, NULL);
6908 upb_gfree(name);
6909 return n;
6910 }
6911
field_ondefaultval(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6912 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
6913 size_t n, const upb_bufhandle *handle) {
6914 upb_descreader *r = closure;
6915 UPB_UNUSED(hd);
6916 UPB_UNUSED(handle);
6917
6918 /* Have to convert from string to the correct type, but we might not know the
6919 * type yet, so we save it as a string until the end of the field.
6920 * XXX: see comment at the top of the file. */
6921 upb_gfree(r->default_string);
6922 r->default_string = upb_strndup(buf, n);
6923 return n;
6924 }
6925
6926 /** Handlers for google.protobuf.DescriptorProto ******************************/
6927
msg_start(void * closure,const void * hd)6928 static bool msg_start(void *closure, const void *hd) {
6929 upb_descreader *r = closure;
6930 UPB_UNUSED(hd);
6931
6932 upb_descreader_startcontainer(r);
6933 return true;
6934 }
6935
msg_end(void * closure,const void * hd,upb_status * status)6936 static bool msg_end(void *closure, const void *hd, upb_status *status) {
6937 upb_descreader *r = closure;
6938 upb_msgdef *m = upb_descreader_top(r);
6939 UPB_UNUSED(hd);
6940
6941 if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
6942 upb_status_seterrmsg(status, "Encountered message with no name.");
6943 return false;
6944 }
6945 return upb_descreader_endcontainer(r);
6946 }
6947
msg_name(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6948 static size_t msg_name(void *closure, const void *hd, const char *buf,
6949 size_t n, const upb_bufhandle *handle) {
6950 upb_descreader *r = closure;
6951 upb_msgdef *m = upb_descreader_top(r);
6952 /* XXX: see comment at the top of the file. */
6953 char *name = upb_strndup(buf, n);
6954 UPB_UNUSED(hd);
6955 UPB_UNUSED(handle);
6956
6957 upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
6958 upb_descreader_setscopename(r, name); /* Passes ownership of name. */
6959 return n;
6960 }
6961
msg_startmsg(void * closure,const void * hd)6962 static void *msg_startmsg(void *closure, const void *hd) {
6963 upb_descreader *r = closure;
6964 upb_msgdef *m = upb_msgdef_new(&m);
6965 bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
6966 UPB_UNUSED(hd);
6967 UPB_ASSERT_VAR(ok, ok);
6968 return r;
6969 }
6970
msg_startext(void * closure,const void * hd)6971 static void *msg_startext(void *closure, const void *hd) {
6972 upb_descreader *r = closure;
6973 upb_fielddef *f = upb_fielddef_new(&f);
6974 bool ok = upb_filedef_addext(r->file, f, &f, NULL);
6975 UPB_UNUSED(hd);
6976 UPB_ASSERT_VAR(ok, ok);
6977 return r;
6978 }
6979
msg_startfield(void * closure,const void * hd)6980 static void *msg_startfield(void *closure, const void *hd) {
6981 upb_descreader *r = closure;
6982 r->f = upb_fielddef_new(&r->f);
6983 /* We can't add the new field to the message until its name/number are
6984 * filled in. */
6985 UPB_UNUSED(hd);
6986 return r;
6987 }
6988
msg_endfield(void * closure,const void * hd)6989 static bool msg_endfield(void *closure, const void *hd) {
6990 upb_descreader *r = closure;
6991 upb_msgdef *m = upb_descreader_top(r);
6992 UPB_UNUSED(hd);
6993
6994 upb_msgdef_addfield(m, r->f, &r->f, NULL);
6995 r->f = NULL;
6996 return true;
6997 }
6998
msg_onmapentry(void * closure,const void * hd,bool mapentry)6999 static bool msg_onmapentry(void *closure, const void *hd, bool mapentry) {
7000 upb_descreader *r = closure;
7001 upb_msgdef *m = upb_descreader_top(r);
7002 UPB_UNUSED(hd);
7003
7004 upb_msgdef_setmapentry(m, mapentry);
7005 r->f = NULL;
7006 return true;
7007 }
7008
7009
7010
7011 /** Code to register handlers *************************************************/
7012
7013 #define F(msg, field) upbdefs_google_protobuf_ ## msg ## _f_ ## field(m)
7014
reghandlers(const void * closure,upb_handlers * h)7015 static void reghandlers(const void *closure, upb_handlers *h) {
7016 const upb_msgdef *m = upb_handlers_msgdef(h);
7017 UPB_UNUSED(closure);
7018
7019 if (upbdefs_google_protobuf_FileDescriptorSet_is(m)) {
7020 upb_handlers_setstartsubmsg(h, F(FileDescriptorSet, file),
7021 &fileset_startfile, NULL);
7022 } else if (upbdefs_google_protobuf_DescriptorProto_is(m)) {
7023 upb_handlers_setstartmsg(h, &msg_start, NULL);
7024 upb_handlers_setendmsg(h, &msg_end, NULL);
7025 upb_handlers_setstring(h, F(DescriptorProto, name), &msg_name, NULL);
7026 upb_handlers_setstartsubmsg(h, F(DescriptorProto, extension), &msg_startext,
7027 NULL);
7028 upb_handlers_setstartsubmsg(h, F(DescriptorProto, nested_type),
7029 &msg_startmsg, NULL);
7030 upb_handlers_setstartsubmsg(h, F(DescriptorProto, field),
7031 &msg_startfield, NULL);
7032 upb_handlers_setendsubmsg(h, F(DescriptorProto, field),
7033 &msg_endfield, NULL);
7034 upb_handlers_setstartsubmsg(h, F(DescriptorProto, enum_type),
7035 &file_startenum, NULL);
7036 } else if (upbdefs_google_protobuf_FileDescriptorProto_is(m)) {
7037 upb_handlers_setstartmsg(h, &file_start, NULL);
7038 upb_handlers_setendmsg(h, &file_end, NULL);
7039 upb_handlers_setstring(h, F(FileDescriptorProto, name), &file_onname,
7040 NULL);
7041 upb_handlers_setstring(h, F(FileDescriptorProto, package), &file_onpackage,
7042 NULL);
7043 upb_handlers_setstring(h, F(FileDescriptorProto, syntax), &file_onsyntax,
7044 NULL);
7045 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, message_type),
7046 &file_startmsg, NULL);
7047 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, enum_type),
7048 &file_startenum, NULL);
7049 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, extension),
7050 &file_startext, NULL);
7051 } else if (upbdefs_google_protobuf_EnumValueDescriptorProto_is(m)) {
7052 upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
7053 upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
7054 upb_handlers_setstring(h, F(EnumValueDescriptorProto, name), &enumval_onname, NULL);
7055 upb_handlers_setint32(h, F(EnumValueDescriptorProto, number), &enumval_onnumber,
7056 NULL);
7057 } else if (upbdefs_google_protobuf_EnumDescriptorProto_is(m)) {
7058 upb_handlers_setendmsg(h, &enum_endmsg, NULL);
7059 upb_handlers_setstring(h, F(EnumDescriptorProto, name), &enum_onname, NULL);
7060 } else if (upbdefs_google_protobuf_FieldDescriptorProto_is(m)) {
7061 upb_handlers_setstartmsg(h, &field_startmsg, NULL);
7062 upb_handlers_setendmsg(h, &field_endmsg, NULL);
7063 upb_handlers_setint32(h, F(FieldDescriptorProto, type), &field_ontype,
7064 NULL);
7065 upb_handlers_setint32(h, F(FieldDescriptorProto, label), &field_onlabel,
7066 NULL);
7067 upb_handlers_setint32(h, F(FieldDescriptorProto, number), &field_onnumber,
7068 NULL);
7069 upb_handlers_setstring(h, F(FieldDescriptorProto, name), &field_onname,
7070 NULL);
7071 upb_handlers_setstring(h, F(FieldDescriptorProto, type_name),
7072 &field_ontypename, NULL);
7073 upb_handlers_setstring(h, F(FieldDescriptorProto, extendee),
7074 &field_onextendee, NULL);
7075 upb_handlers_setstring(h, F(FieldDescriptorProto, default_value),
7076 &field_ondefaultval, NULL);
7077 } else if (upbdefs_google_protobuf_FieldOptions_is(m)) {
7078 upb_handlers_setbool(h, F(FieldOptions, lazy), &field_onlazy, NULL);
7079 upb_handlers_setbool(h, F(FieldOptions, packed), &field_onpacked, NULL);
7080 } else if (upbdefs_google_protobuf_MessageOptions_is(m)) {
7081 upb_handlers_setbool(h, F(MessageOptions, map_entry), &msg_onmapentry, NULL);
7082 }
7083
7084 assert(upb_ok(upb_handlers_status(h)));
7085 }
7086
7087 #undef F
7088
descreader_cleanup(void * _r)7089 void descreader_cleanup(void *_r) {
7090 upb_descreader *r = _r;
7091 size_t i;
7092
7093 for (i = 0; i < upb_descreader_filecount(r); i++) {
7094 upb_filedef_unref(upb_descreader_file(r, i), &r->files);
7095 }
7096
7097 upb_gfree(r->name);
7098 upb_inttable_uninit(&r->files);
7099 upb_gfree(r->default_string);
7100 while (r->stack_len > 0) {
7101 upb_descreader_frame *f = &r->stack[--r->stack_len];
7102 upb_gfree(f->name);
7103 }
7104 }
7105
7106
7107 /* Public API ****************************************************************/
7108
upb_descreader_create(upb_env * e,const upb_handlers * h)7109 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
7110 upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
7111 if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
7112 return NULL;
7113 }
7114
7115 upb_inttable_init(&r->files, UPB_CTYPE_PTR);
7116 upb_sink_reset(upb_descreader_input(r), h, r);
7117 r->stack_len = 0;
7118 r->name = NULL;
7119 r->default_string = NULL;
7120
7121 return r;
7122 }
7123
upb_descreader_filecount(const upb_descreader * r)7124 size_t upb_descreader_filecount(const upb_descreader *r) {
7125 return upb_inttable_count(&r->files);
7126 }
7127
upb_descreader_file(const upb_descreader * r,size_t i)7128 upb_filedef *upb_descreader_file(const upb_descreader *r, size_t i) {
7129 upb_value v;
7130 if (upb_inttable_lookup(&r->files, i, &v)) {
7131 return upb_value_getptr(v);
7132 } else {
7133 return NULL;
7134 }
7135 }
7136
upb_descreader_input(upb_descreader * r)7137 upb_sink *upb_descreader_input(upb_descreader *r) {
7138 return &r->sink;
7139 }
7140
upb_descreader_newhandlers(const void * owner)7141 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
7142 const upb_msgdef *m = upbdefs_google_protobuf_FileDescriptorSet_get(&m);
7143 const upb_handlers *h = upb_handlers_newfrozen(m, owner, reghandlers, NULL);
7144 upb_msgdef_unref(m, &m);
7145 return h;
7146 }
7147 /*
7148 ** protobuf decoder bytecode compiler
7149 **
7150 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
7151 ** according to that specific schema and destination handlers.
7152 **
7153 ** Compiling to bytecode is always the first step. If we are using the
7154 ** interpreted decoder we leave it as bytecode and interpret that. If we are
7155 ** using a JIT decoder we use a code generator to turn the bytecode into native
7156 ** code, LLVM IR, etc.
7157 **
7158 ** Bytecode definition is in decoder.int.h.
7159 */
7160
7161 #include <stdarg.h>
7162
7163 #ifdef UPB_DUMP_BYTECODE
7164 #include <stdio.h>
7165 #endif
7166
7167 #define MAXLABEL 5
7168 #define EMPTYLABEL -1
7169
7170 /* mgroup *********************************************************************/
7171
freegroup(upb_refcounted * r)7172 static void freegroup(upb_refcounted *r) {
7173 mgroup *g = (mgroup*)r;
7174 upb_inttable_uninit(&g->methods);
7175 #ifdef UPB_USE_JIT_X64
7176 upb_pbdecoder_freejit(g);
7177 #endif
7178 upb_gfree(g->bytecode);
7179 upb_gfree(g);
7180 }
7181
visitgroup(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)7182 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
7183 void *closure) {
7184 const mgroup *g = (const mgroup*)r;
7185 upb_inttable_iter i;
7186 upb_inttable_begin(&i, &g->methods);
7187 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7188 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7189 visit(r, upb_pbdecodermethod_upcast(method), closure);
7190 }
7191 }
7192
newgroup(const void * owner)7193 mgroup *newgroup(const void *owner) {
7194 mgroup *g = upb_gmalloc(sizeof(*g));
7195 static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
7196 upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
7197 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
7198 g->bytecode = NULL;
7199 g->bytecode_end = NULL;
7200 return g;
7201 }
7202
7203
7204 /* upb_pbdecodermethod ********************************************************/
7205
freemethod(upb_refcounted * r)7206 static void freemethod(upb_refcounted *r) {
7207 upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
7208
7209 if (method->dest_handlers_) {
7210 upb_handlers_unref(method->dest_handlers_, method);
7211 }
7212
7213 upb_inttable_uninit(&method->dispatch);
7214 upb_gfree(method);
7215 }
7216
visitmethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)7217 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
7218 void *closure) {
7219 const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
7220 visit(r, m->group, closure);
7221 }
7222
newmethod(const upb_handlers * dest_handlers,mgroup * group)7223 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
7224 mgroup *group) {
7225 static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
7226 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
7227 upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
7228 upb_byteshandler_init(&ret->input_handler_);
7229
7230 /* The method references the group and vice-versa, in a circular reference. */
7231 upb_ref2(ret, group);
7232 upb_ref2(group, ret);
7233 upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
7234 upb_pbdecodermethod_unref(ret, &ret);
7235
7236 ret->group = mgroup_upcast_mutable(group);
7237 ret->dest_handlers_ = dest_handlers;
7238 ret->is_native_ = false; /* If we JIT, it will update this later. */
7239 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
7240
7241 if (ret->dest_handlers_) {
7242 upb_handlers_ref(ret->dest_handlers_, ret);
7243 }
7244 return ret;
7245 }
7246
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)7247 const upb_handlers *upb_pbdecodermethod_desthandlers(
7248 const upb_pbdecodermethod *m) {
7249 return m->dest_handlers_;
7250 }
7251
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)7252 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
7253 const upb_pbdecodermethod *m) {
7254 return &m->input_handler_;
7255 }
7256
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)7257 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
7258 return m->is_native_;
7259 }
7260
upb_pbdecodermethod_new(const upb_pbdecodermethodopts * opts,const void * owner)7261 const upb_pbdecodermethod *upb_pbdecodermethod_new(
7262 const upb_pbdecodermethodopts *opts, const void *owner) {
7263 const upb_pbdecodermethod *ret;
7264 upb_pbcodecache cache;
7265
7266 upb_pbcodecache_init(&cache);
7267 ret = upb_pbcodecache_getdecodermethod(&cache, opts);
7268 upb_pbdecodermethod_ref(ret, owner);
7269 upb_pbcodecache_uninit(&cache);
7270 return ret;
7271 }
7272
7273
7274 /* bytecode compiler **********************************************************/
7275
7276 /* Data used only at compilation time. */
7277 typedef struct {
7278 mgroup *group;
7279
7280 uint32_t *pc;
7281 int fwd_labels[MAXLABEL];
7282 int back_labels[MAXLABEL];
7283
7284 /* For fields marked "lazy", parse them lazily or eagerly? */
7285 bool lazy;
7286 } compiler;
7287
newcompiler(mgroup * group,bool lazy)7288 static compiler *newcompiler(mgroup *group, bool lazy) {
7289 compiler *ret = upb_gmalloc(sizeof(*ret));
7290 int i;
7291
7292 ret->group = group;
7293 ret->lazy = lazy;
7294 for (i = 0; i < MAXLABEL; i++) {
7295 ret->fwd_labels[i] = EMPTYLABEL;
7296 ret->back_labels[i] = EMPTYLABEL;
7297 }
7298 return ret;
7299 }
7300
freecompiler(compiler * c)7301 static void freecompiler(compiler *c) {
7302 upb_gfree(c);
7303 }
7304
7305 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
7306
7307 /* How many words an instruction is. */
instruction_len(uint32_t instr)7308 static int instruction_len(uint32_t instr) {
7309 switch (getop(instr)) {
7310 case OP_SETDISPATCH: return 1 + ptr_words;
7311 case OP_TAGN: return 3;
7312 case OP_SETBIGGROUPNUM: return 2;
7313 default: return 1;
7314 }
7315 }
7316
op_has_longofs(int32_t instruction)7317 bool op_has_longofs(int32_t instruction) {
7318 switch (getop(instruction)) {
7319 case OP_CALL:
7320 case OP_BRANCH:
7321 case OP_CHECKDELIM:
7322 return true;
7323 /* The "tag" instructions only have 8 bytes available for the jump target,
7324 * but that is ok because these opcodes only require short jumps. */
7325 case OP_TAG1:
7326 case OP_TAG2:
7327 case OP_TAGN:
7328 return false;
7329 default:
7330 assert(false);
7331 return false;
7332 }
7333 }
7334
getofs(uint32_t instruction)7335 static int32_t getofs(uint32_t instruction) {
7336 if (op_has_longofs(instruction)) {
7337 return (int32_t)instruction >> 8;
7338 } else {
7339 return (int8_t)(instruction >> 8);
7340 }
7341 }
7342
setofs(uint32_t * instruction,int32_t ofs)7343 static void setofs(uint32_t *instruction, int32_t ofs) {
7344 if (op_has_longofs(*instruction)) {
7345 *instruction = getop(*instruction) | ofs << 8;
7346 } else {
7347 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
7348 }
7349 assert(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
7350 }
7351
pcofs(compiler * c)7352 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
7353
7354 /* Defines a local label at the current PC location. All previous forward
7355 * references are updated to point to this location. The location is noted
7356 * for any future backward references. */
label(compiler * c,unsigned int label)7357 static void label(compiler *c, unsigned int label) {
7358 int val;
7359 uint32_t *codep;
7360
7361 assert(label < MAXLABEL);
7362 val = c->fwd_labels[label];
7363 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
7364 while (codep) {
7365 int ofs = getofs(*codep);
7366 setofs(codep, c->pc - codep - instruction_len(*codep));
7367 codep = ofs ? codep + ofs : NULL;
7368 }
7369 c->fwd_labels[label] = EMPTYLABEL;
7370 c->back_labels[label] = pcofs(c);
7371 }
7372
7373 /* Creates a reference to a numbered label; either a forward reference
7374 * (positive arg) or backward reference (negative arg). For forward references
7375 * the value returned now is actually a "next" pointer into a linked list of all
7376 * instructions that use this label and will be patched later when the label is
7377 * defined with label().
7378 *
7379 * The returned value is the offset that should be written into the instruction.
7380 */
labelref(compiler * c,int label)7381 static int32_t labelref(compiler *c, int label) {
7382 assert(label < MAXLABEL);
7383 if (label == LABEL_DISPATCH) {
7384 /* No resolving required. */
7385 return 0;
7386 } else if (label < 0) {
7387 /* Backward local label. Relative to the next instruction. */
7388 uint32_t from = (c->pc + 1) - c->group->bytecode;
7389 return c->back_labels[-label] - from;
7390 } else {
7391 /* Forward local label: prepend to (possibly-empty) linked list. */
7392 int *lptr = &c->fwd_labels[label];
7393 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
7394 *lptr = pcofs(c);
7395 return ret;
7396 }
7397 }
7398
put32(compiler * c,uint32_t v)7399 static void put32(compiler *c, uint32_t v) {
7400 mgroup *g = c->group;
7401 if (c->pc == g->bytecode_end) {
7402 int ofs = pcofs(c);
7403 size_t oldsize = g->bytecode_end - g->bytecode;
7404 size_t newsize = UPB_MAX(oldsize * 2, 64);
7405 /* TODO(haberman): handle OOM. */
7406 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
7407 newsize * sizeof(uint32_t));
7408 g->bytecode_end = g->bytecode + newsize;
7409 c->pc = g->bytecode + ofs;
7410 }
7411 *c->pc++ = v;
7412 }
7413
putop(compiler * c,opcode op,...)7414 static void putop(compiler *c, opcode op, ...) {
7415 va_list ap;
7416 va_start(ap, op);
7417
7418 switch (op) {
7419 case OP_SETDISPATCH: {
7420 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
7421 put32(c, OP_SETDISPATCH);
7422 put32(c, ptr);
7423 if (sizeof(uintptr_t) > sizeof(uint32_t))
7424 put32(c, (uint64_t)ptr >> 32);
7425 break;
7426 }
7427 case OP_STARTMSG:
7428 case OP_ENDMSG:
7429 case OP_PUSHLENDELIM:
7430 case OP_POP:
7431 case OP_SETDELIM:
7432 case OP_HALT:
7433 case OP_RET:
7434 case OP_DISPATCH:
7435 put32(c, op);
7436 break;
7437 case OP_PARSE_DOUBLE:
7438 case OP_PARSE_FLOAT:
7439 case OP_PARSE_INT64:
7440 case OP_PARSE_UINT64:
7441 case OP_PARSE_INT32:
7442 case OP_PARSE_FIXED64:
7443 case OP_PARSE_FIXED32:
7444 case OP_PARSE_BOOL:
7445 case OP_PARSE_UINT32:
7446 case OP_PARSE_SFIXED32:
7447 case OP_PARSE_SFIXED64:
7448 case OP_PARSE_SINT32:
7449 case OP_PARSE_SINT64:
7450 case OP_STARTSEQ:
7451 case OP_ENDSEQ:
7452 case OP_STARTSUBMSG:
7453 case OP_ENDSUBMSG:
7454 case OP_STARTSTR:
7455 case OP_STRING:
7456 case OP_ENDSTR:
7457 case OP_PUSHTAGDELIM:
7458 put32(c, op | va_arg(ap, upb_selector_t) << 8);
7459 break;
7460 case OP_SETBIGGROUPNUM:
7461 put32(c, op);
7462 put32(c, va_arg(ap, int));
7463 break;
7464 case OP_CALL: {
7465 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
7466 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
7467 break;
7468 }
7469 case OP_CHECKDELIM:
7470 case OP_BRANCH: {
7471 uint32_t instruction = op;
7472 int label = va_arg(ap, int);
7473 setofs(&instruction, labelref(c, label));
7474 put32(c, instruction);
7475 break;
7476 }
7477 case OP_TAG1:
7478 case OP_TAG2: {
7479 int label = va_arg(ap, int);
7480 uint64_t tag = va_arg(ap, uint64_t);
7481 uint32_t instruction = op | (tag << 16);
7482 assert(tag <= 0xffff);
7483 setofs(&instruction, labelref(c, label));
7484 put32(c, instruction);
7485 break;
7486 }
7487 case OP_TAGN: {
7488 int label = va_arg(ap, int);
7489 uint64_t tag = va_arg(ap, uint64_t);
7490 uint32_t instruction = op | (upb_value_size(tag) << 16);
7491 setofs(&instruction, labelref(c, label));
7492 put32(c, instruction);
7493 put32(c, tag);
7494 put32(c, tag >> 32);
7495 break;
7496 }
7497 }
7498
7499 va_end(ap);
7500 }
7501
7502 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
7503
upb_pbdecoder_getopname(unsigned int op)7504 const char *upb_pbdecoder_getopname(unsigned int op) {
7505 #define QUOTE(x) #x
7506 #define EXPAND_AND_QUOTE(x) QUOTE(x)
7507 #define OPNAME(x) OP_##x
7508 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
7509 #define T(x) OP(PARSE_##x)
7510 /* Keep in sync with list in decoder.int.h. */
7511 switch ((opcode)op) {
7512 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
7513 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
7514 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
7515 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
7516 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
7517 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
7518 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
7519 }
7520 return "<unknown op>";
7521 #undef OP
7522 #undef T
7523 }
7524
7525 #endif
7526
7527 #ifdef UPB_DUMP_BYTECODE
7528
dumpbc(uint32_t * p,uint32_t * end,FILE * f)7529 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
7530
7531 uint32_t *begin = p;
7532
7533 while (p < end) {
7534 fprintf(f, "%p %8tx", p, p - begin);
7535 uint32_t instr = *p++;
7536 uint8_t op = getop(instr);
7537 fprintf(f, " %s", upb_pbdecoder_getopname(op));
7538 switch ((opcode)op) {
7539 case OP_SETDISPATCH: {
7540 const upb_inttable *dispatch;
7541 memcpy(&dispatch, p, sizeof(void*));
7542 p += ptr_words;
7543 const upb_pbdecodermethod *method =
7544 (void *)((char *)dispatch -
7545 offsetof(upb_pbdecodermethod, dispatch));
7546 fprintf(f, " %s", upb_msgdef_fullname(
7547 upb_handlers_msgdef(method->dest_handlers_)));
7548 break;
7549 }
7550 case OP_DISPATCH:
7551 case OP_STARTMSG:
7552 case OP_ENDMSG:
7553 case OP_PUSHLENDELIM:
7554 case OP_POP:
7555 case OP_SETDELIM:
7556 case OP_HALT:
7557 case OP_RET:
7558 break;
7559 case OP_PARSE_DOUBLE:
7560 case OP_PARSE_FLOAT:
7561 case OP_PARSE_INT64:
7562 case OP_PARSE_UINT64:
7563 case OP_PARSE_INT32:
7564 case OP_PARSE_FIXED64:
7565 case OP_PARSE_FIXED32:
7566 case OP_PARSE_BOOL:
7567 case OP_PARSE_UINT32:
7568 case OP_PARSE_SFIXED32:
7569 case OP_PARSE_SFIXED64:
7570 case OP_PARSE_SINT32:
7571 case OP_PARSE_SINT64:
7572 case OP_STARTSEQ:
7573 case OP_ENDSEQ:
7574 case OP_STARTSUBMSG:
7575 case OP_ENDSUBMSG:
7576 case OP_STARTSTR:
7577 case OP_STRING:
7578 case OP_ENDSTR:
7579 case OP_PUSHTAGDELIM:
7580 fprintf(f, " %d", instr >> 8);
7581 break;
7582 case OP_SETBIGGROUPNUM:
7583 fprintf(f, " %d", *p++);
7584 break;
7585 case OP_CHECKDELIM:
7586 case OP_CALL:
7587 case OP_BRANCH:
7588 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7589 break;
7590 case OP_TAG1:
7591 case OP_TAG2: {
7592 fprintf(f, " tag:0x%x", instr >> 16);
7593 if (getofs(instr)) {
7594 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7595 }
7596 break;
7597 }
7598 case OP_TAGN: {
7599 uint64_t tag = *p++;
7600 tag |= (uint64_t)*p++ << 32;
7601 fprintf(f, " tag:0x%llx", (long long)tag);
7602 fprintf(f, " n:%d", instr >> 16);
7603 if (getofs(instr)) {
7604 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7605 }
7606 break;
7607 }
7608 }
7609 fputs("\n", f);
7610 }
7611 }
7612
7613 #endif
7614
get_encoded_tag(const upb_fielddef * f,int wire_type)7615 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
7616 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
7617 uint64_t encoded_tag = upb_vencode32(tag);
7618 /* No tag should be greater than 5 bytes. */
7619 assert(encoded_tag <= 0xffffffffff);
7620 return encoded_tag;
7621 }
7622
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)7623 static void putchecktag(compiler *c, const upb_fielddef *f,
7624 int wire_type, int dest) {
7625 uint64_t tag = get_encoded_tag(f, wire_type);
7626 switch (upb_value_size(tag)) {
7627 case 1:
7628 putop(c, OP_TAG1, dest, tag);
7629 break;
7630 case 2:
7631 putop(c, OP_TAG2, dest, tag);
7632 break;
7633 default:
7634 putop(c, OP_TAGN, dest, tag);
7635 break;
7636 }
7637 }
7638
getsel(const upb_fielddef * f,upb_handlertype_t type)7639 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
7640 upb_selector_t selector;
7641 bool ok = upb_handlers_getselector(f, type, &selector);
7642 UPB_ASSERT_VAR(ok, ok);
7643 return selector;
7644 }
7645
7646 /* Takes an existing, primary dispatch table entry and repacks it with a
7647 * different alternate wire type. Called when we are inserting a secondary
7648 * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)7649 static uint64_t repack(uint64_t dispatch, int new_wt2) {
7650 uint64_t ofs;
7651 uint8_t wt1;
7652 uint8_t old_wt2;
7653 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
7654 assert(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
7655 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
7656 }
7657
7658 /* Marks the current bytecode position as the dispatch target for this message,
7659 * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)7660 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
7661 const upb_fielddef *f, int wire_type) {
7662 /* Offset is relative to msg base. */
7663 uint64_t ofs = pcofs(c) - method->code_base.ofs;
7664 uint32_t fn = upb_fielddef_number(f);
7665 upb_inttable *d = &method->dispatch;
7666 upb_value v;
7667 if (upb_inttable_remove(d, fn, &v)) {
7668 /* TODO: prioritize based on packed setting in .proto file. */
7669 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
7670 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
7671 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
7672 } else {
7673 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
7674 upb_inttable_insert(d, fn, upb_value_uint64(val));
7675 }
7676 }
7677
putpush(compiler * c,const upb_fielddef * f)7678 static void putpush(compiler *c, const upb_fielddef *f) {
7679 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
7680 putop(c, OP_PUSHLENDELIM);
7681 } else {
7682 uint32_t fn = upb_fielddef_number(f);
7683 if (fn >= 1 << 24) {
7684 putop(c, OP_PUSHTAGDELIM, 0);
7685 putop(c, OP_SETBIGGROUPNUM, fn);
7686 } else {
7687 putop(c, OP_PUSHTAGDELIM, fn);
7688 }
7689 }
7690 }
7691
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)7692 static upb_pbdecodermethod *find_submethod(const compiler *c,
7693 const upb_pbdecodermethod *method,
7694 const upb_fielddef *f) {
7695 const upb_handlers *sub =
7696 upb_handlers_getsubhandlers(method->dest_handlers_, f);
7697 upb_value v;
7698 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
7699 ? upb_value_getptr(v)
7700 : NULL;
7701 }
7702
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)7703 static void putsel(compiler *c, opcode op, upb_selector_t sel,
7704 const upb_handlers *h) {
7705 if (upb_handlers_gethandler(h, sel)) {
7706 putop(c, op, sel);
7707 }
7708 }
7709
7710 /* Puts an opcode to call a callback, but only if a callback actually exists for
7711 * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)7712 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
7713 const upb_fielddef *f, upb_handlertype_t type) {
7714 putsel(c, op, getsel(f, type), h);
7715 }
7716
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)7717 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
7718 if (!upb_fielddef_lazy(f))
7719 return false;
7720
7721 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
7722 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
7723 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
7724 }
7725
7726
7727 /* bytecode compiler code generation ******************************************/
7728
7729 /* Symbolic names for our local labels. */
7730 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
7731 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
7732 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
7733 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
7734
7735 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7736 static void generate_msgfield(compiler *c, const upb_fielddef *f,
7737 upb_pbdecodermethod *method) {
7738 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7739 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
7740 int wire_type;
7741
7742 if (!sub_m) {
7743 /* Don't emit any code for this field at all; it will be parsed as an
7744 * unknown field.
7745 *
7746 * TODO(haberman): we should change this to parse it as a string field
7747 * instead. It will probably be faster, but more importantly, once we
7748 * start vending unknown fields, a field shouldn't be treated as unknown
7749 * just because it doesn't have subhandlers registered. */
7750 return;
7751 }
7752
7753 label(c, LABEL_FIELD);
7754
7755 wire_type =
7756 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
7757 ? UPB_WIRE_TYPE_DELIMITED
7758 : UPB_WIRE_TYPE_START_GROUP;
7759
7760 if (upb_fielddef_isseq(f)) {
7761 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7762 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7763 dispatchtarget(c, method, f, wire_type);
7764 putop(c, OP_PUSHTAGDELIM, 0);
7765 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7766 label(c, LABEL_LOOPSTART);
7767 putpush(c, f);
7768 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7769 putop(c, OP_CALL, sub_m);
7770 putop(c, OP_POP);
7771 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7772 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7773 putop(c, OP_SETDELIM);
7774 }
7775 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7776 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7777 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7778 label(c, LABEL_LOOPBREAK);
7779 putop(c, OP_POP);
7780 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7781 } else {
7782 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7783 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7784 dispatchtarget(c, method, f, wire_type);
7785 putpush(c, f);
7786 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7787 putop(c, OP_CALL, sub_m);
7788 putop(c, OP_POP);
7789 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7790 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7791 putop(c, OP_SETDELIM);
7792 }
7793 }
7794 }
7795
7796 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7797 static void generate_delimfield(compiler *c, const upb_fielddef *f,
7798 upb_pbdecodermethod *method) {
7799 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7800
7801 label(c, LABEL_FIELD);
7802 if (upb_fielddef_isseq(f)) {
7803 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7804 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7805 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7806 putop(c, OP_PUSHTAGDELIM, 0);
7807 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7808 label(c, LABEL_LOOPSTART);
7809 putop(c, OP_PUSHLENDELIM);
7810 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7811 /* Need to emit even if no handler to skip past the string. */
7812 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7813 putop(c, OP_POP);
7814 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7815 putop(c, OP_SETDELIM);
7816 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7817 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
7818 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7819 label(c, LABEL_LOOPBREAK);
7820 putop(c, OP_POP);
7821 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7822 } else {
7823 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7824 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7825 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7826 putop(c, OP_PUSHLENDELIM);
7827 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7828 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7829 putop(c, OP_POP);
7830 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7831 putop(c, OP_SETDELIM);
7832 }
7833 }
7834
7835 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7836 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
7837 upb_pbdecodermethod *method) {
7838 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7839 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
7840 opcode parse_type;
7841 upb_selector_t sel;
7842 int wire_type;
7843
7844 label(c, LABEL_FIELD);
7845
7846 /* From a decoding perspective, ENUM is the same as INT32. */
7847 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
7848 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
7849
7850 parse_type = (opcode)descriptor_type;
7851
7852 /* TODO(haberman): generate packed or non-packed first depending on "packed"
7853 * setting in the fielddef. This will favor (in speed) whichever was
7854 * specified. */
7855
7856 assert((int)parse_type >= 0 && parse_type <= OP_MAX);
7857 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
7858 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
7859 if (upb_fielddef_isseq(f)) {
7860 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7861 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7862 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7863 putop(c, OP_PUSHLENDELIM);
7864 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
7865 label(c, LABEL_LOOPSTART);
7866 putop(c, parse_type, sel);
7867 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7868 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7869 dispatchtarget(c, method, f, wire_type);
7870 putop(c, OP_PUSHTAGDELIM, 0);
7871 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
7872 label(c, LABEL_LOOPSTART);
7873 putop(c, parse_type, sel);
7874 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7875 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7876 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7877 label(c, LABEL_LOOPBREAK);
7878 putop(c, OP_POP); /* Packed and non-packed join. */
7879 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7880 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
7881 } else {
7882 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7883 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7884 dispatchtarget(c, method, f, wire_type);
7885 putop(c, parse_type, sel);
7886 }
7887 }
7888
7889 /* Adds bytecode for parsing the given message to the given decoderplan,
7890 * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)7891 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
7892 const upb_handlers *h;
7893 const upb_msgdef *md;
7894 uint32_t* start_pc;
7895 upb_msg_field_iter i;
7896 upb_value val;
7897
7898 assert(method);
7899
7900 /* Clear all entries in the dispatch table. */
7901 upb_inttable_uninit(&method->dispatch);
7902 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
7903
7904 h = upb_pbdecodermethod_desthandlers(method);
7905 md = upb_handlers_msgdef(h);
7906
7907 method->code_base.ofs = pcofs(c);
7908 putop(c, OP_SETDISPATCH, &method->dispatch);
7909 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
7910 label(c, LABEL_FIELD);
7911 start_pc = c->pc;
7912 for(upb_msg_field_begin(&i, md);
7913 !upb_msg_field_done(&i);
7914 upb_msg_field_next(&i)) {
7915 const upb_fielddef *f = upb_msg_iter_field(&i);
7916 upb_fieldtype_t type = upb_fielddef_type(f);
7917
7918 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
7919 generate_msgfield(c, f, method);
7920 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
7921 type == UPB_TYPE_MESSAGE) {
7922 generate_delimfield(c, f, method);
7923 } else {
7924 generate_primitivefield(c, f, method);
7925 }
7926 }
7927
7928 /* If there were no fields, or if no handlers were defined, we need to
7929 * generate a non-empty loop body so that we can at least dispatch for unknown
7930 * fields and check for the end of the message. */
7931 if (c->pc == start_pc) {
7932 /* Check for end-of-message. */
7933 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7934 /* Unconditionally dispatch. */
7935 putop(c, OP_DISPATCH, 0);
7936 }
7937
7938 /* For now we just loop back to the last field of the message (or if none,
7939 * the DISPATCH opcode for the message). */
7940 putop(c, OP_BRANCH, -LABEL_FIELD);
7941
7942 /* Insert both a label and a dispatch table entry for this end-of-msg. */
7943 label(c, LABEL_ENDMSG);
7944 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
7945 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
7946
7947 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
7948 putop(c, OP_RET);
7949
7950 upb_inttable_compact(&method->dispatch);
7951 }
7952
7953 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
7954 * Returns the method for these handlers.
7955 *
7956 * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)7957 static void find_methods(compiler *c, const upb_handlers *h) {
7958 upb_value v;
7959 upb_msg_field_iter i;
7960 const upb_msgdef *md;
7961
7962 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
7963 return;
7964 newmethod(h, c->group);
7965
7966 /* Find submethods. */
7967 md = upb_handlers_msgdef(h);
7968 for(upb_msg_field_begin(&i, md);
7969 !upb_msg_field_done(&i);
7970 upb_msg_field_next(&i)) {
7971 const upb_fielddef *f = upb_msg_iter_field(&i);
7972 const upb_handlers *sub_h;
7973 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
7974 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
7975 /* We only generate a decoder method for submessages with handlers.
7976 * Others will be parsed as unknown fields. */
7977 find_methods(c, sub_h);
7978 }
7979 }
7980 }
7981
7982 /* (Re-)compile bytecode for all messages in "msgs."
7983 * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)7984 static void compile_methods(compiler *c) {
7985 upb_inttable_iter i;
7986
7987 /* Start over at the beginning of the bytecode. */
7988 c->pc = c->group->bytecode;
7989
7990 upb_inttable_begin(&i, &c->group->methods);
7991 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7992 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7993 compile_method(c, method);
7994 }
7995 }
7996
set_bytecode_handlers(mgroup * g)7997 static void set_bytecode_handlers(mgroup *g) {
7998 upb_inttable_iter i;
7999 upb_inttable_begin(&i, &g->methods);
8000 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
8001 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
8002 upb_byteshandler *h = &m->input_handler_;
8003
8004 m->code_base.ptr = g->bytecode + m->code_base.ofs;
8005
8006 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
8007 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
8008 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
8009 }
8010 }
8011
8012
8013 /* JIT setup. *****************************************************************/
8014
8015 #ifdef UPB_USE_JIT_X64
8016
sethandlers(mgroup * g,bool allowjit)8017 static void sethandlers(mgroup *g, bool allowjit) {
8018 g->jit_code = NULL;
8019 if (allowjit) {
8020 /* Compile byte-code into machine code, create handlers. */
8021 upb_pbdecoder_jit(g);
8022 } else {
8023 set_bytecode_handlers(g);
8024 }
8025 }
8026
8027 #else /* UPB_USE_JIT_X64 */
8028
sethandlers(mgroup * g,bool allowjit)8029 static void sethandlers(mgroup *g, bool allowjit) {
8030 /* No JIT compiled in; use bytecode handlers unconditionally. */
8031 UPB_UNUSED(allowjit);
8032 set_bytecode_handlers(g);
8033 }
8034
8035 #endif /* UPB_USE_JIT_X64 */
8036
8037
8038 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
8039 * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool allowjit,bool lazy,const void * owner)8040 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
8041 const void *owner) {
8042 mgroup *g;
8043 compiler *c;
8044
8045 UPB_UNUSED(allowjit);
8046 assert(upb_handlers_isfrozen(dest));
8047
8048 g = newgroup(owner);
8049 c = newcompiler(g, lazy);
8050 find_methods(c, dest);
8051
8052 /* We compile in two passes:
8053 * 1. all messages are assigned relative offsets from the beginning of the
8054 * bytecode (saved in method->code_base).
8055 * 2. forwards OP_CALL instructions can be correctly linked since message
8056 * offsets have been previously assigned.
8057 *
8058 * Could avoid the second pass by linking OP_CALL instructions somehow. */
8059 compile_methods(c);
8060 compile_methods(c);
8061 g->bytecode_end = c->pc;
8062 freecompiler(c);
8063
8064 #ifdef UPB_DUMP_BYTECODE
8065 {
8066 FILE *f = fopen("/tmp/upb-bytecode", "w");
8067 assert(f);
8068 dumpbc(g->bytecode, g->bytecode_end, stderr);
8069 dumpbc(g->bytecode, g->bytecode_end, f);
8070 fclose(f);
8071
8072 f = fopen("/tmp/upb-bytecode.bin", "wb");
8073 assert(f);
8074 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
8075 fclose(f);
8076 }
8077 #endif
8078
8079 sethandlers(g, allowjit);
8080 return g;
8081 }
8082
8083
8084 /* upb_pbcodecache ************************************************************/
8085
upb_pbcodecache_init(upb_pbcodecache * c)8086 void upb_pbcodecache_init(upb_pbcodecache *c) {
8087 upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
8088 c->allow_jit_ = true;
8089 }
8090
upb_pbcodecache_uninit(upb_pbcodecache * c)8091 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
8092 upb_inttable_iter i;
8093 upb_inttable_begin(&i, &c->groups);
8094 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
8095 const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
8096 mgroup_unref(group, c);
8097 }
8098 upb_inttable_uninit(&c->groups);
8099 }
8100
upb_pbcodecache_allowjit(const upb_pbcodecache * c)8101 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
8102 return c->allow_jit_;
8103 }
8104
upb_pbcodecache_setallowjit(upb_pbcodecache * c,bool allow)8105 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
8106 if (upb_inttable_count(&c->groups) > 0)
8107 return false;
8108 c->allow_jit_ = allow;
8109 return true;
8110 }
8111
upb_pbcodecache_getdecodermethod(upb_pbcodecache * c,const upb_pbdecodermethodopts * opts)8112 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
8113 upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
8114 upb_value v;
8115 bool ok;
8116
8117 /* Right now we build a new DecoderMethod every time.
8118 * TODO(haberman): properly cache methods by their true key. */
8119 const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
8120 upb_inttable_push(&c->groups, upb_value_constptr(g));
8121
8122 ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
8123 UPB_ASSERT_VAR(ok, ok);
8124 return upb_value_getptr(v);
8125 }
8126
8127
8128 /* upb_pbdecodermethodopts ****************************************************/
8129
upb_pbdecodermethodopts_init(upb_pbdecodermethodopts * opts,const upb_handlers * h)8130 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
8131 const upb_handlers *h) {
8132 opts->handlers = h;
8133 opts->lazy = false;
8134 }
8135
upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts * opts,bool lazy)8136 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
8137 opts->lazy = lazy;
8138 }
8139 /*
8140 ** upb::Decoder (Bytecode Decoder VM)
8141 **
8142 ** Bytecode must previously have been generated using the bytecode compiler in
8143 ** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
8144 ** parse the input.
8145 **
8146 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
8147 ** instruction and resume from there. A fair amount of the logic here is to
8148 ** handle the fact that values can span buffer seams and we have to be able to
8149 ** be capable of suspending/resuming from any byte in the stream. This
8150 ** sometimes requires keeping a few trailing bytes from the last buffer around
8151 ** in the "residual" buffer.
8152 */
8153
8154 #include <inttypes.h>
8155 #include <stddef.h>
8156
8157 #ifdef UPB_DUMP_BYTECODE
8158 #include <stdio.h>
8159 #endif
8160
8161 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
8162
8163 /* Error messages that are shared between the bytecode and JIT decoders. */
8164 const char *kPbDecoderStackOverflow = "Nesting too deep.";
8165 const char *kPbDecoderSubmessageTooLong =
8166 "Submessage end extends past enclosing submessage.";
8167
8168 /* Error messages shared within this file. */
8169 static const char *kUnterminatedVarint = "Unterminated varint.";
8170
8171 /* upb_pbdecoder **************************************************************/
8172
8173 static opcode halt = OP_HALT;
8174
8175 /* A dummy character we can point to when the user passes us a NULL buffer.
8176 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
8177 * behavior, which would invalidate functions like curbufleft(). */
8178 static const char dummy_char;
8179
8180 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)8181 static bool consumes_input(opcode op) {
8182 switch (op) {
8183 case OP_SETDISPATCH:
8184 case OP_STARTMSG:
8185 case OP_ENDMSG:
8186 case OP_STARTSEQ:
8187 case OP_ENDSEQ:
8188 case OP_STARTSUBMSG:
8189 case OP_ENDSUBMSG:
8190 case OP_STARTSTR:
8191 case OP_ENDSTR:
8192 case OP_PUSHTAGDELIM:
8193 case OP_POP:
8194 case OP_SETDELIM:
8195 case OP_SETBIGGROUPNUM:
8196 case OP_CHECKDELIM:
8197 case OP_CALL:
8198 case OP_RET:
8199 case OP_BRANCH:
8200 return false;
8201 default:
8202 return true;
8203 }
8204 }
8205
stacksize(upb_pbdecoder * d,size_t entries)8206 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
8207 UPB_UNUSED(d);
8208 return entries * sizeof(upb_pbdecoder_frame);
8209 }
8210
callstacksize(upb_pbdecoder * d,size_t entries)8211 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
8212 UPB_UNUSED(d);
8213
8214 #ifdef UPB_USE_JIT_X64
8215 if (d->method_->is_native_) {
8216 /* Each native stack frame needs two pointers, plus we need a few frames for
8217 * the enter/exit trampolines. */
8218 size_t ret = entries * sizeof(void*) * 2;
8219 ret += sizeof(void*) * 10;
8220 return ret;
8221 }
8222 #endif
8223
8224 return entries * sizeof(uint32_t*);
8225 }
8226
8227
8228 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
8229
8230 /* It's unfortunate that we have to micro-manage the compiler with
8231 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
8232 * specific to one hardware configuration. But empirically on a Core i7,
8233 * performance increases 30-50% with these annotations. Every instance where
8234 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
8235 * benchmarks. */
8236
seterr(upb_pbdecoder * d,const char * msg)8237 static void seterr(upb_pbdecoder *d, const char *msg) {
8238 upb_status status = UPB_STATUS_INIT;
8239 upb_status_seterrmsg(&status, msg);
8240 upb_env_reporterror(d->env, &status);
8241 }
8242
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)8243 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
8244 seterr(d, msg);
8245 }
8246
8247
8248 /* Buffering ******************************************************************/
8249
8250 /* We operate on one buffer at a time, which is either the user's buffer passed
8251 * to our "decode" callback or some residual bytes from the previous buffer. */
8252
8253 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
8254 * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)8255 static size_t curbufleft(const upb_pbdecoder *d) {
8256 assert(d->data_end >= d->ptr);
8257 return d->data_end - d->ptr;
8258 }
8259
8260 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)8261 static size_t bufleft(const upb_pbdecoder *d) {
8262 return d->end - d->ptr;
8263 }
8264
8265 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)8266 uint64_t offset(const upb_pbdecoder *d) {
8267 return d->bufstart_ofs + (d->ptr - d->buf);
8268 }
8269
8270 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)8271 size_t delim_remaining(const upb_pbdecoder *d) {
8272 return d->top->end_ofs - offset(d);
8273 }
8274
8275 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)8276 static void advance(upb_pbdecoder *d, size_t len) {
8277 assert(curbufleft(d) >= len);
8278 d->ptr += len;
8279 }
8280
in_buf(const char * p,const char * buf,const char * end)8281 static bool in_buf(const char *p, const char *buf, const char *end) {
8282 return p >= buf && p <= end;
8283 }
8284
in_residual_buf(const upb_pbdecoder * d,const char * p)8285 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
8286 return in_buf(p, d->residual, d->residual_end);
8287 }
8288
8289 /* Calculates the delim_end value, which is affected by both the current buffer
8290 * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)8291 static void set_delim_end(upb_pbdecoder *d) {
8292 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
8293 if (delim_ofs <= (size_t)(d->end - d->buf)) {
8294 d->delim_end = d->buf + delim_ofs;
8295 d->data_end = d->delim_end;
8296 } else {
8297 d->data_end = d->end;
8298 d->delim_end = NULL;
8299 }
8300 }
8301
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)8302 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
8303 d->ptr = buf;
8304 d->buf = buf;
8305 d->end = end;
8306 set_delim_end(d);
8307 }
8308
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)8309 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
8310 assert(curbufleft(d) == 0);
8311 d->bufstart_ofs += (d->end - d->buf);
8312 switchtobuf(d, buf, buf + len);
8313 }
8314
checkpoint(upb_pbdecoder * d)8315 static void checkpoint(upb_pbdecoder *d) {
8316 /* The assertion here is in the interests of efficiency, not correctness.
8317 * We are trying to ensure that we don't checkpoint() more often than
8318 * necessary. */
8319 assert(d->checkpoint != d->ptr);
8320 d->checkpoint = d->ptr;
8321 }
8322
8323 /* Skips "bytes" bytes in the stream, which may be more than available. If we
8324 * skip more bytes than are available, we return a long read count to the caller
8325 * indicating how many bytes can be skipped over before passing actual data
8326 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
8327 * won't actually be read.
8328 */
skip(upb_pbdecoder * d,size_t bytes)8329 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
8330 assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
8331 assert(d->skip == 0);
8332 if (bytes > delim_remaining(d)) {
8333 seterr(d, "Skipped value extended beyond enclosing submessage.");
8334 return upb_pbdecoder_suspend(d);
8335 } else if (bufleft(d) >= bytes) {
8336 /* Skipped data is all in current buffer, and more is still available. */
8337 advance(d, bytes);
8338 d->skip = 0;
8339 return DECODE_OK;
8340 } else {
8341 /* Skipped data extends beyond currently available buffers. */
8342 d->pc = d->last;
8343 d->skip = bytes - curbufleft(d);
8344 d->bufstart_ofs += (d->end - d->buf);
8345 d->residual_end = d->residual;
8346 switchtobuf(d, d->residual, d->residual_end);
8347 return d->size_param + d->skip;
8348 }
8349 }
8350
8351
8352 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)8353 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
8354 size_t size, const upb_bufhandle *handle) {
8355 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
8356
8357 /* d->skip and d->residual_end could probably elegantly be represented
8358 * as a single variable, to more easily represent this invariant. */
8359 assert(!(d->skip && d->residual_end > d->residual));
8360
8361 /* We need to remember the original size_param, so that the value we return
8362 * is relative to it, even if we do some skipping first. */
8363 d->size_param = size;
8364 d->handle = handle;
8365
8366 /* Have to handle this case specially (ie. not with skip()) because the user
8367 * is allowed to pass a NULL buffer here, which won't allow us to safely
8368 * calculate a d->end or use our normal functions like curbufleft(). */
8369 if (d->skip && d->skip >= size) {
8370 d->skip -= size;
8371 d->bufstart_ofs += size;
8372 buf = &dummy_char;
8373 size = 0;
8374
8375 /* We can't just return now, because we might need to execute some ops
8376 * like CHECKDELIM, which could call some callbacks and pop the stack. */
8377 }
8378
8379 /* We need to pretend that this was the actual buffer param, since some of the
8380 * calculations assume that d->ptr/d->buf is relative to this. */
8381 d->buf_param = buf;
8382
8383 if (!buf) {
8384 /* NULL buf is ok if its entire span is covered by the "skip" above, but
8385 * by this point we know that "skip" doesn't cover the buffer. */
8386 seterr(d, "Passed NULL buffer over non-skippable region.");
8387 return upb_pbdecoder_suspend(d);
8388 }
8389
8390 if (d->residual_end > d->residual) {
8391 /* We have residual bytes from the last buffer. */
8392 assert(d->ptr == d->residual);
8393 } else {
8394 switchtobuf(d, buf, buf + size);
8395 }
8396
8397 d->checkpoint = d->ptr;
8398
8399 /* Handle skips that don't cover the whole buffer (as above). */
8400 if (d->skip) {
8401 size_t skip_bytes = d->skip;
8402 d->skip = 0;
8403 CHECK_RETURN(skip(d, skip_bytes));
8404 checkpoint(d);
8405 }
8406
8407 /* If we're inside an unknown group, continue to parse unknown values. */
8408 if (d->top->groupnum < 0) {
8409 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
8410 checkpoint(d);
8411 }
8412
8413 return DECODE_OK;
8414 }
8415
8416 /* Suspends the decoder at the last checkpoint, without saving any residual
8417 * bytes. If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)8418 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
8419 d->pc = d->last;
8420 if (d->checkpoint == d->residual) {
8421 /* Checkpoint was in residual buf; no user bytes were consumed. */
8422 d->ptr = d->residual;
8423 return 0;
8424 } else {
8425 size_t ret = d->size_param - (d->end - d->checkpoint);
8426 assert(!in_residual_buf(d, d->checkpoint));
8427 assert(d->buf == d->buf_param || d->buf == &dummy_char);
8428
8429 d->bufstart_ofs += (d->checkpoint - d->buf);
8430 d->residual_end = d->residual;
8431 switchtobuf(d, d->residual, d->residual_end);
8432 return ret;
8433 }
8434 }
8435
8436 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
8437 * bytes in our residual buffer. This is necessary if we need more user
8438 * bytes to form a complete value, which might not be contiguous in the
8439 * user's buffers. Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)8440 static size_t suspend_save(upb_pbdecoder *d) {
8441 /* We hit end-of-buffer before we could parse a full value.
8442 * Save any unconsumed bytes (if any) to the residual buffer. */
8443 d->pc = d->last;
8444
8445 if (d->checkpoint == d->residual) {
8446 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
8447 assert((d->residual_end - d->residual) + d->size_param <=
8448 sizeof(d->residual));
8449 if (!in_residual_buf(d, d->ptr)) {
8450 d->bufstart_ofs -= (d->residual_end - d->residual);
8451 }
8452 memcpy(d->residual_end, d->buf_param, d->size_param);
8453 d->residual_end += d->size_param;
8454 } else {
8455 /* Checkpoint was in user buf; old residual bytes not needed. */
8456 size_t save;
8457 assert(!in_residual_buf(d, d->checkpoint));
8458
8459 d->ptr = d->checkpoint;
8460 save = curbufleft(d);
8461 assert(save <= sizeof(d->residual));
8462 memcpy(d->residual, d->ptr, save);
8463 d->residual_end = d->residual + save;
8464 d->bufstart_ofs = offset(d);
8465 }
8466
8467 switchtobuf(d, d->residual, d->residual_end);
8468 return d->size_param;
8469 }
8470
8471 /* Copies the next "bytes" bytes into "buf" and advances the stream.
8472 * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)8473 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
8474 size_t bytes) {
8475 assert(bytes <= curbufleft(d));
8476 memcpy(buf, d->ptr, bytes);
8477 advance(d, bytes);
8478 }
8479
8480 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
8481 * available in the current buffer or not. Returns a status code as described
8482 * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)8483 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
8484 size_t bytes) {
8485 const size_t avail = curbufleft(d);
8486 consumebytes(d, buf, avail);
8487 bytes -= avail;
8488 assert(bytes > 0);
8489 if (in_residual_buf(d, d->ptr)) {
8490 advancetobuf(d, d->buf_param, d->size_param);
8491 }
8492 if (curbufleft(d) >= bytes) {
8493 consumebytes(d, (char *)buf + avail, bytes);
8494 return DECODE_OK;
8495 } else if (d->data_end == d->delim_end) {
8496 seterr(d, "Submessage ended in the middle of a value or group");
8497 return upb_pbdecoder_suspend(d);
8498 } else {
8499 return suspend_save(d);
8500 }
8501 }
8502
8503 /* Gets the next "bytes" bytes, regardless of whether they are available in the
8504 * current buffer or not. Returns a status code as described in decoder.int.h.
8505 */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)8506 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
8507 size_t bytes) {
8508 if (curbufleft(d) >= bytes) {
8509 /* Buffer has enough data to satisfy. */
8510 consumebytes(d, buf, bytes);
8511 return DECODE_OK;
8512 } else {
8513 return getbytes_slow(d, buf, bytes);
8514 }
8515 }
8516
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)8517 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
8518 size_t bytes) {
8519 size_t ret = curbufleft(d);
8520 memcpy(buf, d->ptr, ret);
8521 if (in_residual_buf(d, d->ptr)) {
8522 size_t copy = UPB_MIN(bytes - ret, d->size_param);
8523 memcpy((char *)buf + ret, d->buf_param, copy);
8524 ret += copy;
8525 }
8526 return ret;
8527 }
8528
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)8529 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
8530 size_t bytes) {
8531 if (curbufleft(d) >= bytes) {
8532 memcpy(buf, d->ptr, bytes);
8533 return bytes;
8534 } else {
8535 return peekbytes_slow(d, buf, bytes);
8536 }
8537 }
8538
8539
8540 /* Decoding of wire types *****************************************************/
8541
8542 /* Slow path for decoding a varint from the current buffer position.
8543 * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)8544 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
8545 uint64_t *u64) {
8546 uint8_t byte = 0x80;
8547 int bitpos;
8548 *u64 = 0;
8549 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
8550 CHECK_RETURN(getbytes(d, &byte, 1));
8551 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
8552 }
8553 if(bitpos == 70 && (byte & 0x80)) {
8554 seterr(d, kUnterminatedVarint);
8555 return upb_pbdecoder_suspend(d);
8556 }
8557 return DECODE_OK;
8558 }
8559
8560 /* Decodes a varint from the current buffer position.
8561 * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)8562 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
8563 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
8564 *u64 = *d->ptr;
8565 advance(d, 1);
8566 return DECODE_OK;
8567 } else if (curbufleft(d) >= 10) {
8568 /* Fast case. */
8569 upb_decoderet r = upb_vdecode_fast(d->ptr);
8570 if (r.p == NULL) {
8571 seterr(d, kUnterminatedVarint);
8572 return upb_pbdecoder_suspend(d);
8573 }
8574 advance(d, r.p - d->ptr);
8575 *u64 = r.val;
8576 return DECODE_OK;
8577 } else {
8578 /* Slow case -- varint spans buffer seam. */
8579 return upb_pbdecoder_decode_varint_slow(d, u64);
8580 }
8581 }
8582
8583 /* Decodes a 32-bit varint from the current buffer position.
8584 * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)8585 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
8586 uint64_t u64;
8587 int32_t ret = decode_varint(d, &u64);
8588 if (ret >= 0) return ret;
8589 if (u64 > UINT32_MAX) {
8590 seterr(d, "Unterminated 32-bit varint");
8591 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
8592 * so we know this path will always be treated as error by our caller.
8593 * Right now the size_t -> int32_t can overflow and produce negative values.
8594 */
8595 *u32 = 0;
8596 return upb_pbdecoder_suspend(d);
8597 }
8598 *u32 = u64;
8599 return DECODE_OK;
8600 }
8601
8602 /* Decodes a fixed32 from the current buffer position.
8603 * Returns a status code as described in decoder.int.h.
8604 * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)8605 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
8606 return getbytes(d, u32, 4);
8607 }
8608
8609 /* Decodes a fixed64 from the current buffer position.
8610 * Returns a status code as described in decoder.int.h.
8611 * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)8612 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
8613 return getbytes(d, u64, 8);
8614 }
8615
8616 /* Non-static versions of the above functions.
8617 * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)8618 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
8619 return decode_fixed32(d, u32);
8620 }
8621
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)8622 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
8623 return decode_fixed64(d, u64);
8624 }
8625
as_double(uint64_t n)8626 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)8627 static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
8628
8629 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)8630 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
8631 upb_pbdecoder_frame *fr = d->top;
8632
8633 if (end > fr->end_ofs) {
8634 seterr(d, kPbDecoderSubmessageTooLong);
8635 return false;
8636 } else if (fr == d->limit) {
8637 seterr(d, kPbDecoderStackOverflow);
8638 return false;
8639 }
8640
8641 fr++;
8642 fr->end_ofs = end;
8643 fr->dispatch = NULL;
8644 fr->groupnum = 0;
8645 d->top = fr;
8646 return true;
8647 }
8648
pushtagdelim(upb_pbdecoder * d,uint32_t arg)8649 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
8650 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
8651 * field number) prior to hitting any enclosing submessage end, pushing our
8652 * existing delim end prevents us from continuing to parse values from a
8653 * corrupt proto that doesn't give us an END tag in time. */
8654 if (!decoder_push(d, d->top->end_ofs))
8655 return false;
8656 d->top->groupnum = arg;
8657 return true;
8658 }
8659
8660 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)8661 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
8662
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)8663 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
8664 uint64_t expected) {
8665 uint64_t data = 0;
8666 size_t bytes = upb_value_size(expected);
8667 size_t read = peekbytes(d, &data, bytes);
8668 if (read == bytes && data == expected) {
8669 /* Advance past matched bytes. */
8670 int32_t ok = getbytes(d, &data, read);
8671 UPB_ASSERT_VAR(ok, ok < 0);
8672 return DECODE_OK;
8673 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
8674 return suspend_save(d);
8675 } else {
8676 return DECODE_MISMATCH;
8677 }
8678 }
8679
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)8680 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
8681 uint8_t wire_type) {
8682 if (fieldnum >= 0)
8683 goto have_tag;
8684
8685 while (true) {
8686 uint32_t tag;
8687 CHECK_RETURN(decode_v32(d, &tag));
8688 wire_type = tag & 0x7;
8689 fieldnum = tag >> 3;
8690
8691 have_tag:
8692 if (fieldnum == 0) {
8693 seterr(d, "Saw invalid field number (0)");
8694 return upb_pbdecoder_suspend(d);
8695 }
8696
8697 /* TODO: deliver to unknown field callback. */
8698 switch (wire_type) {
8699 case UPB_WIRE_TYPE_32BIT:
8700 CHECK_RETURN(skip(d, 4));
8701 break;
8702 case UPB_WIRE_TYPE_64BIT:
8703 CHECK_RETURN(skip(d, 8));
8704 break;
8705 case UPB_WIRE_TYPE_VARINT: {
8706 uint64_t u64;
8707 CHECK_RETURN(decode_varint(d, &u64));
8708 break;
8709 }
8710 case UPB_WIRE_TYPE_DELIMITED: {
8711 uint32_t len;
8712 CHECK_RETURN(decode_v32(d, &len));
8713 CHECK_RETURN(skip(d, len));
8714 break;
8715 }
8716 case UPB_WIRE_TYPE_START_GROUP:
8717 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
8718 break;
8719 case UPB_WIRE_TYPE_END_GROUP:
8720 if (fieldnum == -d->top->groupnum) {
8721 decoder_pop(d);
8722 } else if (fieldnum == d->top->groupnum) {
8723 return DECODE_ENDGROUP;
8724 } else {
8725 seterr(d, "Unmatched ENDGROUP tag.");
8726 return upb_pbdecoder_suspend(d);
8727 }
8728 break;
8729 default:
8730 seterr(d, "Invalid wire type");
8731 return upb_pbdecoder_suspend(d);
8732 }
8733
8734 if (d->top->groupnum >= 0) {
8735 return DECODE_OK;
8736 }
8737
8738 /* Unknown group -- continue looping over unknown fields. */
8739 checkpoint(d);
8740 }
8741 }
8742
goto_endmsg(upb_pbdecoder * d)8743 static void goto_endmsg(upb_pbdecoder *d) {
8744 upb_value v;
8745 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
8746 UPB_ASSERT_VAR(found, found);
8747 d->pc = d->top->base + upb_value_getuint64(v);
8748 }
8749
8750 /* Parses a tag and jumps to the corresponding bytecode instruction for this
8751 * field.
8752 *
8753 * If the tag is unknown (or the wire type doesn't match), parses the field as
8754 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
8755 * instruction for the end of message. */
dispatch(upb_pbdecoder * d)8756 static int32_t dispatch(upb_pbdecoder *d) {
8757 upb_inttable *dispatch = d->top->dispatch;
8758 uint32_t tag;
8759 uint8_t wire_type;
8760 uint32_t fieldnum;
8761 upb_value val;
8762 int32_t retval;
8763
8764 /* Decode tag. */
8765 CHECK_RETURN(decode_v32(d, &tag));
8766 wire_type = tag & 0x7;
8767 fieldnum = tag >> 3;
8768
8769 /* Lookup tag. Because of packed/non-packed compatibility, we have to
8770 * check the wire type against two possibilities. */
8771 if (fieldnum != DISPATCH_ENDMSG &&
8772 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
8773 uint64_t v = upb_value_getuint64(val);
8774 if (wire_type == (v & 0xff)) {
8775 d->pc = d->top->base + (v >> 16);
8776 return DECODE_OK;
8777 } else if (wire_type == ((v >> 8) & 0xff)) {
8778 bool found =
8779 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
8780 UPB_ASSERT_VAR(found, found);
8781 d->pc = d->top->base + upb_value_getuint64(val);
8782 return DECODE_OK;
8783 }
8784 }
8785
8786 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
8787 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
8788 * we need to back up to, so that when we're done skipping unknown data we
8789 * can re-check the delimited end. */
8790 d->last--; /* Necessary if we get suspended */
8791 d->pc = d->last;
8792 assert(getop(*d->last) == OP_CHECKDELIM);
8793
8794 /* Unknown field or ENDGROUP. */
8795 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
8796
8797 CHECK_RETURN(retval);
8798
8799 if (retval == DECODE_ENDGROUP) {
8800 goto_endmsg(d);
8801 return DECODE_OK;
8802 }
8803
8804 return DECODE_OK;
8805 }
8806
8807 /* Callers know that the stack is more than one deep because the opcodes that
8808 * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)8809 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
8810 assert(d->top != d->stack);
8811 return d->top - 1;
8812 }
8813
8814
8815 /* The main decoding loop *****************************************************/
8816
8817 /* The main decoder VM function. Uses traditional bytecode dispatch loop with a
8818 * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)8819 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
8820 const upb_bufhandle* handle) {
8821
8822 #define VMCASE(op, code) \
8823 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
8824 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
8825 VMCASE(OP_PARSE_ ## type, { \
8826 ctype val; \
8827 CHECK_RETURN(decode_ ## wt(d, &val)); \
8828 upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
8829 })
8830
8831 while(1) {
8832 int32_t instruction;
8833 opcode op;
8834 uint32_t arg;
8835 int32_t longofs;
8836
8837 d->last = d->pc;
8838 instruction = *d->pc++;
8839 op = getop(instruction);
8840 arg = instruction >> 8;
8841 longofs = arg;
8842 assert(d->ptr != d->residual_end);
8843 UPB_UNUSED(group);
8844 #ifdef UPB_DUMP_BYTECODE
8845 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
8846 "%x %s (%d)\n",
8847 (int)offset(d),
8848 (int)(d->ptr - d->buf),
8849 (int)(d->data_end - d->ptr),
8850 (int)(d->end - d->ptr),
8851 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
8852 (int)(d->pc - 1 - group->bytecode),
8853 upb_pbdecoder_getopname(op),
8854 arg);
8855 #endif
8856 switch (op) {
8857 /* Technically, we are losing data if we see a 32-bit varint that is not
8858 * properly sign-extended. We could detect this and error about the data
8859 * loss, but proto2 does not do this, so we pass. */
8860 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
8861 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
8862 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
8863 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
8864 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
8865 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
8866 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
8867 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
8868 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
8869 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
8870 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
8871 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
8872 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
8873
8874 VMCASE(OP_SETDISPATCH,
8875 d->top->base = d->pc - 1;
8876 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
8877 d->pc += sizeof(void*) / sizeof(uint32_t);
8878 )
8879 VMCASE(OP_STARTMSG,
8880 CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
8881 )
8882 VMCASE(OP_ENDMSG,
8883 CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
8884 )
8885 VMCASE(OP_STARTSEQ,
8886 upb_pbdecoder_frame *outer = outer_frame(d);
8887 CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
8888 )
8889 VMCASE(OP_ENDSEQ,
8890 CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
8891 )
8892 VMCASE(OP_STARTSUBMSG,
8893 upb_pbdecoder_frame *outer = outer_frame(d);
8894 CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
8895 )
8896 VMCASE(OP_ENDSUBMSG,
8897 CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
8898 )
8899 VMCASE(OP_STARTSTR,
8900 uint32_t len = delim_remaining(d);
8901 upb_pbdecoder_frame *outer = outer_frame(d);
8902 CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
8903 if (len == 0) {
8904 d->pc++; /* Skip OP_STRING. */
8905 }
8906 )
8907 VMCASE(OP_STRING,
8908 uint32_t len = curbufleft(d);
8909 size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
8910 if (n > len) {
8911 if (n > delim_remaining(d)) {
8912 seterr(d, "Tried to skip past end of string.");
8913 return upb_pbdecoder_suspend(d);
8914 } else {
8915 int32_t ret = skip(d, n);
8916 /* This shouldn't return DECODE_OK, because n > len. */
8917 assert(ret >= 0);
8918 return ret;
8919 }
8920 }
8921 advance(d, n);
8922 if (n < len || d->delim_end == NULL) {
8923 /* We aren't finished with this string yet. */
8924 d->pc--; /* Repeat OP_STRING. */
8925 if (n > 0) checkpoint(d);
8926 return upb_pbdecoder_suspend(d);
8927 }
8928 )
8929 VMCASE(OP_ENDSTR,
8930 CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
8931 )
8932 VMCASE(OP_PUSHTAGDELIM,
8933 CHECK_SUSPEND(pushtagdelim(d, arg));
8934 )
8935 VMCASE(OP_SETBIGGROUPNUM,
8936 d->top->groupnum = *d->pc++;
8937 )
8938 VMCASE(OP_POP,
8939 assert(d->top > d->stack);
8940 decoder_pop(d);
8941 )
8942 VMCASE(OP_PUSHLENDELIM,
8943 uint32_t len;
8944 CHECK_RETURN(decode_v32(d, &len));
8945 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
8946 set_delim_end(d);
8947 )
8948 VMCASE(OP_SETDELIM,
8949 set_delim_end(d);
8950 )
8951 VMCASE(OP_CHECKDELIM,
8952 /* We are guaranteed of this assert because we never allow ourselves to
8953 * consume bytes beyond data_end, which covers delim_end when non-NULL.
8954 */
8955 assert(!(d->delim_end && d->ptr > d->delim_end));
8956 if (d->ptr == d->delim_end)
8957 d->pc += longofs;
8958 )
8959 VMCASE(OP_CALL,
8960 d->callstack[d->call_len++] = d->pc;
8961 d->pc += longofs;
8962 )
8963 VMCASE(OP_RET,
8964 assert(d->call_len > 0);
8965 d->pc = d->callstack[--d->call_len];
8966 )
8967 VMCASE(OP_BRANCH,
8968 d->pc += longofs;
8969 )
8970 VMCASE(OP_TAG1,
8971 uint8_t expected;
8972 CHECK_SUSPEND(curbufleft(d) > 0);
8973 expected = (arg >> 8) & 0xff;
8974 if (*d->ptr == expected) {
8975 advance(d, 1);
8976 } else {
8977 int8_t shortofs;
8978 badtag:
8979 shortofs = arg;
8980 if (shortofs == LABEL_DISPATCH) {
8981 CHECK_RETURN(dispatch(d));
8982 } else {
8983 d->pc += shortofs;
8984 break; /* Avoid checkpoint(). */
8985 }
8986 }
8987 )
8988 VMCASE(OP_TAG2,
8989 uint16_t expected;
8990 CHECK_SUSPEND(curbufleft(d) > 0);
8991 expected = (arg >> 8) & 0xffff;
8992 if (curbufleft(d) >= 2) {
8993 uint16_t actual;
8994 memcpy(&actual, d->ptr, 2);
8995 if (expected == actual) {
8996 advance(d, 2);
8997 } else {
8998 goto badtag;
8999 }
9000 } else {
9001 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
9002 if (result == DECODE_MISMATCH) goto badtag;
9003 if (result >= 0) return result;
9004 }
9005 )
9006 VMCASE(OP_TAGN, {
9007 uint64_t expected;
9008 int32_t result;
9009 memcpy(&expected, d->pc, 8);
9010 d->pc += 2;
9011 result = upb_pbdecoder_checktag_slow(d, expected);
9012 if (result == DECODE_MISMATCH) goto badtag;
9013 if (result >= 0) return result;
9014 })
9015 VMCASE(OP_DISPATCH, {
9016 CHECK_RETURN(dispatch(d));
9017 })
9018 VMCASE(OP_HALT, {
9019 return d->size_param;
9020 })
9021 }
9022 }
9023 }
9024
9025
9026 /* BytesHandler handlers ******************************************************/
9027
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)9028 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
9029 upb_pbdecoder *d = closure;
9030 UPB_UNUSED(size_hint);
9031 d->top->end_ofs = UINT64_MAX;
9032 d->bufstart_ofs = 0;
9033 d->call_len = 1;
9034 d->callstack[0] = &halt;
9035 d->pc = pc;
9036 d->skip = 0;
9037 return d;
9038 }
9039
upb_pbdecoder_startjit(void * closure,const void * hd,size_t size_hint)9040 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
9041 upb_pbdecoder *d = closure;
9042 UPB_UNUSED(hd);
9043 UPB_UNUSED(size_hint);
9044 d->top->end_ofs = UINT64_MAX;
9045 d->bufstart_ofs = 0;
9046 d->call_len = 0;
9047 d->skip = 0;
9048 return d;
9049 }
9050
upb_pbdecoder_end(void * closure,const void * handler_data)9051 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
9052 upb_pbdecoder *d = closure;
9053 const upb_pbdecodermethod *method = handler_data;
9054 uint64_t end;
9055 char dummy;
9056
9057 if (d->residual_end > d->residual) {
9058 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
9059 return false;
9060 }
9061
9062 if (d->skip) {
9063 seterr(d, "Unexpected EOF inside skipped data");
9064 return false;
9065 }
9066
9067 if (d->top->end_ofs != UINT64_MAX) {
9068 seterr(d, "Unexpected EOF inside delimited string");
9069 return false;
9070 }
9071
9072 /* The user's end() call indicates that the message ends here. */
9073 end = offset(d);
9074 d->top->end_ofs = end;
9075
9076 #ifdef UPB_USE_JIT_X64
9077 if (method->is_native_) {
9078 const mgroup *group = (const mgroup*)method->group;
9079 if (d->top != d->stack)
9080 d->stack->end_ofs = 0;
9081 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
9082 } else
9083 #endif
9084 {
9085 const uint32_t *p = d->pc;
9086 d->stack->end_ofs = end;
9087 /* Check the previous bytecode, but guard against beginning. */
9088 if (p != method->code_base.ptr) p--;
9089 if (getop(*p) == OP_CHECKDELIM) {
9090 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
9091 assert(getop(*d->pc) == OP_TAG1 ||
9092 getop(*d->pc) == OP_TAG2 ||
9093 getop(*d->pc) == OP_TAGN ||
9094 getop(*d->pc) == OP_DISPATCH);
9095 d->pc = p;
9096 }
9097 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
9098 }
9099
9100 if (d->call_len != 0) {
9101 seterr(d, "Unexpected EOF inside submessage or group");
9102 return false;
9103 }
9104
9105 return true;
9106 }
9107
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)9108 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
9109 size_t size, const upb_bufhandle *handle) {
9110 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
9111
9112 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
9113 CHECK_RETURN(result);
9114
9115 return run_decoder_vm(decoder, group, handle);
9116 }
9117
9118
9119 /* Public API *****************************************************************/
9120
upb_pbdecoder_reset(upb_pbdecoder * d)9121 void upb_pbdecoder_reset(upb_pbdecoder *d) {
9122 d->top = d->stack;
9123 d->top->groupnum = 0;
9124 d->ptr = d->residual;
9125 d->buf = d->residual;
9126 d->end = d->residual;
9127 d->residual_end = d->residual;
9128 }
9129
upb_pbdecoder_create(upb_env * e,const upb_pbdecodermethod * m,upb_sink * sink)9130 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
9131 upb_sink *sink) {
9132 const size_t default_max_nesting = 64;
9133 #ifndef NDEBUG
9134 size_t size_before = upb_env_bytesallocated(e);
9135 #endif
9136
9137 upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
9138 if (!d) return NULL;
9139
9140 d->method_ = m;
9141 d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
9142 d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
9143 if (!d->stack || !d->callstack) {
9144 return NULL;
9145 }
9146
9147 d->env = e;
9148 d->limit = d->stack + default_max_nesting - 1;
9149 d->stack_size = default_max_nesting;
9150 d->status = NULL;
9151
9152 upb_pbdecoder_reset(d);
9153 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
9154
9155 assert(sink);
9156 if (d->method_->dest_handlers_) {
9157 if (sink->handlers != d->method_->dest_handlers_)
9158 return NULL;
9159 }
9160 upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
9161
9162 /* If this fails, increase the value in decoder.h. */
9163 assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
9164 return d;
9165 }
9166
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)9167 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
9168 return offset(d);
9169 }
9170
upb_pbdecoder_method(const upb_pbdecoder * d)9171 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
9172 return d->method_;
9173 }
9174
upb_pbdecoder_input(upb_pbdecoder * d)9175 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
9176 return &d->input_;
9177 }
9178
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)9179 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
9180 return d->stack_size;
9181 }
9182
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)9183 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
9184 assert(d->top >= d->stack);
9185
9186 if (max < (size_t)(d->top - d->stack)) {
9187 /* Can't set a limit smaller than what we are currently at. */
9188 return false;
9189 }
9190
9191 if (max > d->stack_size) {
9192 /* Need to reallocate stack and callstack to accommodate. */
9193 size_t old_size = stacksize(d, d->stack_size);
9194 size_t new_size = stacksize(d, max);
9195 void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
9196 if (!p) {
9197 return false;
9198 }
9199 d->stack = p;
9200
9201 old_size = callstacksize(d, d->stack_size);
9202 new_size = callstacksize(d, max);
9203 p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
9204 if (!p) {
9205 return false;
9206 }
9207 d->callstack = p;
9208
9209 d->stack_size = max;
9210 }
9211
9212 d->limit = d->stack + max - 1;
9213 return true;
9214 }
9215 /*
9216 ** upb::Encoder
9217 **
9218 ** Since we are implementing pure handlers (ie. without any out-of-band access
9219 ** to pre-computed lengths), we have to buffer all submessages before we can
9220 ** emit even their first byte.
9221 **
9222 ** Not knowing the size of submessages also means we can't write a perfect
9223 ** zero-copy implementation, even with buffering. Lengths are stored as
9224 ** varints, which means that we don't know how many bytes to reserve for the
9225 ** length until we know what the length is.
9226 **
9227 ** This leaves us with three main choices:
9228 **
9229 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
9230 ** once into the output buffer.
9231 **
9232 ** 2. attempt to buffer data directly into the output buffer, estimating how
9233 ** many bytes each length will take. When our guesses are wrong, use
9234 ** memmove() to grow or shrink the allotted space.
9235 **
9236 ** 3. buffer directly into the output buffer, allocating a max length
9237 ** ahead-of-time for each submessage length. If we overallocated, we waste
9238 ** space, but no memcpy() or memmove() is required. This approach requires
9239 ** defining a maximum size for submessages and rejecting submessages that
9240 ** exceed that size.
9241 **
9242 ** (2) and (3) have the potential to have better performance, but they are more
9243 ** complicated and subtle to implement:
9244 **
9245 ** (3) requires making an arbitrary choice of the maximum message size; it
9246 ** wastes space when submessages are shorter than this and fails
9247 ** completely when they are longer. This makes it more finicky and
9248 ** requires configuration based on the input. It also makes it impossible
9249 ** to perfectly match the output of reference encoders that always use the
9250 ** optimal amount of space for each length.
9251 **
9252 ** (2) requires guessing the the size upfront, and if multiple lengths are
9253 ** guessed wrong the minimum required number of memmove() operations may
9254 ** be complicated to compute correctly. Implemented properly, it may have
9255 ** a useful amortized or average cost, but more investigation is required
9256 ** to determine this and what the optimal algorithm is to achieve it.
9257 **
9258 ** (1) makes you always pay for exactly one copy, but its implementation is
9259 ** the simplest and its performance is predictable.
9260 **
9261 ** So for now, we implement (1) only. If we wish to optimize later, we should
9262 ** be able to do it without affecting users.
9263 **
9264 ** The strategy is to buffer the segments of data that do *not* depend on
9265 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
9266 ** and lengths. When the top-level submessage ends, we can go beginning to end,
9267 ** alternating the writing of lengths with memcpy() of the rest of the data.
9268 ** At the top level though, no buffering is required.
9269 */
9270
9271
9272
9273 /* The output buffer is divided into segments; a segment is a string of data
9274 * that is "ready to go" -- it does not need any varint lengths inserted into
9275 * the middle. The seams between segments are where varints will be inserted
9276 * once they are known.
9277 *
9278 * We also use the concept of a "run", which is a range of encoded bytes that
9279 * occur at a single submessage level. Every segment contains one or more runs.
9280 *
9281 * A segment can span messages. Consider:
9282 *
9283 * .--Submessage lengths---------.
9284 * | | |
9285 * | V V
9286 * V | |--------------- | |-----------------
9287 * Submessages: | |-----------------------------------------------
9288 * Top-level msg: ------------------------------------------------------------
9289 *
9290 * Segments: ----- ------------------- -----------------
9291 * Runs: *---- *--------------*--- *----------------
9292 * (* marks the start)
9293 *
9294 * Note that the top-level menssage is not in any segment because it does not
9295 * have any length preceding it.
9296 *
9297 * A segment is only interrupted when another length needs to be inserted. So
9298 * observe how the second segment spans both the inner submessage and part of
9299 * the next enclosing message. */
9300 typedef struct {
9301 uint32_t msglen; /* The length to varint-encode before this segment. */
9302 uint32_t seglen; /* Length of the segment. */
9303 } upb_pb_encoder_segment;
9304
9305 struct upb_pb_encoder {
9306 upb_env *env;
9307
9308 /* Our input and output. */
9309 upb_sink input_;
9310 upb_bytessink *output_;
9311
9312 /* The "subclosure" -- used as the inner closure as part of the bytessink
9313 * protocol. */
9314 void *subc;
9315
9316 /* The output buffer and limit, and our current write position. "buf"
9317 * initially points to "initbuf", but is dynamically allocated if we need to
9318 * grow beyond the initial size. */
9319 char *buf, *ptr, *limit;
9320
9321 /* The beginning of the current run, or undefined if we are at the top
9322 * level. */
9323 char *runbegin;
9324
9325 /* The list of segments we are accumulating. */
9326 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
9327
9328 /* The stack of enclosing submessages. Each entry in the stack points to the
9329 * segment where this submessage's length is being accumulated. */
9330 int *stack, *top, *stacklimit;
9331
9332 /* Depth of startmsg/endmsg calls. */
9333 int depth;
9334 };
9335
9336 /* low-level buffering ********************************************************/
9337
9338 /* Low-level functions for interacting with the output buffer. */
9339
9340 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)9341 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
9342 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
9343 UPB_ASSERT_VAR(n, n == len);
9344 }
9345
top(upb_pb_encoder * e)9346 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
9347 return &e->segbuf[*e->top];
9348 }
9349
9350 /* Call to ensure that at least "bytes" bytes are available for writing at
9351 * e->ptr. Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)9352 static bool reserve(upb_pb_encoder *e, size_t bytes) {
9353 if ((size_t)(e->limit - e->ptr) < bytes) {
9354 /* Grow buffer. */
9355 char *new_buf;
9356 size_t needed = bytes + (e->ptr - e->buf);
9357 size_t old_size = e->limit - e->buf;
9358
9359 size_t new_size = old_size;
9360
9361 while (new_size < needed) {
9362 new_size *= 2;
9363 }
9364
9365 new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
9366
9367 if (new_buf == NULL) {
9368 return false;
9369 }
9370
9371 e->ptr = new_buf + (e->ptr - e->buf);
9372 e->runbegin = new_buf + (e->runbegin - e->buf);
9373 e->limit = new_buf + new_size;
9374 e->buf = new_buf;
9375 }
9376
9377 return true;
9378 }
9379
9380 /* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
9381 * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)9382 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
9383 assert((size_t)(e->limit - e->ptr) >= bytes);
9384 e->ptr += bytes;
9385 }
9386
9387 /* Call when all of the bytes for a handler have been written. Flushes the
9388 * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)9389 static bool commit(upb_pb_encoder *e) {
9390 if (!e->top) {
9391 /* We aren't inside a delimited region. Flush our accumulated bytes to
9392 * the output.
9393 *
9394 * TODO(haberman): in the future we may want to delay flushing for
9395 * efficiency reasons. */
9396 putbuf(e, e->buf, e->ptr - e->buf);
9397 e->ptr = e->buf;
9398 }
9399
9400 return true;
9401 }
9402
9403 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)9404 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
9405 if (!reserve(e, len)) {
9406 return false;
9407 }
9408
9409 memcpy(e->ptr, data, len);
9410 encoder_advance(e, len);
9411 return true;
9412 }
9413
9414 /* Finish the current run by adding the run totals to the segment and message
9415 * length. */
accumulate(upb_pb_encoder * e)9416 static void accumulate(upb_pb_encoder *e) {
9417 size_t run_len;
9418 assert(e->ptr >= e->runbegin);
9419 run_len = e->ptr - e->runbegin;
9420 e->segptr->seglen += run_len;
9421 top(e)->msglen += run_len;
9422 e->runbegin = e->ptr;
9423 }
9424
9425 /* Call to indicate the start of delimited region for which the full length is
9426 * not yet known. All data will be buffered until the length is known.
9427 * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)9428 static bool start_delim(upb_pb_encoder *e) {
9429 if (e->top) {
9430 /* We are already buffering, advance to the next segment and push it on the
9431 * stack. */
9432 accumulate(e);
9433
9434 if (++e->top == e->stacklimit) {
9435 /* TODO(haberman): grow stack? */
9436 return false;
9437 }
9438
9439 if (++e->segptr == e->seglimit) {
9440 /* Grow segment buffer. */
9441 size_t old_size =
9442 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
9443 size_t new_size = old_size * 2;
9444 upb_pb_encoder_segment *new_buf =
9445 upb_env_realloc(e->env, e->segbuf, old_size, new_size);
9446
9447 if (new_buf == NULL) {
9448 return false;
9449 }
9450
9451 e->segptr = new_buf + (e->segptr - e->segbuf);
9452 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
9453 e->segbuf = new_buf;
9454 }
9455 } else {
9456 /* We were previously at the top level, start buffering. */
9457 e->segptr = e->segbuf;
9458 e->top = e->stack;
9459 e->runbegin = e->ptr;
9460 }
9461
9462 *e->top = e->segptr - e->segbuf;
9463 e->segptr->seglen = 0;
9464 e->segptr->msglen = 0;
9465
9466 return true;
9467 }
9468
9469 /* Call to indicate the end of a delimited region. We now know the length of
9470 * the delimited region. If we are not nested inside any other delimited
9471 * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)9472 static bool end_delim(upb_pb_encoder *e) {
9473 size_t msglen;
9474 accumulate(e);
9475 msglen = top(e)->msglen;
9476
9477 if (e->top == e->stack) {
9478 /* All lengths are now available, emit all buffered data. */
9479 char buf[UPB_PB_VARINT_MAX_LEN];
9480 upb_pb_encoder_segment *s;
9481 const char *ptr = e->buf;
9482 for (s = e->segbuf; s <= e->segptr; s++) {
9483 size_t lenbytes = upb_vencode64(s->msglen, buf);
9484 putbuf(e, buf, lenbytes);
9485 putbuf(e, ptr, s->seglen);
9486 ptr += s->seglen;
9487 }
9488
9489 e->ptr = e->buf;
9490 e->top = NULL;
9491 } else {
9492 /* Need to keep buffering; propagate length info into enclosing
9493 * submessages. */
9494 --e->top;
9495 top(e)->msglen += msglen + upb_varint_size(msglen);
9496 }
9497
9498 return true;
9499 }
9500
9501
9502 /* tag_t **********************************************************************/
9503
9504 /* A precomputed (pre-encoded) tag and length. */
9505
9506 typedef struct {
9507 uint8_t bytes;
9508 char tag[7];
9509 } tag_t;
9510
9511 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)9512 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
9513 upb_handlerattr *attr) {
9514 uint32_t n = upb_fielddef_number(f);
9515
9516 tag_t *tag = upb_gmalloc(sizeof(tag_t));
9517 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
9518
9519 upb_handlerattr_init(attr);
9520 upb_handlerattr_sethandlerdata(attr, tag);
9521 upb_handlers_addcleanup(h, tag, upb_gfree);
9522 }
9523
encode_tag(upb_pb_encoder * e,const tag_t * tag)9524 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
9525 return encode_bytes(e, tag->tag, tag->bytes);
9526 }
9527
9528
9529 /* encoding of wire types *****************************************************/
9530
encode_fixed64(upb_pb_encoder * e,uint64_t val)9531 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
9532 /* TODO(haberman): byte-swap for big endian. */
9533 return encode_bytes(e, &val, sizeof(uint64_t));
9534 }
9535
encode_fixed32(upb_pb_encoder * e,uint32_t val)9536 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
9537 /* TODO(haberman): byte-swap for big endian. */
9538 return encode_bytes(e, &val, sizeof(uint32_t));
9539 }
9540
encode_varint(upb_pb_encoder * e,uint64_t val)9541 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
9542 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
9543 return false;
9544 }
9545
9546 encoder_advance(e, upb_vencode64(val, e->ptr));
9547 return true;
9548 }
9549
dbl2uint64(double d)9550 static uint64_t dbl2uint64(double d) {
9551 uint64_t ret;
9552 memcpy(&ret, &d, sizeof(uint64_t));
9553 return ret;
9554 }
9555
flt2uint32(float d)9556 static uint32_t flt2uint32(float d) {
9557 uint32_t ret;
9558 memcpy(&ret, &d, sizeof(uint32_t));
9559 return ret;
9560 }
9561
9562
9563 /* encoding of proto types ****************************************************/
9564
startmsg(void * c,const void * hd)9565 static bool startmsg(void *c, const void *hd) {
9566 upb_pb_encoder *e = c;
9567 UPB_UNUSED(hd);
9568 if (e->depth++ == 0) {
9569 upb_bytessink_start(e->output_, 0, &e->subc);
9570 }
9571 return true;
9572 }
9573
endmsg(void * c,const void * hd,upb_status * status)9574 static bool endmsg(void *c, const void *hd, upb_status *status) {
9575 upb_pb_encoder *e = c;
9576 UPB_UNUSED(hd);
9577 UPB_UNUSED(status);
9578 if (--e->depth == 0) {
9579 upb_bytessink_end(e->output_);
9580 }
9581 return true;
9582 }
9583
encode_startdelimfield(void * c,const void * hd)9584 static void *encode_startdelimfield(void *c, const void *hd) {
9585 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
9586 return ok ? c : UPB_BREAK;
9587 }
9588
encode_enddelimfield(void * c,const void * hd)9589 static bool encode_enddelimfield(void *c, const void *hd) {
9590 UPB_UNUSED(hd);
9591 return end_delim(c);
9592 }
9593
encode_startgroup(void * c,const void * hd)9594 static void *encode_startgroup(void *c, const void *hd) {
9595 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
9596 }
9597
encode_endgroup(void * c,const void * hd)9598 static bool encode_endgroup(void *c, const void *hd) {
9599 return encode_tag(c, hd) && commit(c);
9600 }
9601
encode_startstr(void * c,const void * hd,size_t size_hint)9602 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
9603 UPB_UNUSED(size_hint);
9604 return encode_startdelimfield(c, hd);
9605 }
9606
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)9607 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
9608 size_t len, const upb_bufhandle *h) {
9609 UPB_UNUSED(hd);
9610 UPB_UNUSED(h);
9611 return encode_bytes(c, buf, len) ? len : 0;
9612 }
9613
9614 #define T(type, ctype, convert, encode) \
9615 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
9616 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
9617 } \
9618 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
9619 UPB_UNUSED(hd); \
9620 return encode(e, (convert)(val)); \
9621 }
9622
T(double,double,dbl2uint64,encode_fixed64)9623 T(double, double, dbl2uint64, encode_fixed64)
9624 T(float, float, flt2uint32, encode_fixed32)
9625 T(int64, int64_t, uint64_t, encode_varint)
9626 T(int32, int32_t, uint32_t, encode_varint)
9627 T(fixed64, uint64_t, uint64_t, encode_fixed64)
9628 T(fixed32, uint32_t, uint32_t, encode_fixed32)
9629 T(bool, bool, bool, encode_varint)
9630 T(uint32, uint32_t, uint32_t, encode_varint)
9631 T(uint64, uint64_t, uint64_t, encode_varint)
9632 T(enum, int32_t, uint32_t, encode_varint)
9633 T(sfixed32, int32_t, uint32_t, encode_fixed32)
9634 T(sfixed64, int64_t, uint64_t, encode_fixed64)
9635 T(sint32, int32_t, upb_zzenc_32, encode_varint)
9636 T(sint64, int64_t, upb_zzenc_64, encode_varint)
9637
9638 #undef T
9639
9640
9641 /* code to build the handlers *************************************************/
9642
9643 static void newhandlers_callback(const void *closure, upb_handlers *h) {
9644 const upb_msgdef *m;
9645 upb_msg_field_iter i;
9646
9647 UPB_UNUSED(closure);
9648
9649 upb_handlers_setstartmsg(h, startmsg, NULL);
9650 upb_handlers_setendmsg(h, endmsg, NULL);
9651
9652 m = upb_handlers_msgdef(h);
9653 for(upb_msg_field_begin(&i, m);
9654 !upb_msg_field_done(&i);
9655 upb_msg_field_next(&i)) {
9656 const upb_fielddef *f = upb_msg_iter_field(&i);
9657 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
9658 upb_fielddef_packed(f);
9659 upb_handlerattr attr;
9660 upb_wiretype_t wt =
9661 packed ? UPB_WIRE_TYPE_DELIMITED
9662 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
9663
9664 /* Pre-encode the tag for this field. */
9665 new_tag(h, f, wt, &attr);
9666
9667 if (packed) {
9668 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
9669 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
9670 }
9671
9672 #define T(upper, lower, upbtype) \
9673 case UPB_DESCRIPTOR_TYPE_##upper: \
9674 if (packed) { \
9675 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
9676 } else { \
9677 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
9678 } \
9679 break;
9680
9681 switch (upb_fielddef_descriptortype(f)) {
9682 T(DOUBLE, double, double);
9683 T(FLOAT, float, float);
9684 T(INT64, int64, int64);
9685 T(INT32, int32, int32);
9686 T(FIXED64, fixed64, uint64);
9687 T(FIXED32, fixed32, uint32);
9688 T(BOOL, bool, bool);
9689 T(UINT32, uint32, uint32);
9690 T(UINT64, uint64, uint64);
9691 T(ENUM, enum, int32);
9692 T(SFIXED32, sfixed32, int32);
9693 T(SFIXED64, sfixed64, int64);
9694 T(SINT32, sint32, int32);
9695 T(SINT64, sint64, int64);
9696 case UPB_DESCRIPTOR_TYPE_STRING:
9697 case UPB_DESCRIPTOR_TYPE_BYTES:
9698 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
9699 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
9700 upb_handlers_setstring(h, f, encode_strbuf, &attr);
9701 break;
9702 case UPB_DESCRIPTOR_TYPE_MESSAGE:
9703 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
9704 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
9705 break;
9706 case UPB_DESCRIPTOR_TYPE_GROUP: {
9707 /* Endgroup takes a different tag (wire_type = END_GROUP). */
9708 upb_handlerattr attr2;
9709 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
9710
9711 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
9712 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
9713
9714 upb_handlerattr_uninit(&attr2);
9715 break;
9716 }
9717 }
9718
9719 #undef T
9720
9721 upb_handlerattr_uninit(&attr);
9722 }
9723 }
9724
upb_pb_encoder_reset(upb_pb_encoder * e)9725 void upb_pb_encoder_reset(upb_pb_encoder *e) {
9726 e->segptr = NULL;
9727 e->top = NULL;
9728 e->depth = 0;
9729 }
9730
9731
9732 /* public API *****************************************************************/
9733
upb_pb_encoder_newhandlers(const upb_msgdef * m,const void * owner)9734 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
9735 const void *owner) {
9736 return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
9737 }
9738
upb_pb_encoder_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)9739 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
9740 upb_bytessink *output) {
9741 const size_t initial_bufsize = 256;
9742 const size_t initial_segbufsize = 16;
9743 /* TODO(haberman): make this configurable. */
9744 const size_t stack_size = 64;
9745 #ifndef NDEBUG
9746 const size_t size_before = upb_env_bytesallocated(env);
9747 #endif
9748
9749 upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
9750 if (!e) return NULL;
9751
9752 e->buf = upb_env_malloc(env, initial_bufsize);
9753 e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
9754 e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
9755
9756 if (!e->buf || !e->segbuf || !e->stack) {
9757 return NULL;
9758 }
9759
9760 e->limit = e->buf + initial_bufsize;
9761 e->seglimit = e->segbuf + initial_segbufsize;
9762 e->stacklimit = e->stack + stack_size;
9763
9764 upb_pb_encoder_reset(e);
9765 upb_sink_reset(&e->input_, h, e);
9766
9767 e->env = env;
9768 e->output_ = output;
9769 e->subc = output->closure;
9770 e->ptr = e->buf;
9771
9772 /* If this fails, increase the value in encoder.h. */
9773 assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
9774 return e;
9775 }
9776
upb_pb_encoder_input(upb_pb_encoder * e)9777 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
9778
9779
9780
upb_loaddescriptor(const char * buf,size_t n,const void * owner,upb_status * status)9781 upb_filedef **upb_loaddescriptor(const char *buf, size_t n, const void *owner,
9782 upb_status *status) {
9783 /* Create handlers. */
9784 const upb_pbdecodermethod *decoder_m;
9785 const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
9786 upb_env env;
9787 upb_pbdecodermethodopts opts;
9788 upb_pbdecoder *decoder;
9789 upb_descreader *reader;
9790 bool ok;
9791 size_t i;
9792 upb_filedef **ret = NULL;
9793
9794 upb_pbdecodermethodopts_init(&opts, reader_h);
9795 decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
9796
9797 upb_env_init(&env);
9798 upb_env_reporterrorsto(&env, status);
9799
9800 reader = upb_descreader_create(&env, reader_h);
9801 decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
9802
9803 /* Push input data. */
9804 ok = upb_bufsrc_putbuf(buf, n, upb_pbdecoder_input(decoder));
9805
9806 if (!ok) {
9807 goto cleanup;
9808 }
9809
9810 ret = upb_gmalloc(sizeof (*ret) * (upb_descreader_filecount(reader) + 1));
9811
9812 if (!ret) {
9813 goto cleanup;
9814 }
9815
9816 for (i = 0; i < upb_descreader_filecount(reader); i++) {
9817 ret[i] = upb_descreader_file(reader, i);
9818 upb_filedef_ref(ret[i], owner);
9819 }
9820
9821 ret[i] = NULL;
9822
9823 cleanup:
9824 upb_env_uninit(&env);
9825 upb_handlers_unref(reader_h, &reader_h);
9826 upb_pbdecodermethod_unref(decoder_m, &decoder_m);
9827 return ret;
9828 }
9829 /*
9830 * upb::pb::TextPrinter
9831 *
9832 * OPT: This is not optimized at all. It uses printf() which parses the format
9833 * string every time, and it allocates memory for every put.
9834 */
9835
9836
9837 #include <ctype.h>
9838 #include <float.h>
9839 #include <inttypes.h>
9840 #include <stdarg.h>
9841 #include <stdio.h>
9842 #include <string.h>
9843
9844
9845 struct upb_textprinter {
9846 upb_sink input_;
9847 upb_bytessink *output_;
9848 int indent_depth_;
9849 bool single_line_;
9850 void *subc;
9851 };
9852
9853 #define CHECK(x) if ((x) < 0) goto err;
9854
shortname(const char * longname)9855 static const char *shortname(const char *longname) {
9856 const char *last = strrchr(longname, '.');
9857 return last ? last + 1 : longname;
9858 }
9859
indent(upb_textprinter * p)9860 static int indent(upb_textprinter *p) {
9861 int i;
9862 if (!p->single_line_)
9863 for (i = 0; i < p->indent_depth_; i++)
9864 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
9865 return 0;
9866 }
9867
endfield(upb_textprinter * p)9868 static int endfield(upb_textprinter *p) {
9869 const char ch = (p->single_line_ ? ' ' : '\n');
9870 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
9871 return 0;
9872 }
9873
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)9874 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
9875 bool preserve_utf8) {
9876 /* Based on CEscapeInternal() from Google's protobuf release. */
9877 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
9878 const char *end = buf + len;
9879
9880 /* I think hex is prettier and more useful, but proto2 uses octal; should
9881 * investigate whether it can parse hex also. */
9882 const bool use_hex = false;
9883 bool last_hex_escape = false; /* true if last output char was \xNN */
9884
9885 for (; buf < end; buf++) {
9886 bool is_hex_escape;
9887
9888 if (dstend - dst < 4) {
9889 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9890 dst = dstbuf;
9891 }
9892
9893 is_hex_escape = false;
9894 switch (*buf) {
9895 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
9896 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
9897 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
9898 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
9899 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
9900 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
9901 default:
9902 /* Note that if we emit \xNN and the buf character after that is a hex
9903 * digit then that digit must be escaped too to prevent it being
9904 * interpreted as part of the character code by C. */
9905 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
9906 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
9907 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
9908 is_hex_escape = use_hex;
9909 dst += 4;
9910 } else {
9911 *(dst++) = *buf; break;
9912 }
9913 }
9914 last_hex_escape = is_hex_escape;
9915 }
9916 /* Flush remaining data. */
9917 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9918 return 0;
9919 }
9920
putf(upb_textprinter * p,const char * fmt,...)9921 bool putf(upb_textprinter *p, const char *fmt, ...) {
9922 va_list args;
9923 va_list args_copy;
9924 char *str;
9925 int written;
9926 int len;
9927 bool ok;
9928
9929 va_start(args, fmt);
9930
9931 /* Run once to get the length of the string. */
9932 _upb_va_copy(args_copy, args);
9933 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
9934 va_end(args_copy);
9935
9936 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
9937 str = upb_gmalloc(len + 1);
9938 if (!str) return false;
9939 written = vsprintf(str, fmt, args);
9940 va_end(args);
9941 UPB_ASSERT_VAR(written, written == len);
9942
9943 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
9944 upb_gfree(str);
9945 return ok;
9946 }
9947
9948
9949 /* handlers *******************************************************************/
9950
textprinter_startmsg(void * c,const void * hd)9951 static bool textprinter_startmsg(void *c, const void *hd) {
9952 upb_textprinter *p = c;
9953 UPB_UNUSED(hd);
9954 if (p->indent_depth_ == 0) {
9955 upb_bytessink_start(p->output_, 0, &p->subc);
9956 }
9957 return true;
9958 }
9959
textprinter_endmsg(void * c,const void * hd,upb_status * s)9960 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
9961 upb_textprinter *p = c;
9962 UPB_UNUSED(hd);
9963 UPB_UNUSED(s);
9964 if (p->indent_depth_ == 0) {
9965 upb_bytessink_end(p->output_);
9966 }
9967 return true;
9968 }
9969
9970 #define TYPE(name, ctype, fmt) \
9971 static bool textprinter_put ## name(void *closure, const void *handler_data, \
9972 ctype val) { \
9973 upb_textprinter *p = closure; \
9974 const upb_fielddef *f = handler_data; \
9975 CHECK(indent(p)); \
9976 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
9977 CHECK(endfield(p)); \
9978 return true; \
9979 err: \
9980 return false; \
9981 }
9982
textprinter_putbool(void * closure,const void * handler_data,bool val)9983 static bool textprinter_putbool(void *closure, const void *handler_data,
9984 bool val) {
9985 upb_textprinter *p = closure;
9986 const upb_fielddef *f = handler_data;
9987 CHECK(indent(p));
9988 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
9989 CHECK(endfield(p));
9990 return true;
9991 err:
9992 return false;
9993 }
9994
9995 #define STRINGIFY_HELPER(x) #x
9996 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
9997
9998 TYPE(int32, int32_t, "%" PRId32)
9999 TYPE(int64, int64_t, "%" PRId64)
10000 TYPE(uint32, uint32_t, "%" PRIu32)
10001 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)10002 TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
10003 TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
10004
10005 #undef TYPE
10006
10007 /* Output a symbolic value from the enum if found, else just print as int32. */
10008 static bool textprinter_putenum(void *closure, const void *handler_data,
10009 int32_t val) {
10010 upb_textprinter *p = closure;
10011 const upb_fielddef *f = handler_data;
10012 const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
10013 const char *label = upb_enumdef_iton(enum_def, val);
10014 if (label) {
10015 indent(p);
10016 putf(p, "%s: %s", upb_fielddef_name(f), label);
10017 endfield(p);
10018 } else {
10019 if (!textprinter_putint32(closure, handler_data, val))
10020 return false;
10021 }
10022 return true;
10023 }
10024
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)10025 static void *textprinter_startstr(void *closure, const void *handler_data,
10026 size_t size_hint) {
10027 upb_textprinter *p = closure;
10028 const upb_fielddef *f = handler_data;
10029 UPB_UNUSED(size_hint);
10030 indent(p);
10031 putf(p, "%s: \"", upb_fielddef_name(f));
10032 return p;
10033 }
10034
textprinter_endstr(void * closure,const void * handler_data)10035 static bool textprinter_endstr(void *closure, const void *handler_data) {
10036 upb_textprinter *p = closure;
10037 UPB_UNUSED(handler_data);
10038 putf(p, "\"");
10039 endfield(p);
10040 return true;
10041 }
10042
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)10043 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
10044 size_t len, const upb_bufhandle *handle) {
10045 upb_textprinter *p = closure;
10046 const upb_fielddef *f = hd;
10047 UPB_UNUSED(handle);
10048 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
10049 return len;
10050 err:
10051 return 0;
10052 }
10053
textprinter_startsubmsg(void * closure,const void * handler_data)10054 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
10055 upb_textprinter *p = closure;
10056 const char *name = handler_data;
10057 CHECK(indent(p));
10058 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
10059 p->indent_depth_++;
10060 return p;
10061 err:
10062 return UPB_BREAK;
10063 }
10064
textprinter_endsubmsg(void * closure,const void * handler_data)10065 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
10066 upb_textprinter *p = closure;
10067 UPB_UNUSED(handler_data);
10068 p->indent_depth_--;
10069 CHECK(indent(p));
10070 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
10071 CHECK(endfield(p));
10072 return true;
10073 err:
10074 return false;
10075 }
10076
onmreg(const void * c,upb_handlers * h)10077 static void onmreg(const void *c, upb_handlers *h) {
10078 const upb_msgdef *m = upb_handlers_msgdef(h);
10079 upb_msg_field_iter i;
10080 UPB_UNUSED(c);
10081
10082 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
10083 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
10084
10085 for(upb_msg_field_begin(&i, m);
10086 !upb_msg_field_done(&i);
10087 upb_msg_field_next(&i)) {
10088 upb_fielddef *f = upb_msg_iter_field(&i);
10089 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
10090 upb_handlerattr_sethandlerdata(&attr, f);
10091 switch (upb_fielddef_type(f)) {
10092 case UPB_TYPE_INT32:
10093 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
10094 break;
10095 case UPB_TYPE_INT64:
10096 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
10097 break;
10098 case UPB_TYPE_UINT32:
10099 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
10100 break;
10101 case UPB_TYPE_UINT64:
10102 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
10103 break;
10104 case UPB_TYPE_FLOAT:
10105 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
10106 break;
10107 case UPB_TYPE_DOUBLE:
10108 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
10109 break;
10110 case UPB_TYPE_BOOL:
10111 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
10112 break;
10113 case UPB_TYPE_STRING:
10114 case UPB_TYPE_BYTES:
10115 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
10116 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
10117 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
10118 break;
10119 case UPB_TYPE_MESSAGE: {
10120 const char *name =
10121 upb_fielddef_istagdelim(f)
10122 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
10123 : upb_fielddef_name(f);
10124 upb_handlerattr_sethandlerdata(&attr, name);
10125 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
10126 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
10127 break;
10128 }
10129 case UPB_TYPE_ENUM:
10130 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
10131 break;
10132 }
10133 }
10134 }
10135
textprinter_reset(upb_textprinter * p,bool single_line)10136 static void textprinter_reset(upb_textprinter *p, bool single_line) {
10137 p->single_line_ = single_line;
10138 p->indent_depth_ = 0;
10139 }
10140
10141
10142 /* Public API *****************************************************************/
10143
upb_textprinter_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)10144 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
10145 upb_bytessink *output) {
10146 upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
10147 if (!p) return NULL;
10148
10149 p->output_ = output;
10150 upb_sink_reset(&p->input_, h, p);
10151 textprinter_reset(p, false);
10152
10153 return p;
10154 }
10155
upb_textprinter_newhandlers(const upb_msgdef * m,const void * owner)10156 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
10157 const void *owner) {
10158 return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
10159 }
10160
upb_textprinter_input(upb_textprinter * p)10161 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
10162
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)10163 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
10164 p->single_line_ = single_line;
10165 }
10166
10167
10168 /* Index is descriptor type. */
10169 const uint8_t upb_pb_native_wire_types[] = {
10170 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
10171 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
10172 UPB_WIRE_TYPE_32BIT, /* FLOAT */
10173 UPB_WIRE_TYPE_VARINT, /* INT64 */
10174 UPB_WIRE_TYPE_VARINT, /* UINT64 */
10175 UPB_WIRE_TYPE_VARINT, /* INT32 */
10176 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
10177 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
10178 UPB_WIRE_TYPE_VARINT, /* BOOL */
10179 UPB_WIRE_TYPE_DELIMITED, /* STRING */
10180 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
10181 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
10182 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
10183 UPB_WIRE_TYPE_VARINT, /* UINT32 */
10184 UPB_WIRE_TYPE_VARINT, /* ENUM */
10185 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
10186 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
10187 UPB_WIRE_TYPE_VARINT, /* SINT32 */
10188 UPB_WIRE_TYPE_VARINT, /* SINT64 */
10189 };
10190
10191 /* A basic branch-based decoder, uses 32-bit values to get good performance
10192 * on 32-bit architectures (but performs well on 64-bits also).
10193 * This scheme comes from the original Google Protobuf implementation
10194 * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)10195 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
10196 upb_decoderet err = {NULL, 0};
10197 const char *p = r.p;
10198 uint32_t low = (uint32_t)r.val;
10199 uint32_t high = 0;
10200 uint32_t b;
10201 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
10202 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
10203 b = *(p++); low |= (b & 0x7fU) << 28;
10204 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
10205 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
10206 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
10207 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
10208 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
10209 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
10210 return err;
10211
10212 done:
10213 r.val = ((uint64_t)high << 32) | low;
10214 r.p = p;
10215 return r;
10216 }
10217
10218 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)10219 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
10220 const char *p = r.p;
10221 uint64_t val = r.val;
10222 uint64_t b;
10223 upb_decoderet err = {NULL, 0};
10224 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
10225 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
10226 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
10227 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
10228 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
10229 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
10230 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
10231 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
10232 return err;
10233
10234 done:
10235 r.val = val;
10236 r.p = p;
10237 return r;
10238 }
10239
10240 /* Given an encoded varint v, returns an integer with a single bit set that
10241 * indicates the end of the varint. Subtracting one from this value will
10242 * yield a mask that leaves only bits that are part of the varint. Returns
10243 * 0 if the varint is unterminated. */
upb_get_vstopbit(uint64_t v)10244 static uint64_t upb_get_vstopbit(uint64_t v) {
10245 uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
10246 return ~cbits & (cbits+1);
10247 }
10248
10249 /* A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. */
upb_vdecode_max8_massimino(upb_decoderet r)10250 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
10251 uint64_t b;
10252 uint64_t stop_bit;
10253 upb_decoderet my_r;
10254 memcpy(&b, r.p, sizeof(b));
10255 stop_bit = upb_get_vstopbit(b);
10256 b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
10257 b += b & 0x007f007f007f007fULL;
10258 b += 3 * (b & 0x0000ffff0000ffffULL);
10259 b += 15 * (b & 0x00000000ffffffffULL);
10260 if (stop_bit == 0) {
10261 /* Error: unterminated varint. */
10262 upb_decoderet err_r = {(void*)0, 0};
10263 return err_r;
10264 }
10265 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
10266 r.val | (b << 7));
10267 return my_r;
10268 }
10269
10270 /* A branchless decoder. Credit to Daniel Wright for the bit-twiddling. */
upb_vdecode_max8_wright(upb_decoderet r)10271 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
10272 uint64_t b;
10273 uint64_t stop_bit;
10274 upb_decoderet my_r;
10275 memcpy(&b, r.p, sizeof(b));
10276 stop_bit = upb_get_vstopbit(b);
10277 b &= (stop_bit - 1);
10278 b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
10279 b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
10280 b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
10281 if (stop_bit == 0) {
10282 /* Error: unterminated varint. */
10283 upb_decoderet err_r = {(void*)0, 0};
10284 return err_r;
10285 }
10286 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
10287 r.val | (b << 14));
10288 return my_r;
10289 }
10290
10291 #line 1 "upb/json/parser.rl"
10292 /*
10293 ** upb::json::Parser (upb_json_parser)
10294 **
10295 ** A parser that uses the Ragel State Machine Compiler to generate
10296 ** the finite automata.
10297 **
10298 ** Ragel only natively handles regular languages, but we can manually
10299 ** program it a bit to handle context-free languages like JSON, by using
10300 ** the "fcall" and "fret" constructs.
10301 **
10302 ** This parser can handle the basics, but needs several things to be fleshed
10303 ** out:
10304 **
10305 ** - handling of unicode escape sequences (including high surrogate pairs).
10306 ** - properly check and report errors for unknown fields, stack overflow,
10307 ** improper array nesting (or lack of nesting).
10308 ** - handling of base64 sequences with padding characters.
10309 ** - handling of push-back (non-success returns from sink functions).
10310 ** - handling of keys/escape-sequences/etc that span input buffers.
10311 */
10312
10313 #include <assert.h>
10314 #include <errno.h>
10315 #include <stdint.h>
10316 #include <stdlib.h>
10317 #include <string.h>
10318
10319
10320 #define UPB_JSON_MAX_DEPTH 64
10321
10322 typedef struct {
10323 upb_sink sink;
10324
10325 /* The current message in which we're parsing, and the field whose value we're
10326 * expecting next. */
10327 const upb_msgdef *m;
10328 const upb_fielddef *f;
10329
10330 /* The table mapping json name to fielddef for this message. */
10331 upb_strtable *name_table;
10332
10333 /* We are in a repeated-field context, ready to emit mapentries as
10334 * submessages. This flag alters the start-of-object (open-brace) behavior to
10335 * begin a sequence of mapentry messages rather than a single submessage. */
10336 bool is_map;
10337
10338 /* We are in a map-entry message context. This flag is set when parsing the
10339 * value field of a single map entry and indicates to all value-field parsers
10340 * (subobjects, strings, numbers, and bools) that the map-entry submessage
10341 * should end as soon as the value is parsed. */
10342 bool is_mapentry;
10343
10344 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
10345 * message's map field that we're currently parsing. This differs from |f|
10346 * because |f| is the field in the *current* message (i.e., the map-entry
10347 * message itself), not the parent's field that leads to this map. */
10348 const upb_fielddef *mapfield;
10349 } upb_jsonparser_frame;
10350
10351 struct upb_json_parser {
10352 upb_env *env;
10353 const upb_json_parsermethod *method;
10354 upb_bytessink input_;
10355
10356 /* Stack to track the JSON scopes we are in. */
10357 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
10358 upb_jsonparser_frame *top;
10359 upb_jsonparser_frame *limit;
10360
10361 upb_status status;
10362
10363 /* Ragel's internal parsing stack for the parsing state machine. */
10364 int current_state;
10365 int parser_stack[UPB_JSON_MAX_DEPTH];
10366 int parser_top;
10367
10368 /* The handle for the current buffer. */
10369 const upb_bufhandle *handle;
10370
10371 /* Accumulate buffer. See details in parser.rl. */
10372 const char *accumulated;
10373 size_t accumulated_len;
10374 char *accumulate_buf;
10375 size_t accumulate_buf_size;
10376
10377 /* Multi-part text data. See details in parser.rl. */
10378 int multipart_state;
10379 upb_selector_t string_selector;
10380
10381 /* Input capture. See details in parser.rl. */
10382 const char *capture;
10383
10384 /* Intermediate result of parsing a unicode escape sequence. */
10385 uint32_t digit;
10386 };
10387
10388 struct upb_json_parsermethod {
10389 upb_refcounted base;
10390
10391 upb_byteshandler input_handler_;
10392
10393 /* Mainly for the purposes of refcounting, so all the fielddefs we point
10394 * to stay alive. */
10395 const upb_msgdef *msg;
10396
10397 /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
10398 upb_inttable name_tables;
10399 };
10400
10401 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
10402
10403 /* Used to signal that a capture has been suspended. */
10404 static char suspend_capture;
10405
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)10406 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
10407 upb_handlertype_t type) {
10408 upb_selector_t sel;
10409 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
10410 UPB_ASSERT_VAR(ok, ok);
10411 return sel;
10412 }
10413
parser_getsel(upb_json_parser * p)10414 static upb_selector_t parser_getsel(upb_json_parser *p) {
10415 return getsel_for_handlertype(
10416 p, upb_handlers_getprimitivehandlertype(p->top->f));
10417 }
10418
check_stack(upb_json_parser * p)10419 static bool check_stack(upb_json_parser *p) {
10420 if ((p->top + 1) == p->limit) {
10421 upb_status_seterrmsg(&p->status, "Nesting too deep");
10422 upb_env_reporterror(p->env, &p->status);
10423 return false;
10424 }
10425
10426 return true;
10427 }
10428
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)10429 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
10430 upb_value v;
10431 bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
10432 UPB_ASSERT_VAR(ok, ok);
10433 frame->name_table = upb_value_getptr(v);
10434 }
10435
10436 /* There are GCC/Clang built-ins for overflow checking which we could start
10437 * using if there was any performance benefit to it. */
10438
checked_add(size_t a,size_t b,size_t * c)10439 static bool checked_add(size_t a, size_t b, size_t *c) {
10440 if (SIZE_MAX - a < b) return false;
10441 *c = a + b;
10442 return true;
10443 }
10444
saturating_multiply(size_t a,size_t b)10445 static size_t saturating_multiply(size_t a, size_t b) {
10446 /* size_t is unsigned, so this is defined behavior even on overflow. */
10447 size_t ret = a * b;
10448 if (b != 0 && ret / b != a) {
10449 ret = SIZE_MAX;
10450 }
10451 return ret;
10452 }
10453
10454
10455 /* Base64 decoding ************************************************************/
10456
10457 /* TODO(haberman): make this streaming. */
10458
10459 static const signed char b64table[] = {
10460 -1, -1, -1, -1, -1, -1, -1, -1,
10461 -1, -1, -1, -1, -1, -1, -1, -1,
10462 -1, -1, -1, -1, -1, -1, -1, -1,
10463 -1, -1, -1, -1, -1, -1, -1, -1,
10464 -1, -1, -1, -1, -1, -1, -1, -1,
10465 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
10466 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
10467 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
10468 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
10469 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
10470 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
10471 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
10472 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
10473 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
10474 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
10475 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
10476 -1, -1, -1, -1, -1, -1, -1, -1,
10477 -1, -1, -1, -1, -1, -1, -1, -1,
10478 -1, -1, -1, -1, -1, -1, -1, -1,
10479 -1, -1, -1, -1, -1, -1, -1, -1,
10480 -1, -1, -1, -1, -1, -1, -1, -1,
10481 -1, -1, -1, -1, -1, -1, -1, -1,
10482 -1, -1, -1, -1, -1, -1, -1, -1,
10483 -1, -1, -1, -1, -1, -1, -1, -1,
10484 -1, -1, -1, -1, -1, -1, -1, -1,
10485 -1, -1, -1, -1, -1, -1, -1, -1,
10486 -1, -1, -1, -1, -1, -1, -1, -1,
10487 -1, -1, -1, -1, -1, -1, -1, -1,
10488 -1, -1, -1, -1, -1, -1, -1, -1,
10489 -1, -1, -1, -1, -1, -1, -1, -1,
10490 -1, -1, -1, -1, -1, -1, -1, -1,
10491 -1, -1, -1, -1, -1, -1, -1, -1
10492 };
10493
10494 /* Returns the table value sign-extended to 32 bits. Knowing that the upper
10495 * bits will be 1 for unrecognized characters makes it easier to check for
10496 * this error condition later (see below). */
b64lookup(unsigned char ch)10497 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
10498
10499 /* Returns true if the given character is not a valid base64 character or
10500 * padding. */
nonbase64(unsigned char ch)10501 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
10502
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)10503 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
10504 size_t len) {
10505 const char *limit = ptr + len;
10506 for (; ptr < limit; ptr += 4) {
10507 uint32_t val;
10508 char output[3];
10509
10510 if (limit - ptr < 4) {
10511 upb_status_seterrf(&p->status,
10512 "Base64 input for bytes field not a multiple of 4: %s",
10513 upb_fielddef_name(p->top->f));
10514 upb_env_reporterror(p->env, &p->status);
10515 return false;
10516 }
10517
10518 val = b64lookup(ptr[0]) << 18 |
10519 b64lookup(ptr[1]) << 12 |
10520 b64lookup(ptr[2]) << 6 |
10521 b64lookup(ptr[3]);
10522
10523 /* Test the upper bit; returns true if any of the characters returned -1. */
10524 if (val & 0x80000000) {
10525 goto otherchar;
10526 }
10527
10528 output[0] = val >> 16;
10529 output[1] = (val >> 8) & 0xff;
10530 output[2] = val & 0xff;
10531 upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
10532 }
10533 return true;
10534
10535 otherchar:
10536 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
10537 nonbase64(ptr[3]) ) {
10538 upb_status_seterrf(&p->status,
10539 "Non-base64 characters in bytes field: %s",
10540 upb_fielddef_name(p->top->f));
10541 upb_env_reporterror(p->env, &p->status);
10542 return false;
10543 } if (ptr[2] == '=') {
10544 uint32_t val;
10545 char output;
10546
10547 /* Last group contains only two input bytes, one output byte. */
10548 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
10549 goto badpadding;
10550 }
10551
10552 val = b64lookup(ptr[0]) << 18 |
10553 b64lookup(ptr[1]) << 12;
10554
10555 assert(!(val & 0x80000000));
10556 output = val >> 16;
10557 upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
10558 return true;
10559 } else {
10560 uint32_t val;
10561 char output[2];
10562
10563 /* Last group contains only three input bytes, two output bytes. */
10564 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
10565 goto badpadding;
10566 }
10567
10568 val = b64lookup(ptr[0]) << 18 |
10569 b64lookup(ptr[1]) << 12 |
10570 b64lookup(ptr[2]) << 6;
10571
10572 output[0] = val >> 16;
10573 output[1] = (val >> 8) & 0xff;
10574 upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
10575 return true;
10576 }
10577
10578 badpadding:
10579 upb_status_seterrf(&p->status,
10580 "Incorrect base64 padding for field: %s (%.*s)",
10581 upb_fielddef_name(p->top->f),
10582 4, ptr);
10583 upb_env_reporterror(p->env, &p->status);
10584 return false;
10585 }
10586
10587
10588 /* Accumulate buffer **********************************************************/
10589
10590 /* Functionality for accumulating a buffer.
10591 *
10592 * Some parts of the parser need an entire value as a contiguous string. For
10593 * example, to look up a member name in a hash table, or to turn a string into
10594 * a number, the relevant library routines need the input string to be in
10595 * contiguous memory, even if the value spanned two or more buffers in the
10596 * input. These routines handle that.
10597 *
10598 * In the common case we can just point to the input buffer to get this
10599 * contiguous string and avoid any actual copy. So we optimistically begin
10600 * this way. But there are a few cases where we must instead copy into a
10601 * separate buffer:
10602 *
10603 * 1. The string was not contiguous in the input (it spanned buffers).
10604 *
10605 * 2. The string included escape sequences that need to be interpreted to get
10606 * the true value in a contiguous buffer. */
10607
assert_accumulate_empty(upb_json_parser * p)10608 static void assert_accumulate_empty(upb_json_parser *p) {
10609 UPB_UNUSED(p);
10610 assert(p->accumulated == NULL);
10611 assert(p->accumulated_len == 0);
10612 }
10613
accumulate_clear(upb_json_parser * p)10614 static void accumulate_clear(upb_json_parser *p) {
10615 p->accumulated = NULL;
10616 p->accumulated_len = 0;
10617 }
10618
10619 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)10620 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
10621 void *mem;
10622 size_t old_size = p->accumulate_buf_size;
10623 size_t new_size = UPB_MAX(old_size, 128);
10624 while (new_size < need) {
10625 new_size = saturating_multiply(new_size, 2);
10626 }
10627
10628 mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
10629 if (!mem) {
10630 upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
10631 upb_env_reporterror(p->env, &p->status);
10632 return false;
10633 }
10634
10635 p->accumulate_buf = mem;
10636 p->accumulate_buf_size = new_size;
10637 return true;
10638 }
10639
10640 /* Logically appends the given data to the append buffer.
10641 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
10642 * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)10643 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
10644 bool can_alias) {
10645 size_t need;
10646
10647 if (!p->accumulated && can_alias) {
10648 p->accumulated = buf;
10649 p->accumulated_len = len;
10650 return true;
10651 }
10652
10653 if (!checked_add(p->accumulated_len, len, &need)) {
10654 upb_status_seterrmsg(&p->status, "Integer overflow.");
10655 upb_env_reporterror(p->env, &p->status);
10656 return false;
10657 }
10658
10659 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
10660 return false;
10661 }
10662
10663 if (p->accumulated != p->accumulate_buf) {
10664 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
10665 p->accumulated = p->accumulate_buf;
10666 }
10667
10668 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
10669 p->accumulated_len += len;
10670 return true;
10671 }
10672
10673 /* Returns a pointer to the data accumulated since the last accumulate_clear()
10674 * call, and writes the length to *len. This with point either to the input
10675 * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)10676 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
10677 assert(p->accumulated);
10678 *len = p->accumulated_len;
10679 return p->accumulated;
10680 }
10681
10682
10683 /* Mult-part text data ********************************************************/
10684
10685 /* When we have text data in the input, it can often come in multiple segments.
10686 * For example, there may be some raw string data followed by an escape
10687 * sequence. The two segments are processed with different logic. Also buffer
10688 * seams in the input can cause multiple segments.
10689 *
10690 * As we see segments, there are two main cases for how we want to process them:
10691 *
10692 * 1. we want to push the captured input directly to string handlers.
10693 *
10694 * 2. we need to accumulate all the parts into a contiguous buffer for further
10695 * processing (field name lookup, string->number conversion, etc). */
10696
10697 /* This is the set of states for p->multipart_state. */
10698 enum {
10699 /* We are not currently processing multipart data. */
10700 MULTIPART_INACTIVE = 0,
10701
10702 /* We are processing multipart data by accumulating it into a contiguous
10703 * buffer. */
10704 MULTIPART_ACCUMULATE = 1,
10705
10706 /* We are processing multipart data by pushing each part directly to the
10707 * current string handlers. */
10708 MULTIPART_PUSHEAGERLY = 2
10709 };
10710
10711 /* Start a multi-part text value where we accumulate the data for processing at
10712 * the end. */
multipart_startaccum(upb_json_parser * p)10713 static void multipart_startaccum(upb_json_parser *p) {
10714 assert_accumulate_empty(p);
10715 assert(p->multipart_state == MULTIPART_INACTIVE);
10716 p->multipart_state = MULTIPART_ACCUMULATE;
10717 }
10718
10719 /* Start a multi-part text value where we immediately push text data to a string
10720 * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)10721 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
10722 assert_accumulate_empty(p);
10723 assert(p->multipart_state == MULTIPART_INACTIVE);
10724 p->multipart_state = MULTIPART_PUSHEAGERLY;
10725 p->string_selector = sel;
10726 }
10727
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)10728 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
10729 bool can_alias) {
10730 switch (p->multipart_state) {
10731 case MULTIPART_INACTIVE:
10732 upb_status_seterrmsg(
10733 &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
10734 upb_env_reporterror(p->env, &p->status);
10735 return false;
10736
10737 case MULTIPART_ACCUMULATE:
10738 if (!accumulate_append(p, buf, len, can_alias)) {
10739 return false;
10740 }
10741 break;
10742
10743 case MULTIPART_PUSHEAGERLY: {
10744 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
10745 upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
10746 break;
10747 }
10748 }
10749
10750 return true;
10751 }
10752
10753 /* Note: this invalidates the accumulate buffer! Call only after reading its
10754 * contents. */
multipart_end(upb_json_parser * p)10755 static void multipart_end(upb_json_parser *p) {
10756 assert(p->multipart_state != MULTIPART_INACTIVE);
10757 p->multipart_state = MULTIPART_INACTIVE;
10758 accumulate_clear(p);
10759 }
10760
10761
10762 /* Input capture **************************************************************/
10763
10764 /* Functionality for capturing a region of the input as text. Gracefully
10765 * handles the case where a buffer seam occurs in the middle of the captured
10766 * region. */
10767
capture_begin(upb_json_parser * p,const char * ptr)10768 static void capture_begin(upb_json_parser *p, const char *ptr) {
10769 assert(p->multipart_state != MULTIPART_INACTIVE);
10770 assert(p->capture == NULL);
10771 p->capture = ptr;
10772 }
10773
capture_end(upb_json_parser * p,const char * ptr)10774 static bool capture_end(upb_json_parser *p, const char *ptr) {
10775 assert(p->capture);
10776 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
10777 p->capture = NULL;
10778 return true;
10779 } else {
10780 return false;
10781 }
10782 }
10783
10784 /* This is called at the end of each input buffer (ie. when we have hit a
10785 * buffer seam). If we are in the middle of capturing the input, this
10786 * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)10787 static void capture_suspend(upb_json_parser *p, const char **ptr) {
10788 if (!p->capture) return;
10789
10790 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
10791 /* We use this as a signal that we were in the middle of capturing, and
10792 * that capturing should resume at the beginning of the next buffer.
10793 *
10794 * We can't use *ptr here, because we have no guarantee that this pointer
10795 * will be valid when we resume (if the underlying memory is freed, then
10796 * using the pointer at all, even to compare to NULL, is likely undefined
10797 * behavior). */
10798 p->capture = &suspend_capture;
10799 } else {
10800 /* Need to back up the pointer to the beginning of the capture, since
10801 * we were not able to actually preserve it. */
10802 *ptr = p->capture;
10803 }
10804 }
10805
capture_resume(upb_json_parser * p,const char * ptr)10806 static void capture_resume(upb_json_parser *p, const char *ptr) {
10807 if (p->capture) {
10808 assert(p->capture == &suspend_capture);
10809 p->capture = ptr;
10810 }
10811 }
10812
10813
10814 /* Callbacks from the parser **************************************************/
10815
10816 /* These are the functions called directly from the parser itself.
10817 * We define these in the same order as their declarations in the parser. */
10818
escape_char(char in)10819 static char escape_char(char in) {
10820 switch (in) {
10821 case 'r': return '\r';
10822 case 't': return '\t';
10823 case 'n': return '\n';
10824 case 'f': return '\f';
10825 case 'b': return '\b';
10826 case '/': return '/';
10827 case '"': return '"';
10828 case '\\': return '\\';
10829 default:
10830 assert(0);
10831 return 'x';
10832 }
10833 }
10834
escape(upb_json_parser * p,const char * ptr)10835 static bool escape(upb_json_parser *p, const char *ptr) {
10836 char ch = escape_char(*ptr);
10837 return multipart_text(p, &ch, 1, false);
10838 }
10839
start_hex(upb_json_parser * p)10840 static void start_hex(upb_json_parser *p) {
10841 p->digit = 0;
10842 }
10843
hexdigit(upb_json_parser * p,const char * ptr)10844 static void hexdigit(upb_json_parser *p, const char *ptr) {
10845 char ch = *ptr;
10846
10847 p->digit <<= 4;
10848
10849 if (ch >= '0' && ch <= '9') {
10850 p->digit += (ch - '0');
10851 } else if (ch >= 'a' && ch <= 'f') {
10852 p->digit += ((ch - 'a') + 10);
10853 } else {
10854 assert(ch >= 'A' && ch <= 'F');
10855 p->digit += ((ch - 'A') + 10);
10856 }
10857 }
10858
end_hex(upb_json_parser * p)10859 static bool end_hex(upb_json_parser *p) {
10860 uint32_t codepoint = p->digit;
10861
10862 /* emit the codepoint as UTF-8. */
10863 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
10864 int length = 0;
10865 if (codepoint <= 0x7F) {
10866 utf8[0] = codepoint;
10867 length = 1;
10868 } else if (codepoint <= 0x07FF) {
10869 utf8[1] = (codepoint & 0x3F) | 0x80;
10870 codepoint >>= 6;
10871 utf8[0] = (codepoint & 0x1F) | 0xC0;
10872 length = 2;
10873 } else /* codepoint <= 0xFFFF */ {
10874 utf8[2] = (codepoint & 0x3F) | 0x80;
10875 codepoint >>= 6;
10876 utf8[1] = (codepoint & 0x3F) | 0x80;
10877 codepoint >>= 6;
10878 utf8[0] = (codepoint & 0x0F) | 0xE0;
10879 length = 3;
10880 }
10881 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
10882 * we have to wait for the next escape to get the full code point). */
10883
10884 return multipart_text(p, utf8, length, false);
10885 }
10886
start_text(upb_json_parser * p,const char * ptr)10887 static void start_text(upb_json_parser *p, const char *ptr) {
10888 capture_begin(p, ptr);
10889 }
10890
end_text(upb_json_parser * p,const char * ptr)10891 static bool end_text(upb_json_parser *p, const char *ptr) {
10892 return capture_end(p, ptr);
10893 }
10894
start_number(upb_json_parser * p,const char * ptr)10895 static void start_number(upb_json_parser *p, const char *ptr) {
10896 multipart_startaccum(p);
10897 capture_begin(p, ptr);
10898 }
10899
10900 static bool parse_number(upb_json_parser *p);
10901
end_number(upb_json_parser * p,const char * ptr)10902 static bool end_number(upb_json_parser *p, const char *ptr) {
10903 if (!capture_end(p, ptr)) {
10904 return false;
10905 }
10906
10907 return parse_number(p);
10908 }
10909
parse_number(upb_json_parser * p)10910 static bool parse_number(upb_json_parser *p) {
10911 size_t len;
10912 const char *buf;
10913 const char *myend;
10914 char *end;
10915
10916 /* strtol() and friends unfortunately do not support specifying the length of
10917 * the input string, so we need to force a copy into a NULL-terminated buffer. */
10918 if (!multipart_text(p, "\0", 1, false)) {
10919 return false;
10920 }
10921
10922 buf = accumulate_getptr(p, &len);
10923 myend = buf + len - 1; /* One for NULL. */
10924
10925 /* XXX: We are using strtol to parse integers, but this is wrong as even
10926 * integers can be represented as 1e6 (for example), which strtol can't
10927 * handle correctly.
10928 *
10929 * XXX: Also, we can't handle large integers properly because strto[u]ll
10930 * isn't in C89.
10931 *
10932 * XXX: Also, we don't properly check floats for overflow, since strtof
10933 * isn't in C89. */
10934 switch (upb_fielddef_type(p->top->f)) {
10935 case UPB_TYPE_ENUM:
10936 case UPB_TYPE_INT32: {
10937 long val = strtol(p->accumulated, &end, 0);
10938 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
10939 goto err;
10940 else
10941 upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
10942 break;
10943 }
10944 case UPB_TYPE_INT64: {
10945 long long val = strtol(p->accumulated, &end, 0);
10946 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
10947 goto err;
10948 else
10949 upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
10950 break;
10951 }
10952 case UPB_TYPE_UINT32: {
10953 unsigned long val = strtoul(p->accumulated, &end, 0);
10954 if (val > UINT32_MAX || errno == ERANGE || end != myend)
10955 goto err;
10956 else
10957 upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
10958 break;
10959 }
10960 case UPB_TYPE_UINT64: {
10961 unsigned long long val = strtoul(p->accumulated, &end, 0);
10962 if (val > UINT64_MAX || errno == ERANGE || end != myend)
10963 goto err;
10964 else
10965 upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
10966 break;
10967 }
10968 case UPB_TYPE_DOUBLE: {
10969 double val = strtod(p->accumulated, &end);
10970 if (errno == ERANGE || end != myend)
10971 goto err;
10972 else
10973 upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
10974 break;
10975 }
10976 case UPB_TYPE_FLOAT: {
10977 float val = strtod(p->accumulated, &end);
10978 if (errno == ERANGE || end != myend)
10979 goto err;
10980 else
10981 upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
10982 break;
10983 }
10984 default:
10985 assert(false);
10986 }
10987
10988 multipart_end(p);
10989
10990 return true;
10991
10992 err:
10993 upb_status_seterrf(&p->status, "error parsing number: %s", buf);
10994 upb_env_reporterror(p->env, &p->status);
10995 multipart_end(p);
10996 return false;
10997 }
10998
parser_putbool(upb_json_parser * p,bool val)10999 static bool parser_putbool(upb_json_parser *p, bool val) {
11000 bool ok;
11001
11002 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
11003 upb_status_seterrf(&p->status,
11004 "Boolean value specified for non-bool field: %s",
11005 upb_fielddef_name(p->top->f));
11006 upb_env_reporterror(p->env, &p->status);
11007 return false;
11008 }
11009
11010 ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
11011 UPB_ASSERT_VAR(ok, ok);
11012
11013 return true;
11014 }
11015
start_stringval(upb_json_parser * p)11016 static bool start_stringval(upb_json_parser *p) {
11017 assert(p->top->f);
11018
11019 if (upb_fielddef_isstring(p->top->f)) {
11020 upb_jsonparser_frame *inner;
11021 upb_selector_t sel;
11022
11023 if (!check_stack(p)) return false;
11024
11025 /* Start a new parser frame: parser frames correspond one-to-one with
11026 * handler frames, and string events occur in a sub-frame. */
11027 inner = p->top + 1;
11028 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
11029 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
11030 inner->m = p->top->m;
11031 inner->f = p->top->f;
11032 inner->name_table = NULL;
11033 inner->is_map = false;
11034 inner->is_mapentry = false;
11035 p->top = inner;
11036
11037 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
11038 /* For STRING fields we push data directly to the handlers as it is
11039 * parsed. We don't do this yet for BYTES fields, because our base64
11040 * decoder is not streaming.
11041 *
11042 * TODO(haberman): make base64 decoding streaming also. */
11043 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
11044 return true;
11045 } else {
11046 multipart_startaccum(p);
11047 return true;
11048 }
11049 } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
11050 /* No need to push a frame -- symbolic enum names in quotes remain in the
11051 * current parser frame.
11052 *
11053 * Enum string values must accumulate so we can look up the value in a table
11054 * once it is complete. */
11055 multipart_startaccum(p);
11056 return true;
11057 } else {
11058 upb_status_seterrf(&p->status,
11059 "String specified for non-string/non-enum field: %s",
11060 upb_fielddef_name(p->top->f));
11061 upb_env_reporterror(p->env, &p->status);
11062 return false;
11063 }
11064 }
11065
end_stringval(upb_json_parser * p)11066 static bool end_stringval(upb_json_parser *p) {
11067 bool ok = true;
11068
11069 switch (upb_fielddef_type(p->top->f)) {
11070 case UPB_TYPE_BYTES:
11071 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
11072 p->accumulated, p->accumulated_len)) {
11073 return false;
11074 }
11075 /* Fall through. */
11076
11077 case UPB_TYPE_STRING: {
11078 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
11079 upb_sink_endstr(&p->top->sink, sel);
11080 p->top--;
11081 break;
11082 }
11083
11084 case UPB_TYPE_ENUM: {
11085 /* Resolve enum symbolic name to integer value. */
11086 const upb_enumdef *enumdef =
11087 (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
11088
11089 size_t len;
11090 const char *buf = accumulate_getptr(p, &len);
11091
11092 int32_t int_val = 0;
11093 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
11094
11095 if (ok) {
11096 upb_selector_t sel = parser_getsel(p);
11097 upb_sink_putint32(&p->top->sink, sel, int_val);
11098 } else {
11099 upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
11100 upb_env_reporterror(p->env, &p->status);
11101 }
11102
11103 break;
11104 }
11105
11106 default:
11107 assert(false);
11108 upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
11109 upb_env_reporterror(p->env, &p->status);
11110 ok = false;
11111 break;
11112 }
11113
11114 multipart_end(p);
11115
11116 return ok;
11117 }
11118
start_member(upb_json_parser * p)11119 static void start_member(upb_json_parser *p) {
11120 assert(!p->top->f);
11121 multipart_startaccum(p);
11122 }
11123
11124 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
11125 * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)11126 static bool parse_mapentry_key(upb_json_parser *p) {
11127
11128 size_t len;
11129 const char *buf = accumulate_getptr(p, &len);
11130
11131 /* Emit the key field. We do a bit of ad-hoc parsing here because the
11132 * parser state machine has already decided that this is a string field
11133 * name, and we are reinterpreting it as some arbitrary key type. In
11134 * particular, integer and bool keys are quoted, so we need to parse the
11135 * quoted string contents here. */
11136
11137 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
11138 if (p->top->f == NULL) {
11139 upb_status_seterrmsg(&p->status, "mapentry message has no key");
11140 upb_env_reporterror(p->env, &p->status);
11141 return false;
11142 }
11143 switch (upb_fielddef_type(p->top->f)) {
11144 case UPB_TYPE_INT32:
11145 case UPB_TYPE_INT64:
11146 case UPB_TYPE_UINT32:
11147 case UPB_TYPE_UINT64:
11148 /* Invoke end_number. The accum buffer has the number's text already. */
11149 if (!parse_number(p)) {
11150 return false;
11151 }
11152 break;
11153 case UPB_TYPE_BOOL:
11154 if (len == 4 && !strncmp(buf, "true", 4)) {
11155 if (!parser_putbool(p, true)) {
11156 return false;
11157 }
11158 } else if (len == 5 && !strncmp(buf, "false", 5)) {
11159 if (!parser_putbool(p, false)) {
11160 return false;
11161 }
11162 } else {
11163 upb_status_seterrmsg(&p->status,
11164 "Map bool key not 'true' or 'false'");
11165 upb_env_reporterror(p->env, &p->status);
11166 return false;
11167 }
11168 multipart_end(p);
11169 break;
11170 case UPB_TYPE_STRING:
11171 case UPB_TYPE_BYTES: {
11172 upb_sink subsink;
11173 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
11174 upb_sink_startstr(&p->top->sink, sel, len, &subsink);
11175 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
11176 upb_sink_putstring(&subsink, sel, buf, len, NULL);
11177 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
11178 upb_sink_endstr(&subsink, sel);
11179 multipart_end(p);
11180 break;
11181 }
11182 default:
11183 upb_status_seterrmsg(&p->status, "Invalid field type for map key");
11184 upb_env_reporterror(p->env, &p->status);
11185 return false;
11186 }
11187
11188 return true;
11189 }
11190
11191 /* Helper: emit one map entry (as a submessage in the map field sequence). This
11192 * is invoked from end_membername(), at the end of the map entry's key string,
11193 * with the map key in the accumulate buffer. It parses the key from that
11194 * buffer, emits the handler calls to start the mapentry submessage (setting up
11195 * its subframe in the process), and sets up state in the subframe so that the
11196 * value parser (invoked next) will emit the mapentry's value field and then
11197 * end the mapentry message. */
11198
handle_mapentry(upb_json_parser * p)11199 static bool handle_mapentry(upb_json_parser *p) {
11200 const upb_fielddef *mapfield;
11201 const upb_msgdef *mapentrymsg;
11202 upb_jsonparser_frame *inner;
11203 upb_selector_t sel;
11204
11205 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
11206 * for the mapentry itself, and then set |f| in that frame so that the map
11207 * value field is parsed, and also set a flag to end the frame after the
11208 * map-entry value is parsed. */
11209 if (!check_stack(p)) return false;
11210
11211 mapfield = p->top->mapfield;
11212 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
11213
11214 inner = p->top + 1;
11215 p->top->f = mapfield;
11216 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
11217 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
11218 inner->m = mapentrymsg;
11219 inner->name_table = NULL;
11220 inner->mapfield = mapfield;
11221 inner->is_map = false;
11222
11223 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
11224 * the key field value to the sink, and these handlers will pop the frame
11225 * if they see is_mapentry (when invoked by the parser state machine, they
11226 * would have just seen the map-entry value, not key). */
11227 inner->is_mapentry = false;
11228 p->top = inner;
11229
11230 /* send STARTMSG in submsg frame. */
11231 upb_sink_startmsg(&p->top->sink);
11232
11233 parse_mapentry_key(p);
11234
11235 /* Set up the value field to receive the map-entry value. */
11236 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
11237 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
11238 p->top->mapfield = mapfield;
11239 if (p->top->f == NULL) {
11240 upb_status_seterrmsg(&p->status, "mapentry message has no value");
11241 upb_env_reporterror(p->env, &p->status);
11242 return false;
11243 }
11244
11245 return true;
11246 }
11247
end_membername(upb_json_parser * p)11248 static bool end_membername(upb_json_parser *p) {
11249 assert(!p->top->f);
11250
11251 if (p->top->is_map) {
11252 return handle_mapentry(p);
11253 } else {
11254 size_t len;
11255 const char *buf = accumulate_getptr(p, &len);
11256 upb_value v;
11257
11258 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
11259 p->top->f = upb_value_getconstptr(v);
11260 multipart_end(p);
11261
11262 return true;
11263 } else {
11264 /* TODO(haberman): Ignore unknown fields if requested/configured to do
11265 * so. */
11266 upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
11267 upb_env_reporterror(p->env, &p->status);
11268 return false;
11269 }
11270 }
11271 }
11272
end_member(upb_json_parser * p)11273 static void end_member(upb_json_parser *p) {
11274 /* If we just parsed a map-entry value, end that frame too. */
11275 if (p->top->is_mapentry) {
11276 upb_status s = UPB_STATUS_INIT;
11277 upb_selector_t sel;
11278 bool ok;
11279 const upb_fielddef *mapfield;
11280
11281 assert(p->top > p->stack);
11282 /* send ENDMSG on submsg. */
11283 upb_sink_endmsg(&p->top->sink, &s);
11284 mapfield = p->top->mapfield;
11285
11286 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
11287 p->top--;
11288 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
11289 UPB_ASSERT_VAR(ok, ok);
11290 upb_sink_endsubmsg(&p->top->sink, sel);
11291 }
11292
11293 p->top->f = NULL;
11294 }
11295
start_subobject(upb_json_parser * p)11296 static bool start_subobject(upb_json_parser *p) {
11297 assert(p->top->f);
11298
11299 if (upb_fielddef_ismap(p->top->f)) {
11300 upb_jsonparser_frame *inner;
11301 upb_selector_t sel;
11302
11303 /* Beginning of a map. Start a new parser frame in a repeated-field
11304 * context. */
11305 if (!check_stack(p)) return false;
11306
11307 inner = p->top + 1;
11308 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11309 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
11310 inner->m = upb_fielddef_msgsubdef(p->top->f);
11311 inner->name_table = NULL;
11312 inner->mapfield = p->top->f;
11313 inner->f = NULL;
11314 inner->is_map = true;
11315 inner->is_mapentry = false;
11316 p->top = inner;
11317
11318 return true;
11319 } else if (upb_fielddef_issubmsg(p->top->f)) {
11320 upb_jsonparser_frame *inner;
11321 upb_selector_t sel;
11322
11323 /* Beginning of a subobject. Start a new parser frame in the submsg
11324 * context. */
11325 if (!check_stack(p)) return false;
11326
11327 inner = p->top + 1;
11328
11329 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
11330 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
11331 inner->m = upb_fielddef_msgsubdef(p->top->f);
11332 set_name_table(p, inner);
11333 inner->f = NULL;
11334 inner->is_map = false;
11335 inner->is_mapentry = false;
11336 p->top = inner;
11337
11338 return true;
11339 } else {
11340 upb_status_seterrf(&p->status,
11341 "Object specified for non-message/group field: %s",
11342 upb_fielddef_name(p->top->f));
11343 upb_env_reporterror(p->env, &p->status);
11344 return false;
11345 }
11346 }
11347
end_subobject(upb_json_parser * p)11348 static void end_subobject(upb_json_parser *p) {
11349 if (p->top->is_map) {
11350 upb_selector_t sel;
11351 p->top--;
11352 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11353 upb_sink_endseq(&p->top->sink, sel);
11354 } else {
11355 upb_selector_t sel;
11356 p->top--;
11357 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
11358 upb_sink_endsubmsg(&p->top->sink, sel);
11359 }
11360 }
11361
start_array(upb_json_parser * p)11362 static bool start_array(upb_json_parser *p) {
11363 upb_jsonparser_frame *inner;
11364 upb_selector_t sel;
11365
11366 assert(p->top->f);
11367
11368 if (!upb_fielddef_isseq(p->top->f)) {
11369 upb_status_seterrf(&p->status,
11370 "Array specified for non-repeated field: %s",
11371 upb_fielddef_name(p->top->f));
11372 upb_env_reporterror(p->env, &p->status);
11373 return false;
11374 }
11375
11376 if (!check_stack(p)) return false;
11377
11378 inner = p->top + 1;
11379 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11380 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
11381 inner->m = p->top->m;
11382 inner->name_table = NULL;
11383 inner->f = p->top->f;
11384 inner->is_map = false;
11385 inner->is_mapentry = false;
11386 p->top = inner;
11387
11388 return true;
11389 }
11390
end_array(upb_json_parser * p)11391 static void end_array(upb_json_parser *p) {
11392 upb_selector_t sel;
11393
11394 assert(p->top > p->stack);
11395
11396 p->top--;
11397 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11398 upb_sink_endseq(&p->top->sink, sel);
11399 }
11400
start_object(upb_json_parser * p)11401 static void start_object(upb_json_parser *p) {
11402 if (!p->top->is_map) {
11403 upb_sink_startmsg(&p->top->sink);
11404 }
11405 }
11406
end_object(upb_json_parser * p)11407 static void end_object(upb_json_parser *p) {
11408 if (!p->top->is_map) {
11409 upb_status status;
11410 upb_status_clear(&status);
11411 upb_sink_endmsg(&p->top->sink, &status);
11412 if (!upb_ok(&status)) {
11413 upb_env_reporterror(p->env, &status);
11414 }
11415 }
11416 }
11417
11418
11419 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
11420
11421
11422 /* The actual parser **********************************************************/
11423
11424 /* What follows is the Ragel parser itself. The language is specified in Ragel
11425 * and the actions call our C functions above.
11426 *
11427 * Ragel has an extensive set of functionality, and we use only a small part of
11428 * it. There are many action types but we only use a few:
11429 *
11430 * ">" -- transition into a machine
11431 * "%" -- transition out of a machine
11432 * "@" -- transition into a final state of a machine.
11433 *
11434 * "@" transitions are tricky because a machine can transition into a final
11435 * state repeatedly. But in some cases we know this can't happen, for example
11436 * a string which is delimited by a final '"' can only transition into its
11437 * final state once, when the closing '"' is seen. */
11438
11439
11440 #line 1245 "upb/json/parser.rl"
11441
11442
11443
11444 #line 1157 "upb/json/parser.c"
11445 static const char _json_actions[] = {
11446 0, 1, 0, 1, 2, 1, 3, 1,
11447 5, 1, 6, 1, 7, 1, 8, 1,
11448 10, 1, 12, 1, 13, 1, 14, 1,
11449 15, 1, 16, 1, 17, 1, 21, 1,
11450 25, 1, 27, 2, 3, 8, 2, 4,
11451 5, 2, 6, 2, 2, 6, 8, 2,
11452 11, 9, 2, 13, 15, 2, 14, 15,
11453 2, 18, 1, 2, 19, 27, 2, 20,
11454 9, 2, 22, 27, 2, 23, 27, 2,
11455 24, 27, 2, 26, 27, 3, 14, 11,
11456 9
11457 };
11458
11459 static const unsigned char _json_key_offsets[] = {
11460 0, 0, 4, 9, 14, 15, 19, 24,
11461 29, 34, 38, 42, 45, 48, 50, 54,
11462 58, 60, 62, 67, 69, 71, 80, 86,
11463 92, 98, 104, 106, 115, 116, 116, 116,
11464 121, 126, 131, 132, 133, 134, 135, 135,
11465 136, 137, 138, 138, 139, 140, 141, 141,
11466 146, 151, 152, 156, 161, 166, 171, 175,
11467 175, 178, 178, 178
11468 };
11469
11470 static const char _json_trans_keys[] = {
11471 32, 123, 9, 13, 32, 34, 125, 9,
11472 13, 32, 34, 125, 9, 13, 34, 32,
11473 58, 9, 13, 32, 93, 125, 9, 13,
11474 32, 44, 125, 9, 13, 32, 44, 125,
11475 9, 13, 32, 34, 9, 13, 45, 48,
11476 49, 57, 48, 49, 57, 46, 69, 101,
11477 48, 57, 69, 101, 48, 57, 43, 45,
11478 48, 57, 48, 57, 48, 57, 46, 69,
11479 101, 48, 57, 34, 92, 34, 92, 34,
11480 47, 92, 98, 102, 110, 114, 116, 117,
11481 48, 57, 65, 70, 97, 102, 48, 57,
11482 65, 70, 97, 102, 48, 57, 65, 70,
11483 97, 102, 48, 57, 65, 70, 97, 102,
11484 34, 92, 34, 45, 91, 102, 110, 116,
11485 123, 48, 57, 34, 32, 93, 125, 9,
11486 13, 32, 44, 93, 9, 13, 32, 93,
11487 125, 9, 13, 97, 108, 115, 101, 117,
11488 108, 108, 114, 117, 101, 32, 34, 125,
11489 9, 13, 32, 34, 125, 9, 13, 34,
11490 32, 58, 9, 13, 32, 93, 125, 9,
11491 13, 32, 44, 125, 9, 13, 32, 44,
11492 125, 9, 13, 32, 34, 9, 13, 32,
11493 9, 13, 0
11494 };
11495
11496 static const char _json_single_lengths[] = {
11497 0, 2, 3, 3, 1, 2, 3, 3,
11498 3, 2, 2, 1, 3, 0, 2, 2,
11499 0, 0, 3, 2, 2, 9, 0, 0,
11500 0, 0, 2, 7, 1, 0, 0, 3,
11501 3, 3, 1, 1, 1, 1, 0, 1,
11502 1, 1, 0, 1, 1, 1, 0, 3,
11503 3, 1, 2, 3, 3, 3, 2, 0,
11504 1, 0, 0, 0
11505 };
11506
11507 static const char _json_range_lengths[] = {
11508 0, 1, 1, 1, 0, 1, 1, 1,
11509 1, 1, 1, 1, 0, 1, 1, 1,
11510 1, 1, 1, 0, 0, 0, 3, 3,
11511 3, 3, 0, 1, 0, 0, 0, 1,
11512 1, 1, 0, 0, 0, 0, 0, 0,
11513 0, 0, 0, 0, 0, 0, 0, 1,
11514 1, 0, 1, 1, 1, 1, 1, 0,
11515 1, 0, 0, 0
11516 };
11517
11518 static const short _json_index_offsets[] = {
11519 0, 0, 4, 9, 14, 16, 20, 25,
11520 30, 35, 39, 43, 46, 50, 52, 56,
11521 60, 62, 64, 69, 72, 75, 85, 89,
11522 93, 97, 101, 104, 113, 115, 116, 117,
11523 122, 127, 132, 134, 136, 138, 140, 141,
11524 143, 145, 147, 148, 150, 152, 154, 155,
11525 160, 165, 167, 171, 176, 181, 186, 190,
11526 191, 194, 195, 196
11527 };
11528
11529 static const char _json_indicies[] = {
11530 0, 2, 0, 1, 3, 4, 5, 3,
11531 1, 6, 7, 8, 6, 1, 9, 1,
11532 10, 11, 10, 1, 11, 1, 1, 11,
11533 12, 13, 14, 15, 13, 1, 16, 17,
11534 8, 16, 1, 17, 7, 17, 1, 18,
11535 19, 20, 1, 19, 20, 1, 22, 23,
11536 23, 21, 24, 1, 23, 23, 24, 21,
11537 25, 25, 26, 1, 26, 1, 26, 21,
11538 22, 23, 23, 20, 21, 28, 29, 27,
11539 31, 32, 30, 33, 33, 33, 33, 33,
11540 33, 33, 33, 34, 1, 35, 35, 35,
11541 1, 36, 36, 36, 1, 37, 37, 37,
11542 1, 38, 38, 38, 1, 40, 41, 39,
11543 42, 43, 44, 45, 46, 47, 48, 43,
11544 1, 49, 1, 50, 51, 53, 54, 1,
11545 53, 52, 55, 56, 54, 55, 1, 56,
11546 1, 1, 56, 52, 57, 1, 58, 1,
11547 59, 1, 60, 1, 61, 62, 1, 63,
11548 1, 64, 1, 65, 66, 1, 67, 1,
11549 68, 1, 69, 70, 71, 72, 70, 1,
11550 73, 74, 75, 73, 1, 76, 1, 77,
11551 78, 77, 1, 78, 1, 1, 78, 79,
11552 80, 81, 82, 80, 1, 83, 84, 75,
11553 83, 1, 84, 74, 84, 1, 85, 86,
11554 86, 1, 1, 1, 1, 0
11555 };
11556
11557 static const char _json_trans_targs[] = {
11558 1, 0, 2, 3, 4, 56, 3, 4,
11559 56, 5, 5, 6, 7, 8, 9, 56,
11560 8, 9, 11, 12, 18, 57, 13, 15,
11561 14, 16, 17, 20, 58, 21, 20, 58,
11562 21, 19, 22, 23, 24, 25, 26, 20,
11563 58, 21, 28, 30, 31, 34, 39, 43,
11564 47, 29, 59, 59, 32, 31, 29, 32,
11565 33, 35, 36, 37, 38, 59, 40, 41,
11566 42, 59, 44, 45, 46, 59, 48, 49,
11567 55, 48, 49, 55, 50, 50, 51, 52,
11568 53, 54, 55, 53, 54, 59, 56
11569 };
11570
11571 static const char _json_trans_actions[] = {
11572 0, 0, 0, 21, 77, 53, 0, 47,
11573 23, 17, 0, 0, 15, 19, 19, 50,
11574 0, 0, 0, 0, 0, 1, 0, 0,
11575 0, 0, 0, 3, 13, 0, 0, 35,
11576 5, 11, 0, 38, 7, 7, 7, 41,
11577 44, 9, 62, 56, 25, 0, 0, 0,
11578 31, 29, 33, 59, 15, 0, 27, 0,
11579 0, 0, 0, 0, 0, 68, 0, 0,
11580 0, 71, 0, 0, 0, 65, 21, 77,
11581 53, 0, 47, 23, 17, 0, 0, 15,
11582 19, 19, 50, 0, 0, 74, 0
11583 };
11584
11585 static const int json_start = 1;
11586
11587 static const int json_en_number_machine = 10;
11588 static const int json_en_string_machine = 19;
11589 static const int json_en_value_machine = 27;
11590 static const int json_en_main = 1;
11591
11592
11593 #line 1248 "upb/json/parser.rl"
11594
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)11595 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
11596 const upb_bufhandle *handle) {
11597 upb_json_parser *parser = closure;
11598
11599 /* Variables used by Ragel's generated code. */
11600 int cs = parser->current_state;
11601 int *stack = parser->parser_stack;
11602 int top = parser->parser_top;
11603
11604 const char *p = buf;
11605 const char *pe = buf + size;
11606
11607 parser->handle = handle;
11608
11609 UPB_UNUSED(hd);
11610 UPB_UNUSED(handle);
11611
11612 capture_resume(parser, buf);
11613
11614
11615 #line 1328 "upb/json/parser.c"
11616 {
11617 int _klen;
11618 unsigned int _trans;
11619 const char *_acts;
11620 unsigned int _nacts;
11621 const char *_keys;
11622
11623 if ( p == pe )
11624 goto _test_eof;
11625 if ( cs == 0 )
11626 goto _out;
11627 _resume:
11628 _keys = _json_trans_keys + _json_key_offsets[cs];
11629 _trans = _json_index_offsets[cs];
11630
11631 _klen = _json_single_lengths[cs];
11632 if ( _klen > 0 ) {
11633 const char *_lower = _keys;
11634 const char *_mid;
11635 const char *_upper = _keys + _klen - 1;
11636 while (1) {
11637 if ( _upper < _lower )
11638 break;
11639
11640 _mid = _lower + ((_upper-_lower) >> 1);
11641 if ( (*p) < *_mid )
11642 _upper = _mid - 1;
11643 else if ( (*p) > *_mid )
11644 _lower = _mid + 1;
11645 else {
11646 _trans += (unsigned int)(_mid - _keys);
11647 goto _match;
11648 }
11649 }
11650 _keys += _klen;
11651 _trans += _klen;
11652 }
11653
11654 _klen = _json_range_lengths[cs];
11655 if ( _klen > 0 ) {
11656 const char *_lower = _keys;
11657 const char *_mid;
11658 const char *_upper = _keys + (_klen<<1) - 2;
11659 while (1) {
11660 if ( _upper < _lower )
11661 break;
11662
11663 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
11664 if ( (*p) < _mid[0] )
11665 _upper = _mid - 2;
11666 else if ( (*p) > _mid[1] )
11667 _lower = _mid + 2;
11668 else {
11669 _trans += (unsigned int)((_mid - _keys)>>1);
11670 goto _match;
11671 }
11672 }
11673 _trans += _klen;
11674 }
11675
11676 _match:
11677 _trans = _json_indicies[_trans];
11678 cs = _json_trans_targs[_trans];
11679
11680 if ( _json_trans_actions[_trans] == 0 )
11681 goto _again;
11682
11683 _acts = _json_actions + _json_trans_actions[_trans];
11684 _nacts = (unsigned int) *_acts++;
11685 while ( _nacts-- > 0 )
11686 {
11687 switch ( *_acts++ )
11688 {
11689 case 0:
11690 #line 1160 "upb/json/parser.rl"
11691 { p--; {cs = stack[--top]; goto _again;} }
11692 break;
11693 case 1:
11694 #line 1161 "upb/json/parser.rl"
11695 { p--; {stack[top++] = cs; cs = 10; goto _again;} }
11696 break;
11697 case 2:
11698 #line 1165 "upb/json/parser.rl"
11699 { start_text(parser, p); }
11700 break;
11701 case 3:
11702 #line 1166 "upb/json/parser.rl"
11703 { CHECK_RETURN_TOP(end_text(parser, p)); }
11704 break;
11705 case 4:
11706 #line 1172 "upb/json/parser.rl"
11707 { start_hex(parser); }
11708 break;
11709 case 5:
11710 #line 1173 "upb/json/parser.rl"
11711 { hexdigit(parser, p); }
11712 break;
11713 case 6:
11714 #line 1174 "upb/json/parser.rl"
11715 { CHECK_RETURN_TOP(end_hex(parser)); }
11716 break;
11717 case 7:
11718 #line 1180 "upb/json/parser.rl"
11719 { CHECK_RETURN_TOP(escape(parser, p)); }
11720 break;
11721 case 8:
11722 #line 1186 "upb/json/parser.rl"
11723 { p--; {cs = stack[--top]; goto _again;} }
11724 break;
11725 case 9:
11726 #line 1189 "upb/json/parser.rl"
11727 { {stack[top++] = cs; cs = 19; goto _again;} }
11728 break;
11729 case 10:
11730 #line 1191 "upb/json/parser.rl"
11731 { p--; {stack[top++] = cs; cs = 27; goto _again;} }
11732 break;
11733 case 11:
11734 #line 1196 "upb/json/parser.rl"
11735 { start_member(parser); }
11736 break;
11737 case 12:
11738 #line 1197 "upb/json/parser.rl"
11739 { CHECK_RETURN_TOP(end_membername(parser)); }
11740 break;
11741 case 13:
11742 #line 1200 "upb/json/parser.rl"
11743 { end_member(parser); }
11744 break;
11745 case 14:
11746 #line 1206 "upb/json/parser.rl"
11747 { start_object(parser); }
11748 break;
11749 case 15:
11750 #line 1209 "upb/json/parser.rl"
11751 { end_object(parser); }
11752 break;
11753 case 16:
11754 #line 1215 "upb/json/parser.rl"
11755 { CHECK_RETURN_TOP(start_array(parser)); }
11756 break;
11757 case 17:
11758 #line 1219 "upb/json/parser.rl"
11759 { end_array(parser); }
11760 break;
11761 case 18:
11762 #line 1224 "upb/json/parser.rl"
11763 { start_number(parser, p); }
11764 break;
11765 case 19:
11766 #line 1225 "upb/json/parser.rl"
11767 { CHECK_RETURN_TOP(end_number(parser, p)); }
11768 break;
11769 case 20:
11770 #line 1227 "upb/json/parser.rl"
11771 { CHECK_RETURN_TOP(start_stringval(parser)); }
11772 break;
11773 case 21:
11774 #line 1228 "upb/json/parser.rl"
11775 { CHECK_RETURN_TOP(end_stringval(parser)); }
11776 break;
11777 case 22:
11778 #line 1230 "upb/json/parser.rl"
11779 { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
11780 break;
11781 case 23:
11782 #line 1232 "upb/json/parser.rl"
11783 { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
11784 break;
11785 case 24:
11786 #line 1234 "upb/json/parser.rl"
11787 { /* null value */ }
11788 break;
11789 case 25:
11790 #line 1236 "upb/json/parser.rl"
11791 { CHECK_RETURN_TOP(start_subobject(parser)); }
11792 break;
11793 case 26:
11794 #line 1237 "upb/json/parser.rl"
11795 { end_subobject(parser); }
11796 break;
11797 case 27:
11798 #line 1242 "upb/json/parser.rl"
11799 { p--; {cs = stack[--top]; goto _again;} }
11800 break;
11801 #line 1514 "upb/json/parser.c"
11802 }
11803 }
11804
11805 _again:
11806 if ( cs == 0 )
11807 goto _out;
11808 if ( ++p != pe )
11809 goto _resume;
11810 _test_eof: {}
11811 _out: {}
11812 }
11813
11814 #line 1269 "upb/json/parser.rl"
11815
11816 if (p != pe) {
11817 upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p);
11818 upb_env_reporterror(parser->env, &parser->status);
11819 } else {
11820 capture_suspend(parser, &p);
11821 }
11822
11823 error:
11824 /* Save parsing state back to parser. */
11825 parser->current_state = cs;
11826 parser->parser_top = top;
11827
11828 return p - buf;
11829 }
11830
end(void * closure,const void * hd)11831 bool end(void *closure, const void *hd) {
11832 UPB_UNUSED(closure);
11833 UPB_UNUSED(hd);
11834
11835 /* Prevent compile warning on unused static constants. */
11836 UPB_UNUSED(json_start);
11837 UPB_UNUSED(json_en_number_machine);
11838 UPB_UNUSED(json_en_string_machine);
11839 UPB_UNUSED(json_en_value_machine);
11840 UPB_UNUSED(json_en_main);
11841 return true;
11842 }
11843
json_parser_reset(upb_json_parser * p)11844 static void json_parser_reset(upb_json_parser *p) {
11845 int cs;
11846 int top;
11847
11848 p->top = p->stack;
11849 p->top->f = NULL;
11850 p->top->is_map = false;
11851 p->top->is_mapentry = false;
11852
11853 /* Emit Ragel initialization of the parser. */
11854
11855 #line 1568 "upb/json/parser.c"
11856 {
11857 cs = json_start;
11858 top = 0;
11859 }
11860
11861 #line 1309 "upb/json/parser.rl"
11862 p->current_state = cs;
11863 p->parser_top = top;
11864 accumulate_clear(p);
11865 p->multipart_state = MULTIPART_INACTIVE;
11866 p->capture = NULL;
11867 p->accumulated = NULL;
11868 upb_status_clear(&p->status);
11869 }
11870
visit_json_parsermethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)11871 static void visit_json_parsermethod(const upb_refcounted *r,
11872 upb_refcounted_visit *visit,
11873 void *closure) {
11874 const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
11875 visit(r, upb_msgdef_upcast2(method->msg), closure);
11876 }
11877
free_json_parsermethod(upb_refcounted * r)11878 static void free_json_parsermethod(upb_refcounted *r) {
11879 upb_json_parsermethod *method = (upb_json_parsermethod*)r;
11880
11881 upb_inttable_iter i;
11882 upb_inttable_begin(&i, &method->name_tables);
11883 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
11884 upb_value val = upb_inttable_iter_value(&i);
11885 upb_strtable *t = upb_value_getptr(val);
11886 upb_strtable_uninit(t);
11887 upb_gfree(t);
11888 }
11889
11890 upb_inttable_uninit(&method->name_tables);
11891
11892 upb_gfree(r);
11893 }
11894
add_jsonname_table(upb_json_parsermethod * m,const upb_msgdef * md)11895 static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
11896 upb_msg_field_iter i;
11897 upb_strtable *t;
11898
11899 /* It would be nice to stack-allocate this, but protobufs do not limit the
11900 * length of fields to any reasonable limit. */
11901 char *buf = NULL;
11902 size_t len = 0;
11903
11904 if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
11905 return;
11906 }
11907
11908 /* TODO(haberman): handle malloc failure. */
11909 t = upb_gmalloc(sizeof(*t));
11910 upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
11911 upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
11912
11913 for(upb_msg_field_begin(&i, md);
11914 !upb_msg_field_done(&i);
11915 upb_msg_field_next(&i)) {
11916 const upb_fielddef *f = upb_msg_iter_field(&i);
11917
11918 /* Add an entry for the JSON name. */
11919 size_t field_len = upb_fielddef_getjsonname(f, buf, len);
11920 if (field_len > len) {
11921 size_t len2;
11922 buf = upb_grealloc(buf, 0, field_len);
11923 len = field_len;
11924 len2 = upb_fielddef_getjsonname(f, buf, len);
11925 UPB_ASSERT_VAR(len2, len == len2);
11926 }
11927 upb_strtable_insert(t, buf, upb_value_constptr(f));
11928
11929 if (strcmp(buf, upb_fielddef_name(f)) != 0) {
11930 /* Since the JSON name is different from the regular field name, add an
11931 * entry for the raw name (compliant proto3 JSON parsers must accept
11932 * both). */
11933 upb_strtable_insert(t, upb_fielddef_name(f), upb_value_constptr(f));
11934 }
11935
11936 if (upb_fielddef_issubmsg(f)) {
11937 add_jsonname_table(m, upb_fielddef_msgsubdef(f));
11938 }
11939 }
11940
11941 upb_gfree(buf);
11942 }
11943
11944 /* Public API *****************************************************************/
11945
upb_json_parser_create(upb_env * env,const upb_json_parsermethod * method,upb_sink * output)11946 upb_json_parser *upb_json_parser_create(upb_env *env,
11947 const upb_json_parsermethod *method,
11948 upb_sink *output) {
11949 #ifndef NDEBUG
11950 const size_t size_before = upb_env_bytesallocated(env);
11951 #endif
11952 upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
11953 if (!p) return false;
11954
11955 p->env = env;
11956 p->method = method;
11957 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
11958 p->accumulate_buf = NULL;
11959 p->accumulate_buf_size = 0;
11960 upb_bytessink_reset(&p->input_, &method->input_handler_, p);
11961
11962 json_parser_reset(p);
11963 upb_sink_reset(&p->top->sink, output->handlers, output->closure);
11964 p->top->m = upb_handlers_msgdef(output->handlers);
11965 set_name_table(p, p->top);
11966
11967 /* If this fails, uncomment and increase the value in parser.h. */
11968 /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
11969 assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11970 return p;
11971 }
11972
upb_json_parser_input(upb_json_parser * p)11973 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
11974 return &p->input_;
11975 }
11976
upb_json_parsermethod_new(const upb_msgdef * md,const void * owner)11977 upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
11978 const void* owner) {
11979 static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
11980 free_json_parsermethod};
11981 upb_json_parsermethod *ret = upb_gmalloc(sizeof(*ret));
11982 upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
11983
11984 ret->msg = md;
11985 upb_ref2(md, ret);
11986
11987 upb_byteshandler_init(&ret->input_handler_);
11988 upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
11989 upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
11990
11991 upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
11992
11993 add_jsonname_table(ret, md);
11994
11995 return ret;
11996 }
11997
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)11998 const upb_byteshandler *upb_json_parsermethod_inputhandler(
11999 const upb_json_parsermethod *m) {
12000 return &m->input_handler_;
12001 }
12002 /*
12003 ** This currently uses snprintf() to format primitives, and could be optimized
12004 ** further.
12005 */
12006
12007
12008 #include <string.h>
12009 #include <stdint.h>
12010
12011 struct upb_json_printer {
12012 upb_sink input_;
12013 /* BytesSink closure. */
12014 void *subc_;
12015 upb_bytessink *output_;
12016
12017 /* We track the depth so that we know when to emit startstr/endstr on the
12018 * output. */
12019 int depth_;
12020
12021 /* Have we emitted the first element? This state is necessary to emit commas
12022 * without leaving a trailing comma in arrays/maps. We keep this state per
12023 * frame depth.
12024 *
12025 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
12026 * We count frames (contexts in which we separate elements by commas) as both
12027 * repeated fields and messages (maps), and the worst case is a
12028 * message->repeated field->submessage->repeated field->... nesting. */
12029 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
12030 };
12031
12032 /* StringPiece; a pointer plus a length. */
12033 typedef struct {
12034 char *ptr;
12035 size_t len;
12036 } strpc;
12037
freestrpc(void * ptr)12038 void freestrpc(void *ptr) {
12039 strpc *pc = ptr;
12040 upb_gfree(pc->ptr);
12041 upb_gfree(pc);
12042 }
12043
12044 /* Convert fielddef name to JSON name and return as a string piece. */
newstrpc(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames)12045 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
12046 bool preserve_fieldnames) {
12047 /* TODO(haberman): handle malloc failure. */
12048 strpc *ret = upb_gmalloc(sizeof(*ret));
12049 if (preserve_fieldnames) {
12050 ret->ptr = upb_gstrdup(upb_fielddef_name(f));
12051 ret->len = strlen(ret->ptr);
12052 } else {
12053 size_t len;
12054 ret->len = upb_fielddef_getjsonname(f, NULL, 0);
12055 ret->ptr = upb_gmalloc(ret->len);
12056 len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
12057 UPB_ASSERT_VAR(len, len == ret->len);
12058 ret->len--; /* NULL */
12059 }
12060
12061 upb_handlers_addcleanup(h, ret, freestrpc);
12062 return ret;
12063 }
12064
12065 /* ------------ JSON string printing: values, maps, arrays ------------------ */
12066
print_data(upb_json_printer * p,const char * buf,unsigned int len)12067 static void print_data(
12068 upb_json_printer *p, const char *buf, unsigned int len) {
12069 /* TODO: Will need to change if we support pushback from the sink. */
12070 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
12071 UPB_ASSERT_VAR(n, n == len);
12072 }
12073
print_comma(upb_json_printer * p)12074 static void print_comma(upb_json_printer *p) {
12075 if (!p->first_elem_[p->depth_]) {
12076 print_data(p, ",", 1);
12077 }
12078 p->first_elem_[p->depth_] = false;
12079 }
12080
12081 /* Helpers that print properly formatted elements to the JSON output stream. */
12082
12083 /* Used for escaping control chars in strings. */
12084 static const char kControlCharLimit = 0x20;
12085
is_json_escaped(char c)12086 UPB_INLINE bool is_json_escaped(char c) {
12087 /* See RFC 4627. */
12088 unsigned char uc = (unsigned char)c;
12089 return uc < kControlCharLimit || uc == '"' || uc == '\\';
12090 }
12091
json_nice_escape(char c)12092 UPB_INLINE const char* json_nice_escape(char c) {
12093 switch (c) {
12094 case '"': return "\\\"";
12095 case '\\': return "\\\\";
12096 case '\b': return "\\b";
12097 case '\f': return "\\f";
12098 case '\n': return "\\n";
12099 case '\r': return "\\r";
12100 case '\t': return "\\t";
12101 default: return NULL;
12102 }
12103 }
12104
12105 /* Write a properly escaped string chunk. The surrounding quotes are *not*
12106 * printed; this is so that the caller has the option of emitting the string
12107 * content in chunks. */
putstring(upb_json_printer * p,const char * buf,unsigned int len)12108 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
12109 const char* unescaped_run = NULL;
12110 unsigned int i;
12111 for (i = 0; i < len; i++) {
12112 char c = buf[i];
12113 /* Handle escaping. */
12114 if (is_json_escaped(c)) {
12115 /* Use a "nice" escape, like \n, if one exists for this character. */
12116 const char* escape = json_nice_escape(c);
12117 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
12118 * escape. */
12119 char escape_buf[8];
12120 if (!escape) {
12121 unsigned char byte = (unsigned char)c;
12122 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
12123 escape = escape_buf;
12124 }
12125
12126 /* N.B. that we assume that the input encoding is equal to the output
12127 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
12128 * can simply pass the bytes through. */
12129
12130 /* If there's a current run of unescaped chars, print that run first. */
12131 if (unescaped_run) {
12132 print_data(p, unescaped_run, &buf[i] - unescaped_run);
12133 unescaped_run = NULL;
12134 }
12135 /* Then print the escape code. */
12136 print_data(p, escape, strlen(escape));
12137 } else {
12138 /* Add to the current unescaped run of characters. */
12139 if (unescaped_run == NULL) {
12140 unescaped_run = &buf[i];
12141 }
12142 }
12143 }
12144
12145 /* If the string ended in a run of unescaped characters, print that last run. */
12146 if (unescaped_run) {
12147 print_data(p, unescaped_run, &buf[len] - unescaped_run);
12148 }
12149 }
12150
12151 #define CHKLENGTH(x) if (!(x)) return -1;
12152
12153 /* Helpers that format floating point values according to our custom formats.
12154 * Right now we use %.8g and %.17g for float/double, respectively, to match
12155 * proto2::util::JsonFormat's defaults. May want to change this later. */
12156
fmt_double(double val,char * buf,size_t length)12157 static size_t fmt_double(double val, char* buf, size_t length) {
12158 size_t n = _upb_snprintf(buf, length, "%.17g", val);
12159 CHKLENGTH(n > 0 && n < length);
12160 return n;
12161 }
12162
fmt_float(float val,char * buf,size_t length)12163 static size_t fmt_float(float val, char* buf, size_t length) {
12164 size_t n = _upb_snprintf(buf, length, "%.8g", val);
12165 CHKLENGTH(n > 0 && n < length);
12166 return n;
12167 }
12168
fmt_bool(bool val,char * buf,size_t length)12169 static size_t fmt_bool(bool val, char* buf, size_t length) {
12170 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
12171 CHKLENGTH(n > 0 && n < length);
12172 return n;
12173 }
12174
fmt_int64(long val,char * buf,size_t length)12175 static size_t fmt_int64(long val, char* buf, size_t length) {
12176 size_t n = _upb_snprintf(buf, length, "%ld", val);
12177 CHKLENGTH(n > 0 && n < length);
12178 return n;
12179 }
12180
fmt_uint64(unsigned long long val,char * buf,size_t length)12181 static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
12182 size_t n = _upb_snprintf(buf, length, "%llu", val);
12183 CHKLENGTH(n > 0 && n < length);
12184 return n;
12185 }
12186
12187 /* Print a map key given a field name. Called by scalar field handlers and by
12188 * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)12189 static bool putkey(void *closure, const void *handler_data) {
12190 upb_json_printer *p = closure;
12191 const strpc *key = handler_data;
12192 print_comma(p);
12193 print_data(p, "\"", 1);
12194 putstring(p, key->ptr, key->len);
12195 print_data(p, "\":", 2);
12196 return true;
12197 }
12198
12199 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
12200 #define CHK(val) if (!(val)) return false;
12201
12202 #define TYPE_HANDLERS(type, fmt_func) \
12203 static bool put##type(void *closure, const void *handler_data, type val) { \
12204 upb_json_printer *p = closure; \
12205 char data[64]; \
12206 size_t length = fmt_func(val, data, sizeof(data)); \
12207 UPB_UNUSED(handler_data); \
12208 CHKFMT(length); \
12209 print_data(p, data, length); \
12210 return true; \
12211 } \
12212 static bool scalar_##type(void *closure, const void *handler_data, \
12213 type val) { \
12214 CHK(putkey(closure, handler_data)); \
12215 CHK(put##type(closure, handler_data, val)); \
12216 return true; \
12217 } \
12218 static bool repeated_##type(void *closure, const void *handler_data, \
12219 type val) { \
12220 upb_json_printer *p = closure; \
12221 print_comma(p); \
12222 CHK(put##type(closure, handler_data, val)); \
12223 return true; \
12224 }
12225
12226 #define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
12227 static bool putmapkey_##type(void *closure, const void *handler_data, \
12228 type val) { \
12229 upb_json_printer *p = closure; \
12230 print_data(p, "\"", 1); \
12231 CHK(put##type(closure, handler_data, val)); \
12232 print_data(p, "\":", 2); \
12233 return true; \
12234 }
12235
12236 TYPE_HANDLERS(double, fmt_double)
12237 TYPE_HANDLERS(float, fmt_float)
12238 TYPE_HANDLERS(bool, fmt_bool)
12239 TYPE_HANDLERS(int32_t, fmt_int64)
12240 TYPE_HANDLERS(uint32_t, fmt_int64)
12241 TYPE_HANDLERS(int64_t, fmt_int64)
12242 TYPE_HANDLERS(uint64_t, fmt_uint64)
12243
12244 /* double and float are not allowed to be map keys. */
12245 TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
12246 TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64)
12247 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
12248 TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64)
12249 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
12250
12251 #undef TYPE_HANDLERS
12252 #undef TYPE_HANDLERS_MAPKEY
12253
12254 typedef struct {
12255 void *keyname;
12256 const upb_enumdef *enumdef;
12257 } EnumHandlerData;
12258
scalar_enum(void * closure,const void * handler_data,int32_t val)12259 static bool scalar_enum(void *closure, const void *handler_data,
12260 int32_t val) {
12261 const EnumHandlerData *hd = handler_data;
12262 upb_json_printer *p = closure;
12263 const char *symbolic_name;
12264
12265 CHK(putkey(closure, hd->keyname));
12266
12267 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
12268 if (symbolic_name) {
12269 print_data(p, "\"", 1);
12270 putstring(p, symbolic_name, strlen(symbolic_name));
12271 print_data(p, "\"", 1);
12272 } else {
12273 putint32_t(closure, NULL, val);
12274 }
12275
12276 return true;
12277 }
12278
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)12279 static void print_enum_symbolic_name(upb_json_printer *p,
12280 const upb_enumdef *def,
12281 int32_t val) {
12282 const char *symbolic_name = upb_enumdef_iton(def, val);
12283 if (symbolic_name) {
12284 print_data(p, "\"", 1);
12285 putstring(p, symbolic_name, strlen(symbolic_name));
12286 print_data(p, "\"", 1);
12287 } else {
12288 putint32_t(p, NULL, val);
12289 }
12290 }
12291
repeated_enum(void * closure,const void * handler_data,int32_t val)12292 static bool repeated_enum(void *closure, const void *handler_data,
12293 int32_t val) {
12294 const EnumHandlerData *hd = handler_data;
12295 upb_json_printer *p = closure;
12296 print_comma(p);
12297
12298 print_enum_symbolic_name(p, hd->enumdef, val);
12299
12300 return true;
12301 }
12302
mapvalue_enum(void * closure,const void * handler_data,int32_t val)12303 static bool mapvalue_enum(void *closure, const void *handler_data,
12304 int32_t val) {
12305 const EnumHandlerData *hd = handler_data;
12306 upb_json_printer *p = closure;
12307
12308 print_enum_symbolic_name(p, hd->enumdef, val);
12309
12310 return true;
12311 }
12312
scalar_startsubmsg(void * closure,const void * handler_data)12313 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
12314 return putkey(closure, handler_data) ? closure : UPB_BREAK;
12315 }
12316
repeated_startsubmsg(void * closure,const void * handler_data)12317 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
12318 upb_json_printer *p = closure;
12319 UPB_UNUSED(handler_data);
12320 print_comma(p);
12321 return closure;
12322 }
12323
start_frame(upb_json_printer * p)12324 static void start_frame(upb_json_printer *p) {
12325 p->depth_++;
12326 p->first_elem_[p->depth_] = true;
12327 print_data(p, "{", 1);
12328 }
12329
end_frame(upb_json_printer * p)12330 static void end_frame(upb_json_printer *p) {
12331 print_data(p, "}", 1);
12332 p->depth_--;
12333 }
12334
printer_startmsg(void * closure,const void * handler_data)12335 static bool printer_startmsg(void *closure, const void *handler_data) {
12336 upb_json_printer *p = closure;
12337 UPB_UNUSED(handler_data);
12338 if (p->depth_ == 0) {
12339 upb_bytessink_start(p->output_, 0, &p->subc_);
12340 }
12341 start_frame(p);
12342 return true;
12343 }
12344
printer_endmsg(void * closure,const void * handler_data,upb_status * s)12345 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
12346 upb_json_printer *p = closure;
12347 UPB_UNUSED(handler_data);
12348 UPB_UNUSED(s);
12349 end_frame(p);
12350 if (p->depth_ == 0) {
12351 upb_bytessink_end(p->output_);
12352 }
12353 return true;
12354 }
12355
startseq(void * closure,const void * handler_data)12356 static void *startseq(void *closure, const void *handler_data) {
12357 upb_json_printer *p = closure;
12358 CHK(putkey(closure, handler_data));
12359 p->depth_++;
12360 p->first_elem_[p->depth_] = true;
12361 print_data(p, "[", 1);
12362 return closure;
12363 }
12364
endseq(void * closure,const void * handler_data)12365 static bool endseq(void *closure, const void *handler_data) {
12366 upb_json_printer *p = closure;
12367 UPB_UNUSED(handler_data);
12368 print_data(p, "]", 1);
12369 p->depth_--;
12370 return true;
12371 }
12372
startmap(void * closure,const void * handler_data)12373 static void *startmap(void *closure, const void *handler_data) {
12374 upb_json_printer *p = closure;
12375 CHK(putkey(closure, handler_data));
12376 p->depth_++;
12377 p->first_elem_[p->depth_] = true;
12378 print_data(p, "{", 1);
12379 return closure;
12380 }
12381
endmap(void * closure,const void * handler_data)12382 static bool endmap(void *closure, const void *handler_data) {
12383 upb_json_printer *p = closure;
12384 UPB_UNUSED(handler_data);
12385 print_data(p, "}", 1);
12386 p->depth_--;
12387 return true;
12388 }
12389
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12390 static size_t putstr(void *closure, const void *handler_data, const char *str,
12391 size_t len, const upb_bufhandle *handle) {
12392 upb_json_printer *p = closure;
12393 UPB_UNUSED(handler_data);
12394 UPB_UNUSED(handle);
12395 putstring(p, str, len);
12396 return len;
12397 }
12398
12399 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12400 static size_t putbytes(void *closure, const void *handler_data, const char *str,
12401 size_t len, const upb_bufhandle *handle) {
12402 upb_json_printer *p = closure;
12403
12404 /* This is the regular base64, not the "web-safe" version. */
12405 static const char base64[] =
12406 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12407
12408 /* Base64-encode. */
12409 char data[16000];
12410 const char *limit = data + sizeof(data);
12411 const unsigned char *from = (const unsigned char*)str;
12412 char *to = data;
12413 size_t remaining = len;
12414 size_t bytes;
12415
12416 UPB_UNUSED(handler_data);
12417 UPB_UNUSED(handle);
12418
12419 while (remaining > 2) {
12420 /* TODO(haberman): handle encoded lengths > sizeof(data) */
12421 UPB_ASSERT_VAR(limit, (limit - to) >= 4);
12422
12423 to[0] = base64[from[0] >> 2];
12424 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12425 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
12426 to[3] = base64[from[2] & 0x3f];
12427
12428 remaining -= 3;
12429 to += 4;
12430 from += 3;
12431 }
12432
12433 switch (remaining) {
12434 case 2:
12435 to[0] = base64[from[0] >> 2];
12436 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12437 to[2] = base64[(from[1] & 0xf) << 2];
12438 to[3] = '=';
12439 to += 4;
12440 from += 2;
12441 break;
12442 case 1:
12443 to[0] = base64[from[0] >> 2];
12444 to[1] = base64[((from[0] & 0x3) << 4)];
12445 to[2] = '=';
12446 to[3] = '=';
12447 to += 4;
12448 from += 1;
12449 break;
12450 }
12451
12452 bytes = to - data;
12453 print_data(p, "\"", 1);
12454 putstring(p, data, bytes);
12455 print_data(p, "\"", 1);
12456 return len;
12457 }
12458
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)12459 static void *scalar_startstr(void *closure, const void *handler_data,
12460 size_t size_hint) {
12461 upb_json_printer *p = closure;
12462 UPB_UNUSED(handler_data);
12463 UPB_UNUSED(size_hint);
12464 CHK(putkey(closure, handler_data));
12465 print_data(p, "\"", 1);
12466 return p;
12467 }
12468
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12469 static size_t scalar_str(void *closure, const void *handler_data,
12470 const char *str, size_t len,
12471 const upb_bufhandle *handle) {
12472 CHK(putstr(closure, handler_data, str, len, handle));
12473 return len;
12474 }
12475
scalar_endstr(void * closure,const void * handler_data)12476 static bool scalar_endstr(void *closure, const void *handler_data) {
12477 upb_json_printer *p = closure;
12478 UPB_UNUSED(handler_data);
12479 print_data(p, "\"", 1);
12480 return true;
12481 }
12482
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)12483 static void *repeated_startstr(void *closure, const void *handler_data,
12484 size_t size_hint) {
12485 upb_json_printer *p = closure;
12486 UPB_UNUSED(handler_data);
12487 UPB_UNUSED(size_hint);
12488 print_comma(p);
12489 print_data(p, "\"", 1);
12490 return p;
12491 }
12492
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12493 static size_t repeated_str(void *closure, const void *handler_data,
12494 const char *str, size_t len,
12495 const upb_bufhandle *handle) {
12496 CHK(putstr(closure, handler_data, str, len, handle));
12497 return len;
12498 }
12499
repeated_endstr(void * closure,const void * handler_data)12500 static bool repeated_endstr(void *closure, const void *handler_data) {
12501 upb_json_printer *p = closure;
12502 UPB_UNUSED(handler_data);
12503 print_data(p, "\"", 1);
12504 return true;
12505 }
12506
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)12507 static void *mapkeyval_startstr(void *closure, const void *handler_data,
12508 size_t size_hint) {
12509 upb_json_printer *p = closure;
12510 UPB_UNUSED(handler_data);
12511 UPB_UNUSED(size_hint);
12512 print_data(p, "\"", 1);
12513 return p;
12514 }
12515
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12516 static size_t mapkey_str(void *closure, const void *handler_data,
12517 const char *str, size_t len,
12518 const upb_bufhandle *handle) {
12519 CHK(putstr(closure, handler_data, str, len, handle));
12520 return len;
12521 }
12522
mapkey_endstr(void * closure,const void * handler_data)12523 static bool mapkey_endstr(void *closure, const void *handler_data) {
12524 upb_json_printer *p = closure;
12525 UPB_UNUSED(handler_data);
12526 print_data(p, "\":", 2);
12527 return true;
12528 }
12529
mapvalue_endstr(void * closure,const void * handler_data)12530 static bool mapvalue_endstr(void *closure, const void *handler_data) {
12531 upb_json_printer *p = closure;
12532 UPB_UNUSED(handler_data);
12533 print_data(p, "\"", 1);
12534 return true;
12535 }
12536
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12537 static size_t scalar_bytes(void *closure, const void *handler_data,
12538 const char *str, size_t len,
12539 const upb_bufhandle *handle) {
12540 CHK(putkey(closure, handler_data));
12541 CHK(putbytes(closure, handler_data, str, len, handle));
12542 return len;
12543 }
12544
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12545 static size_t repeated_bytes(void *closure, const void *handler_data,
12546 const char *str, size_t len,
12547 const upb_bufhandle *handle) {
12548 upb_json_printer *p = closure;
12549 print_comma(p);
12550 CHK(putbytes(closure, handler_data, str, len, handle));
12551 return len;
12552 }
12553
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12554 static size_t mapkey_bytes(void *closure, const void *handler_data,
12555 const char *str, size_t len,
12556 const upb_bufhandle *handle) {
12557 upb_json_printer *p = closure;
12558 CHK(putbytes(closure, handler_data, str, len, handle));
12559 print_data(p, ":", 1);
12560 return len;
12561 }
12562
set_enum_hd(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames,upb_handlerattr * attr)12563 static void set_enum_hd(upb_handlers *h,
12564 const upb_fielddef *f,
12565 bool preserve_fieldnames,
12566 upb_handlerattr *attr) {
12567 EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
12568 hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
12569 hd->keyname = newstrpc(h, f, preserve_fieldnames);
12570 upb_handlers_addcleanup(h, hd, upb_gfree);
12571 upb_handlerattr_sethandlerdata(attr, hd);
12572 }
12573
12574 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
12575 * in a map).
12576 *
12577 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
12578 * key or value cases properly. The right way to do this is to allocate a
12579 * temporary structure at the start of a mapentry submessage, store key and
12580 * value data in it as key and value handlers are called, and then print the
12581 * key/value pair once at the end of the submessage. If we don't do this, we
12582 * should at least detect the case and throw an error. However, so far all of
12583 * our sources that emit mapentry messages do so canonically (with one key
12584 * field, and then one value field), so this is not a pressing concern at the
12585 * moment. */
printer_sethandlers_mapentry(const void * closure,bool preserve_fieldnames,upb_handlers * h)12586 void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
12587 upb_handlers *h) {
12588 const upb_msgdef *md = upb_handlers_msgdef(h);
12589
12590 /* A mapentry message is printed simply as '"key": value'. Rather than
12591 * special-case key and value for every type below, we just handle both
12592 * fields explicitly here. */
12593 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
12594 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
12595
12596 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
12597
12598 UPB_UNUSED(closure);
12599
12600 switch (upb_fielddef_type(key_field)) {
12601 case UPB_TYPE_INT32:
12602 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
12603 break;
12604 case UPB_TYPE_INT64:
12605 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
12606 break;
12607 case UPB_TYPE_UINT32:
12608 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
12609 break;
12610 case UPB_TYPE_UINT64:
12611 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
12612 break;
12613 case UPB_TYPE_BOOL:
12614 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
12615 break;
12616 case UPB_TYPE_STRING:
12617 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
12618 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
12619 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
12620 break;
12621 case UPB_TYPE_BYTES:
12622 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
12623 break;
12624 default:
12625 assert(false);
12626 break;
12627 }
12628
12629 switch (upb_fielddef_type(value_field)) {
12630 case UPB_TYPE_INT32:
12631 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
12632 break;
12633 case UPB_TYPE_INT64:
12634 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
12635 break;
12636 case UPB_TYPE_UINT32:
12637 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
12638 break;
12639 case UPB_TYPE_UINT64:
12640 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
12641 break;
12642 case UPB_TYPE_BOOL:
12643 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
12644 break;
12645 case UPB_TYPE_FLOAT:
12646 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
12647 break;
12648 case UPB_TYPE_DOUBLE:
12649 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
12650 break;
12651 case UPB_TYPE_STRING:
12652 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
12653 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
12654 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
12655 break;
12656 case UPB_TYPE_BYTES:
12657 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
12658 break;
12659 case UPB_TYPE_ENUM: {
12660 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
12661 set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
12662 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
12663 upb_handlerattr_uninit(&enum_attr);
12664 break;
12665 }
12666 case UPB_TYPE_MESSAGE:
12667 /* No handler necessary -- the submsg handlers will print the message
12668 * as appropriate. */
12669 break;
12670 }
12671
12672 upb_handlerattr_uninit(&empty_attr);
12673 }
12674
printer_sethandlers(const void * closure,upb_handlers * h)12675 void printer_sethandlers(const void *closure, upb_handlers *h) {
12676 const upb_msgdef *md = upb_handlers_msgdef(h);
12677 bool is_mapentry = upb_msgdef_mapentry(md);
12678 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
12679 upb_msg_field_iter i;
12680 const bool *preserve_fieldnames_ptr = closure;
12681 const bool preserve_fieldnames = *preserve_fieldnames_ptr;
12682
12683 if (is_mapentry) {
12684 /* mapentry messages are sufficiently different that we handle them
12685 * separately. */
12686 printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
12687 return;
12688 }
12689
12690 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
12691 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
12692
12693 #define TYPE(type, name, ctype) \
12694 case type: \
12695 if (upb_fielddef_isseq(f)) { \
12696 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
12697 } else { \
12698 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
12699 } \
12700 break;
12701
12702 upb_msg_field_begin(&i, md);
12703 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
12704 const upb_fielddef *f = upb_msg_iter_field(&i);
12705
12706 upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
12707 upb_handlerattr_sethandlerdata(&name_attr,
12708 newstrpc(h, f, preserve_fieldnames));
12709
12710 if (upb_fielddef_ismap(f)) {
12711 upb_handlers_setstartseq(h, f, startmap, &name_attr);
12712 upb_handlers_setendseq(h, f, endmap, &name_attr);
12713 } else if (upb_fielddef_isseq(f)) {
12714 upb_handlers_setstartseq(h, f, startseq, &name_attr);
12715 upb_handlers_setendseq(h, f, endseq, &empty_attr);
12716 }
12717
12718 switch (upb_fielddef_type(f)) {
12719 TYPE(UPB_TYPE_FLOAT, float, float);
12720 TYPE(UPB_TYPE_DOUBLE, double, double);
12721 TYPE(UPB_TYPE_BOOL, bool, bool);
12722 TYPE(UPB_TYPE_INT32, int32, int32_t);
12723 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
12724 TYPE(UPB_TYPE_INT64, int64, int64_t);
12725 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
12726 case UPB_TYPE_ENUM: {
12727 /* For now, we always emit symbolic names for enums. We may want an
12728 * option later to control this behavior, but we will wait for a real
12729 * need first. */
12730 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
12731 set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
12732
12733 if (upb_fielddef_isseq(f)) {
12734 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
12735 } else {
12736 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
12737 }
12738
12739 upb_handlerattr_uninit(&enum_attr);
12740 break;
12741 }
12742 case UPB_TYPE_STRING:
12743 if (upb_fielddef_isseq(f)) {
12744 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
12745 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
12746 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
12747 } else {
12748 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
12749 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
12750 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
12751 }
12752 break;
12753 case UPB_TYPE_BYTES:
12754 /* XXX: this doesn't support strings that span buffers yet. The base64
12755 * encoder will need to be made resumable for this to work properly. */
12756 if (upb_fielddef_isseq(f)) {
12757 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
12758 } else {
12759 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
12760 }
12761 break;
12762 case UPB_TYPE_MESSAGE:
12763 if (upb_fielddef_isseq(f)) {
12764 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
12765 } else {
12766 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
12767 }
12768 break;
12769 }
12770
12771 upb_handlerattr_uninit(&name_attr);
12772 }
12773
12774 upb_handlerattr_uninit(&empty_attr);
12775 #undef TYPE
12776 }
12777
json_printer_reset(upb_json_printer * p)12778 static void json_printer_reset(upb_json_printer *p) {
12779 p->depth_ = 0;
12780 }
12781
12782
12783 /* Public API *****************************************************************/
12784
upb_json_printer_create(upb_env * e,const upb_handlers * h,upb_bytessink * output)12785 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
12786 upb_bytessink *output) {
12787 #ifndef NDEBUG
12788 size_t size_before = upb_env_bytesallocated(e);
12789 #endif
12790
12791 upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
12792 if (!p) return NULL;
12793
12794 p->output_ = output;
12795 json_printer_reset(p);
12796 upb_sink_reset(&p->input_, h, p);
12797
12798 /* If this fails, increase the value in printer.h. */
12799 assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
12800 return p;
12801 }
12802
upb_json_printer_input(upb_json_printer * p)12803 upb_sink *upb_json_printer_input(upb_json_printer *p) {
12804 return &p->input_;
12805 }
12806
upb_json_printer_newhandlers(const upb_msgdef * md,bool preserve_fieldnames,const void * owner)12807 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
12808 bool preserve_fieldnames,
12809 const void *owner) {
12810 return upb_handlers_newfrozen(
12811 md, owner, printer_sethandlers, &preserve_fieldnames);
12812 }
12813