1 // Amalgamated source file
2 #include "upb.h"
3
4
5 #include <stdlib.h>
6 #include <string.h>
7
8 typedef struct {
9 size_t len;
10 char str[1]; /* Null-terminated string data follows. */
11 } str_t;
12
newstr(const char * data,size_t len)13 static str_t *newstr(const char *data, size_t len) {
14 str_t *ret = malloc(sizeof(*ret) + len);
15 if (!ret) return NULL;
16 ret->len = len;
17 memcpy(ret->str, data, len);
18 ret->str[len] = '\0';
19 return ret;
20 }
21
freestr(str_t * s)22 static void freestr(str_t *s) { free(s); }
23
24 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)25 static bool upb_isbetween(char c, char low, char high) {
26 return c >= low && c <= high;
27 }
28
upb_isletter(char c)29 static bool upb_isletter(char c) {
30 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
31 }
32
upb_isalphanum(char c)33 static bool upb_isalphanum(char c) {
34 return upb_isletter(c) || upb_isbetween(c, '0', '9');
35 }
36
upb_isident(const char * str,size_t len,bool full,upb_status * s)37 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
38 bool start = true;
39 size_t i;
40 for (i = 0; i < len; i++) {
41 char c = str[i];
42 if (c == '.') {
43 if (start || !full) {
44 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
45 return false;
46 }
47 start = true;
48 } else if (start) {
49 if (!upb_isletter(c)) {
50 upb_status_seterrf(
51 s, "invalid name: path components must start with a letter (%s)",
52 str);
53 return false;
54 }
55 start = false;
56 } else {
57 if (!upb_isalphanum(c)) {
58 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
59 str);
60 return false;
61 }
62 }
63 }
64 return !start;
65 }
66
67
68 /* upb_def ********************************************************************/
69
upb_def_type(const upb_def * d)70 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
71
upb_def_fullname(const upb_def * d)72 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
73
upb_def_setfullname(upb_def * def,const char * fullname,upb_status * s)74 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
75 assert(!upb_def_isfrozen(def));
76 if (!upb_isident(fullname, strlen(fullname), true, s)) return false;
77 free((void*)def->fullname);
78 def->fullname = upb_strdup(fullname);
79 return true;
80 }
81
upb_def_dup(const upb_def * def,const void * o)82 upb_def *upb_def_dup(const upb_def *def, const void *o) {
83 switch (def->type) {
84 case UPB_DEF_MSG:
85 return upb_msgdef_upcast_mutable(
86 upb_msgdef_dup(upb_downcast_msgdef(def), o));
87 case UPB_DEF_FIELD:
88 return upb_fielddef_upcast_mutable(
89 upb_fielddef_dup(upb_downcast_fielddef(def), o));
90 case UPB_DEF_ENUM:
91 return upb_enumdef_upcast_mutable(
92 upb_enumdef_dup(upb_downcast_enumdef(def), o));
93 default: assert(false); return NULL;
94 }
95 }
96
upb_def_init(upb_def * def,upb_deftype_t type,const struct upb_refcounted_vtbl * vtbl,const void * owner)97 static bool upb_def_init(upb_def *def, upb_deftype_t type,
98 const struct upb_refcounted_vtbl *vtbl,
99 const void *owner) {
100 if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
101 def->type = type;
102 def->fullname = NULL;
103 def->came_from_user = false;
104 return true;
105 }
106
upb_def_uninit(upb_def * def)107 static void upb_def_uninit(upb_def *def) {
108 free((void*)def->fullname);
109 }
110
msgdef_name(const upb_msgdef * m)111 static const char *msgdef_name(const upb_msgdef *m) {
112 const char *name = upb_def_fullname(upb_msgdef_upcast(m));
113 return name ? name : "(anonymous)";
114 }
115
upb_validate_field(upb_fielddef * f,upb_status * s)116 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
117 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
118 upb_status_seterrmsg(s, "fielddef must have name and number set");
119 return false;
120 }
121
122 if (!f->type_is_set_) {
123 upb_status_seterrmsg(s, "fielddef type was not initialized");
124 return false;
125 }
126
127 if (upb_fielddef_lazy(f) &&
128 upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
129 upb_status_seterrmsg(s,
130 "only length-delimited submessage fields may be lazy");
131 return false;
132 }
133
134 if (upb_fielddef_hassubdef(f)) {
135 const upb_def *subdef;
136
137 if (f->subdef_is_symbolic) {
138 upb_status_seterrf(s, "field '%s.%s' has not been resolved",
139 msgdef_name(f->msg.def), upb_fielddef_name(f));
140 return false;
141 }
142
143 subdef = upb_fielddef_subdef(f);
144 if (subdef == NULL) {
145 upb_status_seterrf(s, "field %s.%s is missing required subdef",
146 msgdef_name(f->msg.def), upb_fielddef_name(f));
147 return false;
148 }
149
150 if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
151 upb_status_seterrf(s,
152 "subdef of field %s.%s is not frozen or being frozen",
153 msgdef_name(f->msg.def), upb_fielddef_name(f));
154 return false;
155 }
156 }
157
158 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
159 bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
160 bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
161
162 /* Previously verified by upb_validate_enumdef(). */
163 assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
164
165 /* We've already validated that we have an associated enumdef and that it
166 * has at least one member, so at least one of these should be true.
167 * Because if the user didn't set anything, we'll pick up the enum's
168 * default, but if the user *did* set something we should at least pick up
169 * the one they set (int32 or string). */
170 assert(has_default_name || has_default_number);
171
172 if (!has_default_name) {
173 upb_status_seterrf(s,
174 "enum default for field %s.%s (%d) is not in the enum",
175 msgdef_name(f->msg.def), upb_fielddef_name(f),
176 upb_fielddef_defaultint32(f));
177 return false;
178 }
179
180 if (!has_default_number) {
181 upb_status_seterrf(s,
182 "enum default for field %s.%s (%s) is not in the enum",
183 msgdef_name(f->msg.def), upb_fielddef_name(f),
184 upb_fielddef_defaultstr(f, NULL));
185 return false;
186 }
187
188 /* Lift the effective numeric default into the field's default slot, in case
189 * we were only getting it "by reference" from the enumdef. */
190 upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
191 }
192
193 /* Ensure that MapEntry submessages only appear as repeated fields, not
194 * optional/required (singular) fields. */
195 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
196 upb_fielddef_msgsubdef(f) != NULL) {
197 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
198 if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
199 upb_status_seterrf(s,
200 "Field %s refers to mapentry message but is not "
201 "a repeated field",
202 upb_fielddef_name(f) ? upb_fielddef_name(f) :
203 "(unnamed)");
204 return false;
205 }
206 }
207
208 return true;
209 }
210
upb_validate_enumdef(const upb_enumdef * e,upb_status * s)211 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
212 if (upb_enumdef_numvals(e) == 0) {
213 upb_status_seterrf(s, "enum %s has no members (must have at least one)",
214 upb_enumdef_fullname(e));
215 return false;
216 }
217
218 return true;
219 }
220
221 /* All submessage fields are lower than all other fields.
222 * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)223 uint32_t field_rank(const upb_fielddef *f) {
224 uint32_t ret = upb_fielddef_number(f);
225 const uint32_t high_bit = 1 << 30;
226 assert(ret < high_bit);
227 if (!upb_fielddef_issubmsg(f))
228 ret |= high_bit;
229 return ret;
230 }
231
cmp_fields(const void * p1,const void * p2)232 int cmp_fields(const void *p1, const void *p2) {
233 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
234 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
235 return field_rank(f1) - field_rank(f2);
236 }
237
assign_msg_indices(upb_msgdef * m,upb_status * s)238 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
239 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
240 * lowest indexes, but we do not publicly guarantee this. */
241 upb_msg_field_iter j;
242 int i;
243 uint32_t selector;
244 int n = upb_msgdef_numfields(m);
245 upb_fielddef **fields = malloc(n * sizeof(*fields));
246 if (!fields) return false;
247
248 m->submsg_field_count = 0;
249 for(i = 0, upb_msg_field_begin(&j, m);
250 !upb_msg_field_done(&j);
251 upb_msg_field_next(&j), i++) {
252 upb_fielddef *f = upb_msg_iter_field(&j);
253 assert(f->msg.def == m);
254 if (!upb_validate_field(f, s)) {
255 free(fields);
256 return false;
257 }
258 if (upb_fielddef_issubmsg(f)) {
259 m->submsg_field_count++;
260 }
261 fields[i] = f;
262 }
263
264 qsort(fields, n, sizeof(*fields), cmp_fields);
265
266 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
267 for (i = 0; i < n; i++) {
268 upb_fielddef *f = fields[i];
269 f->index_ = i;
270 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
271 selector += upb_handlers_selectorcount(f);
272 }
273 m->selector_count = selector;
274
275 #ifndef NDEBUG
276 {
277 /* Verify that all selectors for the message are distinct. */
278 #define TRY(type) \
279 if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
280
281 upb_inttable t;
282 upb_value v;
283 upb_selector_t sel;
284
285 upb_inttable_init(&t, UPB_CTYPE_BOOL);
286 v = upb_value_bool(true);
287 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
288 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
289 for(upb_msg_field_begin(&j, m);
290 !upb_msg_field_done(&j);
291 upb_msg_field_next(&j)) {
292 upb_fielddef *f = upb_msg_iter_field(&j);
293 /* These calls will assert-fail in upb_table if the value already
294 * exists. */
295 TRY(UPB_HANDLER_INT32);
296 TRY(UPB_HANDLER_INT64)
297 TRY(UPB_HANDLER_UINT32)
298 TRY(UPB_HANDLER_UINT64)
299 TRY(UPB_HANDLER_FLOAT)
300 TRY(UPB_HANDLER_DOUBLE)
301 TRY(UPB_HANDLER_BOOL)
302 TRY(UPB_HANDLER_STARTSTR)
303 TRY(UPB_HANDLER_STRING)
304 TRY(UPB_HANDLER_ENDSTR)
305 TRY(UPB_HANDLER_STARTSUBMSG)
306 TRY(UPB_HANDLER_ENDSUBMSG)
307 TRY(UPB_HANDLER_STARTSEQ)
308 TRY(UPB_HANDLER_ENDSEQ)
309 }
310 upb_inttable_uninit(&t);
311 }
312 #undef TRY
313 #endif
314
315 free(fields);
316 return true;
317 }
318
upb_def_freeze(upb_def * const * defs,int n,upb_status * s)319 bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
320 int i;
321 int maxdepth;
322 bool ret;
323 upb_status_clear(s);
324
325 /* First perform validation, in two passes so we can check that we have a
326 * transitive closure without needing to search. */
327 for (i = 0; i < n; i++) {
328 upb_def *def = defs[i];
329 if (upb_def_isfrozen(def)) {
330 /* Could relax this requirement if it's annoying. */
331 upb_status_seterrmsg(s, "def is already frozen");
332 goto err;
333 } else if (def->type == UPB_DEF_FIELD) {
334 upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
335 goto err;
336 } else if (def->type == UPB_DEF_ENUM) {
337 if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
338 goto err;
339 }
340 } else {
341 /* Set now to detect transitive closure in the second pass. */
342 def->came_from_user = true;
343 }
344 }
345
346 /* Second pass of validation. Also assign selector bases and indexes, and
347 * compact tables. */
348 for (i = 0; i < n; i++) {
349 upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
350 upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
351 if (m) {
352 upb_inttable_compact(&m->itof);
353 if (!assign_msg_indices(m, s)) {
354 goto err;
355 }
356 } else if (e) {
357 upb_inttable_compact(&e->iton);
358 }
359 }
360
361 /* Def graph contains FieldDefs between each MessageDef, so double the
362 * limit. */
363 maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
364
365 /* Validation all passed; freeze the defs. */
366 ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
367 assert(!(s && ret != upb_ok(s)));
368 return ret;
369
370 err:
371 for (i = 0; i < n; i++) {
372 defs[i]->came_from_user = false;
373 }
374 assert(!(s && upb_ok(s)));
375 return false;
376 }
377
378
379 /* upb_enumdef ****************************************************************/
380
upb_enumdef_free(upb_refcounted * r)381 static void upb_enumdef_free(upb_refcounted *r) {
382 upb_enumdef *e = (upb_enumdef*)r;
383 upb_inttable_iter i;
384 upb_inttable_begin(&i, &e->iton);
385 for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
386 /* To clean up the upb_strdup() from upb_enumdef_addval(). */
387 free(upb_value_getcstr(upb_inttable_iter_value(&i)));
388 }
389 upb_strtable_uninit(&e->ntoi);
390 upb_inttable_uninit(&e->iton);
391 upb_def_uninit(upb_enumdef_upcast_mutable(e));
392 free(e);
393 }
394
upb_enumdef_new(const void * owner)395 upb_enumdef *upb_enumdef_new(const void *owner) {
396 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
397 upb_enumdef *e = malloc(sizeof(*e));
398 if (!e) return NULL;
399 if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
400 goto err2;
401 if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
402 if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
403 return e;
404
405 err1:
406 upb_strtable_uninit(&e->ntoi);
407 err2:
408 free(e);
409 return NULL;
410 }
411
upb_enumdef_dup(const upb_enumdef * e,const void * owner)412 upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
413 upb_enum_iter i;
414 upb_enumdef *new_e = upb_enumdef_new(owner);
415 if (!new_e) return NULL;
416 for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
417 bool success = upb_enumdef_addval(
418 new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
419 if (!success) {
420 upb_enumdef_unref(new_e, owner);
421 return NULL;
422 }
423 }
424 return new_e;
425 }
426
upb_enumdef_freeze(upb_enumdef * e,upb_status * status)427 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
428 upb_def *d = upb_enumdef_upcast_mutable(e);
429 return upb_def_freeze(&d, 1, status);
430 }
431
upb_enumdef_fullname(const upb_enumdef * e)432 const char *upb_enumdef_fullname(const upb_enumdef *e) {
433 return upb_def_fullname(upb_enumdef_upcast(e));
434 }
435
upb_enumdef_setfullname(upb_enumdef * e,const char * fullname,upb_status * s)436 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
437 upb_status *s) {
438 return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
439 }
440
upb_enumdef_addval(upb_enumdef * e,const char * name,int32_t num,upb_status * status)441 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
442 upb_status *status) {
443 if (!upb_isident(name, strlen(name), false, status)) {
444 return false;
445 }
446 if (upb_enumdef_ntoiz(e, name, NULL)) {
447 upb_status_seterrf(status, "name '%s' is already defined", name);
448 return false;
449 }
450 if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
451 upb_status_seterrmsg(status, "out of memory");
452 return false;
453 }
454 if (!upb_inttable_lookup(&e->iton, num, NULL) &&
455 !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) {
456 upb_status_seterrmsg(status, "out of memory");
457 upb_strtable_remove(&e->ntoi, name, NULL);
458 return false;
459 }
460 if (upb_enumdef_numvals(e) == 1) {
461 bool ok = upb_enumdef_setdefault(e, num, NULL);
462 UPB_ASSERT_VAR(ok, ok);
463 }
464 return true;
465 }
466
upb_enumdef_default(const upb_enumdef * e)467 int32_t upb_enumdef_default(const upb_enumdef *e) {
468 assert(upb_enumdef_iton(e, e->defaultval));
469 return e->defaultval;
470 }
471
upb_enumdef_setdefault(upb_enumdef * e,int32_t val,upb_status * s)472 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
473 assert(!upb_enumdef_isfrozen(e));
474 if (!upb_enumdef_iton(e, val)) {
475 upb_status_seterrf(s, "number '%d' is not in the enum.", val);
476 return false;
477 }
478 e->defaultval = val;
479 return true;
480 }
481
upb_enumdef_numvals(const upb_enumdef * e)482 int upb_enumdef_numvals(const upb_enumdef *e) {
483 return upb_strtable_count(&e->ntoi);
484 }
485
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)486 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
487 /* We iterate over the ntoi table, to account for duplicate numbers. */
488 upb_strtable_begin(i, &e->ntoi);
489 }
490
upb_enum_next(upb_enum_iter * iter)491 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)492 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
493
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)494 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
495 size_t len, int32_t *num) {
496 upb_value v;
497 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
498 return false;
499 }
500 if (num) *num = upb_value_getint32(v);
501 return true;
502 }
503
upb_enumdef_iton(const upb_enumdef * def,int32_t num)504 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
505 upb_value v;
506 return upb_inttable_lookup32(&def->iton, num, &v) ?
507 upb_value_getcstr(v) : NULL;
508 }
509
upb_enum_iter_name(upb_enum_iter * iter)510 const char *upb_enum_iter_name(upb_enum_iter *iter) {
511 return upb_strtable_iter_key(iter);
512 }
513
upb_enum_iter_number(upb_enum_iter * iter)514 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
515 return upb_value_getint32(upb_strtable_iter_value(iter));
516 }
517
518
519 /* upb_fielddef ***************************************************************/
520
521 static void upb_fielddef_init_default(upb_fielddef *f);
522
upb_fielddef_uninit_default(upb_fielddef * f)523 static void upb_fielddef_uninit_default(upb_fielddef *f) {
524 if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
525 freestr(f->defaultval.bytes);
526 }
527
visitfield(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)528 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
529 void *closure) {
530 const upb_fielddef *f = (const upb_fielddef*)r;
531 if (upb_fielddef_containingtype(f)) {
532 visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
533 }
534 if (upb_fielddef_containingoneof(f)) {
535 visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
536 }
537 if (upb_fielddef_subdef(f)) {
538 visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
539 }
540 }
541
freefield(upb_refcounted * r)542 static void freefield(upb_refcounted *r) {
543 upb_fielddef *f = (upb_fielddef*)r;
544 upb_fielddef_uninit_default(f);
545 if (f->subdef_is_symbolic)
546 free(f->sub.name);
547 upb_def_uninit(upb_fielddef_upcast_mutable(f));
548 free(f);
549 }
550
enumdefaultstr(const upb_fielddef * f)551 static const char *enumdefaultstr(const upb_fielddef *f) {
552 const upb_enumdef *e;
553 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
554 e = upb_fielddef_enumsubdef(f);
555 if (f->default_is_string && f->defaultval.bytes) {
556 /* Default was explicitly set as a string. */
557 str_t *s = f->defaultval.bytes;
558 return s->str;
559 } else if (e) {
560 if (!f->default_is_string) {
561 /* Default was explicitly set as an integer; look it up in enumdef. */
562 const char *name = upb_enumdef_iton(e, f->defaultval.sint);
563 if (name) {
564 return name;
565 }
566 } else {
567 /* Default is completely unset; pull enumdef default. */
568 if (upb_enumdef_numvals(e) > 0) {
569 const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
570 assert(name);
571 return name;
572 }
573 }
574 }
575 return NULL;
576 }
577
enumdefaultint32(const upb_fielddef * f,int32_t * val)578 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
579 const upb_enumdef *e;
580 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
581 e = upb_fielddef_enumsubdef(f);
582 if (!f->default_is_string) {
583 /* Default was explicitly set as an integer. */
584 *val = f->defaultval.sint;
585 return true;
586 } else if (e) {
587 if (f->defaultval.bytes) {
588 /* Default was explicitly set as a str; try to lookup corresponding int. */
589 str_t *s = f->defaultval.bytes;
590 if (upb_enumdef_ntoiz(e, s->str, val)) {
591 return true;
592 }
593 } else {
594 /* Default is unset; try to pull in enumdef default. */
595 if (upb_enumdef_numvals(e) > 0) {
596 *val = upb_enumdef_default(e);
597 return true;
598 }
599 }
600 }
601 return false;
602 }
603
upb_fielddef_new(const void * o)604 upb_fielddef *upb_fielddef_new(const void *o) {
605 static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
606 upb_fielddef *f = malloc(sizeof(*f));
607 if (!f) return NULL;
608 if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
609 free(f);
610 return NULL;
611 }
612 f->msg.def = NULL;
613 f->sub.def = NULL;
614 f->oneof = NULL;
615 f->subdef_is_symbolic = false;
616 f->msg_is_symbolic = false;
617 f->label_ = UPB_LABEL_OPTIONAL;
618 f->type_ = UPB_TYPE_INT32;
619 f->number_ = 0;
620 f->type_is_set_ = false;
621 f->tagdelim = false;
622 f->is_extension_ = false;
623 f->lazy_ = false;
624 f->packed_ = true;
625
626 /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
627 * with all integer types and is in some since more "default" since the most
628 * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
629 *
630 * Other options to consider:
631 * - there is no default; users must set this manually (like type).
632 * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
633 * be an optimal default for signed integers. */
634 f->intfmt = UPB_INTFMT_VARIABLE;
635 return f;
636 }
637
upb_fielddef_dup(const upb_fielddef * f,const void * owner)638 upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
639 const char *srcname;
640 upb_fielddef *newf = upb_fielddef_new(owner);
641 if (!newf) return NULL;
642 upb_fielddef_settype(newf, upb_fielddef_type(f));
643 upb_fielddef_setlabel(newf, upb_fielddef_label(f));
644 upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
645 upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
646 if (f->default_is_string && f->defaultval.bytes) {
647 str_t *s = f->defaultval.bytes;
648 upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
649 } else {
650 newf->default_is_string = f->default_is_string;
651 newf->defaultval = f->defaultval;
652 }
653
654 if (f->subdef_is_symbolic) {
655 srcname = f->sub.name; /* Might be NULL. */
656 } else {
657 srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
658 }
659 if (srcname) {
660 char *newname = malloc(strlen(f->sub.def->fullname) + 2);
661 if (!newname) {
662 upb_fielddef_unref(newf, owner);
663 return NULL;
664 }
665 strcpy(newname, ".");
666 strcat(newname, f->sub.def->fullname);
667 upb_fielddef_setsubdefname(newf, newname, NULL);
668 free(newname);
669 }
670
671 return newf;
672 }
673
upb_fielddef_typeisset(const upb_fielddef * f)674 bool upb_fielddef_typeisset(const upb_fielddef *f) {
675 return f->type_is_set_;
676 }
677
upb_fielddef_type(const upb_fielddef * f)678 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
679 assert(f->type_is_set_);
680 return f->type_;
681 }
682
upb_fielddef_index(const upb_fielddef * f)683 uint32_t upb_fielddef_index(const upb_fielddef *f) {
684 return f->index_;
685 }
686
upb_fielddef_label(const upb_fielddef * f)687 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
688 return f->label_;
689 }
690
upb_fielddef_intfmt(const upb_fielddef * f)691 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
692 return f->intfmt;
693 }
694
upb_fielddef_istagdelim(const upb_fielddef * f)695 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
696 return f->tagdelim;
697 }
698
upb_fielddef_number(const upb_fielddef * f)699 uint32_t upb_fielddef_number(const upb_fielddef *f) {
700 return f->number_;
701 }
702
upb_fielddef_isextension(const upb_fielddef * f)703 bool upb_fielddef_isextension(const upb_fielddef *f) {
704 return f->is_extension_;
705 }
706
upb_fielddef_lazy(const upb_fielddef * f)707 bool upb_fielddef_lazy(const upb_fielddef *f) {
708 return f->lazy_;
709 }
710
upb_fielddef_packed(const upb_fielddef * f)711 bool upb_fielddef_packed(const upb_fielddef *f) {
712 return f->packed_;
713 }
714
upb_fielddef_name(const upb_fielddef * f)715 const char *upb_fielddef_name(const upb_fielddef *f) {
716 return upb_def_fullname(upb_fielddef_upcast(f));
717 }
718
upb_fielddef_containingtype(const upb_fielddef * f)719 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
720 return f->msg_is_symbolic ? NULL : f->msg.def;
721 }
722
upb_fielddef_containingoneof(const upb_fielddef * f)723 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
724 return f->oneof;
725 }
726
upb_fielddef_containingtype_mutable(upb_fielddef * f)727 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
728 return (upb_msgdef*)upb_fielddef_containingtype(f);
729 }
730
upb_fielddef_containingtypename(upb_fielddef * f)731 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
732 return f->msg_is_symbolic ? f->msg.name : NULL;
733 }
734
release_containingtype(upb_fielddef * f)735 static void release_containingtype(upb_fielddef *f) {
736 if (f->msg_is_symbolic) free(f->msg.name);
737 }
738
upb_fielddef_setcontainingtypename(upb_fielddef * f,const char * name,upb_status * s)739 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
740 upb_status *s) {
741 assert(!upb_fielddef_isfrozen(f));
742 if (upb_fielddef_containingtype(f)) {
743 upb_status_seterrmsg(s, "field has already been added to a message.");
744 return false;
745 }
746 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
747 * may have a leading "."). */
748 release_containingtype(f);
749 f->msg.name = upb_strdup(name);
750 f->msg_is_symbolic = true;
751 return true;
752 }
753
upb_fielddef_setname(upb_fielddef * f,const char * name,upb_status * s)754 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
755 if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
756 upb_status_seterrmsg(s, "Already added to message or oneof");
757 return false;
758 }
759 return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
760 }
761
chkdefaulttype(const upb_fielddef * f,upb_fieldtype_t type)762 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
763 UPB_UNUSED(f);
764 UPB_UNUSED(type);
765 assert(f->type_is_set_ && upb_fielddef_type(f) == type);
766 }
767
upb_fielddef_defaultint64(const upb_fielddef * f)768 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
769 chkdefaulttype(f, UPB_TYPE_INT64);
770 return f->defaultval.sint;
771 }
772
upb_fielddef_defaultint32(const upb_fielddef * f)773 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
774 if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
775 int32_t val;
776 bool ok = enumdefaultint32(f, &val);
777 UPB_ASSERT_VAR(ok, ok);
778 return val;
779 } else {
780 chkdefaulttype(f, UPB_TYPE_INT32);
781 return f->defaultval.sint;
782 }
783 }
784
upb_fielddef_defaultuint64(const upb_fielddef * f)785 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
786 chkdefaulttype(f, UPB_TYPE_UINT64);
787 return f->defaultval.uint;
788 }
789
upb_fielddef_defaultuint32(const upb_fielddef * f)790 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
791 chkdefaulttype(f, UPB_TYPE_UINT32);
792 return f->defaultval.uint;
793 }
794
upb_fielddef_defaultbool(const upb_fielddef * f)795 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
796 chkdefaulttype(f, UPB_TYPE_BOOL);
797 return f->defaultval.uint;
798 }
799
upb_fielddef_defaultfloat(const upb_fielddef * f)800 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
801 chkdefaulttype(f, UPB_TYPE_FLOAT);
802 return f->defaultval.flt;
803 }
804
upb_fielddef_defaultdouble(const upb_fielddef * f)805 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
806 chkdefaulttype(f, UPB_TYPE_DOUBLE);
807 return f->defaultval.dbl;
808 }
809
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)810 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
811 assert(f->type_is_set_);
812 assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
813 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
814 upb_fielddef_type(f) == UPB_TYPE_ENUM);
815
816 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
817 const char *ret = enumdefaultstr(f);
818 assert(ret);
819 /* Enum defaults can't have embedded NULLs. */
820 if (len) *len = strlen(ret);
821 return ret;
822 }
823
824 if (f->default_is_string) {
825 str_t *str = f->defaultval.bytes;
826 if (len) *len = str->len;
827 return str->str;
828 }
829
830 return NULL;
831 }
832
upb_fielddef_init_default(upb_fielddef * f)833 static void upb_fielddef_init_default(upb_fielddef *f) {
834 f->default_is_string = false;
835 switch (upb_fielddef_type(f)) {
836 case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
837 case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
838 case UPB_TYPE_INT32:
839 case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
840 case UPB_TYPE_UINT64:
841 case UPB_TYPE_UINT32:
842 case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
843 case UPB_TYPE_STRING:
844 case UPB_TYPE_BYTES:
845 f->defaultval.bytes = newstr("", 0);
846 f->default_is_string = true;
847 break;
848 case UPB_TYPE_MESSAGE: break;
849 case UPB_TYPE_ENUM:
850 /* This is our special sentinel that indicates "not set" for an enum. */
851 f->default_is_string = true;
852 f->defaultval.bytes = NULL;
853 break;
854 }
855 }
856
upb_fielddef_subdef(const upb_fielddef * f)857 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
858 return f->subdef_is_symbolic ? NULL : f->sub.def;
859 }
860
upb_fielddef_msgsubdef(const upb_fielddef * f)861 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
862 const upb_def *def = upb_fielddef_subdef(f);
863 return def ? upb_dyncast_msgdef(def) : NULL;
864 }
865
upb_fielddef_enumsubdef(const upb_fielddef * f)866 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
867 const upb_def *def = upb_fielddef_subdef(f);
868 return def ? upb_dyncast_enumdef(def) : NULL;
869 }
870
upb_fielddef_subdef_mutable(upb_fielddef * f)871 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
872 return (upb_def*)upb_fielddef_subdef(f);
873 }
874
upb_fielddef_subdefname(const upb_fielddef * f)875 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
876 if (f->subdef_is_symbolic) {
877 return f->sub.name;
878 } else if (f->sub.def) {
879 return upb_def_fullname(f->sub.def);
880 } else {
881 return NULL;
882 }
883 }
884
upb_fielddef_setnumber(upb_fielddef * f,uint32_t number,upb_status * s)885 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
886 if (upb_fielddef_containingtype(f)) {
887 upb_status_seterrmsg(
888 s, "cannot change field number after adding to a message");
889 return false;
890 }
891 if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
892 upb_status_seterrf(s, "invalid field number (%u)", number);
893 return false;
894 }
895 f->number_ = number;
896 return true;
897 }
898
upb_fielddef_settype(upb_fielddef * f,upb_fieldtype_t type)899 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
900 assert(!upb_fielddef_isfrozen(f));
901 assert(upb_fielddef_checktype(type));
902 upb_fielddef_uninit_default(f);
903 f->type_ = type;
904 f->type_is_set_ = true;
905 upb_fielddef_init_default(f);
906 }
907
upb_fielddef_setdescriptortype(upb_fielddef * f,int type)908 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
909 assert(!upb_fielddef_isfrozen(f));
910 switch (type) {
911 case UPB_DESCRIPTOR_TYPE_DOUBLE:
912 upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
913 break;
914 case UPB_DESCRIPTOR_TYPE_FLOAT:
915 upb_fielddef_settype(f, UPB_TYPE_FLOAT);
916 break;
917 case UPB_DESCRIPTOR_TYPE_INT64:
918 case UPB_DESCRIPTOR_TYPE_SFIXED64:
919 case UPB_DESCRIPTOR_TYPE_SINT64:
920 upb_fielddef_settype(f, UPB_TYPE_INT64);
921 break;
922 case UPB_DESCRIPTOR_TYPE_UINT64:
923 case UPB_DESCRIPTOR_TYPE_FIXED64:
924 upb_fielddef_settype(f, UPB_TYPE_UINT64);
925 break;
926 case UPB_DESCRIPTOR_TYPE_INT32:
927 case UPB_DESCRIPTOR_TYPE_SFIXED32:
928 case UPB_DESCRIPTOR_TYPE_SINT32:
929 upb_fielddef_settype(f, UPB_TYPE_INT32);
930 break;
931 case UPB_DESCRIPTOR_TYPE_UINT32:
932 case UPB_DESCRIPTOR_TYPE_FIXED32:
933 upb_fielddef_settype(f, UPB_TYPE_UINT32);
934 break;
935 case UPB_DESCRIPTOR_TYPE_BOOL:
936 upb_fielddef_settype(f, UPB_TYPE_BOOL);
937 break;
938 case UPB_DESCRIPTOR_TYPE_STRING:
939 upb_fielddef_settype(f, UPB_TYPE_STRING);
940 break;
941 case UPB_DESCRIPTOR_TYPE_BYTES:
942 upb_fielddef_settype(f, UPB_TYPE_BYTES);
943 break;
944 case UPB_DESCRIPTOR_TYPE_GROUP:
945 case UPB_DESCRIPTOR_TYPE_MESSAGE:
946 upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
947 break;
948 case UPB_DESCRIPTOR_TYPE_ENUM:
949 upb_fielddef_settype(f, UPB_TYPE_ENUM);
950 break;
951 default: assert(false);
952 }
953
954 if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
955 type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
956 type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
957 type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
958 upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
959 } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
960 type == UPB_DESCRIPTOR_TYPE_SINT32) {
961 upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
962 } else {
963 upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
964 }
965
966 upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
967 }
968
upb_fielddef_descriptortype(const upb_fielddef * f)969 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
970 switch (upb_fielddef_type(f)) {
971 case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT;
972 case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
973 case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL;
974 case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
975 case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES;
976 case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM;
977 case UPB_TYPE_INT32:
978 switch (upb_fielddef_intfmt(f)) {
979 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
980 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32;
981 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32;
982 }
983 case UPB_TYPE_INT64:
984 switch (upb_fielddef_intfmt(f)) {
985 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
986 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64;
987 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64;
988 }
989 case UPB_TYPE_UINT32:
990 switch (upb_fielddef_intfmt(f)) {
991 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
992 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32;
993 case UPB_INTFMT_ZIGZAG: return -1;
994 }
995 case UPB_TYPE_UINT64:
996 switch (upb_fielddef_intfmt(f)) {
997 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
998 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64;
999 case UPB_INTFMT_ZIGZAG: return -1;
1000 }
1001 case UPB_TYPE_MESSAGE:
1002 return upb_fielddef_istagdelim(f) ?
1003 UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1004 }
1005 return 0;
1006 }
1007
upb_fielddef_setisextension(upb_fielddef * f,bool is_extension)1008 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
1009 assert(!upb_fielddef_isfrozen(f));
1010 f->is_extension_ = is_extension;
1011 }
1012
upb_fielddef_setlazy(upb_fielddef * f,bool lazy)1013 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
1014 assert(!upb_fielddef_isfrozen(f));
1015 f->lazy_ = lazy;
1016 }
1017
upb_fielddef_setpacked(upb_fielddef * f,bool packed)1018 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
1019 assert(!upb_fielddef_isfrozen(f));
1020 f->packed_ = packed;
1021 }
1022
upb_fielddef_setlabel(upb_fielddef * f,upb_label_t label)1023 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
1024 assert(!upb_fielddef_isfrozen(f));
1025 assert(upb_fielddef_checklabel(label));
1026 f->label_ = label;
1027 }
1028
upb_fielddef_setintfmt(upb_fielddef * f,upb_intfmt_t fmt)1029 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
1030 assert(!upb_fielddef_isfrozen(f));
1031 assert(upb_fielddef_checkintfmt(fmt));
1032 f->intfmt = fmt;
1033 }
1034
upb_fielddef_settagdelim(upb_fielddef * f,bool tag_delim)1035 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
1036 assert(!upb_fielddef_isfrozen(f));
1037 f->tagdelim = tag_delim;
1038 f->tagdelim = tag_delim;
1039 }
1040
checksetdefault(upb_fielddef * f,upb_fieldtype_t type)1041 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1042 if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1043 upb_fielddef_type(f) != type) {
1044 assert(false);
1045 return false;
1046 }
1047 if (f->default_is_string) {
1048 str_t *s = f->defaultval.bytes;
1049 assert(s || type == UPB_TYPE_ENUM);
1050 if (s) freestr(s);
1051 }
1052 f->default_is_string = false;
1053 return true;
1054 }
1055
upb_fielddef_setdefaultint64(upb_fielddef * f,int64_t value)1056 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1057 if (checksetdefault(f, UPB_TYPE_INT64))
1058 f->defaultval.sint = value;
1059 }
1060
upb_fielddef_setdefaultint32(upb_fielddef * f,int32_t value)1061 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1062 if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1063 checksetdefault(f, UPB_TYPE_ENUM)) ||
1064 checksetdefault(f, UPB_TYPE_INT32)) {
1065 f->defaultval.sint = value;
1066 }
1067 }
1068
upb_fielddef_setdefaultuint64(upb_fielddef * f,uint64_t value)1069 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1070 if (checksetdefault(f, UPB_TYPE_UINT64))
1071 f->defaultval.uint = value;
1072 }
1073
upb_fielddef_setdefaultuint32(upb_fielddef * f,uint32_t value)1074 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1075 if (checksetdefault(f, UPB_TYPE_UINT32))
1076 f->defaultval.uint = value;
1077 }
1078
upb_fielddef_setdefaultbool(upb_fielddef * f,bool value)1079 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1080 if (checksetdefault(f, UPB_TYPE_BOOL))
1081 f->defaultval.uint = value;
1082 }
1083
upb_fielddef_setdefaultfloat(upb_fielddef * f,float value)1084 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1085 if (checksetdefault(f, UPB_TYPE_FLOAT))
1086 f->defaultval.flt = value;
1087 }
1088
upb_fielddef_setdefaultdouble(upb_fielddef * f,double value)1089 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1090 if (checksetdefault(f, UPB_TYPE_DOUBLE))
1091 f->defaultval.dbl = value;
1092 }
1093
upb_fielddef_setdefaultstr(upb_fielddef * f,const void * str,size_t len,upb_status * s)1094 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1095 upb_status *s) {
1096 str_t *str2;
1097 assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
1098 if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1099 return false;
1100
1101 if (f->default_is_string) {
1102 str_t *s = f->defaultval.bytes;
1103 assert(s || f->type_ == UPB_TYPE_ENUM);
1104 if (s) freestr(s);
1105 } else {
1106 assert(f->type_ == UPB_TYPE_ENUM);
1107 }
1108
1109 str2 = newstr(str, len);
1110 f->defaultval.bytes = str2;
1111 f->default_is_string = true;
1112 return true;
1113 }
1114
upb_fielddef_setdefaultcstr(upb_fielddef * f,const char * str,upb_status * s)1115 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1116 upb_status *s) {
1117 assert(f->type_is_set_);
1118 upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1119 }
1120
upb_fielddef_enumhasdefaultint32(const upb_fielddef * f)1121 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
1122 int32_t val;
1123 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1124 return enumdefaultint32(f, &val);
1125 }
1126
upb_fielddef_enumhasdefaultstr(const upb_fielddef * f)1127 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
1128 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1129 return enumdefaultstr(f) != NULL;
1130 }
1131
upb_subdef_typecheck(upb_fielddef * f,const upb_def * subdef,upb_status * s)1132 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1133 upb_status *s) {
1134 if (f->type_ == UPB_TYPE_MESSAGE) {
1135 if (upb_dyncast_msgdef(subdef)) return true;
1136 upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1137 return false;
1138 } else if (f->type_ == UPB_TYPE_ENUM) {
1139 if (upb_dyncast_enumdef(subdef)) return true;
1140 upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1141 return false;
1142 } else {
1143 upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1144 return false;
1145 }
1146 }
1147
release_subdef(upb_fielddef * f)1148 static void release_subdef(upb_fielddef *f) {
1149 if (f->subdef_is_symbolic) {
1150 free(f->sub.name);
1151 } else if (f->sub.def) {
1152 upb_unref2(f->sub.def, f);
1153 }
1154 }
1155
upb_fielddef_setsubdef(upb_fielddef * f,const upb_def * subdef,upb_status * s)1156 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1157 upb_status *s) {
1158 assert(!upb_fielddef_isfrozen(f));
1159 assert(upb_fielddef_hassubdef(f));
1160 if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1161 release_subdef(f);
1162 f->sub.def = subdef;
1163 f->subdef_is_symbolic = false;
1164 if (f->sub.def) upb_ref2(f->sub.def, f);
1165 return true;
1166 }
1167
upb_fielddef_setmsgsubdef(upb_fielddef * f,const upb_msgdef * subdef,upb_status * s)1168 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1169 upb_status *s) {
1170 return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
1171 }
1172
upb_fielddef_setenumsubdef(upb_fielddef * f,const upb_enumdef * subdef,upb_status * s)1173 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1174 upb_status *s) {
1175 return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
1176 }
1177
upb_fielddef_setsubdefname(upb_fielddef * f,const char * name,upb_status * s)1178 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1179 upb_status *s) {
1180 assert(!upb_fielddef_isfrozen(f));
1181 if (!upb_fielddef_hassubdef(f)) {
1182 upb_status_seterrmsg(s, "field type does not accept a subdef");
1183 return false;
1184 }
1185 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1186 * may have a leading "."). */
1187 release_subdef(f);
1188 f->sub.name = upb_strdup(name);
1189 f->subdef_is_symbolic = true;
1190 return true;
1191 }
1192
upb_fielddef_issubmsg(const upb_fielddef * f)1193 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1194 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1195 }
1196
upb_fielddef_isstring(const upb_fielddef * f)1197 bool upb_fielddef_isstring(const upb_fielddef *f) {
1198 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1199 upb_fielddef_type(f) == UPB_TYPE_BYTES;
1200 }
1201
upb_fielddef_isseq(const upb_fielddef * f)1202 bool upb_fielddef_isseq(const upb_fielddef *f) {
1203 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1204 }
1205
upb_fielddef_isprimitive(const upb_fielddef * f)1206 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1207 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1208 }
1209
upb_fielddef_ismap(const upb_fielddef * f)1210 bool upb_fielddef_ismap(const upb_fielddef *f) {
1211 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1212 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1213 }
1214
upb_fielddef_hassubdef(const upb_fielddef * f)1215 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1216 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1217 }
1218
between(int32_t x,int32_t low,int32_t high)1219 static bool between(int32_t x, int32_t low, int32_t high) {
1220 return x >= low && x <= high;
1221 }
1222
upb_fielddef_checklabel(int32_t label)1223 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)1224 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)1225 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1226
upb_fielddef_checkdescriptortype(int32_t type)1227 bool upb_fielddef_checkdescriptortype(int32_t type) {
1228 return between(type, 1, 18);
1229 }
1230
1231 /* upb_msgdef *****************************************************************/
1232
visitmsg(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1233 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1234 void *closure) {
1235 upb_msg_oneof_iter o;
1236 const upb_msgdef *m = (const upb_msgdef*)r;
1237 upb_msg_field_iter i;
1238 for(upb_msg_field_begin(&i, m);
1239 !upb_msg_field_done(&i);
1240 upb_msg_field_next(&i)) {
1241 upb_fielddef *f = upb_msg_iter_field(&i);
1242 visit(r, upb_fielddef_upcast2(f), closure);
1243 }
1244 for(upb_msg_oneof_begin(&o, m);
1245 !upb_msg_oneof_done(&o);
1246 upb_msg_oneof_next(&o)) {
1247 upb_oneofdef *f = upb_msg_iter_oneof(&o);
1248 visit(r, upb_oneofdef_upcast2(f), closure);
1249 }
1250 }
1251
freemsg(upb_refcounted * r)1252 static void freemsg(upb_refcounted *r) {
1253 upb_msgdef *m = (upb_msgdef*)r;
1254 upb_strtable_uninit(&m->ntoo);
1255 upb_strtable_uninit(&m->ntof);
1256 upb_inttable_uninit(&m->itof);
1257 upb_def_uninit(upb_msgdef_upcast_mutable(m));
1258 free(m);
1259 }
1260
upb_msgdef_new(const void * owner)1261 upb_msgdef *upb_msgdef_new(const void *owner) {
1262 static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
1263 upb_msgdef *m = malloc(sizeof(*m));
1264 if (!m) return NULL;
1265 if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
1266 goto err2;
1267 if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
1268 if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
1269 if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
1270 m->map_entry = false;
1271 return m;
1272
1273 err1:
1274 upb_strtable_uninit(&m->ntof);
1275 err2:
1276 upb_inttable_uninit(&m->itof);
1277 err3:
1278 free(m);
1279 return NULL;
1280 }
1281
upb_msgdef_dup(const upb_msgdef * m,const void * owner)1282 upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1283 bool ok;
1284 upb_msg_field_iter i;
1285 upb_msg_oneof_iter o;
1286
1287 upb_msgdef *newm = upb_msgdef_new(owner);
1288 if (!newm) return NULL;
1289 ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
1290 upb_def_fullname(upb_msgdef_upcast(m)),
1291 NULL);
1292 newm->map_entry = m->map_entry;
1293 UPB_ASSERT_VAR(ok, ok);
1294 for(upb_msg_field_begin(&i, m);
1295 !upb_msg_field_done(&i);
1296 upb_msg_field_next(&i)) {
1297 upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
1298 /* Fields in oneofs are dup'd below. */
1299 if (upb_fielddef_containingoneof(f)) continue;
1300 if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
1301 upb_msgdef_unref(newm, owner);
1302 return NULL;
1303 }
1304 }
1305 for(upb_msg_oneof_begin(&o, m);
1306 !upb_msg_oneof_done(&o);
1307 upb_msg_oneof_next(&o)) {
1308 upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
1309 if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
1310 upb_msgdef_unref(newm, owner);
1311 return NULL;
1312 }
1313 }
1314 return newm;
1315 }
1316
upb_msgdef_freeze(upb_msgdef * m,upb_status * status)1317 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
1318 upb_def *d = upb_msgdef_upcast_mutable(m);
1319 return upb_def_freeze(&d, 1, status);
1320 }
1321
upb_msgdef_fullname(const upb_msgdef * m)1322 const char *upb_msgdef_fullname(const upb_msgdef *m) {
1323 return upb_def_fullname(upb_msgdef_upcast(m));
1324 }
1325
upb_msgdef_setfullname(upb_msgdef * m,const char * fullname,upb_status * s)1326 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1327 upb_status *s) {
1328 return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
1329 }
1330
1331 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
1332 * on status |s| and return false if not. */
check_field_add(const upb_msgdef * m,const upb_fielddef * f,upb_status * s)1333 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
1334 upb_status *s) {
1335 if (upb_fielddef_containingtype(f) != NULL) {
1336 upb_status_seterrmsg(s, "fielddef already belongs to a message");
1337 return false;
1338 } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1339 upb_status_seterrmsg(s, "field name or number were not set");
1340 return false;
1341 } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
1342 upb_msgdef_itof(m, upb_fielddef_number(f))) {
1343 upb_status_seterrmsg(s, "duplicate field name or number for field");
1344 return false;
1345 }
1346 return true;
1347 }
1348
add_field(upb_msgdef * m,upb_fielddef * f,const void * ref_donor)1349 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
1350 release_containingtype(f);
1351 f->msg.def = m;
1352 f->msg_is_symbolic = false;
1353 upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
1354 upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1355 upb_ref2(f, m);
1356 upb_ref2(m, f);
1357 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1358 }
1359
upb_msgdef_addfield(upb_msgdef * m,upb_fielddef * f,const void * ref_donor,upb_status * s)1360 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
1361 upb_status *s) {
1362 /* TODO: extensions need to have a separate namespace, because proto2 allows a
1363 * top-level extension (ie. one not in any package) to have the same name as a
1364 * field from the message.
1365 *
1366 * This also implies that there needs to be a separate lookup-by-name method
1367 * for extensions. It seems desirable for iteration to return both extensions
1368 * and non-extensions though.
1369 *
1370 * We also need to validate that the field number is in an extension range iff
1371 * it is an extension.
1372 *
1373 * This method is idempotent. Check if |f| is already part of this msgdef and
1374 * return immediately if so. */
1375 if (upb_fielddef_containingtype(f) == m) {
1376 return true;
1377 }
1378
1379 /* Check constraints for all fields before performing any action. */
1380 if (!check_field_add(m, f, s)) {
1381 return false;
1382 } else if (upb_fielddef_containingoneof(f) != NULL) {
1383 /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
1384 upb_status_seterrmsg(s, "fielddef is part of a oneof");
1385 return false;
1386 }
1387
1388 /* Constraint checks ok, perform the action. */
1389 add_field(m, f, ref_donor);
1390 return true;
1391 }
1392
upb_msgdef_addoneof(upb_msgdef * m,upb_oneofdef * o,const void * ref_donor,upb_status * s)1393 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
1394 upb_status *s) {
1395 upb_oneof_iter it;
1396
1397 /* Check various conditions that would prevent this oneof from being added. */
1398 if (upb_oneofdef_containingtype(o)) {
1399 upb_status_seterrmsg(s, "oneofdef already belongs to a message");
1400 return false;
1401 } else if (upb_oneofdef_name(o) == NULL) {
1402 upb_status_seterrmsg(s, "oneofdef name was not set");
1403 return false;
1404 } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
1405 upb_status_seterrmsg(s, "duplicate oneof name");
1406 return false;
1407 }
1408
1409 /* Check that all of the oneof's fields do not conflict with names or numbers
1410 * of fields already in the message. */
1411 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1412 const upb_fielddef *f = upb_oneof_iter_field(&it);
1413 if (!check_field_add(m, f, s)) {
1414 return false;
1415 }
1416 }
1417
1418 /* Everything checks out -- commit now. */
1419
1420 /* Add oneof itself first. */
1421 o->parent = m;
1422 upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
1423 upb_ref2(o, m);
1424 upb_ref2(m, o);
1425
1426 /* Add each field of the oneof directly to the msgdef. */
1427 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1428 upb_fielddef *f = upb_oneof_iter_field(&it);
1429 add_field(m, f, NULL);
1430 }
1431
1432 if (ref_donor) upb_oneofdef_unref(o, ref_donor);
1433
1434 return true;
1435 }
1436
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)1437 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1438 upb_value val;
1439 return upb_inttable_lookup32(&m->itof, i, &val) ?
1440 upb_value_getptr(val) : NULL;
1441 }
1442
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)1443 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1444 size_t len) {
1445 upb_value val;
1446 return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
1447 upb_value_getptr(val) : NULL;
1448 }
1449
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)1450 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1451 size_t len) {
1452 upb_value val;
1453 return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
1454 upb_value_getptr(val) : NULL;
1455 }
1456
upb_msgdef_numfields(const upb_msgdef * m)1457 int upb_msgdef_numfields(const upb_msgdef *m) {
1458 return upb_strtable_count(&m->ntof);
1459 }
1460
upb_msgdef_numoneofs(const upb_msgdef * m)1461 int upb_msgdef_numoneofs(const upb_msgdef *m) {
1462 return upb_strtable_count(&m->ntoo);
1463 }
1464
upb_msgdef_setmapentry(upb_msgdef * m,bool map_entry)1465 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1466 assert(!upb_msgdef_isfrozen(m));
1467 m->map_entry = map_entry;
1468 }
1469
upb_msgdef_mapentry(const upb_msgdef * m)1470 bool upb_msgdef_mapentry(const upb_msgdef *m) {
1471 return m->map_entry;
1472 }
1473
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)1474 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
1475 upb_inttable_begin(iter, &m->itof);
1476 }
1477
upb_msg_field_next(upb_msg_field_iter * iter)1478 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
1479
upb_msg_field_done(const upb_msg_field_iter * iter)1480 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1481 return upb_inttable_done(iter);
1482 }
1483
upb_msg_iter_field(const upb_msg_field_iter * iter)1484 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
1485 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1486 }
1487
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)1488 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1489 upb_inttable_iter_setdone(iter);
1490 }
1491
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)1492 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
1493 upb_strtable_begin(iter, &m->ntoo);
1494 }
1495
upb_msg_oneof_next(upb_msg_oneof_iter * iter)1496 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
1497
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)1498 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1499 return upb_strtable_done(iter);
1500 }
1501
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)1502 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1503 return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
1504 }
1505
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)1506 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1507 upb_strtable_iter_setdone(iter);
1508 }
1509
1510 /* upb_oneofdef ***************************************************************/
1511
visitoneof(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1512 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
1513 void *closure) {
1514 const upb_oneofdef *o = (const upb_oneofdef*)r;
1515 upb_oneof_iter i;
1516 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1517 const upb_fielddef *f = upb_oneof_iter_field(&i);
1518 visit(r, upb_fielddef_upcast2(f), closure);
1519 }
1520 if (o->parent) {
1521 visit(r, upb_msgdef_upcast2(o->parent), closure);
1522 }
1523 }
1524
freeoneof(upb_refcounted * r)1525 static void freeoneof(upb_refcounted *r) {
1526 upb_oneofdef *o = (upb_oneofdef*)r;
1527 upb_strtable_uninit(&o->ntof);
1528 upb_inttable_uninit(&o->itof);
1529 upb_def_uninit(upb_oneofdef_upcast_mutable(o));
1530 free(o);
1531 }
1532
upb_oneofdef_new(const void * owner)1533 upb_oneofdef *upb_oneofdef_new(const void *owner) {
1534 static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
1535 upb_oneofdef *o = malloc(sizeof(*o));
1536 o->parent = NULL;
1537 if (!o) return NULL;
1538 if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
1539 owner))
1540 goto err2;
1541 if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
1542 if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
1543 return o;
1544
1545 err1:
1546 upb_inttable_uninit(&o->itof);
1547 err2:
1548 free(o);
1549 return NULL;
1550 }
1551
upb_oneofdef_dup(const upb_oneofdef * o,const void * owner)1552 upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
1553 bool ok;
1554 upb_oneof_iter i;
1555 upb_oneofdef *newo = upb_oneofdef_new(owner);
1556 if (!newo) return NULL;
1557 ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
1558 upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
1559 UPB_ASSERT_VAR(ok, ok);
1560 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1561 upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
1562 if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
1563 upb_oneofdef_unref(newo, owner);
1564 return NULL;
1565 }
1566 }
1567 return newo;
1568 }
1569
upb_oneofdef_name(const upb_oneofdef * o)1570 const char *upb_oneofdef_name(const upb_oneofdef *o) {
1571 return upb_def_fullname(upb_oneofdef_upcast(o));
1572 }
1573
upb_oneofdef_setname(upb_oneofdef * o,const char * fullname,upb_status * s)1574 bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
1575 upb_status *s) {
1576 if (upb_oneofdef_containingtype(o)) {
1577 upb_status_seterrmsg(s, "oneof already added to a message");
1578 return false;
1579 }
1580 return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
1581 }
1582
upb_oneofdef_containingtype(const upb_oneofdef * o)1583 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1584 return o->parent;
1585 }
1586
upb_oneofdef_numfields(const upb_oneofdef * o)1587 int upb_oneofdef_numfields(const upb_oneofdef *o) {
1588 return upb_strtable_count(&o->ntof);
1589 }
1590
upb_oneofdef_addfield(upb_oneofdef * o,upb_fielddef * f,const void * ref_donor,upb_status * s)1591 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
1592 const void *ref_donor,
1593 upb_status *s) {
1594 assert(!upb_oneofdef_isfrozen(o));
1595 assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
1596
1597 /* This method is idempotent. Check if |f| is already part of this oneofdef
1598 * and return immediately if so. */
1599 if (upb_fielddef_containingoneof(f) == o) {
1600 return true;
1601 }
1602
1603 /* The field must have an OPTIONAL label. */
1604 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1605 upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
1606 return false;
1607 }
1608
1609 /* Check that no field with this name or number exists already in the oneof.
1610 * Also check that the field is not already part of a oneof. */
1611 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1612 upb_status_seterrmsg(s, "field name or number were not set");
1613 return false;
1614 } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
1615 upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
1616 upb_status_seterrmsg(s, "duplicate field name or number");
1617 return false;
1618 } else if (upb_fielddef_containingoneof(f) != NULL) {
1619 upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
1620 return false;
1621 }
1622
1623 /* We allow adding a field to the oneof either if the field is not part of a
1624 * msgdef, or if it is and we are also part of the same msgdef. */
1625 if (o->parent == NULL) {
1626 /* If we're not in a msgdef, the field cannot be either. Otherwise we would
1627 * need to magically add this oneof to a msgdef to remain consistent, which
1628 * is surprising behavior. */
1629 if (upb_fielddef_containingtype(f) != NULL) {
1630 upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
1631 "oneof does not");
1632 return false;
1633 }
1634 } else {
1635 /* If we're in a msgdef, the user can add fields that either aren't in any
1636 * msgdef (in which case they're added to our msgdef) or already a part of
1637 * our msgdef. */
1638 if (upb_fielddef_containingtype(f) != NULL &&
1639 upb_fielddef_containingtype(f) != o->parent) {
1640 upb_status_seterrmsg(s, "fielddef belongs to a different message "
1641 "than oneof");
1642 return false;
1643 }
1644 }
1645
1646 /* Commit phase. First add the field to our parent msgdef, if any, because
1647 * that may fail; then add the field to our own tables. */
1648
1649 if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
1650 if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
1651 return false;
1652 }
1653 }
1654
1655 release_containingtype(f);
1656 f->oneof = o;
1657 upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
1658 upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1659 upb_ref2(f, o);
1660 upb_ref2(o, f);
1661 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1662
1663 return true;
1664 }
1665
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)1666 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1667 const char *name, size_t length) {
1668 upb_value val;
1669 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1670 upb_value_getptr(val) : NULL;
1671 }
1672
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)1673 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1674 upb_value val;
1675 return upb_inttable_lookup32(&o->itof, num, &val) ?
1676 upb_value_getptr(val) : NULL;
1677 }
1678
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)1679 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1680 upb_inttable_begin(iter, &o->itof);
1681 }
1682
upb_oneof_next(upb_oneof_iter * iter)1683 void upb_oneof_next(upb_oneof_iter *iter) {
1684 upb_inttable_next(iter);
1685 }
1686
upb_oneof_done(upb_oneof_iter * iter)1687 bool upb_oneof_done(upb_oneof_iter *iter) {
1688 return upb_inttable_done(iter);
1689 }
1690
upb_oneof_iter_field(const upb_oneof_iter * iter)1691 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1692 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1693 }
1694
upb_oneof_iter_setdone(upb_oneof_iter * iter)1695 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1696 upb_inttable_iter_setdone(iter);
1697 }
1698
1699
1700 #include <stdlib.h>
1701 #include <stdio.h>
1702 #include <string.h>
1703
1704 typedef struct cleanup_ent {
1705 upb_cleanup_func *cleanup;
1706 void *ud;
1707 struct cleanup_ent *next;
1708 } cleanup_ent;
1709
1710 static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
1711
1712 /* Default allocator **********************************************************/
1713
1714 /* Just use realloc, keeping all allocated blocks in a linked list to destroy at
1715 * the end. */
1716
1717 typedef struct mem_block {
1718 /* List is doubly-linked, because in cases where realloc() moves an existing
1719 * block, we need to be able to remove the old pointer from the list
1720 * efficiently. */
1721 struct mem_block *prev, *next;
1722 #ifndef NDEBUG
1723 size_t size; /* Doesn't include mem_block structure. */
1724 #endif
1725 } mem_block;
1726
1727 typedef struct {
1728 mem_block *head;
1729 } default_alloc_ud;
1730
default_alloc(void * _ud,void * ptr,size_t oldsize,size_t size)1731 static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
1732 default_alloc_ud *ud = _ud;
1733 mem_block *from, *block;
1734 void *ret;
1735 UPB_UNUSED(oldsize);
1736
1737 from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
1738
1739 #ifndef NDEBUG
1740 if (from) {
1741 assert(oldsize <= from->size);
1742 }
1743 #endif
1744
1745 /* TODO(haberman): we probably need to provide even better alignment here,
1746 * like 16-byte alignment of the returned data pointer. */
1747 block = realloc(from, size + sizeof(mem_block));
1748 if (!block) return NULL;
1749 ret = (char*)block + sizeof(*block);
1750
1751 #ifndef NDEBUG
1752 block->size = size;
1753 #endif
1754
1755 if (from) {
1756 if (block != from) {
1757 /* The block was moved, so pointers in next and prev blocks must be
1758 * updated to its new location. */
1759 if (block->next) block->next->prev = block;
1760 if (block->prev) block->prev->next = block;
1761 if (ud->head == from) ud->head = block;
1762 }
1763 } else {
1764 /* Insert at head of linked list. */
1765 block->prev = NULL;
1766 block->next = ud->head;
1767 if (block->next) block->next->prev = block;
1768 ud->head = block;
1769 }
1770
1771 return ret;
1772 }
1773
default_alloc_cleanup(void * _ud)1774 static void default_alloc_cleanup(void *_ud) {
1775 default_alloc_ud *ud = _ud;
1776 mem_block *block = ud->head;
1777
1778 while (block) {
1779 void *to_free = block;
1780 block = block->next;
1781 free(to_free);
1782 }
1783 }
1784
1785
1786 /* Standard error functions ***************************************************/
1787
default_err(void * ud,const upb_status * status)1788 static bool default_err(void *ud, const upb_status *status) {
1789 UPB_UNUSED(ud);
1790 UPB_UNUSED(status);
1791 return false;
1792 }
1793
write_err_to(void * ud,const upb_status * status)1794 static bool write_err_to(void *ud, const upb_status *status) {
1795 upb_status *copy_to = ud;
1796 upb_status_copy(copy_to, status);
1797 return false;
1798 }
1799
1800
1801 /* upb_env ********************************************************************/
1802
upb_env_init(upb_env * e)1803 void upb_env_init(upb_env *e) {
1804 default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
1805 e->ok_ = true;
1806 e->bytes_allocated = 0;
1807 e->cleanup_head = NULL;
1808
1809 ud->head = NULL;
1810
1811 /* Set default functions. */
1812 upb_env_setallocfunc(e, default_alloc, ud);
1813 upb_env_seterrorfunc(e, default_err, NULL);
1814 }
1815
upb_env_uninit(upb_env * e)1816 void upb_env_uninit(upb_env *e) {
1817 cleanup_ent *ent = e->cleanup_head;
1818
1819 while (ent) {
1820 ent->cleanup(ent->ud);
1821 ent = ent->next;
1822 }
1823
1824 /* Must do this after running cleanup functions, because this will delete
1825 the memory we store our cleanup entries in! */
1826 if (e->alloc == default_alloc) {
1827 default_alloc_cleanup(e->alloc_ud);
1828 }
1829 }
1830
upb_env_setallocfunc(upb_env * e,upb_alloc_func * alloc,void * ud)1831 UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
1832 void *ud) {
1833 e->alloc = alloc;
1834 e->alloc_ud = ud;
1835 }
1836
upb_env_seterrorfunc(upb_env * e,upb_error_func * func,void * ud)1837 UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
1838 void *ud) {
1839 e->err = func;
1840 e->err_ud = ud;
1841 }
1842
upb_env_reporterrorsto(upb_env * e,upb_status * status)1843 void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
1844 e->err = write_err_to;
1845 e->err_ud = status;
1846 }
1847
upb_env_ok(const upb_env * e)1848 bool upb_env_ok(const upb_env *e) {
1849 return e->ok_;
1850 }
1851
upb_env_reporterror(upb_env * e,const upb_status * status)1852 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
1853 e->ok_ = false;
1854 return e->err(e->err_ud, status);
1855 }
1856
upb_env_addcleanup(upb_env * e,upb_cleanup_func * func,void * ud)1857 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
1858 cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
1859 if (!ent) return false;
1860
1861 ent->cleanup = func;
1862 ent->ud = ud;
1863 ent->next = e->cleanup_head;
1864 e->cleanup_head = ent;
1865
1866 return true;
1867 }
1868
upb_env_malloc(upb_env * e,size_t size)1869 void *upb_env_malloc(upb_env *e, size_t size) {
1870 e->bytes_allocated += size;
1871 if (e->alloc == seeded_alloc) {
1872 /* This is equivalent to the next branch, but allows inlining for a
1873 * measurable perf benefit. */
1874 return seeded_alloc(e->alloc_ud, NULL, 0, size);
1875 } else {
1876 return e->alloc(e->alloc_ud, NULL, 0, size);
1877 }
1878 }
1879
upb_env_realloc(upb_env * e,void * ptr,size_t oldsize,size_t size)1880 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
1881 char *ret;
1882 assert(oldsize <= size);
1883 ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
1884
1885 #ifndef NDEBUG
1886 /* Overwrite non-preserved memory to ensure callers are passing the oldsize
1887 * that they truly require. */
1888 memset(ret + oldsize, 0xff, size - oldsize);
1889 #endif
1890
1891 return ret;
1892 }
1893
upb_env_bytesallocated(const upb_env * e)1894 size_t upb_env_bytesallocated(const upb_env *e) {
1895 return e->bytes_allocated;
1896 }
1897
1898
1899 /* upb_seededalloc ************************************************************/
1900
1901 /* Be conservative and choose 16 in case anyone is using SSE. */
1902 static const size_t maxalign = 16;
1903
align_up(size_t size)1904 static size_t align_up(size_t size) {
1905 return ((size + maxalign - 1) / maxalign) * maxalign;
1906 }
1907
seeded_alloc(void * ud,void * ptr,size_t oldsize,size_t size)1908 UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
1909 size_t size) {
1910 upb_seededalloc *a = ud;
1911
1912 size = align_up(size);
1913
1914 assert(a->mem_limit >= a->mem_ptr);
1915
1916 if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
1917 /* Fast path: we can satisfy from the initial allocation. */
1918 void *ret = a->mem_ptr;
1919 a->mem_ptr += size;
1920 return ret;
1921 } else {
1922 char *chptr = ptr;
1923 /* Slow path: fallback to other allocator. */
1924 a->need_cleanup = true;
1925 /* Is `ptr` part of the user-provided initial block? Don't pass it to the
1926 * default allocator if so; otherwise, it may try to realloc() the block. */
1927 if (chptr >= a->mem_base && chptr < a->mem_limit) {
1928 void *ret;
1929 assert(chptr + oldsize <= a->mem_limit);
1930 ret = a->alloc(a->alloc_ud, NULL, 0, size);
1931 if (ret) memcpy(ret, ptr, oldsize);
1932 return ret;
1933 } else {
1934 return a->alloc(a->alloc_ud, ptr, oldsize, size);
1935 }
1936 }
1937 }
1938
upb_seededalloc_init(upb_seededalloc * a,void * mem,size_t len)1939 void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
1940 default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
1941 a->mem_base = mem;
1942 a->mem_ptr = mem;
1943 a->mem_limit = (char*)mem + len;
1944 a->need_cleanup = false;
1945 a->returned_allocfunc = false;
1946
1947 ud->head = NULL;
1948
1949 upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
1950 }
1951
upb_seededalloc_uninit(upb_seededalloc * a)1952 void upb_seededalloc_uninit(upb_seededalloc *a) {
1953 if (a->alloc == default_alloc && a->need_cleanup) {
1954 default_alloc_cleanup(a->alloc_ud);
1955 }
1956 }
1957
upb_seededalloc_setfallbackalloc(upb_seededalloc * a,upb_alloc_func * alloc,void * ud)1958 UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
1959 upb_alloc_func *alloc,
1960 void *ud) {
1961 assert(!a->returned_allocfunc);
1962 a->alloc = alloc;
1963 a->alloc_ud = ud;
1964 }
1965
upb_seededalloc_getallocfunc(upb_seededalloc * a)1966 upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
1967 a->returned_allocfunc = true;
1968 return seeded_alloc;
1969 }
1970 /*
1971 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
1972 ** assert() or return false.
1973 */
1974
1975
1976 #include <stdlib.h>
1977 #include <string.h>
1978
1979
1980
1981 /* Defined for the sole purpose of having a unique pointer value for
1982 * UPB_NO_CLOSURE. */
1983 char _upb_noclosure;
1984
freehandlers(upb_refcounted * r)1985 static void freehandlers(upb_refcounted *r) {
1986 upb_handlers *h = (upb_handlers*)r;
1987
1988 upb_inttable_iter i;
1989 upb_inttable_begin(&i, &h->cleanup_);
1990 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1991 void *val = (void*)upb_inttable_iter_key(&i);
1992 upb_value func_val = upb_inttable_iter_value(&i);
1993 upb_handlerfree *func = upb_value_getfptr(func_val);
1994 func(val);
1995 }
1996
1997 upb_inttable_uninit(&h->cleanup_);
1998 upb_msgdef_unref(h->msg, h);
1999 free(h->sub);
2000 free(h);
2001 }
2002
visithandlers(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2003 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
2004 void *closure) {
2005 const upb_handlers *h = (const upb_handlers*)r;
2006 upb_msg_field_iter i;
2007 for(upb_msg_field_begin(&i, h->msg);
2008 !upb_msg_field_done(&i);
2009 upb_msg_field_next(&i)) {
2010 upb_fielddef *f = upb_msg_iter_field(&i);
2011 const upb_handlers *sub;
2012 if (!upb_fielddef_issubmsg(f)) continue;
2013 sub = upb_handlers_getsubhandlers(h, f);
2014 if (sub) visit(r, upb_handlers_upcast(sub), closure);
2015 }
2016 }
2017
2018 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
2019
2020 typedef struct {
2021 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
2022 upb_handlers_callback *callback;
2023 const void *closure;
2024 } dfs_state;
2025
2026 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
2027 * handlers set and cannot reach any upb_handlers* object that does. This is
2028 * slightly tricky to do correctly. */
newformsg(const upb_msgdef * m,const void * owner,dfs_state * s)2029 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
2030 dfs_state *s) {
2031 upb_msg_field_iter i;
2032 upb_handlers *h = upb_handlers_new(m, owner);
2033 if (!h) return NULL;
2034 if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
2035
2036 s->callback(s->closure, h);
2037
2038 /* For each submessage field, get or create a handlers object and set it as
2039 * the subhandlers. */
2040 for(upb_msg_field_begin(&i, m);
2041 !upb_msg_field_done(&i);
2042 upb_msg_field_next(&i)) {
2043 upb_fielddef *f = upb_msg_iter_field(&i);
2044 const upb_msgdef *subdef;
2045 upb_value subm_ent;
2046
2047 if (!upb_fielddef_issubmsg(f)) continue;
2048
2049 subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
2050 if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
2051 upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
2052 } else {
2053 upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
2054 if (!sub_mh) goto oom;
2055 upb_handlers_setsubhandlers(h, f, sub_mh);
2056 upb_handlers_unref(sub_mh, &sub_mh);
2057 }
2058 }
2059 return h;
2060
2061 oom:
2062 upb_handlers_unref(h, owner);
2063 return NULL;
2064 }
2065
2066 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
2067 * subhandlers for this submessage field. */
2068 #define SUBH(h, selector) (h->sub[selector])
2069
2070 /* The selector for a submessage field is the field index. */
2071 #define SUBH_F(h, f) SUBH(h, f->index_)
2072
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2073 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
2074 upb_handlertype_t type) {
2075 upb_selector_t sel;
2076 assert(!upb_handlers_isfrozen(h));
2077 if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
2078 upb_status_seterrf(
2079 &h->status_, "type mismatch: field %s does not belong to message %s",
2080 upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
2081 return -1;
2082 }
2083 if (!upb_handlers_getselector(f, type, &sel)) {
2084 upb_status_seterrf(
2085 &h->status_,
2086 "type mismatch: cannot register handler type %d for field %s",
2087 type, upb_fielddef_name(f));
2088 return -1;
2089 }
2090 return sel;
2091 }
2092
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2093 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
2094 upb_handlertype_t type) {
2095 int32_t sel = trygetsel(h, f, type);
2096 assert(sel >= 0);
2097 return sel;
2098 }
2099
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2100 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
2101 upb_handlertype_t type) {
2102 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
2103 }
2104
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,upb_handlerattr * attr)2105 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
2106 upb_handlertype_t type, upb_func *func,
2107 upb_handlerattr *attr) {
2108 upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
2109 const void *closure_type;
2110 const void **context_closure_type;
2111
2112 assert(!upb_handlers_isfrozen(h));
2113
2114 if (sel < 0) {
2115 upb_status_seterrmsg(&h->status_,
2116 "incorrect handler type for this field.");
2117 return false;
2118 }
2119
2120 if (h->table[sel].func) {
2121 upb_status_seterrmsg(&h->status_,
2122 "cannot change handler once it has been set.");
2123 return false;
2124 }
2125
2126 if (attr) {
2127 set_attr = *attr;
2128 }
2129
2130 /* Check that the given closure type matches the closure type that has been
2131 * established for this context (if any). */
2132 closure_type = upb_handlerattr_closuretype(&set_attr);
2133
2134 if (type == UPB_HANDLER_STRING) {
2135 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
2136 } else if (f && upb_fielddef_isseq(f) &&
2137 type != UPB_HANDLER_STARTSEQ &&
2138 type != UPB_HANDLER_ENDSEQ) {
2139 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
2140 } else {
2141 context_closure_type = &h->top_closure_type;
2142 }
2143
2144 if (closure_type && *context_closure_type &&
2145 closure_type != *context_closure_type) {
2146 /* TODO(haberman): better message for debugging. */
2147 if (f) {
2148 upb_status_seterrf(&h->status_,
2149 "closure type does not match for field %s",
2150 upb_fielddef_name(f));
2151 } else {
2152 upb_status_seterrmsg(
2153 &h->status_, "closure type does not match for message-level handler");
2154 }
2155 return false;
2156 }
2157
2158 if (closure_type)
2159 *context_closure_type = closure_type;
2160
2161 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
2162 * matches any pre-existing expectations about what type is expected. */
2163 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
2164 const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
2165 const void *table_return_type =
2166 upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2167 if (return_type && table_return_type && return_type != table_return_type) {
2168 upb_status_seterrmsg(&h->status_, "closure return type does not match");
2169 return false;
2170 }
2171
2172 if (table_return_type && !return_type)
2173 upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
2174 }
2175
2176 h->table[sel].func = (upb_func*)func;
2177 h->table[sel].attr = set_attr;
2178 return true;
2179 }
2180
2181 /* Returns the effective closure type for this handler (which will propagate
2182 * from outer frames if this frame has no START* handler). Not implemented for
2183 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
2184 * the effective closure type is unspecified (either no handler was registered
2185 * to specify it or the handler that was registered did not specify the closure
2186 * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2187 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
2188 upb_handlertype_t type) {
2189 const void *ret;
2190 upb_selector_t sel;
2191
2192 assert(type != UPB_HANDLER_STRING);
2193 ret = h->top_closure_type;
2194
2195 if (upb_fielddef_isseq(f) &&
2196 type != UPB_HANDLER_STARTSEQ &&
2197 type != UPB_HANDLER_ENDSEQ &&
2198 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
2199 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2200 }
2201
2202 if (type == UPB_HANDLER_STRING &&
2203 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
2204 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2205 }
2206
2207 /* The effective type of the submessage; not used yet.
2208 * if (type == SUBMESSAGE &&
2209 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
2210 * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2211 * } */
2212
2213 return ret;
2214 }
2215
2216 /* Checks whether the START* handler specified by f & type is missing even
2217 * though it is required to convert the established type of an outer frame
2218 * ("closure_type") into the established type of an inner frame (represented in
2219 * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)2220 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
2221 upb_status *status) {
2222 const void *closure_type;
2223 const upb_handlerattr *attr;
2224 const void *return_closure_type;
2225
2226 upb_selector_t sel = handlers_getsel(h, f, type);
2227 if (h->table[sel].func) return true;
2228 closure_type = effective_closure_type(h, f, type);
2229 attr = &h->table[sel].attr;
2230 return_closure_type = upb_handlerattr_returnclosuretype(attr);
2231 if (closure_type && return_closure_type &&
2232 closure_type != return_closure_type) {
2233 upb_status_seterrf(status,
2234 "expected start handler to return sub type for field %f",
2235 upb_fielddef_name(f));
2236 return false;
2237 }
2238 return true;
2239 }
2240
2241 /* Public interface ***********************************************************/
2242
upb_handlers_new(const upb_msgdef * md,const void * owner)2243 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
2244 int extra;
2245 upb_handlers *h;
2246
2247 assert(upb_msgdef_isfrozen(md));
2248
2249 extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
2250 h = calloc(sizeof(*h) + extra, 1);
2251 if (!h) return NULL;
2252
2253 h->msg = md;
2254 upb_msgdef_ref(h->msg, h);
2255 upb_status_clear(&h->status_);
2256 h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
2257 if (!h->sub) goto oom;
2258 if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
2259 goto oom;
2260 if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
2261
2262 /* calloc() above initialized all handlers to NULL. */
2263 return h;
2264
2265 oom:
2266 freehandlers(upb_handlers_upcast_mutable(h));
2267 return NULL;
2268 }
2269
upb_handlers_newfrozen(const upb_msgdef * m,const void * owner,upb_handlers_callback * callback,const void * closure)2270 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
2271 const void *owner,
2272 upb_handlers_callback *callback,
2273 const void *closure) {
2274 dfs_state state;
2275 upb_handlers *ret;
2276 bool ok;
2277 upb_refcounted *r;
2278
2279 state.callback = callback;
2280 state.closure = closure;
2281 if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
2282
2283 ret = newformsg(m, owner, &state);
2284
2285 upb_inttable_uninit(&state.tab);
2286 if (!ret) return NULL;
2287
2288 r = upb_handlers_upcast_mutable(ret);
2289 ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
2290 UPB_ASSERT_VAR(ok, ok);
2291
2292 return ret;
2293 }
2294
upb_handlers_status(upb_handlers * h)2295 const upb_status *upb_handlers_status(upb_handlers *h) {
2296 assert(!upb_handlers_isfrozen(h));
2297 return &h->status_;
2298 }
2299
upb_handlers_clearerr(upb_handlers * h)2300 void upb_handlers_clearerr(upb_handlers *h) {
2301 assert(!upb_handlers_isfrozen(h));
2302 upb_status_clear(&h->status_);
2303 }
2304
2305 #define SETTER(name, handlerctype, handlertype) \
2306 bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
2307 handlerctype func, upb_handlerattr *attr) { \
2308 int32_t sel = trygetsel(h, f, handlertype); \
2309 return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
2310 }
2311
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)2312 SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
2313 SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
2314 SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
2315 SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
2316 SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
2317 SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
2318 SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
2319 SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
2320 SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
2321 SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
2322 SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
2323 SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
2324 SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
2325 SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
2326
2327 #undef SETTER
2328
2329 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
2330 upb_handlerattr *attr) {
2331 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2332 (upb_func *)func, attr);
2333 }
2334
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,upb_handlerattr * attr)2335 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
2336 upb_handlerattr *attr) {
2337 assert(!upb_handlers_isfrozen(h));
2338 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2339 (upb_func *)func, attr);
2340 }
2341
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)2342 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
2343 const upb_handlers *sub) {
2344 assert(sub);
2345 assert(!upb_handlers_isfrozen(h));
2346 assert(upb_fielddef_issubmsg(f));
2347 if (SUBH_F(h, f)) return false; /* Can't reset. */
2348 if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
2349 return false;
2350 }
2351 SUBH_F(h, f) = sub;
2352 upb_ref2(sub, h);
2353 return true;
2354 }
2355
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)2356 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
2357 const upb_fielddef *f) {
2358 assert(upb_fielddef_issubmsg(f));
2359 return SUBH_F(h, f);
2360 }
2361
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)2362 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
2363 upb_handlerattr *attr) {
2364 if (!upb_handlers_gethandler(h, sel))
2365 return false;
2366 *attr = h->table[sel].attr;
2367 return true;
2368 }
2369
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)2370 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
2371 upb_selector_t sel) {
2372 /* STARTSUBMSG selector in sel is the field's selector base. */
2373 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
2374 }
2375
upb_handlers_msgdef(const upb_handlers * h)2376 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
2377
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)2378 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
2379 bool ok;
2380 if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
2381 return false;
2382 }
2383 ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
2384 UPB_ASSERT_VAR(ok, ok);
2385 return true;
2386 }
2387
2388
2389 /* "Static" methods ***********************************************************/
2390
upb_handlers_freeze(upb_handlers * const * handlers,int n,upb_status * s)2391 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
2392 /* TODO: verify we have a transitive closure. */
2393 int i;
2394 for (i = 0; i < n; i++) {
2395 upb_msg_field_iter j;
2396 upb_handlers *h = handlers[i];
2397
2398 if (!upb_ok(&h->status_)) {
2399 upb_status_seterrf(s, "handlers for message %s had error status: %s",
2400 upb_msgdef_fullname(upb_handlers_msgdef(h)),
2401 upb_status_errmsg(&h->status_));
2402 return false;
2403 }
2404
2405 /* Check that there are no closure mismatches due to missing Start* handlers
2406 * or subhandlers with different type-level types. */
2407 for(upb_msg_field_begin(&j, h->msg);
2408 !upb_msg_field_done(&j);
2409 upb_msg_field_next(&j)) {
2410
2411 const upb_fielddef *f = upb_msg_iter_field(&j);
2412 if (upb_fielddef_isseq(f)) {
2413 if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
2414 return false;
2415 }
2416
2417 if (upb_fielddef_isstring(f)) {
2418 if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
2419 return false;
2420 }
2421
2422 if (upb_fielddef_issubmsg(f)) {
2423 bool hashandler = false;
2424 if (upb_handlers_gethandler(
2425 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
2426 upb_handlers_gethandler(
2427 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
2428 hashandler = true;
2429 }
2430
2431 if (upb_fielddef_isseq(f) &&
2432 (upb_handlers_gethandler(
2433 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
2434 upb_handlers_gethandler(
2435 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
2436 hashandler = true;
2437 }
2438
2439 if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
2440 /* For now we add an empty subhandlers in this case. It makes the
2441 * decoder code generator simpler, because it only has to handle two
2442 * cases (submessage has handlers or not) as opposed to three
2443 * (submessage has handlers in enclosing message but no subhandlers).
2444 *
2445 * This makes parsing less efficient in the case that we want to
2446 * notice a submessage but skip its contents (like if we're testing
2447 * for submessage presence or counting the number of repeated
2448 * submessages). In this case we will end up parsing the submessage
2449 * field by field and throwing away the results for each, instead of
2450 * skipping the whole delimited thing at once. If this is an issue we
2451 * can revisit it, but do remember that this only arises when you have
2452 * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
2453 * submessage but no subhandlers. The uses cases for this are
2454 * limited. */
2455 upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
2456 upb_handlers_setsubhandlers(h, f, sub);
2457 upb_handlers_unref(sub, &sub);
2458 }
2459
2460 /* TODO(haberman): check type of submessage.
2461 * This is slightly tricky; also consider whether we should check that
2462 * they match at setsubhandlers time. */
2463 }
2464 }
2465 }
2466
2467 if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
2468 UPB_MAX_HANDLER_DEPTH)) {
2469 return false;
2470 }
2471
2472 return true;
2473 }
2474
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)2475 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
2476 switch (upb_fielddef_type(f)) {
2477 case UPB_TYPE_INT32:
2478 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
2479 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
2480 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
2481 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
2482 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
2483 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
2484 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
2485 default: assert(false); return -1; /* Invalid input. */
2486 }
2487 }
2488
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)2489 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2490 upb_selector_t *s) {
2491 switch (type) {
2492 case UPB_HANDLER_INT32:
2493 case UPB_HANDLER_INT64:
2494 case UPB_HANDLER_UINT32:
2495 case UPB_HANDLER_UINT64:
2496 case UPB_HANDLER_FLOAT:
2497 case UPB_HANDLER_DOUBLE:
2498 case UPB_HANDLER_BOOL:
2499 if (!upb_fielddef_isprimitive(f) ||
2500 upb_handlers_getprimitivehandlertype(f) != type)
2501 return false;
2502 *s = f->selector_base;
2503 break;
2504 case UPB_HANDLER_STRING:
2505 if (upb_fielddef_isstring(f)) {
2506 *s = f->selector_base;
2507 } else if (upb_fielddef_lazy(f)) {
2508 *s = f->selector_base + 3;
2509 } else {
2510 return false;
2511 }
2512 break;
2513 case UPB_HANDLER_STARTSTR:
2514 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2515 *s = f->selector_base + 1;
2516 } else {
2517 return false;
2518 }
2519 break;
2520 case UPB_HANDLER_ENDSTR:
2521 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2522 *s = f->selector_base + 2;
2523 } else {
2524 return false;
2525 }
2526 break;
2527 case UPB_HANDLER_STARTSEQ:
2528 if (!upb_fielddef_isseq(f)) return false;
2529 *s = f->selector_base - 2;
2530 break;
2531 case UPB_HANDLER_ENDSEQ:
2532 if (!upb_fielddef_isseq(f)) return false;
2533 *s = f->selector_base - 1;
2534 break;
2535 case UPB_HANDLER_STARTSUBMSG:
2536 if (!upb_fielddef_issubmsg(f)) return false;
2537 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
2538 * selector can also be used as an index into the "sub" array of
2539 * subhandlers. The indexes for the two into these two tables are the
2540 * same, except that in the handler table the static selectors come first. */
2541 *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
2542 break;
2543 case UPB_HANDLER_ENDSUBMSG:
2544 if (!upb_fielddef_issubmsg(f)) return false;
2545 *s = f->selector_base;
2546 break;
2547 }
2548 assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2549 return true;
2550 }
2551
upb_handlers_selectorbaseoffset(const upb_fielddef * f)2552 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
2553 return upb_fielddef_isseq(f) ? 2 : 0;
2554 }
2555
upb_handlers_selectorcount(const upb_fielddef * f)2556 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
2557 uint32_t ret = 1;
2558 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
2559 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
2560 if (upb_fielddef_issubmsg(f)) {
2561 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
2562 ret += 0;
2563 if (upb_fielddef_lazy(f)) {
2564 /* STARTSTR/ENDSTR/STRING (for lazy) */
2565 ret += 3;
2566 }
2567 }
2568 return ret;
2569 }
2570
2571
2572 /* upb_handlerattr ************************************************************/
2573
upb_handlerattr_init(upb_handlerattr * attr)2574 void upb_handlerattr_init(upb_handlerattr *attr) {
2575 upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
2576 memcpy(attr, &from, sizeof(*attr));
2577 }
2578
upb_handlerattr_uninit(upb_handlerattr * attr)2579 void upb_handlerattr_uninit(upb_handlerattr *attr) {
2580 UPB_UNUSED(attr);
2581 }
2582
upb_handlerattr_sethandlerdata(upb_handlerattr * attr,const void * hd)2583 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
2584 attr->handler_data_ = hd;
2585 return true;
2586 }
2587
upb_handlerattr_setclosuretype(upb_handlerattr * attr,const void * type)2588 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
2589 attr->closure_type_ = type;
2590 return true;
2591 }
2592
upb_handlerattr_closuretype(const upb_handlerattr * attr)2593 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
2594 return attr->closure_type_;
2595 }
2596
upb_handlerattr_setreturnclosuretype(upb_handlerattr * attr,const void * type)2597 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
2598 const void *type) {
2599 attr->return_closure_type_ = type;
2600 return true;
2601 }
2602
upb_handlerattr_returnclosuretype(const upb_handlerattr * attr)2603 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
2604 return attr->return_closure_type_;
2605 }
2606
upb_handlerattr_setalwaysok(upb_handlerattr * attr,bool alwaysok)2607 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
2608 attr->alwaysok_ = alwaysok;
2609 return true;
2610 }
2611
upb_handlerattr_alwaysok(const upb_handlerattr * attr)2612 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
2613 return attr->alwaysok_;
2614 }
2615
2616 /* upb_bufhandle **************************************************************/
2617
upb_bufhandle_objofs(const upb_bufhandle * h)2618 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
2619 return h->objofs_;
2620 }
2621
2622 /* upb_byteshandler ***********************************************************/
2623
upb_byteshandler_init(upb_byteshandler * h)2624 void upb_byteshandler_init(upb_byteshandler* h) {
2625 memset(h, 0, sizeof(*h));
2626 }
2627
2628 /* For when we support handlerfree callbacks. */
upb_byteshandler_uninit(upb_byteshandler * h)2629 void upb_byteshandler_uninit(upb_byteshandler* h) {
2630 UPB_UNUSED(h);
2631 }
2632
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)2633 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
2634 upb_startstr_handlerfunc *func, void *d) {
2635 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
2636 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
2637 return true;
2638 }
2639
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)2640 bool upb_byteshandler_setstring(upb_byteshandler *h,
2641 upb_string_handlerfunc *func, void *d) {
2642 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
2643 h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
2644 return true;
2645 }
2646
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)2647 bool upb_byteshandler_setendstr(upb_byteshandler *h,
2648 upb_endfield_handlerfunc *func, void *d) {
2649 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
2650 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
2651 return true;
2652 }
2653 /*
2654 ** upb::RefCounted Implementation
2655 **
2656 ** Our key invariants are:
2657 ** 1. reference cycles never span groups
2658 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
2659 **
2660 ** The previous two are how we avoid leaking cycles. Other important
2661 ** invariants are:
2662 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
2663 ** this implies group(from) == group(to). (In practice, what we implement
2664 ** is even stronger; "from" and "to" will share a group if there has *ever*
2665 ** been a ref2(to, from), but all that is necessary for correctness is the
2666 ** weaker one).
2667 ** 4. mutable and immutable objects are never in the same group.
2668 */
2669
2670
2671 #include <setjmp.h>
2672 #include <stdlib.h>
2673
2674 static void freeobj(upb_refcounted *o);
2675
2676 const char untracked_val;
2677 const void *UPB_UNTRACKED_REF = &untracked_val;
2678
2679 /* arch-specific atomic primitives *******************************************/
2680
2681 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
2682
atomic_inc(uint32_t * a)2683 static void atomic_inc(uint32_t *a) { (*a)++; }
atomic_dec(uint32_t * a)2684 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
2685
2686 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
2687
atomic_inc(uint32_t * a)2688 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
atomic_dec(uint32_t * a)2689 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
2690
2691 #elif defined(WIN32) /*-------------------------------------------------------*/
2692
2693 #include <Windows.h>
2694
atomic_inc(upb_atomic_t * a)2695 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
atomic_dec(upb_atomic_t * a)2696 static bool atomic_dec(upb_atomic_t *a) {
2697 return InterlockedDecrement(&a->val) == 0;
2698 }
2699
2700 #else
2701 #error Atomic primitives not defined for your platform/CPU. \
2702 Implement them or compile with UPB_THREAD_UNSAFE.
2703 #endif
2704
2705 /* All static objects point to this refcount.
2706 * It is special-cased in ref/unref below. */
2707 uint32_t static_refcount = -1;
2708
2709 /* We can avoid atomic ops for statically-declared objects.
2710 * This is a minor optimization but nice since we can avoid degrading under
2711 * contention in this case. */
2712
refgroup(uint32_t * group)2713 static void refgroup(uint32_t *group) {
2714 if (group != &static_refcount)
2715 atomic_inc(group);
2716 }
2717
unrefgroup(uint32_t * group)2718 static bool unrefgroup(uint32_t *group) {
2719 if (group == &static_refcount) {
2720 return false;
2721 } else {
2722 return atomic_dec(group);
2723 }
2724 }
2725
2726
2727 /* Reference tracking (debug only) ********************************************/
2728
2729 #ifdef UPB_DEBUG_REFS
2730
2731 #ifdef UPB_THREAD_UNSAFE
2732
upb_lock()2733 static void upb_lock() {}
upb_unlock()2734 static void upb_unlock() {}
2735
2736 #else
2737
2738 /* User must define functions that lock/unlock a global mutex and link this
2739 * file against them. */
2740 void upb_lock();
2741 void upb_unlock();
2742
2743 #endif
2744
2745 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
2746 * code-paths that can normally never fail, like upb_refcounted_ref(). Since
2747 * we have no way to propagage out-of-memory errors back to the user, and since
2748 * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
2749 #define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
2750
2751 typedef struct {
2752 int count; /* How many refs there are (duplicates only allowed for ref2). */
2753 bool is_ref2;
2754 } trackedref;
2755
trackedref_new(bool is_ref2)2756 static trackedref *trackedref_new(bool is_ref2) {
2757 trackedref *ret = malloc(sizeof(*ret));
2758 CHECK_OOM(ret);
2759 ret->count = 1;
2760 ret->is_ref2 = is_ref2;
2761 return ret;
2762 }
2763
track(const upb_refcounted * r,const void * owner,bool ref2)2764 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2765 upb_value v;
2766
2767 assert(owner);
2768 if (owner == UPB_UNTRACKED_REF) return;
2769
2770 upb_lock();
2771 if (upb_inttable_lookupptr(r->refs, owner, &v)) {
2772 trackedref *ref = upb_value_getptr(v);
2773 /* Since we allow multiple ref2's for the same to/from pair without
2774 * allocating separate memory for each one, we lose the fine-grained
2775 * tracking behavior we get with regular refs. Since ref2s only happen
2776 * inside upb, we'll accept this limitation until/unless there is a really
2777 * difficult upb-internal bug that can't be figured out without it. */
2778 assert(ref2);
2779 assert(ref->is_ref2);
2780 ref->count++;
2781 } else {
2782 trackedref *ref = trackedref_new(ref2);
2783 bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
2784 CHECK_OOM(ok);
2785 if (ref2) {
2786 /* We know this cast is safe when it is a ref2, because it's coming from
2787 * another refcounted object. */
2788 const upb_refcounted *from = owner;
2789 assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
2790 ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
2791 CHECK_OOM(ok);
2792 }
2793 }
2794 upb_unlock();
2795 }
2796
untrack(const upb_refcounted * r,const void * owner,bool ref2)2797 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2798 upb_value v;
2799 bool found;
2800 trackedref *ref;
2801
2802 assert(owner);
2803 if (owner == UPB_UNTRACKED_REF) return;
2804
2805 upb_lock();
2806 found = upb_inttable_lookupptr(r->refs, owner, &v);
2807 /* This assert will fail if an owner attempts to release a ref it didn't have. */
2808 UPB_ASSERT_VAR(found, found);
2809 ref = upb_value_getptr(v);
2810 assert(ref->is_ref2 == ref2);
2811 if (--ref->count == 0) {
2812 free(ref);
2813 upb_inttable_removeptr(r->refs, owner, NULL);
2814 if (ref2) {
2815 /* We know this cast is safe when it is a ref2, because it's coming from
2816 * another refcounted object. */
2817 const upb_refcounted *from = owner;
2818 bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
2819 assert(removed);
2820 }
2821 }
2822 upb_unlock();
2823 }
2824
checkref(const upb_refcounted * r,const void * owner,bool ref2)2825 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2826 upb_value v;
2827 bool found;
2828 trackedref *ref;
2829
2830 upb_lock();
2831 found = upb_inttable_lookupptr(r->refs, owner, &v);
2832 UPB_ASSERT_VAR(found, found);
2833 ref = upb_value_getptr(v);
2834 assert(ref->is_ref2 == ref2);
2835 upb_unlock();
2836 }
2837
2838 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
2839 * originate from the given owner. */
getref2s(const upb_refcounted * owner,upb_inttable * tab)2840 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
2841 upb_inttable_iter i;
2842
2843 upb_lock();
2844 upb_inttable_begin(&i, owner->ref2s);
2845 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
2846 upb_value v;
2847 upb_value count;
2848 trackedref *ref;
2849 bool ok;
2850 bool found;
2851
2852 upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
2853
2854 /* To get the count we need to look in the target's table. */
2855 found = upb_inttable_lookupptr(to->refs, owner, &v);
2856 assert(found);
2857 ref = upb_value_getptr(v);
2858 count = upb_value_int32(ref->count);
2859
2860 ok = upb_inttable_insertptr(tab, to, count);
2861 CHECK_OOM(ok);
2862 }
2863 upb_unlock();
2864 }
2865
2866 typedef struct {
2867 upb_inttable ref2;
2868 const upb_refcounted *obj;
2869 } check_state;
2870
visit_check(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)2871 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
2872 void *closure) {
2873 check_state *s = closure;
2874 upb_inttable *ref2 = &s->ref2;
2875 upb_value v;
2876 bool removed;
2877 int32_t newcount;
2878
2879 assert(obj == s->obj);
2880 assert(subobj);
2881 removed = upb_inttable_removeptr(ref2, subobj, &v);
2882 /* The following assertion will fail if the visit() function visits a subobj
2883 * that it did not have a ref2 on, or visits the same subobj too many times. */
2884 assert(removed);
2885 newcount = upb_value_getint32(v) - 1;
2886 if (newcount > 0) {
2887 upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
2888 }
2889 }
2890
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)2891 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2892 void *closure) {
2893 bool ok;
2894
2895 /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
2896 * exactly the set of nodes that visit() should visit. So we verify visit()'s
2897 * correctness here. */
2898 check_state state;
2899 state.obj = r;
2900 ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
2901 CHECK_OOM(ok);
2902 getref2s(r, &state.ref2);
2903
2904 /* This should visit any children in the ref2 table. */
2905 if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
2906
2907 /* This assertion will fail if the visit() function missed any children. */
2908 assert(upb_inttable_count(&state.ref2) == 0);
2909 upb_inttable_uninit(&state.ref2);
2910 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2911 }
2912
trackinit(upb_refcounted * r)2913 static bool trackinit(upb_refcounted *r) {
2914 r->refs = malloc(sizeof(*r->refs));
2915 r->ref2s = malloc(sizeof(*r->ref2s));
2916 if (!r->refs || !r->ref2s) goto err1;
2917
2918 if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1;
2919 if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2;
2920 return true;
2921
2922 err2:
2923 upb_inttable_uninit(r->refs);
2924 err1:
2925 free(r->refs);
2926 free(r->ref2s);
2927 return false;
2928 }
2929
trackfree(const upb_refcounted * r)2930 static void trackfree(const upb_refcounted *r) {
2931 upb_inttable_uninit(r->refs);
2932 upb_inttable_uninit(r->ref2s);
2933 free(r->refs);
2934 free(r->ref2s);
2935 }
2936
2937 #else
2938
track(const upb_refcounted * r,const void * owner,bool ref2)2939 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2940 UPB_UNUSED(r);
2941 UPB_UNUSED(owner);
2942 UPB_UNUSED(ref2);
2943 }
2944
untrack(const upb_refcounted * r,const void * owner,bool ref2)2945 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2946 UPB_UNUSED(r);
2947 UPB_UNUSED(owner);
2948 UPB_UNUSED(ref2);
2949 }
2950
checkref(const upb_refcounted * r,const void * owner,bool ref2)2951 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2952 UPB_UNUSED(r);
2953 UPB_UNUSED(owner);
2954 UPB_UNUSED(ref2);
2955 }
2956
trackinit(upb_refcounted * r)2957 static bool trackinit(upb_refcounted *r) {
2958 UPB_UNUSED(r);
2959 return true;
2960 }
2961
trackfree(const upb_refcounted * r)2962 static void trackfree(const upb_refcounted *r) {
2963 UPB_UNUSED(r);
2964 }
2965
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)2966 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2967 void *closure) {
2968 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2969 }
2970
2971 #endif /* UPB_DEBUG_REFS */
2972
2973
2974 /* freeze() *******************************************************************/
2975
2976 /* The freeze() operation is by far the most complicated part of this scheme.
2977 * We compute strongly-connected components and then mutate the graph such that
2978 * we preserve the invariants documented at the top of this file. And we must
2979 * handle out-of-memory errors gracefully (without leaving the graph
2980 * inconsistent), which adds to the fun. */
2981
2982 /* The state used by the freeze operation (shared across many functions). */
2983 typedef struct {
2984 int depth;
2985 int maxdepth;
2986 uint64_t index;
2987 /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
2988 * color. */
2989 upb_inttable objattr;
2990 upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */
2991 upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */
2992 upb_status *status;
2993 jmp_buf err;
2994 } tarjan;
2995
2996 static void release_ref2(const upb_refcounted *obj,
2997 const upb_refcounted *subobj,
2998 void *closure);
2999
3000 /* Node attributes -----------------------------------------------------------*/
3001
3002 /* After our analysis phase all nodes will be either GRAY or WHITE. */
3003
3004 typedef enum {
3005 BLACK = 0, /* Object has not been seen. */
3006 GRAY, /* Object has been found via a refgroup but may not be reachable. */
3007 GREEN, /* Object is reachable and is currently on the Tarjan stack. */
3008 WHITE /* Object is reachable and has been assigned a group (SCC). */
3009 } color_t;
3010
err(tarjan * t)3011 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
oom(tarjan * t)3012 UPB_NORETURN static void oom(tarjan *t) {
3013 upb_status_seterrmsg(t->status, "out of memory");
3014 err(t);
3015 }
3016
trygetattr(const tarjan * t,const upb_refcounted * r)3017 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
3018 upb_value v;
3019 return upb_inttable_lookupptr(&t->objattr, r, &v) ?
3020 upb_value_getuint64(v) : 0;
3021 }
3022
getattr(const tarjan * t,const upb_refcounted * r)3023 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
3024 upb_value v;
3025 bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
3026 UPB_ASSERT_VAR(found, found);
3027 return upb_value_getuint64(v);
3028 }
3029
setattr(tarjan * t,const upb_refcounted * r,uint64_t attr)3030 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
3031 upb_inttable_removeptr(&t->objattr, r, NULL);
3032 upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
3033 }
3034
color(tarjan * t,const upb_refcounted * r)3035 static color_t color(tarjan *t, const upb_refcounted *r) {
3036 return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */
3037 }
3038
set_gray(tarjan * t,const upb_refcounted * r)3039 static void set_gray(tarjan *t, const upb_refcounted *r) {
3040 assert(color(t, r) == BLACK);
3041 setattr(t, r, GRAY);
3042 }
3043
3044 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
push(tarjan * t,const upb_refcounted * r)3045 static void push(tarjan *t, const upb_refcounted *r) {
3046 assert(color(t, r) == BLACK || color(t, r) == GRAY);
3047 /* This defines the attr layout for the GREEN state. "index" and "lowlink"
3048 * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
3049 setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
3050 if (++t->index == 0x80000000) {
3051 upb_status_seterrmsg(t->status, "too many objects to freeze");
3052 err(t);
3053 }
3054 upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
3055 }
3056
3057 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
3058 * SCC group. */
pop(tarjan * t)3059 static upb_refcounted *pop(tarjan *t) {
3060 upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
3061 assert(color(t, r) == GREEN);
3062 /* This defines the attr layout for nodes in the WHITE state.
3063 * Top of group stack is [group, NULL]; we point at group. */
3064 setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
3065 return r;
3066 }
3067
tarjan_newgroup(tarjan * t)3068 static void tarjan_newgroup(tarjan *t) {
3069 uint32_t *group = malloc(sizeof(*group));
3070 if (!group) oom(t);
3071 /* Push group and empty group leader (we'll fill in leader later). */
3072 if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
3073 !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
3074 free(group);
3075 oom(t);
3076 }
3077 *group = 0;
3078 }
3079
idx(tarjan * t,const upb_refcounted * r)3080 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
3081 assert(color(t, r) == GREEN);
3082 return (getattr(t, r) >> 2) & 0x7FFFFFFF;
3083 }
3084
lowlink(tarjan * t,const upb_refcounted * r)3085 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
3086 if (color(t, r) == GREEN) {
3087 return getattr(t, r) >> 33;
3088 } else {
3089 return UINT32_MAX;
3090 }
3091 }
3092
set_lowlink(tarjan * t,const upb_refcounted * r,uint32_t lowlink)3093 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
3094 assert(color(t, r) == GREEN);
3095 setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
3096 }
3097
group(tarjan * t,upb_refcounted * r)3098 static uint32_t *group(tarjan *t, upb_refcounted *r) {
3099 uint64_t groupnum;
3100 upb_value v;
3101 bool found;
3102
3103 assert(color(t, r) == WHITE);
3104 groupnum = getattr(t, r) >> 8;
3105 found = upb_inttable_lookup(&t->groups, groupnum, &v);
3106 UPB_ASSERT_VAR(found, found);
3107 return upb_value_getptr(v);
3108 }
3109
3110 /* If the group leader for this object's group has not previously been set,
3111 * the given object is assigned to be its leader. */
groupleader(tarjan * t,upb_refcounted * r)3112 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
3113 uint64_t leader_slot;
3114 upb_value v;
3115 bool found;
3116
3117 assert(color(t, r) == WHITE);
3118 leader_slot = (getattr(t, r) >> 8) + 1;
3119 found = upb_inttable_lookup(&t->groups, leader_slot, &v);
3120 UPB_ASSERT_VAR(found, found);
3121 if (upb_value_getptr(v)) {
3122 return upb_value_getptr(v);
3123 } else {
3124 upb_inttable_remove(&t->groups, leader_slot, NULL);
3125 upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
3126 return r;
3127 }
3128 }
3129
3130
3131 /* Tarjan's algorithm --------------------------------------------------------*/
3132
3133 /* See:
3134 * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
3135 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
3136
tarjan_visit(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3137 static void tarjan_visit(const upb_refcounted *obj,
3138 const upb_refcounted *subobj,
3139 void *closure) {
3140 tarjan *t = closure;
3141 if (++t->depth > t->maxdepth) {
3142 upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
3143 err(t);
3144 } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
3145 /* Do nothing: we don't want to visit or color already-frozen nodes,
3146 * and WHITE nodes have already been assigned a SCC. */
3147 } else if (color(t, subobj) < GREEN) {
3148 /* Subdef has not yet been visited; recurse on it. */
3149 do_tarjan(subobj, t);
3150 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
3151 } else if (color(t, subobj) == GREEN) {
3152 /* Subdef is in the stack and hence in the current SCC. */
3153 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
3154 }
3155 --t->depth;
3156 }
3157
do_tarjan(const upb_refcounted * obj,tarjan * t)3158 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
3159 if (color(t, obj) == BLACK) {
3160 /* We haven't seen this object's group; mark the whole group GRAY. */
3161 const upb_refcounted *o = obj;
3162 do { set_gray(t, o); } while ((o = o->next) != obj);
3163 }
3164
3165 push(t, obj);
3166 visit(obj, tarjan_visit, t);
3167 if (lowlink(t, obj) == idx(t, obj)) {
3168 tarjan_newgroup(t);
3169 while (pop(t) != obj)
3170 ;
3171 }
3172 }
3173
3174
3175 /* freeze() ------------------------------------------------------------------*/
3176
crossref(const upb_refcounted * r,const upb_refcounted * subobj,void * _t)3177 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
3178 void *_t) {
3179 tarjan *t = _t;
3180 assert(color(t, r) > BLACK);
3181 if (color(t, subobj) > BLACK && r->group != subobj->group) {
3182 /* Previously this ref was not reflected in subobj->group because they
3183 * were in the same group; now that they are split a ref must be taken. */
3184 refgroup(subobj->group);
3185 }
3186 }
3187
freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3188 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
3189 int maxdepth) {
3190 volatile bool ret = false;
3191 int i;
3192 upb_inttable_iter iter;
3193
3194 /* We run in two passes so that we can allocate all memory before performing
3195 * any mutation of the input -- this allows us to leave the input unchanged
3196 * in the case of memory allocation failure. */
3197 tarjan t;
3198 t.index = 0;
3199 t.depth = 0;
3200 t.maxdepth = maxdepth;
3201 t.status = s;
3202 if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
3203 if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
3204 if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
3205 if (setjmp(t.err) != 0) goto err4;
3206
3207
3208 for (i = 0; i < n; i++) {
3209 if (color(&t, roots[i]) < GREEN) {
3210 do_tarjan(roots[i], &t);
3211 }
3212 }
3213
3214 /* If we've made it this far, no further errors are possible so it's safe to
3215 * mutate the objects without risk of leaving them in an inconsistent state. */
3216 ret = true;
3217
3218 /* The transformation that follows requires care. The preconditions are:
3219 * - all objects in attr map are WHITE or GRAY, and are in mutable groups
3220 * (groups of all mutable objs)
3221 * - no ref2(to, from) refs have incremented count(to) if both "to" and
3222 * "from" are in our attr map (this follows from invariants (2) and (3)) */
3223
3224 /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
3225 * new groups according to the SCC's we computed. These new groups will
3226 * consist of only frozen objects. None will be immediately collectible,
3227 * because WHITE objects are by definition reachable from one of "roots",
3228 * which the caller must own refs on. */
3229 upb_inttable_begin(&iter, &t.objattr);
3230 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3231 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3232 /* Since removal from a singly-linked list requires access to the object's
3233 * predecessor, we consider obj->next instead of obj for moving. With the
3234 * while() loop we guarantee that we will visit every node's predecessor.
3235 * Proof:
3236 * 1. every node's predecessor is in our attr map.
3237 * 2. though the loop body may change a node's predecessor, it will only
3238 * change it to be the node we are currently operating on, so with a
3239 * while() loop we guarantee ourselves the chance to remove each node. */
3240 while (color(&t, obj->next) == WHITE &&
3241 group(&t, obj->next) != obj->next->group) {
3242 upb_refcounted *leader;
3243
3244 /* Remove from old group. */
3245 upb_refcounted *move = obj->next;
3246 if (obj == move) {
3247 /* Removing the last object from a group. */
3248 assert(*obj->group == obj->individual_count);
3249 free(obj->group);
3250 } else {
3251 obj->next = move->next;
3252 /* This may decrease to zero; we'll collect GRAY objects (if any) that
3253 * remain in the group in the third pass. */
3254 assert(*move->group >= move->individual_count);
3255 *move->group -= move->individual_count;
3256 }
3257
3258 /* Add to new group. */
3259 leader = groupleader(&t, move);
3260 if (move == leader) {
3261 /* First object added to new group is its leader. */
3262 move->group = group(&t, move);
3263 move->next = move;
3264 *move->group = move->individual_count;
3265 } else {
3266 /* Group already has at least one object in it. */
3267 assert(leader->group == group(&t, move));
3268 move->group = group(&t, move);
3269 move->next = leader->next;
3270 leader->next = move;
3271 *move->group += move->individual_count;
3272 }
3273
3274 move->is_frozen = true;
3275 }
3276 }
3277
3278 /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
3279 * increment count(to) if group(obj) != group(to) (which could now be the
3280 * case if "to" was just frozen). */
3281 upb_inttable_begin(&iter, &t.objattr);
3282 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3283 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3284 visit(obj, crossref, &t);
3285 }
3286
3287 /* Pass 3: GRAY objects are collected if their group's refcount dropped to
3288 * zero when we removed its white nodes. This can happen if they had only
3289 * been kept alive by virtue of sharing a group with an object that was just
3290 * frozen.
3291 *
3292 * It is important that we do this last, since the GRAY object's free()
3293 * function could call unref2() on just-frozen objects, which will decrement
3294 * refs that were added in pass 2. */
3295 upb_inttable_begin(&iter, &t.objattr);
3296 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3297 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3298 if (obj->group == NULL || *obj->group == 0) {
3299 if (obj->group) {
3300 upb_refcounted *o;
3301
3302 /* We eagerly free() the group's count (since we can't easily determine
3303 * the group's remaining size it's the easiest way to ensure it gets
3304 * done). */
3305 free(obj->group);
3306
3307 /* Visit to release ref2's (done in a separate pass since release_ref2
3308 * depends on o->group being unmodified so it can test merged()). */
3309 o = obj;
3310 do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
3311
3312 /* Mark "group" fields as NULL so we know to free the objects later in
3313 * this loop, but also don't try to delete the group twice. */
3314 o = obj;
3315 do { o->group = NULL; } while ((o = o->next) != obj);
3316 }
3317 freeobj(obj);
3318 }
3319 }
3320
3321 err4:
3322 if (!ret) {
3323 upb_inttable_begin(&iter, &t.groups);
3324 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
3325 free(upb_value_getptr(upb_inttable_iter_value(&iter)));
3326 }
3327 upb_inttable_uninit(&t.groups);
3328 err3:
3329 upb_inttable_uninit(&t.stack);
3330 err2:
3331 upb_inttable_uninit(&t.objattr);
3332 err1:
3333 return ret;
3334 }
3335
3336
3337 /* Misc internal functions ***************************************************/
3338
merged(const upb_refcounted * r,const upb_refcounted * r2)3339 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
3340 return r->group == r2->group;
3341 }
3342
merge(upb_refcounted * r,upb_refcounted * from)3343 static void merge(upb_refcounted *r, upb_refcounted *from) {
3344 upb_refcounted *base;
3345 upb_refcounted *tmp;
3346
3347 if (merged(r, from)) return;
3348 *r->group += *from->group;
3349 free(from->group);
3350 base = from;
3351
3352 /* Set all refcount pointers in the "from" chain to the merged refcount.
3353 *
3354 * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
3355 * if the user continuously extends a group by one object. Prevent this by
3356 * using one of the techniques in this paper:
3357 * ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
3358 do { from->group = r->group; } while ((from = from->next) != base);
3359
3360 /* Merge the two circularly linked lists by swapping their next pointers. */
3361 tmp = r->next;
3362 r->next = base->next;
3363 base->next = tmp;
3364 }
3365
3366 static void unref(const upb_refcounted *r);
3367
release_ref2(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3368 static void release_ref2(const upb_refcounted *obj,
3369 const upb_refcounted *subobj,
3370 void *closure) {
3371 UPB_UNUSED(closure);
3372 untrack(subobj, obj, true);
3373 if (!merged(obj, subobj)) {
3374 assert(subobj->is_frozen);
3375 unref(subobj);
3376 }
3377 }
3378
unref(const upb_refcounted * r)3379 static void unref(const upb_refcounted *r) {
3380 if (unrefgroup(r->group)) {
3381 const upb_refcounted *o;
3382
3383 free(r->group);
3384
3385 /* In two passes, since release_ref2 needs a guarantee that any subobjs
3386 * are alive. */
3387 o = r;
3388 do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
3389
3390 o = r;
3391 do {
3392 const upb_refcounted *next = o->next;
3393 assert(o->is_frozen || o->individual_count == 0);
3394 freeobj((upb_refcounted*)o);
3395 o = next;
3396 } while(o != r);
3397 }
3398 }
3399
freeobj(upb_refcounted * o)3400 static void freeobj(upb_refcounted *o) {
3401 trackfree(o);
3402 o->vtbl->free((upb_refcounted*)o);
3403 }
3404
3405
3406 /* Public interface ***********************************************************/
3407
upb_refcounted_init(upb_refcounted * r,const struct upb_refcounted_vtbl * vtbl,const void * owner)3408 bool upb_refcounted_init(upb_refcounted *r,
3409 const struct upb_refcounted_vtbl *vtbl,
3410 const void *owner) {
3411 #ifndef NDEBUG
3412 /* Endianness check. This is unrelated to upb_refcounted, it's just a
3413 * convenient place to put the check that we can be assured will run for
3414 * basically every program using upb. */
3415 const int x = 1;
3416 #ifdef UPB_BIG_ENDIAN
3417 assert(*(char*)&x != 1);
3418 #else
3419 assert(*(char*)&x == 1);
3420 #endif
3421 #endif
3422
3423 r->next = r;
3424 r->vtbl = vtbl;
3425 r->individual_count = 0;
3426 r->is_frozen = false;
3427 r->group = malloc(sizeof(*r->group));
3428 if (!r->group) return false;
3429 *r->group = 0;
3430 if (!trackinit(r)) {
3431 free(r->group);
3432 return false;
3433 }
3434 upb_refcounted_ref(r, owner);
3435 return true;
3436 }
3437
upb_refcounted_isfrozen(const upb_refcounted * r)3438 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
3439 return r->is_frozen;
3440 }
3441
upb_refcounted_ref(const upb_refcounted * r,const void * owner)3442 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
3443 track(r, owner, false);
3444 if (!r->is_frozen)
3445 ((upb_refcounted*)r)->individual_count++;
3446 refgroup(r->group);
3447 }
3448
upb_refcounted_unref(const upb_refcounted * r,const void * owner)3449 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
3450 untrack(r, owner, false);
3451 if (!r->is_frozen)
3452 ((upb_refcounted*)r)->individual_count--;
3453 unref(r);
3454 }
3455
upb_refcounted_ref2(const upb_refcounted * r,upb_refcounted * from)3456 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
3457 assert(!from->is_frozen); /* Non-const pointer implies this. */
3458 track(r, from, true);
3459 if (r->is_frozen) {
3460 refgroup(r->group);
3461 } else {
3462 merge((upb_refcounted*)r, from);
3463 }
3464 }
3465
upb_refcounted_unref2(const upb_refcounted * r,upb_refcounted * from)3466 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
3467 assert(!from->is_frozen); /* Non-const pointer implies this. */
3468 untrack(r, from, true);
3469 if (r->is_frozen) {
3470 unref(r);
3471 } else {
3472 assert(merged(r, from));
3473 }
3474 }
3475
upb_refcounted_donateref(const upb_refcounted * r,const void * from,const void * to)3476 void upb_refcounted_donateref(
3477 const upb_refcounted *r, const void *from, const void *to) {
3478 assert(from != to);
3479 if (to != NULL)
3480 upb_refcounted_ref(r, to);
3481 if (from != NULL)
3482 upb_refcounted_unref(r, from);
3483 }
3484
upb_refcounted_checkref(const upb_refcounted * r,const void * owner)3485 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
3486 checkref(r, owner, false);
3487 }
3488
upb_refcounted_freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3489 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
3490 int maxdepth) {
3491 int i;
3492 for (i = 0; i < n; i++) {
3493 assert(!roots[i]->is_frozen);
3494 }
3495 return freeze(roots, n, s, maxdepth);
3496 }
3497
3498
3499 #include <stdlib.h>
3500
3501 /* Fallback implementation if the shim is not specialized by the JIT. */
3502 #define SHIM_WRITER(type, ctype) \
3503 bool upb_shim_set ## type (void *c, const void *hd, ctype val) { \
3504 uint8_t *m = c; \
3505 const upb_shim_data *d = hd; \
3506 if (d->hasbit > 0) \
3507 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
3508 *(ctype*)&m[d->offset] = val; \
3509 return true; \
3510 } \
3511
SHIM_WRITER(double,double)3512 SHIM_WRITER(double, double)
3513 SHIM_WRITER(float, float)
3514 SHIM_WRITER(int32, int32_t)
3515 SHIM_WRITER(int64, int64_t)
3516 SHIM_WRITER(uint32, uint32_t)
3517 SHIM_WRITER(uint64, uint64_t)
3518 SHIM_WRITER(bool, bool)
3519 #undef SHIM_WRITER
3520
3521 bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
3522 int32_t hasbit) {
3523 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
3524 bool ok;
3525
3526 upb_shim_data *d = malloc(sizeof(*d));
3527 if (!d) return false;
3528 d->offset = offset;
3529 d->hasbit = hasbit;
3530
3531 upb_handlerattr_sethandlerdata(&attr, d);
3532 upb_handlerattr_setalwaysok(&attr, true);
3533 upb_handlers_addcleanup(h, d, free);
3534
3535 #define TYPE(u, l) \
3536 case UPB_TYPE_##u: \
3537 ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
3538
3539 ok = false;
3540
3541 switch (upb_fielddef_type(f)) {
3542 TYPE(INT64, int64);
3543 TYPE(INT32, int32);
3544 TYPE(ENUM, int32);
3545 TYPE(UINT64, uint64);
3546 TYPE(UINT32, uint32);
3547 TYPE(DOUBLE, double);
3548 TYPE(FLOAT, float);
3549 TYPE(BOOL, bool);
3550 default: assert(false); break;
3551 }
3552 #undef TYPE
3553
3554 upb_handlerattr_uninit(&attr);
3555 return ok;
3556 }
3557
upb_shim_getdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type)3558 const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
3559 upb_fieldtype_t *type) {
3560 upb_func *f = upb_handlers_gethandler(h, s);
3561
3562 if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
3563 *type = UPB_TYPE_INT64;
3564 } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
3565 *type = UPB_TYPE_INT32;
3566 } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
3567 *type = UPB_TYPE_UINT64;
3568 } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
3569 *type = UPB_TYPE_UINT32;
3570 } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
3571 *type = UPB_TYPE_DOUBLE;
3572 } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
3573 *type = UPB_TYPE_FLOAT;
3574 } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
3575 *type = UPB_TYPE_BOOL;
3576 } else {
3577 return NULL;
3578 }
3579
3580 return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
3581 }
3582
3583
3584 #include <stdlib.h>
3585 #include <string.h>
3586
upb_symtab_free(upb_refcounted * r)3587 static void upb_symtab_free(upb_refcounted *r) {
3588 upb_symtab *s = (upb_symtab*)r;
3589 upb_strtable_iter i;
3590 upb_strtable_begin(&i, &s->symtab);
3591 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3592 const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3593 upb_def_unref(def, s);
3594 }
3595 upb_strtable_uninit(&s->symtab);
3596 free(s);
3597 }
3598
3599
upb_symtab_new(const void * owner)3600 upb_symtab *upb_symtab_new(const void *owner) {
3601 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
3602 upb_symtab *s = malloc(sizeof(*s));
3603 upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
3604 upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3605 return s;
3606 }
3607
upb_symtab_freeze(upb_symtab * s)3608 void upb_symtab_freeze(upb_symtab *s) {
3609 upb_refcounted *r;
3610 bool ok;
3611
3612 assert(!upb_symtab_isfrozen(s));
3613 r = upb_symtab_upcast_mutable(s);
3614 /* The symtab does not take ref2's (see refcounted.h) on the defs, because
3615 * defs cannot refer back to the table and therefore cannot create cycles. So
3616 * 0 will suffice for maxdepth here. */
3617 ok = upb_refcounted_freeze(&r, 1, NULL, 0);
3618 UPB_ASSERT_VAR(ok, ok);
3619 }
3620
upb_symtab_lookup(const upb_symtab * s,const char * sym)3621 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3622 upb_value v;
3623 upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3624 upb_value_getptr(v) : NULL;
3625 return ret;
3626 }
3627
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)3628 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3629 upb_value v;
3630 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3631 upb_value_getptr(v) : NULL;
3632 return def ? upb_dyncast_msgdef(def) : NULL;
3633 }
3634
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)3635 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3636 upb_value v;
3637 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3638 upb_value_getptr(v) : NULL;
3639 return def ? upb_dyncast_enumdef(def) : NULL;
3640 }
3641
3642 /* Given a symbol and the base symbol inside which it is defined, find the
3643 * symbol's definition in t. */
upb_resolvename(const upb_strtable * t,const char * base,const char * sym)3644 static upb_def *upb_resolvename(const upb_strtable *t,
3645 const char *base, const char *sym) {
3646 if(strlen(sym) == 0) return NULL;
3647 if(sym[0] == '.') {
3648 /* Symbols starting with '.' are absolute, so we do a single lookup.
3649 * Slice to omit the leading '.' */
3650 upb_value v;
3651 return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3652 } else {
3653 /* Remove components from base until we find an entry or run out.
3654 * TODO: This branch is totally broken, but currently not used. */
3655 (void)base;
3656 assert(false);
3657 return NULL;
3658 }
3659 }
3660
upb_symtab_resolve(const upb_symtab * s,const char * base,const char * sym)3661 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3662 const char *sym) {
3663 upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3664 return ret;
3665 }
3666
3667 /* Starts a depth-first traversal at "def", recursing into any subdefs
3668 * (ie. submessage types). Adds duplicates of existing defs to addtab
3669 * wherever necessary, so that the resulting symtab will be consistent once
3670 * addtab is added.
3671 *
3672 * More specifically, if any def D is found in the DFS that:
3673 *
3674 * 1. can reach a def that is being replaced by something in addtab, AND
3675 *
3676 * 2. is not itself being replaced already (ie. this name doesn't already
3677 * exist in addtab)
3678 *
3679 * ...then a duplicate (new copy) of D will be added to addtab.
3680 *
3681 * Returns true if this happened for any def reachable from "def."
3682 *
3683 * It is slightly tricky to do this correctly in the presence of cycles. If we
3684 * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
3685 * our stack can reach a def in addtab or not. Once we figure this out, that
3686 * answer needs to apply to *all* defs in these SCCs, even if we visited them
3687 * already. So a straight up one-pass cycle-detecting DFS won't work.
3688 *
3689 * To work around this problem, we traverse each SCC (which we already
3690 * computed, since these defs are frozen) as a single node. We first compute
3691 * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
3692 * the entire SCC. This requires breaking the encapsulation of upb_refcounted,
3693 * since that is where we get the data about what SCC we are in. */
upb_resolve_dfs(const upb_def * def,upb_strtable * addtab,const void * new_owner,upb_inttable * seen,upb_status * s)3694 static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
3695 const void *new_owner, upb_inttable *seen,
3696 upb_status *s) {
3697 upb_value v;
3698 bool need_dup;
3699 const upb_def *base;
3700 const void* memoize_key;
3701
3702 /* Memoize results of this function for efficiency (since we're traversing a
3703 * DAG this is not needed to limit the depth of the search).
3704 *
3705 * We memoize by SCC instead of by individual def. */
3706 memoize_key = def->base.group;
3707
3708 if (upb_inttable_lookupptr(seen, memoize_key, &v))
3709 return upb_value_getbool(v);
3710
3711 /* Visit submessages for all messages in the SCC. */
3712 need_dup = false;
3713 base = def;
3714 do {
3715 upb_value v;
3716 const upb_msgdef *m;
3717
3718 assert(upb_def_isfrozen(def));
3719 if (def->type == UPB_DEF_FIELD) continue;
3720 if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
3721 need_dup = true;
3722 }
3723
3724 /* For messages, continue the recursion by visiting all subdefs, but only
3725 * ones in different SCCs. */
3726 m = upb_dyncast_msgdef(def);
3727 if (m) {
3728 upb_msg_field_iter i;
3729 for(upb_msg_field_begin(&i, m);
3730 !upb_msg_field_done(&i);
3731 upb_msg_field_next(&i)) {
3732 upb_fielddef *f = upb_msg_iter_field(&i);
3733 const upb_def *subdef;
3734
3735 if (!upb_fielddef_hassubdef(f)) continue;
3736 subdef = upb_fielddef_subdef(f);
3737
3738 /* Skip subdefs in this SCC. */
3739 if (def->base.group == subdef->base.group) continue;
3740
3741 /* |= to avoid short-circuit; we need its side-effects. */
3742 need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
3743 if (!upb_ok(s)) return false;
3744 }
3745 }
3746 } while ((def = (upb_def*)def->base.next) != base);
3747
3748 if (need_dup) {
3749 /* Dup all defs in this SCC that don't already have entries in addtab. */
3750 def = base;
3751 do {
3752 const char *name;
3753
3754 if (def->type == UPB_DEF_FIELD) continue;
3755 name = upb_def_fullname(def);
3756 if (!upb_strtable_lookup(addtab, name, NULL)) {
3757 upb_def *newdef = upb_def_dup(def, new_owner);
3758 if (!newdef) goto oom;
3759 newdef->came_from_user = false;
3760 if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
3761 goto oom;
3762 }
3763 } while ((def = (upb_def*)def->base.next) != base);
3764 }
3765
3766 upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
3767 return need_dup;
3768
3769 oom:
3770 upb_status_seterrmsg(s, "out of memory");
3771 return false;
3772 }
3773
3774 /* TODO(haberman): we need a lot more testing of error conditions.
3775 * The came_from_user stuff in particular is not tested. */
upb_symtab_add(upb_symtab * s,upb_def * const * defs,int n,void * ref_donor,upb_status * status)3776 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
3777 upb_status *status) {
3778 int i;
3779 upb_strtable_iter iter;
3780 upb_def **add_defs = NULL;
3781 upb_strtable addtab;
3782 upb_inttable seen;
3783
3784 assert(!upb_symtab_isfrozen(s));
3785 if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3786 upb_status_seterrmsg(status, "out of memory");
3787 return false;
3788 }
3789
3790 /* Add new defs to our "add" set. */
3791 for (i = 0; i < n; i++) {
3792 upb_def *def = defs[i];
3793 const char *fullname;
3794 upb_fielddef *f;
3795
3796 if (upb_def_isfrozen(def)) {
3797 upb_status_seterrmsg(status, "added defs must be mutable");
3798 goto err;
3799 }
3800 assert(!upb_def_isfrozen(def));
3801 fullname = upb_def_fullname(def);
3802 if (!fullname) {
3803 upb_status_seterrmsg(
3804 status, "Anonymous defs cannot be added to a symtab");
3805 goto err;
3806 }
3807
3808 f = upb_dyncast_fielddef_mutable(def);
3809
3810 if (f) {
3811 if (!upb_fielddef_containingtypename(f)) {
3812 upb_status_seterrmsg(status,
3813 "Standalone fielddefs must have a containing type "
3814 "(extendee) name set");
3815 goto err;
3816 }
3817 } else {
3818 if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3819 upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3820 goto err;
3821 }
3822 /* We need this to back out properly, because if there is a failure we
3823 * need to donate the ref back to the caller. */
3824 def->came_from_user = true;
3825 upb_def_donateref(def, ref_donor, s);
3826 if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3827 goto oom_err;
3828 }
3829 }
3830
3831 /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
3832 * If the appropriate message only exists in the existing symtab, duplicate
3833 * it so we have a mutable copy we can add the fields to. */
3834 for (i = 0; i < n; i++) {
3835 upb_def *def = defs[i];
3836 upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
3837 const char *msgname;
3838 upb_value v;
3839 upb_msgdef *m;
3840
3841 if (!f) continue;
3842 msgname = upb_fielddef_containingtypename(f);
3843 /* We validated this earlier in this function. */
3844 assert(msgname);
3845
3846 /* If the extendee name is absolutely qualified, move past the initial ".".
3847 * TODO(haberman): it is not obvious what it would mean if this was not
3848 * absolutely qualified. */
3849 if (msgname[0] == '.') {
3850 msgname++;
3851 }
3852
3853 if (upb_strtable_lookup(&addtab, msgname, &v)) {
3854 /* Extendee is in the set of defs the user asked us to add. */
3855 m = upb_value_getptr(v);
3856 } else {
3857 /* Need to find and dup the extendee from the existing symtab. */
3858 const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
3859 if (!frozen_m) {
3860 upb_status_seterrf(status,
3861 "Tried to extend message %s that does not exist "
3862 "in this SymbolTable.",
3863 msgname);
3864 goto err;
3865 }
3866 m = upb_msgdef_dup(frozen_m, s);
3867 if (!m) goto oom_err;
3868 if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
3869 upb_msgdef_unref(m, s);
3870 goto oom_err;
3871 }
3872 }
3873
3874 if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
3875 goto err;
3876 }
3877 }
3878
3879 /* Add dups of any existing def that can reach a def with the same name as
3880 * anything in our "add" set. */
3881 if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
3882 upb_strtable_begin(&iter, &s->symtab);
3883 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3884 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3885 upb_resolve_dfs(def, &addtab, s, &seen, status);
3886 if (!upb_ok(status)) goto err;
3887 }
3888 upb_inttable_uninit(&seen);
3889
3890 /* Now using the table, resolve symbolic references for subdefs. */
3891 upb_strtable_begin(&iter, &addtab);
3892 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3893 const char *base;
3894 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3895 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3896 upb_msg_field_iter j;
3897
3898 if (!m) continue;
3899 /* Type names are resolved relative to the message in which they appear. */
3900 base = upb_msgdef_fullname(m);
3901
3902 for(upb_msg_field_begin(&j, m);
3903 !upb_msg_field_done(&j);
3904 upb_msg_field_next(&j)) {
3905 upb_fielddef *f = upb_msg_iter_field(&j);
3906 const char *name = upb_fielddef_subdefname(f);
3907 if (name && !upb_fielddef_subdef(f)) {
3908 /* Try the lookup in the current set of to-be-added defs first. If not
3909 * there, try existing defs. */
3910 upb_def *subdef = upb_resolvename(&addtab, base, name);
3911 if (subdef == NULL) {
3912 subdef = upb_resolvename(&s->symtab, base, name);
3913 }
3914 if (subdef == NULL) {
3915 upb_status_seterrf(
3916 status, "couldn't resolve name '%s' in message '%s'", name, base);
3917 goto err;
3918 } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
3919 goto err;
3920 }
3921 }
3922 }
3923 }
3924
3925 /* We need an array of the defs in addtab, for passing to upb_def_freeze. */
3926 add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
3927 if (add_defs == NULL) goto oom_err;
3928 upb_strtable_begin(&iter, &addtab);
3929 for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3930 add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
3931 }
3932
3933 if (!upb_def_freeze(add_defs, n, status)) goto err;
3934
3935 /* This must be delayed until all errors have been detected, since error
3936 * recovery code uses this table to cleanup defs. */
3937 upb_strtable_uninit(&addtab);
3938
3939 /* TODO(haberman) we don't properly handle errors after this point (like
3940 * OOM in upb_strtable_insert() below). */
3941 for (i = 0; i < n; i++) {
3942 upb_def *def = add_defs[i];
3943 const char *name = upb_def_fullname(def);
3944 upb_value v;
3945 bool success;
3946
3947 if (upb_strtable_remove(&s->symtab, name, &v)) {
3948 const upb_def *def = upb_value_getptr(v);
3949 upb_def_unref(def, s);
3950 }
3951 success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
3952 UPB_ASSERT_VAR(success, success == true);
3953 }
3954 free(add_defs);
3955 return true;
3956
3957 oom_err:
3958 upb_status_seterrmsg(status, "out of memory");
3959 err: {
3960 /* For defs the user passed in, we need to donate the refs back. For defs
3961 * we dup'd, we need to just unref them. */
3962 upb_strtable_begin(&iter, &addtab);
3963 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3964 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3965 bool came_from_user = def->came_from_user;
3966 def->came_from_user = false;
3967 if (came_from_user) {
3968 upb_def_donateref(def, s, ref_donor);
3969 } else {
3970 upb_def_unref(def, s);
3971 }
3972 }
3973 }
3974 upb_strtable_uninit(&addtab);
3975 free(add_defs);
3976 assert(!upb_ok(status));
3977 return false;
3978 }
3979
3980 /* Iteration. */
3981
advance_to_matching(upb_symtab_iter * iter)3982 static void advance_to_matching(upb_symtab_iter *iter) {
3983 if (iter->type == UPB_DEF_ANY)
3984 return;
3985
3986 while (!upb_strtable_done(&iter->iter) &&
3987 iter->type != upb_symtab_iter_def(iter)->type) {
3988 upb_strtable_next(&iter->iter);
3989 }
3990 }
3991
upb_symtab_begin(upb_symtab_iter * iter,const upb_symtab * s,upb_deftype_t type)3992 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
3993 upb_deftype_t type) {
3994 upb_strtable_begin(&iter->iter, &s->symtab);
3995 iter->type = type;
3996 advance_to_matching(iter);
3997 }
3998
upb_symtab_next(upb_symtab_iter * iter)3999 void upb_symtab_next(upb_symtab_iter *iter) {
4000 upb_strtable_next(&iter->iter);
4001 advance_to_matching(iter);
4002 }
4003
upb_symtab_done(const upb_symtab_iter * iter)4004 bool upb_symtab_done(const upb_symtab_iter *iter) {
4005 return upb_strtable_done(&iter->iter);
4006 }
4007
upb_symtab_iter_def(const upb_symtab_iter * iter)4008 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
4009 return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
4010 }
4011 /*
4012 ** upb_table Implementation
4013 **
4014 ** Implementation is heavily inspired by Lua's ltable.c.
4015 */
4016
4017
4018 #include <stdlib.h>
4019 #include <string.h>
4020
4021 #define UPB_MAXARRSIZE 16 /* 64k. */
4022
4023 /* From Chromium. */
4024 #define ARRAY_SIZE(x) \
4025 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4026
4027 static const double MAX_LOAD = 0.85;
4028
4029 /* The minimum utilization of the array part of a mixed hash/array table. This
4030 * is a speed/memory-usage tradeoff (though it's not straightforward because of
4031 * cache effects). The lower this is, the more memory we'll use. */
4032 static const double MIN_DENSITY = 0.1;
4033
is_pow2(uint64_t v)4034 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4035
log2ceil(uint64_t v)4036 int log2ceil(uint64_t v) {
4037 int ret = 0;
4038 bool pow2 = is_pow2(v);
4039 while (v >>= 1) ret++;
4040 ret = pow2 ? ret : ret + 1; /* Ceiling. */
4041 return UPB_MIN(UPB_MAXARRSIZE, ret);
4042 }
4043
upb_strdup(const char * s)4044 char *upb_strdup(const char *s) {
4045 return upb_strdup2(s, strlen(s));
4046 }
4047
upb_strdup2(const char * s,size_t len)4048 char *upb_strdup2(const char *s, size_t len) {
4049 size_t n;
4050 char *p;
4051
4052 /* Prevent overflow errors. */
4053 if (len == SIZE_MAX) return NULL;
4054 /* Always null-terminate, even if binary data; but don't rely on the input to
4055 * have a null-terminating byte since it may be a raw binary buffer. */
4056 n = len + 1;
4057 p = malloc(n);
4058 if (p) {
4059 memcpy(p, s, len);
4060 p[len] = 0;
4061 }
4062 return p;
4063 }
4064
4065 /* A type to represent the lookup key of either a strtable or an inttable. */
4066 typedef union {
4067 uintptr_t num;
4068 struct {
4069 const char *str;
4070 size_t len;
4071 } str;
4072 } lookupkey_t;
4073
strkey2(const char * str,size_t len)4074 static lookupkey_t strkey2(const char *str, size_t len) {
4075 lookupkey_t k;
4076 k.str.str = str;
4077 k.str.len = len;
4078 return k;
4079 }
4080
intkey(uintptr_t key)4081 static lookupkey_t intkey(uintptr_t key) {
4082 lookupkey_t k;
4083 k.num = key;
4084 return k;
4085 }
4086
4087 typedef uint32_t hashfunc_t(upb_tabkey key);
4088 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4089
4090 /* Base table (shared code) ***************************************************/
4091
4092 /* For when we need to cast away const. */
mutable_entries(upb_table * t)4093 static upb_tabent *mutable_entries(upb_table *t) {
4094 return (upb_tabent*)t->entries;
4095 }
4096
isfull(upb_table * t)4097 static bool isfull(upb_table *t) {
4098 return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
4099 }
4100
init(upb_table * t,upb_ctype_t ctype,uint8_t size_lg2)4101 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
4102 size_t bytes;
4103
4104 t->count = 0;
4105 t->ctype = ctype;
4106 t->size_lg2 = size_lg2;
4107 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
4108 bytes = upb_table_size(t) * sizeof(upb_tabent);
4109 if (bytes > 0) {
4110 t->entries = malloc(bytes);
4111 if (!t->entries) return false;
4112 memset(mutable_entries(t), 0, bytes);
4113 } else {
4114 t->entries = NULL;
4115 }
4116 return true;
4117 }
4118
uninit(upb_table * t)4119 static void uninit(upb_table *t) { free(mutable_entries(t)); }
4120
emptyent(upb_table * t)4121 static upb_tabent *emptyent(upb_table *t) {
4122 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
4123 while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
4124 }
4125
getentry_mutable(upb_table * t,uint32_t hash)4126 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4127 return (upb_tabent*)upb_getentry(t, hash);
4128 }
4129
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4130 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4131 uint32_t hash, eqlfunc_t *eql) {
4132 const upb_tabent *e;
4133
4134 if (t->size_lg2 == 0) return NULL;
4135 e = upb_getentry(t, hash);
4136 if (upb_tabent_isempty(e)) return NULL;
4137 while (1) {
4138 if (eql(e->key, key)) return e;
4139 if ((e = e->next) == NULL) return NULL;
4140 }
4141 }
4142
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4143 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4144 uint32_t hash, eqlfunc_t *eql) {
4145 return (upb_tabent*)findentry(t, key, hash, eql);
4146 }
4147
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)4148 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4149 uint32_t hash, eqlfunc_t *eql) {
4150 const upb_tabent *e = findentry(t, key, hash, eql);
4151 if (e) {
4152 if (v) {
4153 _upb_value_setval(v, e->val.val, t->ctype);
4154 }
4155 return true;
4156 } else {
4157 return false;
4158 }
4159 }
4160
4161 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)4162 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4163 upb_value val, uint32_t hash,
4164 hashfunc_t *hashfunc, eqlfunc_t *eql) {
4165 upb_tabent *mainpos_e;
4166 upb_tabent *our_e;
4167
4168 UPB_UNUSED(eql);
4169 UPB_UNUSED(key);
4170 assert(findentry(t, key, hash, eql) == NULL);
4171 assert(val.ctype == t->ctype);
4172
4173 t->count++;
4174 mainpos_e = getentry_mutable(t, hash);
4175 our_e = mainpos_e;
4176
4177 if (upb_tabent_isempty(mainpos_e)) {
4178 /* Our main position is empty; use it. */
4179 our_e->next = NULL;
4180 } else {
4181 /* Collision. */
4182 upb_tabent *new_e = emptyent(t);
4183 /* Head of collider's chain. */
4184 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4185 if (chain == mainpos_e) {
4186 /* Existing ent is in its main posisiton (it has the same hash as us, and
4187 * is the head of our chain). Insert to new ent and append to this chain. */
4188 new_e->next = mainpos_e->next;
4189 mainpos_e->next = new_e;
4190 our_e = new_e;
4191 } else {
4192 /* Existing ent is not in its main position (it is a node in some other
4193 * chain). This implies that no existing ent in the table has our hash.
4194 * Evict it (updating its chain) and use its ent for head of our chain. */
4195 *new_e = *mainpos_e; /* copies next. */
4196 while (chain->next != mainpos_e) {
4197 chain = (upb_tabent*)chain->next;
4198 assert(chain);
4199 }
4200 chain->next = new_e;
4201 our_e = mainpos_e;
4202 our_e->next = NULL;
4203 }
4204 }
4205 our_e->key = tabkey;
4206 our_e->val.val = val.val;
4207 assert(findentry(t, key, hash, eql) == our_e);
4208 }
4209
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)4210 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4211 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4212 upb_tabent *chain = getentry_mutable(t, hash);
4213 if (upb_tabent_isempty(chain)) return false;
4214 if (eql(chain->key, key)) {
4215 /* Element to remove is at the head of its chain. */
4216 t->count--;
4217 if (val) {
4218 _upb_value_setval(val, chain->val.val, t->ctype);
4219 }
4220 if (chain->next) {
4221 upb_tabent *move = (upb_tabent*)chain->next;
4222 *chain = *move;
4223 if (removed) *removed = move->key;
4224 move->key = 0; /* Make the slot empty. */
4225 } else {
4226 if (removed) *removed = chain->key;
4227 chain->key = 0; /* Make the slot empty. */
4228 }
4229 return true;
4230 } else {
4231 /* Element to remove is either in a non-head position or not in the
4232 * table. */
4233 while (chain->next && !eql(chain->next->key, key))
4234 chain = (upb_tabent*)chain->next;
4235 if (chain->next) {
4236 /* Found element to remove. */
4237 upb_tabent *rm;
4238
4239 if (val) {
4240 _upb_value_setval(val, chain->next->val.val, t->ctype);
4241 }
4242 rm = (upb_tabent*)chain->next;
4243 if (removed) *removed = rm->key;
4244 rm->key = 0;
4245 chain->next = rm->next;
4246 t->count--;
4247 return true;
4248 } else {
4249 return false;
4250 }
4251 }
4252 }
4253
next(const upb_table * t,size_t i)4254 static size_t next(const upb_table *t, size_t i) {
4255 do {
4256 if (++i >= upb_table_size(t))
4257 return SIZE_MAX;
4258 } while(upb_tabent_isempty(&t->entries[i]));
4259
4260 return i;
4261 }
4262
begin(const upb_table * t)4263 static size_t begin(const upb_table *t) {
4264 return next(t, -1);
4265 }
4266
4267
4268 /* upb_strtable ***************************************************************/
4269
4270 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
4271
strcopy(lookupkey_t k2)4272 static upb_tabkey strcopy(lookupkey_t k2) {
4273 char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
4274 if (str == NULL) return 0;
4275 memcpy(str, &k2.str.len, sizeof(uint32_t));
4276 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4277 return (uintptr_t)str;
4278 }
4279
strhash(upb_tabkey key)4280 static uint32_t strhash(upb_tabkey key) {
4281 uint32_t len;
4282 char *str = upb_tabstr(key, &len);
4283 return MurmurHash2(str, len, 0);
4284 }
4285
streql(upb_tabkey k1,lookupkey_t k2)4286 static bool streql(upb_tabkey k1, lookupkey_t k2) {
4287 uint32_t len;
4288 char *str = upb_tabstr(k1, &len);
4289 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
4290 }
4291
upb_strtable_init(upb_strtable * t,upb_ctype_t ctype)4292 bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
4293 return init(&t->t, ctype, 2);
4294 }
4295
upb_strtable_uninit(upb_strtable * t)4296 void upb_strtable_uninit(upb_strtable *t) {
4297 size_t i;
4298 for (i = 0; i < upb_table_size(&t->t); i++)
4299 free((void*)t->t.entries[i].key);
4300 uninit(&t->t);
4301 }
4302
upb_strtable_resize(upb_strtable * t,size_t size_lg2)4303 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
4304 upb_strtable new_table;
4305 upb_strtable_iter i;
4306
4307 if (!init(&new_table.t, t->t.ctype, size_lg2))
4308 return false;
4309 upb_strtable_begin(&i, t);
4310 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
4311 upb_strtable_insert2(
4312 &new_table,
4313 upb_strtable_iter_key(&i),
4314 upb_strtable_iter_keylength(&i),
4315 upb_strtable_iter_value(&i));
4316 }
4317 upb_strtable_uninit(t);
4318 *t = new_table;
4319 return true;
4320 }
4321
upb_strtable_insert2(upb_strtable * t,const char * k,size_t len,upb_value v)4322 bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
4323 upb_value v) {
4324 lookupkey_t key;
4325 upb_tabkey tabkey;
4326 uint32_t hash;
4327
4328 if (isfull(&t->t)) {
4329 /* Need to resize. New table of double the size, add old elements to it. */
4330 if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
4331 return false;
4332 }
4333 }
4334
4335 key = strkey2(k, len);
4336 tabkey = strcopy(key);
4337 if (tabkey == 0) return false;
4338
4339 hash = MurmurHash2(key.str.str, key.str.len, 0);
4340 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
4341 return true;
4342 }
4343
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)4344 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4345 upb_value *v) {
4346 uint32_t hash = MurmurHash2(key, len, 0);
4347 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4348 }
4349
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)4350 bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
4351 upb_value *val) {
4352 uint32_t hash = MurmurHash2(key, strlen(key), 0);
4353 upb_tabkey tabkey;
4354 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
4355 free((void*)tabkey);
4356 return true;
4357 } else {
4358 return false;
4359 }
4360 }
4361
4362 /* Iteration */
4363
str_tabent(const upb_strtable_iter * i)4364 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
4365 return &i->t->t.entries[i->index];
4366 }
4367
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)4368 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
4369 i->t = t;
4370 i->index = begin(&t->t);
4371 }
4372
upb_strtable_next(upb_strtable_iter * i)4373 void upb_strtable_next(upb_strtable_iter *i) {
4374 i->index = next(&i->t->t, i->index);
4375 }
4376
upb_strtable_done(const upb_strtable_iter * i)4377 bool upb_strtable_done(const upb_strtable_iter *i) {
4378 return i->index >= upb_table_size(&i->t->t) ||
4379 upb_tabent_isempty(str_tabent(i));
4380 }
4381
upb_strtable_iter_key(upb_strtable_iter * i)4382 const char *upb_strtable_iter_key(upb_strtable_iter *i) {
4383 assert(!upb_strtable_done(i));
4384 return upb_tabstr(str_tabent(i)->key, NULL);
4385 }
4386
upb_strtable_iter_keylength(upb_strtable_iter * i)4387 size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
4388 uint32_t len;
4389 assert(!upb_strtable_done(i));
4390 upb_tabstr(str_tabent(i)->key, &len);
4391 return len;
4392 }
4393
upb_strtable_iter_value(const upb_strtable_iter * i)4394 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
4395 assert(!upb_strtable_done(i));
4396 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
4397 }
4398
upb_strtable_iter_setdone(upb_strtable_iter * i)4399 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
4400 i->index = SIZE_MAX;
4401 }
4402
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)4403 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
4404 const upb_strtable_iter *i2) {
4405 if (upb_strtable_done(i1) && upb_strtable_done(i2))
4406 return true;
4407 return i1->t == i2->t && i1->index == i2->index;
4408 }
4409
4410
4411 /* upb_inttable ***************************************************************/
4412
4413 /* For inttables we use a hybrid structure where small keys are kept in an
4414 * array and large keys are put in the hash table. */
4415
inthash(upb_tabkey key)4416 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
4417
inteql(upb_tabkey k1,lookupkey_t k2)4418 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
4419 return k1 == k2.num;
4420 }
4421
mutable_array(upb_inttable * t)4422 static upb_tabval *mutable_array(upb_inttable *t) {
4423 return (upb_tabval*)t->array;
4424 }
4425
inttable_val(upb_inttable * t,uintptr_t key)4426 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
4427 if (key < t->array_size) {
4428 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
4429 } else {
4430 upb_tabent *e =
4431 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
4432 return e ? &e->val : NULL;
4433 }
4434 }
4435
inttable_val_const(const upb_inttable * t,uintptr_t key)4436 static const upb_tabval *inttable_val_const(const upb_inttable *t,
4437 uintptr_t key) {
4438 return inttable_val((upb_inttable*)t, key);
4439 }
4440
upb_inttable_count(const upb_inttable * t)4441 size_t upb_inttable_count(const upb_inttable *t) {
4442 return t->t.count + t->array_count;
4443 }
4444
check(upb_inttable * t)4445 static void check(upb_inttable *t) {
4446 UPB_UNUSED(t);
4447 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
4448 {
4449 /* This check is very expensive (makes inserts/deletes O(N)). */
4450 size_t count = 0;
4451 upb_inttable_iter i;
4452 upb_inttable_begin(&i, t);
4453 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
4454 assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
4455 }
4456 assert(count == upb_inttable_count(t));
4457 }
4458 #endif
4459 }
4460
upb_inttable_sizedinit(upb_inttable * t,upb_ctype_t ctype,size_t asize,int hsize_lg2)4461 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
4462 size_t asize, int hsize_lg2) {
4463 size_t array_bytes;
4464
4465 if (!init(&t->t, ctype, hsize_lg2)) return false;
4466 /* Always make the array part at least 1 long, so that we know key 0
4467 * won't be in the hash part, which simplifies things. */
4468 t->array_size = UPB_MAX(1, asize);
4469 t->array_count = 0;
4470 array_bytes = t->array_size * sizeof(upb_value);
4471 t->array = malloc(array_bytes);
4472 if (!t->array) {
4473 uninit(&t->t);
4474 return false;
4475 }
4476 memset(mutable_array(t), 0xff, array_bytes);
4477 check(t);
4478 return true;
4479 }
4480
upb_inttable_init(upb_inttable * t,upb_ctype_t ctype)4481 bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
4482 return upb_inttable_sizedinit(t, ctype, 0, 4);
4483 }
4484
upb_inttable_uninit(upb_inttable * t)4485 void upb_inttable_uninit(upb_inttable *t) {
4486 uninit(&t->t);
4487 free(mutable_array(t));
4488 }
4489
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val)4490 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
4491 /* XXX: Table can't store value (uint64_t)-1. Need to somehow statically
4492 * guarantee that this is not necessary, or fix the limitation. */
4493 upb_tabval tabval;
4494 tabval.val = val.val;
4495 UPB_UNUSED(tabval);
4496 assert(upb_arrhas(tabval));
4497
4498 if (key < t->array_size) {
4499 assert(!upb_arrhas(t->array[key]));
4500 t->array_count++;
4501 mutable_array(t)[key].val = val.val;
4502 } else {
4503 if (isfull(&t->t)) {
4504 /* Need to resize the hash part, but we re-use the array part. */
4505 size_t i;
4506 upb_table new_table;
4507 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
4508 return false;
4509 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
4510 const upb_tabent *e = &t->t.entries[i];
4511 uint32_t hash;
4512 upb_value v;
4513
4514 _upb_value_setval(&v, e->val.val, t->t.ctype);
4515 hash = upb_inthash(e->key);
4516 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
4517 }
4518
4519 assert(t->t.count == new_table.count);
4520
4521 uninit(&t->t);
4522 t->t = new_table;
4523 }
4524 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
4525 }
4526 check(t);
4527 return true;
4528 }
4529
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)4530 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
4531 const upb_tabval *table_v = inttable_val_const(t, key);
4532 if (!table_v) return false;
4533 if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
4534 return true;
4535 }
4536
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)4537 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
4538 upb_tabval *table_v = inttable_val(t, key);
4539 if (!table_v) return false;
4540 table_v->val = val.val;
4541 return true;
4542 }
4543
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)4544 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
4545 bool success;
4546 if (key < t->array_size) {
4547 if (upb_arrhas(t->array[key])) {
4548 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
4549 t->array_count--;
4550 if (val) {
4551 _upb_value_setval(val, t->array[key].val, t->t.ctype);
4552 }
4553 mutable_array(t)[key] = empty;
4554 success = true;
4555 } else {
4556 success = false;
4557 }
4558 } else {
4559 upb_tabkey removed;
4560 uint32_t hash = upb_inthash(key);
4561 success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
4562 }
4563 check(t);
4564 return success;
4565 }
4566
upb_inttable_push(upb_inttable * t,upb_value val)4567 bool upb_inttable_push(upb_inttable *t, upb_value val) {
4568 return upb_inttable_insert(t, upb_inttable_count(t), val);
4569 }
4570
upb_inttable_pop(upb_inttable * t)4571 upb_value upb_inttable_pop(upb_inttable *t) {
4572 upb_value val;
4573 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
4574 UPB_ASSERT_VAR(ok, ok);
4575 return val;
4576 }
4577
upb_inttable_insertptr(upb_inttable * t,const void * key,upb_value val)4578 bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
4579 return upb_inttable_insert(t, (uintptr_t)key, val);
4580 }
4581
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)4582 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
4583 upb_value *v) {
4584 return upb_inttable_lookup(t, (uintptr_t)key, v);
4585 }
4586
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)4587 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
4588 return upb_inttable_remove(t, (uintptr_t)key, val);
4589 }
4590
upb_inttable_compact(upb_inttable * t)4591 void upb_inttable_compact(upb_inttable *t) {
4592 /* Create a power-of-two histogram of the table keys. */
4593 int counts[UPB_MAXARRSIZE + 1] = {0};
4594 uintptr_t max_key = 0;
4595 upb_inttable_iter i;
4596 size_t arr_size;
4597 int arr_count;
4598 upb_inttable new_t;
4599
4600 upb_inttable_begin(&i, t);
4601 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4602 uintptr_t key = upb_inttable_iter_key(&i);
4603 if (key > max_key) {
4604 max_key = key;
4605 }
4606 counts[log2ceil(key)]++;
4607 }
4608
4609 arr_size = 1;
4610 arr_count = upb_inttable_count(t);
4611
4612 if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
4613 /* We can put 100% of the entries in the array part. */
4614 arr_size = max_key + 1;
4615 } else {
4616 /* Find the largest power of two that satisfies the MIN_DENSITY
4617 * definition. */
4618 int size_lg2;
4619 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
4620 arr_size = 1 << size_lg2;
4621 arr_count -= counts[size_lg2];
4622 if (arr_count >= arr_size * MIN_DENSITY) {
4623 break;
4624 }
4625 }
4626 }
4627
4628 /* Array part must always be at least 1 entry large to catch lookups of key
4629 * 0. Key 0 must always be in the array part because "0" in the hash part
4630 * denotes an empty entry. */
4631 arr_size = UPB_MAX(arr_size, 1);
4632
4633 {
4634 /* Insert all elements into new, perfectly-sized table. */
4635 int hash_count = upb_inttable_count(t) - arr_count;
4636 int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
4637 int hashsize_lg2 = log2ceil(hash_size);
4638
4639 assert(hash_count >= 0);
4640 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
4641 upb_inttable_begin(&i, t);
4642 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4643 uintptr_t k = upb_inttable_iter_key(&i);
4644 upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
4645 }
4646 assert(new_t.array_size == arr_size);
4647 assert(new_t.t.size_lg2 == hashsize_lg2);
4648 }
4649 upb_inttable_uninit(t);
4650 *t = new_t;
4651 }
4652
4653 /* Iteration. */
4654
int_tabent(const upb_inttable_iter * i)4655 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
4656 assert(!i->array_part);
4657 return &i->t->t.entries[i->index];
4658 }
4659
int_arrent(const upb_inttable_iter * i)4660 static upb_tabval int_arrent(const upb_inttable_iter *i) {
4661 assert(i->array_part);
4662 return i->t->array[i->index];
4663 }
4664
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)4665 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
4666 i->t = t;
4667 i->index = -1;
4668 i->array_part = true;
4669 upb_inttable_next(i);
4670 }
4671
upb_inttable_next(upb_inttable_iter * iter)4672 void upb_inttable_next(upb_inttable_iter *iter) {
4673 const upb_inttable *t = iter->t;
4674 if (iter->array_part) {
4675 while (++iter->index < t->array_size) {
4676 if (upb_arrhas(int_arrent(iter))) {
4677 return;
4678 }
4679 }
4680 iter->array_part = false;
4681 iter->index = begin(&t->t);
4682 } else {
4683 iter->index = next(&t->t, iter->index);
4684 }
4685 }
4686
upb_inttable_done(const upb_inttable_iter * i)4687 bool upb_inttable_done(const upb_inttable_iter *i) {
4688 if (i->array_part) {
4689 return i->index >= i->t->array_size ||
4690 !upb_arrhas(int_arrent(i));
4691 } else {
4692 return i->index >= upb_table_size(&i->t->t) ||
4693 upb_tabent_isempty(int_tabent(i));
4694 }
4695 }
4696
upb_inttable_iter_key(const upb_inttable_iter * i)4697 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
4698 assert(!upb_inttable_done(i));
4699 return i->array_part ? i->index : int_tabent(i)->key;
4700 }
4701
upb_inttable_iter_value(const upb_inttable_iter * i)4702 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
4703 assert(!upb_inttable_done(i));
4704 return _upb_value_val(
4705 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
4706 i->t->t.ctype);
4707 }
4708
upb_inttable_iter_setdone(upb_inttable_iter * i)4709 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
4710 i->index = SIZE_MAX;
4711 i->array_part = false;
4712 }
4713
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)4714 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
4715 const upb_inttable_iter *i2) {
4716 if (upb_inttable_done(i1) && upb_inttable_done(i2))
4717 return true;
4718 return i1->t == i2->t && i1->index == i2->index &&
4719 i1->array_part == i2->array_part;
4720 }
4721
4722 #ifdef UPB_UNALIGNED_READS_OK
4723 /* -----------------------------------------------------------------------------
4724 * MurmurHash2, by Austin Appleby (released as public domain).
4725 * Reformatted and C99-ified by Joshua Haberman.
4726 * Note - This code makes a few assumptions about how your machine behaves -
4727 * 1. We can read a 4-byte value from any address without crashing
4728 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
4729 * And it has a few limitations -
4730 * 1. It will not work incrementally.
4731 * 2. It will not produce the same results on little-endian and big-endian
4732 * machines. */
MurmurHash2(const void * key,size_t len,uint32_t seed)4733 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
4734 /* 'm' and 'r' are mixing constants generated offline.
4735 * They're not really 'magic', they just happen to work well. */
4736 const uint32_t m = 0x5bd1e995;
4737 const int32_t r = 24;
4738
4739 /* Initialize the hash to a 'random' value */
4740 uint32_t h = seed ^ len;
4741
4742 /* Mix 4 bytes at a time into the hash */
4743 const uint8_t * data = (const uint8_t *)key;
4744 while(len >= 4) {
4745 uint32_t k = *(uint32_t *)data;
4746
4747 k *= m;
4748 k ^= k >> r;
4749 k *= m;
4750
4751 h *= m;
4752 h ^= k;
4753
4754 data += 4;
4755 len -= 4;
4756 }
4757
4758 /* Handle the last few bytes of the input array */
4759 switch(len) {
4760 case 3: h ^= data[2] << 16;
4761 case 2: h ^= data[1] << 8;
4762 case 1: h ^= data[0]; h *= m;
4763 };
4764
4765 /* Do a few final mixes of the hash to ensure the last few
4766 * bytes are well-incorporated. */
4767 h ^= h >> 13;
4768 h *= m;
4769 h ^= h >> 15;
4770
4771 return h;
4772 }
4773
4774 #else /* !UPB_UNALIGNED_READS_OK */
4775
4776 /* -----------------------------------------------------------------------------
4777 * MurmurHashAligned2, by Austin Appleby
4778 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
4779 * on certain platforms.
4780 * Performance will be lower than MurmurHash2 */
4781
4782 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
4783
MurmurHash2(const void * key,size_t len,uint32_t seed)4784 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
4785 const uint32_t m = 0x5bd1e995;
4786 const int32_t r = 24;
4787 const uint8_t * data = (const uint8_t *)key;
4788 uint32_t h = seed ^ len;
4789 uint8_t align = (uintptr_t)data & 3;
4790
4791 if(align && (len >= 4)) {
4792 /* Pre-load the temp registers */
4793 uint32_t t = 0, d = 0;
4794 int32_t sl;
4795 int32_t sr;
4796
4797 switch(align) {
4798 case 1: t |= data[2] << 16;
4799 case 2: t |= data[1] << 8;
4800 case 3: t |= data[0];
4801 }
4802
4803 t <<= (8 * align);
4804
4805 data += 4-align;
4806 len -= 4-align;
4807
4808 sl = 8 * (4-align);
4809 sr = 8 * align;
4810
4811 /* Mix */
4812
4813 while(len >= 4) {
4814 uint32_t k;
4815
4816 d = *(uint32_t *)data;
4817 t = (t >> sr) | (d << sl);
4818
4819 k = t;
4820
4821 MIX(h,k,m);
4822
4823 t = d;
4824
4825 data += 4;
4826 len -= 4;
4827 }
4828
4829 /* Handle leftover data in temp registers */
4830
4831 d = 0;
4832
4833 if(len >= align) {
4834 uint32_t k;
4835
4836 switch(align) {
4837 case 3: d |= data[2] << 16;
4838 case 2: d |= data[1] << 8;
4839 case 1: d |= data[0];
4840 }
4841
4842 k = (t >> sr) | (d << sl);
4843 MIX(h,k,m);
4844
4845 data += align;
4846 len -= align;
4847
4848 /* ----------
4849 * Handle tail bytes */
4850
4851 switch(len) {
4852 case 3: h ^= data[2] << 16;
4853 case 2: h ^= data[1] << 8;
4854 case 1: h ^= data[0]; h *= m;
4855 };
4856 } else {
4857 switch(len) {
4858 case 3: d |= data[2] << 16;
4859 case 2: d |= data[1] << 8;
4860 case 1: d |= data[0];
4861 case 0: h ^= (t >> sr) | (d << sl); h *= m;
4862 }
4863 }
4864
4865 h ^= h >> 13;
4866 h *= m;
4867 h ^= h >> 15;
4868
4869 return h;
4870 } else {
4871 while(len >= 4) {
4872 uint32_t k = *(uint32_t *)data;
4873
4874 MIX(h,k,m);
4875
4876 data += 4;
4877 len -= 4;
4878 }
4879
4880 /* ----------
4881 * Handle tail bytes */
4882
4883 switch(len) {
4884 case 3: h ^= data[2] << 16;
4885 case 2: h ^= data[1] << 8;
4886 case 1: h ^= data[0]; h *= m;
4887 };
4888
4889 h ^= h >> 13;
4890 h *= m;
4891 h ^= h >> 15;
4892
4893 return h;
4894 }
4895 }
4896 #undef MIX
4897
4898 #endif /* UPB_UNALIGNED_READS_OK */
4899
4900 #include <errno.h>
4901 #include <stdarg.h>
4902 #include <stddef.h>
4903 #include <stdint.h>
4904 #include <stdio.h>
4905 #include <stdlib.h>
4906 #include <string.h>
4907
upb_dumptostderr(void * closure,const upb_status * status)4908 bool upb_dumptostderr(void *closure, const upb_status* status) {
4909 UPB_UNUSED(closure);
4910 fprintf(stderr, "%s\n", upb_status_errmsg(status));
4911 return false;
4912 }
4913
4914 /* Guarantee null-termination and provide ellipsis truncation.
4915 * It may be tempting to "optimize" this by initializing these final
4916 * four bytes up-front and then being careful never to overwrite them,
4917 * this is safer and simpler. */
nullz(upb_status * status)4918 static void nullz(upb_status *status) {
4919 const char *ellipsis = "...";
4920 size_t len = strlen(ellipsis);
4921 assert(sizeof(status->msg) > len);
4922 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
4923 }
4924
upb_status_clear(upb_status * status)4925 void upb_status_clear(upb_status *status) {
4926 if (!status) return;
4927 status->ok_ = true;
4928 status->code_ = 0;
4929 status->msg[0] = '\0';
4930 }
4931
upb_ok(const upb_status * status)4932 bool upb_ok(const upb_status *status) { return status->ok_; }
4933
upb_status_errspace(const upb_status * status)4934 upb_errorspace *upb_status_errspace(const upb_status *status) {
4935 return status->error_space_;
4936 }
4937
upb_status_errcode(const upb_status * status)4938 int upb_status_errcode(const upb_status *status) { return status->code_; }
4939
upb_status_errmsg(const upb_status * status)4940 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
4941
upb_status_seterrmsg(upb_status * status,const char * msg)4942 void upb_status_seterrmsg(upb_status *status, const char *msg) {
4943 if (!status) return;
4944 status->ok_ = false;
4945 strncpy(status->msg, msg, sizeof(status->msg));
4946 nullz(status);
4947 }
4948
upb_status_seterrf(upb_status * status,const char * fmt,...)4949 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
4950 va_list args;
4951 va_start(args, fmt);
4952 upb_status_vseterrf(status, fmt, args);
4953 va_end(args);
4954 }
4955
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)4956 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
4957 if (!status) return;
4958 status->ok_ = false;
4959 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
4960 nullz(status);
4961 }
4962
upb_status_seterrcode(upb_status * status,upb_errorspace * space,int code)4963 void upb_status_seterrcode(upb_status *status, upb_errorspace *space,
4964 int code) {
4965 if (!status) return;
4966 status->ok_ = false;
4967 status->error_space_ = space;
4968 status->code_ = code;
4969 space->set_message(status, code);
4970 }
4971
upb_status_copy(upb_status * to,const upb_status * from)4972 void upb_status_copy(upb_status *to, const upb_status *from) {
4973 if (!to) return;
4974 *to = *from;
4975 }
4976 /* This file was generated by upbc (the upb compiler).
4977 * Do not edit -- your changes will be discarded when the file is
4978 * regenerated. */
4979
4980
4981 static const upb_msgdef msgs[20];
4982 static const upb_fielddef fields[81];
4983 static const upb_enumdef enums[4];
4984 static const upb_tabent strentries[236];
4985 static const upb_tabent intentries[14];
4986 static const upb_tabval arrays[232];
4987
4988 #ifdef UPB_DEBUG_REFS
4989 static upb_inttable reftables[212];
4990 #endif
4991
4992 static const upb_msgdef msgs[20] = {
4993 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 27, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 8, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]),
4994 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[8], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]),
4995 UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]),
4996 UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[15], 8, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]),
4997 UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[23], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]),
4998 UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[27], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]),
4999 UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 9, 8), UPB_STRTABLE_INIT(8, 15, UPB_CTYPE_PTR, 4, &strentries[36]),&reftables[12], &reftables[13]),
5000 UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 14, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[40], 32, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[52]),&reftables[14], &reftables[15]),
5001 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 39, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[72], 12, 11), UPB_STRTABLE_INIT(11, 15, UPB_CTYPE_PTR, 4, &strentries[68]),&reftables[16], &reftables[17]),
5002 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[84], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[84]),&reftables[18], &reftables[19]),
5003 UPB_MSGDEF_INIT("google.protobuf.FileOptions", 21, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[86], 64, 9), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[88]),&reftables[20], &reftables[21]),
5004 UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[150], 16, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[104]),&reftables[22], &reftables[23]),
5005 UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 13, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[166], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[108]),&reftables[24], &reftables[25]),
5006 UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[10], &arrays[171], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[116]),&reftables[26], &reftables[27]),
5007 UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[175], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[120]),&reftables[28], &reftables[29]),
5008 UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[12], &arrays[179], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[124]),&reftables[30], &reftables[31]),
5009 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[183], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[128]),&reftables[32], &reftables[33]),
5010 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 14, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[185], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[34], &reftables[35]),
5011 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[190], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[140]),&reftables[36], &reftables[37]),
5012 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[199], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[38], &reftables[39]),
5013 };
5014
5015 static const upb_fielddef fields[81] = {
5016 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]),
5017 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]),
5018 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]),
5019 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]),
5020 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]),
5021 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]),
5022 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]),
5023 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]),
5024 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]),
5025 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]),
5026 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], (const upb_def*)(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]),
5027 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]),
5028 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
5029 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], (const upb_def*)(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]),
5030 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]),
5031 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]),
5032 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]),
5033 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], (const upb_def*)(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]),
5034 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]),
5035 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]),
5036 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]),
5037 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]),
5038 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]),
5039 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]),
5040 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]),
5041 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]),
5042 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]),
5043 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]),
5044 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
5045 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]),
5046 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], (const upb_def*)(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]),
5047 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
5048 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]),
5049 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], (const upb_def*)(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]),
5050 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
5051 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
5052 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]),
5053 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
5054 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
5055 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
5056 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
5057 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
5058 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]),
5059 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]),
5060 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]),
5061 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]),
5062 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]),
5063 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
5064 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], (const upb_def*)(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]),
5065 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]),
5066 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], (const upb_def*)(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]),
5067 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], (const upb_def*)(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]),
5068 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], (const upb_def*)(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]),
5069 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], (const upb_def*)(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]),
5070 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], (const upb_def*)(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]),
5071 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], (const upb_def*)(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]),
5072 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]),
5073 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]),
5074 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
5075 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]),
5076 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]),
5077 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]),
5078 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]),
5079 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], (const upb_def*)(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]),
5080 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], (const upb_def*)(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]),
5081 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]),
5082 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
5083 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]),
5084 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]),
5085 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]),
5086 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]),
5087 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
5088 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
5089 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
5090 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
5091 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
5092 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
5093 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
5094 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], (const upb_def*)(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]),
5095 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
5096 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]),
5097 };
5098
5099 static const upb_enumdef enums[4] = {
5100 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[160]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[202], 4, 3), 0, &reftables[202], &reftables[203]),
5101 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[164]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[206], 19, 18), 0, &reftables[204], &reftables[205]),
5102 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[196]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[225], 3, 3), 0, &reftables[206], &reftables[207]),
5103 UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[200]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[228], 4, 3), 0, &reftables[208], &reftables[209]),
5104 };
5105
5106 static const upb_tabent strentries[236] = {
5107 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5108 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5109 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5110 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
5111 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5112 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5113 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5114 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
5115 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
5116 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5117 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
5118 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5119 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5120 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5121 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
5122 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[9]), &strentries[14]},
5123 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
5124 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
5125 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5126 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5127 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5128 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
5129 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL},
5130 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[40]), &strentries[22]},
5131 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5132 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5133 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
5134 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5135 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
5136 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5137 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
5138 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[30]},
5139 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5140 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5141 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5142 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5143 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5144 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
5145 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5146 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
5147 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5148 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5149 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5150 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5151 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[49]},
5152 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5153 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5154 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
5155 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[12]), NULL},
5156 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[48]},
5157 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
5158 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
5159 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "experimental_map_key"), UPB_TABVALUE_PTR_INIT(&fields[11]), &strentries[67]},
5160 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5161 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
5162 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5163 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5164 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5165 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5166 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
5167 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
5168 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5169 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
5170 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5171 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5172 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
5173 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5174 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5175 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
5176 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), NULL},
5177 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5178 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
5179 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
5180 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5181 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
5182 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5183 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5184 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5185 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
5186 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[32]), NULL},
5187 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
5188 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[82]},
5189 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
5190 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[61]), &strentries[81]},
5191 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5192 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
5193 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5194 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5195 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5196 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5197 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
5198 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5199 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[24]), NULL},
5200 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5201 {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[23]), &strentries[102]},
5202 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
5203 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5204 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5205 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5206 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
5207 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
5208 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
5209 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL},
5210 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[25]), NULL},
5211 {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[31]), &strentries[106]},
5212 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5213 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5214 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
5215 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5216 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5217 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5218 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
5219 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL},
5220 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5221 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
5222 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
5223 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5224 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5225 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5226 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5227 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5228 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[54]), &strentries[122]},
5229 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[33]), NULL},
5230 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[121]},
5231 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
5232 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5233 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5234 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5235 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5236 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5237 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
5238 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5239 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5240 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5241 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5242 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[65]), &strentries[139]},
5243 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5244 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
5245 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[29]), &strentries[137]},
5246 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
5247 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
5248 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5249 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5250 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
5251 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5252 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5253 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5254 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[43]), NULL},
5255 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
5256 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5257 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5258 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5259 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5260 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
5261 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
5262 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[67]), &strentries[154]},
5263 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5264 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5265 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
5266 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[42]), NULL},
5267 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[162]},
5268 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5269 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
5270 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
5271 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
5272 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5273 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5274 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5275 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5276 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
5277 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[193]},
5278 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
5279 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5280 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
5281 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
5282 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
5283 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5284 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[194]},
5285 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5286 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5287 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[191]},
5288 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5289 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5290 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5291 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5292 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
5293 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
5294 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5295 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[190]},
5296 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5297 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
5298 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
5299 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
5300 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
5301 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
5302 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
5303 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5304 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
5305 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[197]},
5306 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
5307 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
5308 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[203]},
5309 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5310 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
5311 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5312 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5313 {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo.Location"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL},
5314 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.UninterpretedOption"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL},
5315 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL},
5316 {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL},
5317 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5318 {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOptions"), UPB_TABVALUE_PTR_INIT(&msgs[5]), NULL},
5319 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5320 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5321 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5322 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[228]},
5323 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5324 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL},
5325 {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL},
5326 {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL},
5327 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5328 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL},
5329 {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions"), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL},
5330 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions"), UPB_TABVALUE_PTR_INIT(&msgs[10]), NULL},
5331 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[2]), &strentries[233]},
5332 {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL},
5333 {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[14]), NULL},
5334 {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions.CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[229]},
5335 {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescriptorSet"), UPB_TABVALUE_PTR_INIT(&msgs[9]), &strentries[235]},
5336 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions"), UPB_TABVALUE_PTR_INIT(&msgs[3]), NULL},
5337 {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL},
5338 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.OptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[3]), &strentries[221]},
5339 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOptions"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL},
5340 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOptions"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL},
5341 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions"), UPB_TABVALUE_PTR_INIT(&msgs[13]), &strentries[226]},
5342 {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.UninterpretedOption.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL},
5343 };
5344
5345 static const upb_tabent intentries[14] = {
5346 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5347 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5348 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5349 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5350 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5351 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5352 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5353 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5354 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5355 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5356 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5357 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5358 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5359 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
5360 };
5361
5362 static const upb_tabval arrays[232] = {
5363 UPB_TABVALUE_EMPTY_INIT,
5364 UPB_TABVALUE_PTR_INIT(&fields[38]),
5365 UPB_TABVALUE_PTR_INIT(&fields[16]),
5366 UPB_TABVALUE_PTR_INIT(&fields[44]),
5367 UPB_TABVALUE_PTR_INIT(&fields[9]),
5368 UPB_TABVALUE_PTR_INIT(&fields[15]),
5369 UPB_TABVALUE_PTR_INIT(&fields[14]),
5370 UPB_TABVALUE_PTR_INIT(&fields[49]),
5371 UPB_TABVALUE_EMPTY_INIT,
5372 UPB_TABVALUE_PTR_INIT(&fields[66]),
5373 UPB_TABVALUE_PTR_INIT(&fields[8]),
5374 UPB_TABVALUE_EMPTY_INIT,
5375 UPB_TABVALUE_PTR_INIT(&fields[40]),
5376 UPB_TABVALUE_PTR_INIT(&fields[78]),
5377 UPB_TABVALUE_PTR_INIT(&fields[50]),
5378 UPB_TABVALUE_EMPTY_INIT,
5379 UPB_TABVALUE_EMPTY_INIT,
5380 UPB_TABVALUE_PTR_INIT(&fields[1]),
5381 UPB_TABVALUE_EMPTY_INIT,
5382 UPB_TABVALUE_EMPTY_INIT,
5383 UPB_TABVALUE_EMPTY_INIT,
5384 UPB_TABVALUE_EMPTY_INIT,
5385 UPB_TABVALUE_EMPTY_INIT,
5386 UPB_TABVALUE_EMPTY_INIT,
5387 UPB_TABVALUE_PTR_INIT(&fields[37]),
5388 UPB_TABVALUE_PTR_INIT(&fields[47]),
5389 UPB_TABVALUE_PTR_INIT(&fields[52]),
5390 UPB_TABVALUE_EMPTY_INIT,
5391 UPB_TABVALUE_EMPTY_INIT,
5392 UPB_TABVALUE_EMPTY_INIT,
5393 UPB_TABVALUE_EMPTY_INIT,
5394 UPB_TABVALUE_EMPTY_INIT,
5395 UPB_TABVALUE_PTR_INIT(&fields[41]),
5396 UPB_TABVALUE_PTR_INIT(&fields[12]),
5397 UPB_TABVALUE_PTR_INIT(&fields[46]),
5398 UPB_TABVALUE_PTR_INIT(&fields[27]),
5399 UPB_TABVALUE_PTR_INIT(&fields[69]),
5400 UPB_TABVALUE_PTR_INIT(&fields[70]),
5401 UPB_TABVALUE_PTR_INIT(&fields[4]),
5402 UPB_TABVALUE_PTR_INIT(&fields[51]),
5403 UPB_TABVALUE_EMPTY_INIT,
5404 UPB_TABVALUE_PTR_INIT(&fields[3]),
5405 UPB_TABVALUE_PTR_INIT(&fields[58]),
5406 UPB_TABVALUE_PTR_INIT(&fields[6]),
5407 UPB_TABVALUE_EMPTY_INIT,
5408 UPB_TABVALUE_PTR_INIT(&fields[28]),
5409 UPB_TABVALUE_EMPTY_INIT,
5410 UPB_TABVALUE_EMPTY_INIT,
5411 UPB_TABVALUE_EMPTY_INIT,
5412 UPB_TABVALUE_PTR_INIT(&fields[11]),
5413 UPB_TABVALUE_PTR_INIT(&fields[79]),
5414 UPB_TABVALUE_EMPTY_INIT,
5415 UPB_TABVALUE_EMPTY_INIT,
5416 UPB_TABVALUE_EMPTY_INIT,
5417 UPB_TABVALUE_EMPTY_INIT,
5418 UPB_TABVALUE_EMPTY_INIT,
5419 UPB_TABVALUE_EMPTY_INIT,
5420 UPB_TABVALUE_EMPTY_INIT,
5421 UPB_TABVALUE_EMPTY_INIT,
5422 UPB_TABVALUE_EMPTY_INIT,
5423 UPB_TABVALUE_EMPTY_INIT,
5424 UPB_TABVALUE_EMPTY_INIT,
5425 UPB_TABVALUE_EMPTY_INIT,
5426 UPB_TABVALUE_EMPTY_INIT,
5427 UPB_TABVALUE_EMPTY_INIT,
5428 UPB_TABVALUE_EMPTY_INIT,
5429 UPB_TABVALUE_EMPTY_INIT,
5430 UPB_TABVALUE_EMPTY_INIT,
5431 UPB_TABVALUE_EMPTY_INIT,
5432 UPB_TABVALUE_EMPTY_INIT,
5433 UPB_TABVALUE_EMPTY_INIT,
5434 UPB_TABVALUE_EMPTY_INIT,
5435 UPB_TABVALUE_EMPTY_INIT,
5436 UPB_TABVALUE_PTR_INIT(&fields[34]),
5437 UPB_TABVALUE_PTR_INIT(&fields[57]),
5438 UPB_TABVALUE_PTR_INIT(&fields[5]),
5439 UPB_TABVALUE_PTR_INIT(&fields[32]),
5440 UPB_TABVALUE_PTR_INIT(&fields[10]),
5441 UPB_TABVALUE_PTR_INIT(&fields[63]),
5442 UPB_TABVALUE_PTR_INIT(&fields[13]),
5443 UPB_TABVALUE_PTR_INIT(&fields[53]),
5444 UPB_TABVALUE_PTR_INIT(&fields[64]),
5445 UPB_TABVALUE_PTR_INIT(&fields[61]),
5446 UPB_TABVALUE_PTR_INIT(&fields[80]),
5447 UPB_TABVALUE_EMPTY_INIT,
5448 UPB_TABVALUE_PTR_INIT(&fields[17]),
5449 UPB_TABVALUE_EMPTY_INIT,
5450 UPB_TABVALUE_PTR_INIT(&fields[26]),
5451 UPB_TABVALUE_EMPTY_INIT,
5452 UPB_TABVALUE_EMPTY_INIT,
5453 UPB_TABVALUE_EMPTY_INIT,
5454 UPB_TABVALUE_EMPTY_INIT,
5455 UPB_TABVALUE_EMPTY_INIT,
5456 UPB_TABVALUE_EMPTY_INIT,
5457 UPB_TABVALUE_PTR_INIT(&fields[25]),
5458 UPB_TABVALUE_PTR_INIT(&fields[48]),
5459 UPB_TABVALUE_PTR_INIT(&fields[24]),
5460 UPB_TABVALUE_PTR_INIT(&fields[18]),
5461 UPB_TABVALUE_EMPTY_INIT,
5462 UPB_TABVALUE_EMPTY_INIT,
5463 UPB_TABVALUE_EMPTY_INIT,
5464 UPB_TABVALUE_EMPTY_INIT,
5465 UPB_TABVALUE_PTR_INIT(&fields[2]),
5466 UPB_TABVALUE_PTR_INIT(&fields[23]),
5467 UPB_TABVALUE_PTR_INIT(&fields[62]),
5468 UPB_TABVALUE_EMPTY_INIT,
5469 UPB_TABVALUE_PTR_INIT(&fields[22]),
5470 UPB_TABVALUE_EMPTY_INIT,
5471 UPB_TABVALUE_EMPTY_INIT,
5472 UPB_TABVALUE_EMPTY_INIT,
5473 UPB_TABVALUE_EMPTY_INIT,
5474 UPB_TABVALUE_EMPTY_INIT,
5475 UPB_TABVALUE_EMPTY_INIT,
5476 UPB_TABVALUE_EMPTY_INIT,
5477 UPB_TABVALUE_EMPTY_INIT,
5478 UPB_TABVALUE_EMPTY_INIT,
5479 UPB_TABVALUE_EMPTY_INIT,
5480 UPB_TABVALUE_EMPTY_INIT,
5481 UPB_TABVALUE_EMPTY_INIT,
5482 UPB_TABVALUE_EMPTY_INIT,
5483 UPB_TABVALUE_EMPTY_INIT,
5484 UPB_TABVALUE_EMPTY_INIT,
5485 UPB_TABVALUE_EMPTY_INIT,
5486 UPB_TABVALUE_EMPTY_INIT,
5487 UPB_TABVALUE_EMPTY_INIT,
5488 UPB_TABVALUE_EMPTY_INIT,
5489 UPB_TABVALUE_EMPTY_INIT,
5490 UPB_TABVALUE_EMPTY_INIT,
5491 UPB_TABVALUE_EMPTY_INIT,
5492 UPB_TABVALUE_EMPTY_INIT,
5493 UPB_TABVALUE_EMPTY_INIT,
5494 UPB_TABVALUE_EMPTY_INIT,
5495 UPB_TABVALUE_EMPTY_INIT,
5496 UPB_TABVALUE_EMPTY_INIT,
5497 UPB_TABVALUE_EMPTY_INIT,
5498 UPB_TABVALUE_EMPTY_INIT,
5499 UPB_TABVALUE_EMPTY_INIT,
5500 UPB_TABVALUE_EMPTY_INIT,
5501 UPB_TABVALUE_EMPTY_INIT,
5502 UPB_TABVALUE_EMPTY_INIT,
5503 UPB_TABVALUE_EMPTY_INIT,
5504 UPB_TABVALUE_EMPTY_INIT,
5505 UPB_TABVALUE_EMPTY_INIT,
5506 UPB_TABVALUE_EMPTY_INIT,
5507 UPB_TABVALUE_EMPTY_INIT,
5508 UPB_TABVALUE_EMPTY_INIT,
5509 UPB_TABVALUE_EMPTY_INIT,
5510 UPB_TABVALUE_EMPTY_INIT,
5511 UPB_TABVALUE_EMPTY_INIT,
5512 UPB_TABVALUE_EMPTY_INIT,
5513 UPB_TABVALUE_EMPTY_INIT,
5514 UPB_TABVALUE_PTR_INIT(&fields[31]),
5515 UPB_TABVALUE_PTR_INIT(&fields[45]),
5516 UPB_TABVALUE_EMPTY_INIT,
5517 UPB_TABVALUE_EMPTY_INIT,
5518 UPB_TABVALUE_EMPTY_INIT,
5519 UPB_TABVALUE_EMPTY_INIT,
5520 UPB_TABVALUE_EMPTY_INIT,
5521 UPB_TABVALUE_EMPTY_INIT,
5522 UPB_TABVALUE_EMPTY_INIT,
5523 UPB_TABVALUE_EMPTY_INIT,
5524 UPB_TABVALUE_EMPTY_INIT,
5525 UPB_TABVALUE_EMPTY_INIT,
5526 UPB_TABVALUE_EMPTY_INIT,
5527 UPB_TABVALUE_EMPTY_INIT,
5528 UPB_TABVALUE_EMPTY_INIT,
5529 UPB_TABVALUE_EMPTY_INIT,
5530 UPB_TABVALUE_PTR_INIT(&fields[39]),
5531 UPB_TABVALUE_PTR_INIT(&fields[20]),
5532 UPB_TABVALUE_PTR_INIT(&fields[56]),
5533 UPB_TABVALUE_PTR_INIT(&fields[55]),
5534 UPB_TABVALUE_EMPTY_INIT,
5535 UPB_TABVALUE_EMPTY_INIT,
5536 UPB_TABVALUE_EMPTY_INIT,
5537 UPB_TABVALUE_EMPTY_INIT,
5538 UPB_TABVALUE_EMPTY_INIT,
5539 UPB_TABVALUE_PTR_INIT(&fields[35]),
5540 UPB_TABVALUE_PTR_INIT(&fields[33]),
5541 UPB_TABVALUE_PTR_INIT(&fields[54]),
5542 UPB_TABVALUE_EMPTY_INIT,
5543 UPB_TABVALUE_EMPTY_INIT,
5544 UPB_TABVALUE_EMPTY_INIT,
5545 UPB_TABVALUE_EMPTY_INIT,
5546 UPB_TABVALUE_EMPTY_INIT,
5547 UPB_TABVALUE_PTR_INIT(&fields[30]),
5548 UPB_TABVALUE_EMPTY_INIT,
5549 UPB_TABVALUE_PTR_INIT(&fields[59]),
5550 UPB_TABVALUE_PTR_INIT(&fields[65]),
5551 UPB_TABVALUE_PTR_INIT(&fields[29]),
5552 UPB_TABVALUE_PTR_INIT(&fields[68]),
5553 UPB_TABVALUE_EMPTY_INIT,
5554 UPB_TABVALUE_EMPTY_INIT,
5555 UPB_TABVALUE_PTR_INIT(&fields[36]),
5556 UPB_TABVALUE_PTR_INIT(&fields[19]),
5557 UPB_TABVALUE_PTR_INIT(&fields[60]),
5558 UPB_TABVALUE_PTR_INIT(&fields[43]),
5559 UPB_TABVALUE_PTR_INIT(&fields[7]),
5560 UPB_TABVALUE_PTR_INIT(&fields[67]),
5561 UPB_TABVALUE_PTR_INIT(&fields[0]),
5562 UPB_TABVALUE_EMPTY_INIT,
5563 UPB_TABVALUE_PTR_INIT(&fields[42]),
5564 UPB_TABVALUE_PTR_INIT(&fields[21]),
5565 UPB_TABVALUE_EMPTY_INIT,
5566 UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
5567 UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
5568 UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
5569 UPB_TABVALUE_EMPTY_INIT,
5570 UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
5571 UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
5572 UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
5573 UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
5574 UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
5575 UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
5576 UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
5577 UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
5578 UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
5579 UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
5580 UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
5581 UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
5582 UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
5583 UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
5584 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
5585 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
5586 UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
5587 UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
5588 UPB_TABVALUE_PTR_INIT("STRING"),
5589 UPB_TABVALUE_PTR_INIT("CORD"),
5590 UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
5591 UPB_TABVALUE_EMPTY_INIT,
5592 UPB_TABVALUE_PTR_INIT("SPEED"),
5593 UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
5594 UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
5595 };
5596
5597 static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(24, 31, UPB_CTYPE_PTR, 5, &strentries[204]), &reftables[210], &reftables[211]);
5598
upbdefs_google_protobuf_descriptor(const void * owner)5599 const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) {
5600 upb_symtab_ref(&symtab, owner);
5601 return &symtab;
5602 }
5603
5604 #ifdef UPB_DEBUG_REFS
5605 static upb_inttable reftables[212] = {
5606 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5607 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5608 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5609 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5610 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5611 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5612 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5613 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5614 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5615 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5616 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5617 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5618 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5619 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5620 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5621 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5622 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5623 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5624 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5625 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5626 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5627 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5628 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5629 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5630 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5631 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5632 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5633 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5634 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5635 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5636 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5637 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5638 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5639 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5640 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5641 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5642 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5643 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5644 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5645 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5646 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5647 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5648 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5649 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5650 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5651 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5652 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5653 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5654 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5655 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5656 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5657 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5658 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5659 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5660 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5661 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5662 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5663 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5664 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5665 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5666 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5667 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5668 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5669 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5670 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5671 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5672 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5673 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5674 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5675 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5676 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5677 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5678 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5679 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5680 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5681 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5682 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5683 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5684 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5685 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5686 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5687 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5688 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5689 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5690 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5691 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5692 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5693 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5694 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5695 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5696 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5697 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5698 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5699 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5700 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5701 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5702 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5703 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5704 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5705 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5706 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5707 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5708 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5709 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5710 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5711 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5712 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5713 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5714 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5715 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5716 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5717 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5718 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5719 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5720 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5721 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5722 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5723 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5724 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5725 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5726 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5727 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5728 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5729 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5730 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5731 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5732 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5733 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5734 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5735 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5736 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5737 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5738 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5739 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5740 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5741 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5742 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5743 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5744 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5745 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5746 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5747 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5748 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5749 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5750 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5751 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5752 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5753 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5754 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5755 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5756 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5757 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5758 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5759 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5760 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5761 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5762 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5763 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5764 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5765 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5766 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5767 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5768 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5769 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5770 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5771 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5772 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5773 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5774 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5775 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5776 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5777 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5778 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5779 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5780 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5781 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5782 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5783 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5784 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5785 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5786 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5787 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5788 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5789 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5790 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5791 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5792 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5793 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5794 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5795 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5796 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5797 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5798 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5799 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5800 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5801 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5802 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5803 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5804 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5805 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5806 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5807 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5808 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5809 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5810 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5811 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5812 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5813 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5814 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5815 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5816 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5817 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5818 };
5819 #endif
5820
5821 /*
5822 ** XXX: The routines in this file that consume a string do not currently
5823 ** support having the string span buffers. In the future, as upb_sink and
5824 ** its buffering/sharing functionality evolve there should be an easy and
5825 ** idiomatic way of correctly handling this case. For now, we accept this
5826 ** limitation since we currently only parse descriptors from single strings.
5827 */
5828
5829
5830 #include <errno.h>
5831 #include <stdlib.h>
5832 #include <string.h>
5833
5834 /* upb_deflist is an internal-only dynamic array for storing a growing list of
5835 * upb_defs. */
5836 typedef struct {
5837 upb_def **defs;
5838 size_t len;
5839 size_t size;
5840 bool owned;
5841 } upb_deflist;
5842
5843 /* We keep a stack of all the messages scopes we are currently in, as well as
5844 * the top-level file scope. This is necessary to correctly qualify the
5845 * definitions that are contained inside. "name" tracks the name of the
5846 * message or package (a bare name -- not qualified by any enclosing scopes). */
5847 typedef struct {
5848 char *name;
5849 /* Index of the first def that is under this scope. For msgdefs, the
5850 * msgdef itself is at start-1. */
5851 int start;
5852 } upb_descreader_frame;
5853
5854 /* The maximum number of nested declarations that are allowed, ie.
5855 * message Foo {
5856 * message Bar {
5857 * message Baz {
5858 * }
5859 * }
5860 * }
5861 *
5862 * This is a resource limit that affects how big our runtime stack can grow.
5863 * TODO: make this a runtime-settable property of the Reader instance. */
5864 #define UPB_MAX_MESSAGE_NESTING 64
5865
5866 struct upb_descreader {
5867 upb_sink sink;
5868 upb_deflist defs;
5869 upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
5870 int stack_len;
5871
5872 uint32_t number;
5873 char *name;
5874 bool saw_number;
5875 bool saw_name;
5876
5877 char *default_string;
5878
5879 upb_fielddef *f;
5880 };
5881
upb_strndup(const char * buf,size_t n)5882 static char *upb_strndup(const char *buf, size_t n) {
5883 char *ret = malloc(n + 1);
5884 if (!ret) return NULL;
5885 memcpy(ret, buf, n);
5886 ret[n] = '\0';
5887 return ret;
5888 }
5889
5890 /* Returns a newly allocated string that joins input strings together, for
5891 * example:
5892 * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
5893 * join("", "Baz") -> "Baz"
5894 * Caller owns a ref on the returned string. */
upb_join(const char * base,const char * name)5895 static char *upb_join(const char *base, const char *name) {
5896 if (!base || strlen(base) == 0) {
5897 return upb_strdup(name);
5898 } else {
5899 char *ret = malloc(strlen(base) + strlen(name) + 2);
5900 ret[0] = '\0';
5901 strcat(ret, base);
5902 strcat(ret, ".");
5903 strcat(ret, name);
5904 return ret;
5905 }
5906 }
5907
5908
5909 /* upb_deflist ****************************************************************/
5910
upb_deflist_init(upb_deflist * l)5911 void upb_deflist_init(upb_deflist *l) {
5912 l->size = 0;
5913 l->defs = NULL;
5914 l->len = 0;
5915 l->owned = true;
5916 }
5917
upb_deflist_uninit(upb_deflist * l)5918 void upb_deflist_uninit(upb_deflist *l) {
5919 size_t i;
5920 if (l->owned)
5921 for(i = 0; i < l->len; i++)
5922 upb_def_unref(l->defs[i], l);
5923 free(l->defs);
5924 }
5925
upb_deflist_push(upb_deflist * l,upb_def * d)5926 bool upb_deflist_push(upb_deflist *l, upb_def *d) {
5927 if(++l->len >= l->size) {
5928 size_t new_size = UPB_MAX(l->size, 4);
5929 new_size *= 2;
5930 l->defs = realloc(l->defs, new_size * sizeof(void *));
5931 if (!l->defs) return false;
5932 l->size = new_size;
5933 }
5934 l->defs[l->len - 1] = d;
5935 return true;
5936 }
5937
upb_deflist_donaterefs(upb_deflist * l,void * owner)5938 void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
5939 size_t i;
5940 assert(l->owned);
5941 for (i = 0; i < l->len; i++)
5942 upb_def_donateref(l->defs[i], l, owner);
5943 l->owned = false;
5944 }
5945
upb_deflist_last(upb_deflist * l)5946 static upb_def *upb_deflist_last(upb_deflist *l) {
5947 return l->defs[l->len-1];
5948 }
5949
5950 /* Qualify the defname for all defs starting with offset "start" with "str". */
upb_deflist_qualify(upb_deflist * l,char * str,int32_t start)5951 static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
5952 uint32_t i;
5953 for (i = start; i < l->len; i++) {
5954 upb_def *def = l->defs[i];
5955 char *name = upb_join(str, upb_def_fullname(def));
5956 upb_def_setfullname(def, name, NULL);
5957 free(name);
5958 }
5959 }
5960
5961
5962 /* upb_descreader ************************************************************/
5963
upb_descreader_top(upb_descreader * r)5964 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
5965 int index;
5966 assert(r->stack_len > 1);
5967 index = r->stack[r->stack_len-1].start - 1;
5968 assert(index >= 0);
5969 return upb_downcast_msgdef_mutable(r->defs.defs[index]);
5970 }
5971
upb_descreader_last(upb_descreader * r)5972 static upb_def *upb_descreader_last(upb_descreader *r) {
5973 return upb_deflist_last(&r->defs);
5974 }
5975
5976 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
5977 * entities that have names and can contain sub-definitions. */
upb_descreader_startcontainer(upb_descreader * r)5978 void upb_descreader_startcontainer(upb_descreader *r) {
5979 upb_descreader_frame *f = &r->stack[r->stack_len++];
5980 f->start = r->defs.len;
5981 f->name = NULL;
5982 }
5983
upb_descreader_endcontainer(upb_descreader * r)5984 void upb_descreader_endcontainer(upb_descreader *r) {
5985 upb_descreader_frame *f = &r->stack[--r->stack_len];
5986 upb_deflist_qualify(&r->defs, f->name, f->start);
5987 free(f->name);
5988 f->name = NULL;
5989 }
5990
upb_descreader_setscopename(upb_descreader * r,char * str)5991 void upb_descreader_setscopename(upb_descreader *r, char *str) {
5992 upb_descreader_frame *f = &r->stack[r->stack_len-1];
5993 free(f->name);
5994 f->name = str;
5995 }
5996
5997 /* Handlers for google.protobuf.FileDescriptorProto. */
file_startmsg(void * r,const void * hd)5998 static bool file_startmsg(void *r, const void *hd) {
5999 UPB_UNUSED(hd);
6000 upb_descreader_startcontainer(r);
6001 return true;
6002 }
6003
file_endmsg(void * closure,const void * hd,upb_status * status)6004 static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
6005 upb_descreader *r = closure;
6006 UPB_UNUSED(hd);
6007 UPB_UNUSED(status);
6008 upb_descreader_endcontainer(r);
6009 return true;
6010 }
6011
file_onpackage(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6012 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
6013 size_t n, const upb_bufhandle *handle) {
6014 upb_descreader *r = closure;
6015 UPB_UNUSED(hd);
6016 UPB_UNUSED(handle);
6017 /* XXX: see comment at the top of the file. */
6018 upb_descreader_setscopename(r, upb_strndup(buf, n));
6019 return n;
6020 }
6021
6022 /* Handlers for google.protobuf.EnumValueDescriptorProto. */
enumval_startmsg(void * closure,const void * hd)6023 static bool enumval_startmsg(void *closure, const void *hd) {
6024 upb_descreader *r = closure;
6025 UPB_UNUSED(hd);
6026 r->saw_number = false;
6027 r->saw_name = false;
6028 return true;
6029 }
6030
enumval_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6031 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
6032 size_t n, const upb_bufhandle *handle) {
6033 upb_descreader *r = closure;
6034 UPB_UNUSED(hd);
6035 UPB_UNUSED(handle);
6036 /* XXX: see comment at the top of the file. */
6037 free(r->name);
6038 r->name = upb_strndup(buf, n);
6039 r->saw_name = true;
6040 return n;
6041 }
6042
enumval_onnumber(void * closure,const void * hd,int32_t val)6043 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
6044 upb_descreader *r = closure;
6045 UPB_UNUSED(hd);
6046 r->number = val;
6047 r->saw_number = true;
6048 return true;
6049 }
6050
enumval_endmsg(void * closure,const void * hd,upb_status * status)6051 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
6052 upb_descreader *r = closure;
6053 upb_enumdef *e;
6054 UPB_UNUSED(hd);
6055
6056 if(!r->saw_number || !r->saw_name) {
6057 upb_status_seterrmsg(status, "Enum value missing name or number.");
6058 return false;
6059 }
6060 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6061 upb_enumdef_addval(e, r->name, r->number, status);
6062 free(r->name);
6063 r->name = NULL;
6064 return true;
6065 }
6066
6067
6068 /* Handlers for google.protobuf.EnumDescriptorProto. */
enum_startmsg(void * closure,const void * hd)6069 static bool enum_startmsg(void *closure, const void *hd) {
6070 upb_descreader *r = closure;
6071 UPB_UNUSED(hd);
6072 upb_deflist_push(&r->defs,
6073 upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
6074 return true;
6075 }
6076
enum_endmsg(void * closure,const void * hd,upb_status * status)6077 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
6078 upb_descreader *r = closure;
6079 upb_enumdef *e;
6080 UPB_UNUSED(hd);
6081
6082 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6083 if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
6084 upb_status_seterrmsg(status, "Enum had no name.");
6085 return false;
6086 }
6087 if (upb_enumdef_numvals(e) == 0) {
6088 upb_status_seterrmsg(status, "Enum had no values.");
6089 return false;
6090 }
6091 return true;
6092 }
6093
enum_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6094 static size_t enum_onname(void *closure, const void *hd, const char *buf,
6095 size_t n, const upb_bufhandle *handle) {
6096 upb_descreader *r = closure;
6097 char *fullname = upb_strndup(buf, n);
6098 UPB_UNUSED(hd);
6099 UPB_UNUSED(handle);
6100 /* XXX: see comment at the top of the file. */
6101 upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
6102 free(fullname);
6103 return n;
6104 }
6105
6106 /* Handlers for google.protobuf.FieldDescriptorProto */
field_startmsg(void * closure,const void * hd)6107 static bool field_startmsg(void *closure, const void *hd) {
6108 upb_descreader *r = closure;
6109 UPB_UNUSED(hd);
6110 r->f = upb_fielddef_new(&r->defs);
6111 free(r->default_string);
6112 r->default_string = NULL;
6113
6114 /* fielddefs default to packed, but descriptors default to non-packed. */
6115 upb_fielddef_setpacked(r->f, false);
6116 return true;
6117 }
6118
6119 /* Converts the default value in string "str" into "d". Passes a ref on str.
6120 * Returns true on success. */
parse_default(char * str,upb_fielddef * f)6121 static bool parse_default(char *str, upb_fielddef *f) {
6122 bool success = true;
6123 char *end;
6124 switch (upb_fielddef_type(f)) {
6125 case UPB_TYPE_INT32: {
6126 long val = strtol(str, &end, 0);
6127 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
6128 success = false;
6129 else
6130 upb_fielddef_setdefaultint32(f, val);
6131 break;
6132 }
6133 case UPB_TYPE_INT64: {
6134 /* XXX: Need to write our own strtoll, since it's not available in c89. */
6135 long long val = strtol(str, &end, 0);
6136 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
6137 success = false;
6138 else
6139 upb_fielddef_setdefaultint64(f, val);
6140 break;
6141 }
6142 case UPB_TYPE_UINT32: {
6143 unsigned long val = strtoul(str, &end, 0);
6144 if (val > UINT32_MAX || errno == ERANGE || *end)
6145 success = false;
6146 else
6147 upb_fielddef_setdefaultuint32(f, val);
6148 break;
6149 }
6150 case UPB_TYPE_UINT64: {
6151 /* XXX: Need to write our own strtoull, since it's not available in c89. */
6152 unsigned long long val = strtoul(str, &end, 0);
6153 if (val > UINT64_MAX || errno == ERANGE || *end)
6154 success = false;
6155 else
6156 upb_fielddef_setdefaultuint64(f, val);
6157 break;
6158 }
6159 case UPB_TYPE_DOUBLE: {
6160 double val = strtod(str, &end);
6161 if (errno == ERANGE || *end)
6162 success = false;
6163 else
6164 upb_fielddef_setdefaultdouble(f, val);
6165 break;
6166 }
6167 case UPB_TYPE_FLOAT: {
6168 /* XXX: Need to write our own strtof, since it's not available in c89. */
6169 float val = strtod(str, &end);
6170 if (errno == ERANGE || *end)
6171 success = false;
6172 else
6173 upb_fielddef_setdefaultfloat(f, val);
6174 break;
6175 }
6176 case UPB_TYPE_BOOL: {
6177 if (strcmp(str, "false") == 0)
6178 upb_fielddef_setdefaultbool(f, false);
6179 else if (strcmp(str, "true") == 0)
6180 upb_fielddef_setdefaultbool(f, true);
6181 else
6182 success = false;
6183 break;
6184 }
6185 default: abort();
6186 }
6187 return success;
6188 }
6189
field_endmsg(void * closure,const void * hd,upb_status * status)6190 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
6191 upb_descreader *r = closure;
6192 upb_fielddef *f = r->f;
6193 UPB_UNUSED(hd);
6194
6195 /* TODO: verify that all required fields were present. */
6196 assert(upb_fielddef_number(f) != 0);
6197 assert(upb_fielddef_name(f) != NULL);
6198 assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
6199
6200 if (r->default_string) {
6201 if (upb_fielddef_issubmsg(f)) {
6202 upb_status_seterrmsg(status, "Submessages cannot have defaults.");
6203 return false;
6204 }
6205 if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
6206 upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
6207 } else {
6208 if (r->default_string && !parse_default(r->default_string, f)) {
6209 /* We don't worry too much about giving a great error message since the
6210 * compiler should have ensured this was correct. */
6211 upb_status_seterrmsg(status, "Error converting default value.");
6212 return false;
6213 }
6214 }
6215 }
6216 return true;
6217 }
6218
field_onlazy(void * closure,const void * hd,bool val)6219 static bool field_onlazy(void *closure, const void *hd, bool val) {
6220 upb_descreader *r = closure;
6221 UPB_UNUSED(hd);
6222
6223 upb_fielddef_setlazy(r->f, val);
6224 return true;
6225 }
6226
field_onpacked(void * closure,const void * hd,bool val)6227 static bool field_onpacked(void *closure, const void *hd, bool val) {
6228 upb_descreader *r = closure;
6229 UPB_UNUSED(hd);
6230
6231 upb_fielddef_setpacked(r->f, val);
6232 return true;
6233 }
6234
field_ontype(void * closure,const void * hd,int32_t val)6235 static bool field_ontype(void *closure, const void *hd, int32_t val) {
6236 upb_descreader *r = closure;
6237 UPB_UNUSED(hd);
6238
6239 upb_fielddef_setdescriptortype(r->f, val);
6240 return true;
6241 }
6242
field_onlabel(void * closure,const void * hd,int32_t val)6243 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
6244 upb_descreader *r = closure;
6245 UPB_UNUSED(hd);
6246
6247 upb_fielddef_setlabel(r->f, val);
6248 return true;
6249 }
6250
field_onnumber(void * closure,const void * hd,int32_t val)6251 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
6252 upb_descreader *r = closure;
6253 bool ok = upb_fielddef_setnumber(r->f, val, NULL);
6254 UPB_UNUSED(hd);
6255
6256 UPB_ASSERT_VAR(ok, ok);
6257 return true;
6258 }
6259
field_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6260 static size_t field_onname(void *closure, const void *hd, const char *buf,
6261 size_t n, const upb_bufhandle *handle) {
6262 upb_descreader *r = closure;
6263 char *name = upb_strndup(buf, n);
6264 UPB_UNUSED(hd);
6265 UPB_UNUSED(handle);
6266
6267 /* XXX: see comment at the top of the file. */
6268 upb_fielddef_setname(r->f, name, NULL);
6269 free(name);
6270 return n;
6271 }
6272
field_ontypename(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6273 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
6274 size_t n, const upb_bufhandle *handle) {
6275 upb_descreader *r = closure;
6276 char *name = upb_strndup(buf, n);
6277 UPB_UNUSED(hd);
6278 UPB_UNUSED(handle);
6279
6280 /* XXX: see comment at the top of the file. */
6281 upb_fielddef_setsubdefname(r->f, name, NULL);
6282 free(name);
6283 return n;
6284 }
6285
field_onextendee(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6286 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
6287 size_t n, const upb_bufhandle *handle) {
6288 upb_descreader *r = closure;
6289 char *name = upb_strndup(buf, n);
6290 UPB_UNUSED(hd);
6291 UPB_UNUSED(handle);
6292
6293 /* XXX: see comment at the top of the file. */
6294 upb_fielddef_setcontainingtypename(r->f, name, NULL);
6295 free(name);
6296 return n;
6297 }
6298
field_ondefaultval(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6299 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
6300 size_t n, const upb_bufhandle *handle) {
6301 upb_descreader *r = closure;
6302 UPB_UNUSED(hd);
6303 UPB_UNUSED(handle);
6304
6305 /* Have to convert from string to the correct type, but we might not know the
6306 * type yet, so we save it as a string until the end of the field.
6307 * XXX: see comment at the top of the file. */
6308 free(r->default_string);
6309 r->default_string = upb_strndup(buf, n);
6310 return n;
6311 }
6312
6313 /* Handlers for google.protobuf.DescriptorProto (representing a message). */
msg_startmsg(void * closure,const void * hd)6314 static bool msg_startmsg(void *closure, const void *hd) {
6315 upb_descreader *r = closure;
6316 UPB_UNUSED(hd);
6317
6318 upb_deflist_push(&r->defs,
6319 upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs)));
6320 upb_descreader_startcontainer(r);
6321 return true;
6322 }
6323
msg_endmsg(void * closure,const void * hd,upb_status * status)6324 static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
6325 upb_descreader *r = closure;
6326 upb_msgdef *m = upb_descreader_top(r);
6327 UPB_UNUSED(hd);
6328
6329 if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
6330 upb_status_seterrmsg(status, "Encountered message with no name.");
6331 return false;
6332 }
6333 upb_descreader_endcontainer(r);
6334 return true;
6335 }
6336
msg_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6337 static size_t msg_onname(void *closure, const void *hd, const char *buf,
6338 size_t n, const upb_bufhandle *handle) {
6339 upb_descreader *r = closure;
6340 upb_msgdef *m = upb_descreader_top(r);
6341 /* XXX: see comment at the top of the file. */
6342 char *name = upb_strndup(buf, n);
6343 UPB_UNUSED(hd);
6344 UPB_UNUSED(handle);
6345
6346 upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
6347 upb_descreader_setscopename(r, name); /* Passes ownership of name. */
6348 return n;
6349 }
6350
msg_onendfield(void * closure,const void * hd)6351 static bool msg_onendfield(void *closure, const void *hd) {
6352 upb_descreader *r = closure;
6353 upb_msgdef *m = upb_descreader_top(r);
6354 UPB_UNUSED(hd);
6355
6356 upb_msgdef_addfield(m, r->f, &r->defs, NULL);
6357 r->f = NULL;
6358 return true;
6359 }
6360
pushextension(void * closure,const void * hd)6361 static bool pushextension(void *closure, const void *hd) {
6362 upb_descreader *r = closure;
6363 UPB_UNUSED(hd);
6364
6365 assert(upb_fielddef_containingtypename(r->f));
6366 upb_fielddef_setisextension(r->f, true);
6367 upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
6368 r->f = NULL;
6369 return true;
6370 }
6371
6372 #define D(name) upbdefs_google_protobuf_ ## name(s)
6373
reghandlers(const void * closure,upb_handlers * h)6374 static void reghandlers(const void *closure, upb_handlers *h) {
6375 const upb_symtab *s = closure;
6376 const upb_msgdef *m = upb_handlers_msgdef(h);
6377
6378 if (m == D(DescriptorProto)) {
6379 upb_handlers_setstartmsg(h, &msg_startmsg, NULL);
6380 upb_handlers_setendmsg(h, &msg_endmsg, NULL);
6381 upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL);
6382 upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield,
6383 NULL);
6384 upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension,
6385 NULL);
6386 } else if (m == D(FileDescriptorProto)) {
6387 upb_handlers_setstartmsg(h, &file_startmsg, NULL);
6388 upb_handlers_setendmsg(h, &file_endmsg, NULL);
6389 upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage,
6390 NULL);
6391 upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextension,
6392 NULL);
6393 } else if (m == D(EnumValueDescriptorProto)) {
6394 upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
6395 upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
6396 upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL);
6397 upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumber,
6398 NULL);
6399 } else if (m == D(EnumDescriptorProto)) {
6400 upb_handlers_setstartmsg(h, &enum_startmsg, NULL);
6401 upb_handlers_setendmsg(h, &enum_endmsg, NULL);
6402 upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL);
6403 } else if (m == D(FieldDescriptorProto)) {
6404 upb_handlers_setstartmsg(h, &field_startmsg, NULL);
6405 upb_handlers_setendmsg(h, &field_endmsg, NULL);
6406 upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype,
6407 NULL);
6408 upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel,
6409 NULL);
6410 upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber,
6411 NULL);
6412 upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname,
6413 NULL);
6414 upb_handlers_setstring(h, D(FieldDescriptorProto_type_name),
6415 &field_ontypename, NULL);
6416 upb_handlers_setstring(h, D(FieldDescriptorProto_extendee),
6417 &field_onextendee, NULL);
6418 upb_handlers_setstring(h, D(FieldDescriptorProto_default_value),
6419 &field_ondefaultval, NULL);
6420 } else if (m == D(FieldOptions)) {
6421 upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
6422 upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
6423 }
6424 }
6425
6426 #undef D
6427
descreader_cleanup(void * _r)6428 void descreader_cleanup(void *_r) {
6429 upb_descreader *r = _r;
6430 free(r->name);
6431 upb_deflist_uninit(&r->defs);
6432 free(r->default_string);
6433 while (r->stack_len > 0) {
6434 upb_descreader_frame *f = &r->stack[--r->stack_len];
6435 free(f->name);
6436 }
6437 }
6438
6439
6440 /* Public API ****************************************************************/
6441
upb_descreader_create(upb_env * e,const upb_handlers * h)6442 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
6443 upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
6444 if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
6445 return NULL;
6446 }
6447
6448 upb_deflist_init(&r->defs);
6449 upb_sink_reset(upb_descreader_input(r), h, r);
6450 r->stack_len = 0;
6451 r->name = NULL;
6452 r->default_string = NULL;
6453
6454 return r;
6455 }
6456
upb_descreader_getdefs(upb_descreader * r,void * owner,int * n)6457 upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
6458 *n = r->defs.len;
6459 upb_deflist_donaterefs(&r->defs, owner);
6460 return r->defs.defs;
6461 }
6462
upb_descreader_input(upb_descreader * r)6463 upb_sink *upb_descreader_input(upb_descreader *r) {
6464 return &r->sink;
6465 }
6466
upb_descreader_newhandlers(const void * owner)6467 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
6468 const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
6469 const upb_handlers *h = upb_handlers_newfrozen(
6470 upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s);
6471 upb_symtab_unref(s, &s);
6472 return h;
6473 }
6474 /*
6475 ** protobuf decoder bytecode compiler
6476 **
6477 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6478 ** according to that specific schema and destination handlers.
6479 **
6480 ** Compiling to bytecode is always the first step. If we are using the
6481 ** interpreted decoder we leave it as bytecode and interpret that. If we are
6482 ** using a JIT decoder we use a code generator to turn the bytecode into native
6483 ** code, LLVM IR, etc.
6484 **
6485 ** Bytecode definition is in decoder.int.h.
6486 */
6487
6488 #include <stdarg.h>
6489
6490 #ifdef UPB_DUMP_BYTECODE
6491 #include <stdio.h>
6492 #endif
6493
6494 #define MAXLABEL 5
6495 #define EMPTYLABEL -1
6496
6497 /* mgroup *********************************************************************/
6498
freegroup(upb_refcounted * r)6499 static void freegroup(upb_refcounted *r) {
6500 mgroup *g = (mgroup*)r;
6501 upb_inttable_uninit(&g->methods);
6502 #ifdef UPB_USE_JIT_X64
6503 upb_pbdecoder_freejit(g);
6504 #endif
6505 free(g->bytecode);
6506 free(g);
6507 }
6508
visitgroup(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)6509 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
6510 void *closure) {
6511 const mgroup *g = (const mgroup*)r;
6512 upb_inttable_iter i;
6513 upb_inttable_begin(&i, &g->methods);
6514 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6515 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6516 visit(r, upb_pbdecodermethod_upcast(method), closure);
6517 }
6518 }
6519
newgroup(const void * owner)6520 mgroup *newgroup(const void *owner) {
6521 mgroup *g = malloc(sizeof(*g));
6522 static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
6523 upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
6524 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6525 g->bytecode = NULL;
6526 g->bytecode_end = NULL;
6527 return g;
6528 }
6529
6530
6531 /* upb_pbdecodermethod ********************************************************/
6532
freemethod(upb_refcounted * r)6533 static void freemethod(upb_refcounted *r) {
6534 upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
6535
6536 if (method->dest_handlers_) {
6537 upb_handlers_unref(method->dest_handlers_, method);
6538 }
6539
6540 upb_inttable_uninit(&method->dispatch);
6541 free(method);
6542 }
6543
visitmethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)6544 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
6545 void *closure) {
6546 const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
6547 visit(r, m->group, closure);
6548 }
6549
newmethod(const upb_handlers * dest_handlers,mgroup * group)6550 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6551 mgroup *group) {
6552 static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
6553 upb_pbdecodermethod *ret = malloc(sizeof(*ret));
6554 upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
6555 upb_byteshandler_init(&ret->input_handler_);
6556
6557 /* The method references the group and vice-versa, in a circular reference. */
6558 upb_ref2(ret, group);
6559 upb_ref2(group, ret);
6560 upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
6561 upb_pbdecodermethod_unref(ret, &ret);
6562
6563 ret->group = mgroup_upcast_mutable(group);
6564 ret->dest_handlers_ = dest_handlers;
6565 ret->is_native_ = false; /* If we JIT, it will update this later. */
6566 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6567
6568 if (ret->dest_handlers_) {
6569 upb_handlers_ref(ret->dest_handlers_, ret);
6570 }
6571 return ret;
6572 }
6573
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)6574 const upb_handlers *upb_pbdecodermethod_desthandlers(
6575 const upb_pbdecodermethod *m) {
6576 return m->dest_handlers_;
6577 }
6578
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)6579 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6580 const upb_pbdecodermethod *m) {
6581 return &m->input_handler_;
6582 }
6583
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)6584 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6585 return m->is_native_;
6586 }
6587
upb_pbdecodermethod_new(const upb_pbdecodermethodopts * opts,const void * owner)6588 const upb_pbdecodermethod *upb_pbdecodermethod_new(
6589 const upb_pbdecodermethodopts *opts, const void *owner) {
6590 const upb_pbdecodermethod *ret;
6591 upb_pbcodecache cache;
6592
6593 upb_pbcodecache_init(&cache);
6594 ret = upb_pbcodecache_getdecodermethod(&cache, opts);
6595 upb_pbdecodermethod_ref(ret, owner);
6596 upb_pbcodecache_uninit(&cache);
6597 return ret;
6598 }
6599
6600
6601 /* bytecode compiler **********************************************************/
6602
6603 /* Data used only at compilation time. */
6604 typedef struct {
6605 mgroup *group;
6606
6607 uint32_t *pc;
6608 int fwd_labels[MAXLABEL];
6609 int back_labels[MAXLABEL];
6610
6611 /* For fields marked "lazy", parse them lazily or eagerly? */
6612 bool lazy;
6613 } compiler;
6614
newcompiler(mgroup * group,bool lazy)6615 static compiler *newcompiler(mgroup *group, bool lazy) {
6616 compiler *ret = malloc(sizeof(*ret));
6617 int i;
6618
6619 ret->group = group;
6620 ret->lazy = lazy;
6621 for (i = 0; i < MAXLABEL; i++) {
6622 ret->fwd_labels[i] = EMPTYLABEL;
6623 ret->back_labels[i] = EMPTYLABEL;
6624 }
6625 return ret;
6626 }
6627
freecompiler(compiler * c)6628 static void freecompiler(compiler *c) {
6629 free(c);
6630 }
6631
6632 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6633
6634 /* How many words an instruction is. */
instruction_len(uint32_t instr)6635 static int instruction_len(uint32_t instr) {
6636 switch (getop(instr)) {
6637 case OP_SETDISPATCH: return 1 + ptr_words;
6638 case OP_TAGN: return 3;
6639 case OP_SETBIGGROUPNUM: return 2;
6640 default: return 1;
6641 }
6642 }
6643
op_has_longofs(int32_t instruction)6644 bool op_has_longofs(int32_t instruction) {
6645 switch (getop(instruction)) {
6646 case OP_CALL:
6647 case OP_BRANCH:
6648 case OP_CHECKDELIM:
6649 return true;
6650 /* The "tag" instructions only have 8 bytes available for the jump target,
6651 * but that is ok because these opcodes only require short jumps. */
6652 case OP_TAG1:
6653 case OP_TAG2:
6654 case OP_TAGN:
6655 return false;
6656 default:
6657 assert(false);
6658 return false;
6659 }
6660 }
6661
getofs(uint32_t instruction)6662 static int32_t getofs(uint32_t instruction) {
6663 if (op_has_longofs(instruction)) {
6664 return (int32_t)instruction >> 8;
6665 } else {
6666 return (int8_t)(instruction >> 8);
6667 }
6668 }
6669
setofs(uint32_t * instruction,int32_t ofs)6670 static void setofs(uint32_t *instruction, int32_t ofs) {
6671 if (op_has_longofs(*instruction)) {
6672 *instruction = getop(*instruction) | ofs << 8;
6673 } else {
6674 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6675 }
6676 assert(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
6677 }
6678
pcofs(compiler * c)6679 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
6680
6681 /* Defines a local label at the current PC location. All previous forward
6682 * references are updated to point to this location. The location is noted
6683 * for any future backward references. */
label(compiler * c,unsigned int label)6684 static void label(compiler *c, unsigned int label) {
6685 int val;
6686 uint32_t *codep;
6687
6688 assert(label < MAXLABEL);
6689 val = c->fwd_labels[label];
6690 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
6691 while (codep) {
6692 int ofs = getofs(*codep);
6693 setofs(codep, c->pc - codep - instruction_len(*codep));
6694 codep = ofs ? codep + ofs : NULL;
6695 }
6696 c->fwd_labels[label] = EMPTYLABEL;
6697 c->back_labels[label] = pcofs(c);
6698 }
6699
6700 /* Creates a reference to a numbered label; either a forward reference
6701 * (positive arg) or backward reference (negative arg). For forward references
6702 * the value returned now is actually a "next" pointer into a linked list of all
6703 * instructions that use this label and will be patched later when the label is
6704 * defined with label().
6705 *
6706 * The returned value is the offset that should be written into the instruction.
6707 */
labelref(compiler * c,int label)6708 static int32_t labelref(compiler *c, int label) {
6709 assert(label < MAXLABEL);
6710 if (label == LABEL_DISPATCH) {
6711 /* No resolving required. */
6712 return 0;
6713 } else if (label < 0) {
6714 /* Backward local label. Relative to the next instruction. */
6715 uint32_t from = (c->pc + 1) - c->group->bytecode;
6716 return c->back_labels[-label] - from;
6717 } else {
6718 /* Forward local label: prepend to (possibly-empty) linked list. */
6719 int *lptr = &c->fwd_labels[label];
6720 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6721 *lptr = pcofs(c);
6722 return ret;
6723 }
6724 }
6725
put32(compiler * c,uint32_t v)6726 static void put32(compiler *c, uint32_t v) {
6727 mgroup *g = c->group;
6728 if (c->pc == g->bytecode_end) {
6729 int ofs = pcofs(c);
6730 size_t oldsize = g->bytecode_end - g->bytecode;
6731 size_t newsize = UPB_MAX(oldsize * 2, 64);
6732 /* TODO(haberman): handle OOM. */
6733 g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
6734 g->bytecode_end = g->bytecode + newsize;
6735 c->pc = g->bytecode + ofs;
6736 }
6737 *c->pc++ = v;
6738 }
6739
putop(compiler * c,opcode op,...)6740 static void putop(compiler *c, opcode op, ...) {
6741 va_list ap;
6742 va_start(ap, op);
6743
6744 switch (op) {
6745 case OP_SETDISPATCH: {
6746 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6747 put32(c, OP_SETDISPATCH);
6748 put32(c, ptr);
6749 if (sizeof(uintptr_t) > sizeof(uint32_t))
6750 put32(c, (uint64_t)ptr >> 32);
6751 break;
6752 }
6753 case OP_STARTMSG:
6754 case OP_ENDMSG:
6755 case OP_PUSHLENDELIM:
6756 case OP_POP:
6757 case OP_SETDELIM:
6758 case OP_HALT:
6759 case OP_RET:
6760 case OP_DISPATCH:
6761 put32(c, op);
6762 break;
6763 case OP_PARSE_DOUBLE:
6764 case OP_PARSE_FLOAT:
6765 case OP_PARSE_INT64:
6766 case OP_PARSE_UINT64:
6767 case OP_PARSE_INT32:
6768 case OP_PARSE_FIXED64:
6769 case OP_PARSE_FIXED32:
6770 case OP_PARSE_BOOL:
6771 case OP_PARSE_UINT32:
6772 case OP_PARSE_SFIXED32:
6773 case OP_PARSE_SFIXED64:
6774 case OP_PARSE_SINT32:
6775 case OP_PARSE_SINT64:
6776 case OP_STARTSEQ:
6777 case OP_ENDSEQ:
6778 case OP_STARTSUBMSG:
6779 case OP_ENDSUBMSG:
6780 case OP_STARTSTR:
6781 case OP_STRING:
6782 case OP_ENDSTR:
6783 case OP_PUSHTAGDELIM:
6784 put32(c, op | va_arg(ap, upb_selector_t) << 8);
6785 break;
6786 case OP_SETBIGGROUPNUM:
6787 put32(c, op);
6788 put32(c, va_arg(ap, int));
6789 break;
6790 case OP_CALL: {
6791 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6792 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6793 break;
6794 }
6795 case OP_CHECKDELIM:
6796 case OP_BRANCH: {
6797 uint32_t instruction = op;
6798 int label = va_arg(ap, int);
6799 setofs(&instruction, labelref(c, label));
6800 put32(c, instruction);
6801 break;
6802 }
6803 case OP_TAG1:
6804 case OP_TAG2: {
6805 int label = va_arg(ap, int);
6806 uint64_t tag = va_arg(ap, uint64_t);
6807 uint32_t instruction = op | (tag << 16);
6808 assert(tag <= 0xffff);
6809 setofs(&instruction, labelref(c, label));
6810 put32(c, instruction);
6811 break;
6812 }
6813 case OP_TAGN: {
6814 int label = va_arg(ap, int);
6815 uint64_t tag = va_arg(ap, uint64_t);
6816 uint32_t instruction = op | (upb_value_size(tag) << 16);
6817 setofs(&instruction, labelref(c, label));
6818 put32(c, instruction);
6819 put32(c, tag);
6820 put32(c, tag >> 32);
6821 break;
6822 }
6823 }
6824
6825 va_end(ap);
6826 }
6827
6828 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
6829
upb_pbdecoder_getopname(unsigned int op)6830 const char *upb_pbdecoder_getopname(unsigned int op) {
6831 #define QUOTE(x) #x
6832 #define EXPAND_AND_QUOTE(x) QUOTE(x)
6833 #define OPNAME(x) OP_##x
6834 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6835 #define T(x) OP(PARSE_##x)
6836 /* Keep in sync with list in decoder.int.h. */
6837 switch ((opcode)op) {
6838 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6839 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6840 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6841 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6842 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6843 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6844 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6845 }
6846 return "<unknown op>";
6847 #undef OP
6848 #undef T
6849 }
6850
6851 #endif
6852
6853 #ifdef UPB_DUMP_BYTECODE
6854
dumpbc(uint32_t * p,uint32_t * end,FILE * f)6855 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6856
6857 uint32_t *begin = p;
6858
6859 while (p < end) {
6860 fprintf(f, "%p %8tx", p, p - begin);
6861 uint32_t instr = *p++;
6862 uint8_t op = getop(instr);
6863 fprintf(f, " %s", upb_pbdecoder_getopname(op));
6864 switch ((opcode)op) {
6865 case OP_SETDISPATCH: {
6866 const upb_inttable *dispatch;
6867 memcpy(&dispatch, p, sizeof(void*));
6868 p += ptr_words;
6869 const upb_pbdecodermethod *method =
6870 (void *)((char *)dispatch -
6871 offsetof(upb_pbdecodermethod, dispatch));
6872 fprintf(f, " %s", upb_msgdef_fullname(
6873 upb_handlers_msgdef(method->dest_handlers_)));
6874 break;
6875 }
6876 case OP_DISPATCH:
6877 case OP_STARTMSG:
6878 case OP_ENDMSG:
6879 case OP_PUSHLENDELIM:
6880 case OP_POP:
6881 case OP_SETDELIM:
6882 case OP_HALT:
6883 case OP_RET:
6884 break;
6885 case OP_PARSE_DOUBLE:
6886 case OP_PARSE_FLOAT:
6887 case OP_PARSE_INT64:
6888 case OP_PARSE_UINT64:
6889 case OP_PARSE_INT32:
6890 case OP_PARSE_FIXED64:
6891 case OP_PARSE_FIXED32:
6892 case OP_PARSE_BOOL:
6893 case OP_PARSE_UINT32:
6894 case OP_PARSE_SFIXED32:
6895 case OP_PARSE_SFIXED64:
6896 case OP_PARSE_SINT32:
6897 case OP_PARSE_SINT64:
6898 case OP_STARTSEQ:
6899 case OP_ENDSEQ:
6900 case OP_STARTSUBMSG:
6901 case OP_ENDSUBMSG:
6902 case OP_STARTSTR:
6903 case OP_STRING:
6904 case OP_ENDSTR:
6905 case OP_PUSHTAGDELIM:
6906 fprintf(f, " %d", instr >> 8);
6907 break;
6908 case OP_SETBIGGROUPNUM:
6909 fprintf(f, " %d", *p++);
6910 break;
6911 case OP_CHECKDELIM:
6912 case OP_CALL:
6913 case OP_BRANCH:
6914 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6915 break;
6916 case OP_TAG1:
6917 case OP_TAG2: {
6918 fprintf(f, " tag:0x%x", instr >> 16);
6919 if (getofs(instr)) {
6920 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6921 }
6922 break;
6923 }
6924 case OP_TAGN: {
6925 uint64_t tag = *p++;
6926 tag |= (uint64_t)*p++ << 32;
6927 fprintf(f, " tag:0x%llx", (long long)tag);
6928 fprintf(f, " n:%d", instr >> 16);
6929 if (getofs(instr)) {
6930 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6931 }
6932 break;
6933 }
6934 }
6935 fputs("\n", f);
6936 }
6937 }
6938
6939 #endif
6940
get_encoded_tag(const upb_fielddef * f,int wire_type)6941 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6942 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6943 uint64_t encoded_tag = upb_vencode32(tag);
6944 /* No tag should be greater than 5 bytes. */
6945 assert(encoded_tag <= 0xffffffffff);
6946 return encoded_tag;
6947 }
6948
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)6949 static void putchecktag(compiler *c, const upb_fielddef *f,
6950 int wire_type, int dest) {
6951 uint64_t tag = get_encoded_tag(f, wire_type);
6952 switch (upb_value_size(tag)) {
6953 case 1:
6954 putop(c, OP_TAG1, dest, tag);
6955 break;
6956 case 2:
6957 putop(c, OP_TAG2, dest, tag);
6958 break;
6959 default:
6960 putop(c, OP_TAGN, dest, tag);
6961 break;
6962 }
6963 }
6964
getsel(const upb_fielddef * f,upb_handlertype_t type)6965 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6966 upb_selector_t selector;
6967 bool ok = upb_handlers_getselector(f, type, &selector);
6968 UPB_ASSERT_VAR(ok, ok);
6969 return selector;
6970 }
6971
6972 /* Takes an existing, primary dispatch table entry and repacks it with a
6973 * different alternate wire type. Called when we are inserting a secondary
6974 * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)6975 static uint64_t repack(uint64_t dispatch, int new_wt2) {
6976 uint64_t ofs;
6977 uint8_t wt1;
6978 uint8_t old_wt2;
6979 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
6980 assert(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
6981 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6982 }
6983
6984 /* Marks the current bytecode position as the dispatch target for this message,
6985 * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)6986 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6987 const upb_fielddef *f, int wire_type) {
6988 /* Offset is relative to msg base. */
6989 uint64_t ofs = pcofs(c) - method->code_base.ofs;
6990 uint32_t fn = upb_fielddef_number(f);
6991 upb_inttable *d = &method->dispatch;
6992 upb_value v;
6993 if (upb_inttable_remove(d, fn, &v)) {
6994 /* TODO: prioritize based on packed setting in .proto file. */
6995 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6996 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6997 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6998 } else {
6999 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
7000 upb_inttable_insert(d, fn, upb_value_uint64(val));
7001 }
7002 }
7003
putpush(compiler * c,const upb_fielddef * f)7004 static void putpush(compiler *c, const upb_fielddef *f) {
7005 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
7006 putop(c, OP_PUSHLENDELIM);
7007 } else {
7008 uint32_t fn = upb_fielddef_number(f);
7009 if (fn >= 1 << 24) {
7010 putop(c, OP_PUSHTAGDELIM, 0);
7011 putop(c, OP_SETBIGGROUPNUM, fn);
7012 } else {
7013 putop(c, OP_PUSHTAGDELIM, fn);
7014 }
7015 }
7016 }
7017
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)7018 static upb_pbdecodermethod *find_submethod(const compiler *c,
7019 const upb_pbdecodermethod *method,
7020 const upb_fielddef *f) {
7021 const upb_handlers *sub =
7022 upb_handlers_getsubhandlers(method->dest_handlers_, f);
7023 upb_value v;
7024 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
7025 ? upb_value_getptr(v)
7026 : NULL;
7027 }
7028
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)7029 static void putsel(compiler *c, opcode op, upb_selector_t sel,
7030 const upb_handlers *h) {
7031 if (upb_handlers_gethandler(h, sel)) {
7032 putop(c, op, sel);
7033 }
7034 }
7035
7036 /* Puts an opcode to call a callback, but only if a callback actually exists for
7037 * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)7038 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
7039 const upb_fielddef *f, upb_handlertype_t type) {
7040 putsel(c, op, getsel(f, type), h);
7041 }
7042
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)7043 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
7044 if (!upb_fielddef_lazy(f))
7045 return false;
7046
7047 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
7048 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
7049 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
7050 }
7051
7052
7053 /* bytecode compiler code generation ******************************************/
7054
7055 /* Symbolic names for our local labels. */
7056 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
7057 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
7058 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
7059 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
7060
7061 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7062 static void generate_msgfield(compiler *c, const upb_fielddef *f,
7063 upb_pbdecodermethod *method) {
7064 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7065 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
7066 int wire_type;
7067
7068 if (!sub_m) {
7069 /* Don't emit any code for this field at all; it will be parsed as an
7070 * unknown field. */
7071 return;
7072 }
7073
7074 label(c, LABEL_FIELD);
7075
7076 wire_type =
7077 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
7078 ? UPB_WIRE_TYPE_DELIMITED
7079 : UPB_WIRE_TYPE_START_GROUP;
7080
7081 if (upb_fielddef_isseq(f)) {
7082 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7083 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7084 dispatchtarget(c, method, f, wire_type);
7085 putop(c, OP_PUSHTAGDELIM, 0);
7086 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7087 label(c, LABEL_LOOPSTART);
7088 putpush(c, f);
7089 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7090 putop(c, OP_CALL, sub_m);
7091 putop(c, OP_POP);
7092 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7093 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7094 putop(c, OP_SETDELIM);
7095 }
7096 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7097 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7098 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7099 label(c, LABEL_LOOPBREAK);
7100 putop(c, OP_POP);
7101 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7102 } else {
7103 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7104 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7105 dispatchtarget(c, method, f, wire_type);
7106 putpush(c, f);
7107 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7108 putop(c, OP_CALL, sub_m);
7109 putop(c, OP_POP);
7110 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7111 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7112 putop(c, OP_SETDELIM);
7113 }
7114 }
7115 }
7116
7117 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7118 static void generate_delimfield(compiler *c, const upb_fielddef *f,
7119 upb_pbdecodermethod *method) {
7120 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7121
7122 label(c, LABEL_FIELD);
7123 if (upb_fielddef_isseq(f)) {
7124 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7125 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7126 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7127 putop(c, OP_PUSHTAGDELIM, 0);
7128 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7129 label(c, LABEL_LOOPSTART);
7130 putop(c, OP_PUSHLENDELIM);
7131 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7132 /* Need to emit even if no handler to skip past the string. */
7133 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7134 putop(c, OP_POP);
7135 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7136 putop(c, OP_SETDELIM);
7137 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7138 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
7139 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7140 label(c, LABEL_LOOPBREAK);
7141 putop(c, OP_POP);
7142 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7143 } else {
7144 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7145 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7146 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7147 putop(c, OP_PUSHLENDELIM);
7148 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7149 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7150 putop(c, OP_POP);
7151 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7152 putop(c, OP_SETDELIM);
7153 }
7154 }
7155
7156 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7157 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
7158 upb_pbdecodermethod *method) {
7159 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7160 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
7161 opcode parse_type;
7162 upb_selector_t sel;
7163 int wire_type;
7164
7165 label(c, LABEL_FIELD);
7166
7167 /* From a decoding perspective, ENUM is the same as INT32. */
7168 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
7169 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
7170
7171 parse_type = (opcode)descriptor_type;
7172
7173 /* TODO(haberman): generate packed or non-packed first depending on "packed"
7174 * setting in the fielddef. This will favor (in speed) whichever was
7175 * specified. */
7176
7177 assert((int)parse_type >= 0 && parse_type <= OP_MAX);
7178 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
7179 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
7180 if (upb_fielddef_isseq(f)) {
7181 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7182 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7183 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7184 putop(c, OP_PUSHLENDELIM);
7185 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
7186 label(c, LABEL_LOOPSTART);
7187 putop(c, parse_type, sel);
7188 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7189 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7190 dispatchtarget(c, method, f, wire_type);
7191 putop(c, OP_PUSHTAGDELIM, 0);
7192 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
7193 label(c, LABEL_LOOPSTART);
7194 putop(c, parse_type, sel);
7195 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7196 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7197 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7198 label(c, LABEL_LOOPBREAK);
7199 putop(c, OP_POP); /* Packed and non-packed join. */
7200 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7201 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
7202 } else {
7203 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7204 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7205 dispatchtarget(c, method, f, wire_type);
7206 putop(c, parse_type, sel);
7207 }
7208 }
7209
7210 /* Adds bytecode for parsing the given message to the given decoderplan,
7211 * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)7212 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
7213 const upb_handlers *h;
7214 const upb_msgdef *md;
7215 uint32_t* start_pc;
7216 upb_msg_field_iter i;
7217 upb_value val;
7218
7219 assert(method);
7220
7221 /* Clear all entries in the dispatch table. */
7222 upb_inttable_uninit(&method->dispatch);
7223 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
7224
7225 h = upb_pbdecodermethod_desthandlers(method);
7226 md = upb_handlers_msgdef(h);
7227
7228 method->code_base.ofs = pcofs(c);
7229 putop(c, OP_SETDISPATCH, &method->dispatch);
7230 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
7231 label(c, LABEL_FIELD);
7232 start_pc = c->pc;
7233 for(upb_msg_field_begin(&i, md);
7234 !upb_msg_field_done(&i);
7235 upb_msg_field_next(&i)) {
7236 const upb_fielddef *f = upb_msg_iter_field(&i);
7237 upb_fieldtype_t type = upb_fielddef_type(f);
7238
7239 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
7240 generate_msgfield(c, f, method);
7241 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
7242 type == UPB_TYPE_MESSAGE) {
7243 generate_delimfield(c, f, method);
7244 } else {
7245 generate_primitivefield(c, f, method);
7246 }
7247 }
7248
7249 /* If there were no fields, or if no handlers were defined, we need to
7250 * generate a non-empty loop body so that we can at least dispatch for unknown
7251 * fields and check for the end of the message. */
7252 if (c->pc == start_pc) {
7253 /* Check for end-of-message. */
7254 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7255 /* Unconditionally dispatch. */
7256 putop(c, OP_DISPATCH, 0);
7257 }
7258
7259 /* For now we just loop back to the last field of the message (or if none,
7260 * the DISPATCH opcode for the message). */
7261 putop(c, OP_BRANCH, -LABEL_FIELD);
7262
7263 /* Insert both a label and a dispatch table entry for this end-of-msg. */
7264 label(c, LABEL_ENDMSG);
7265 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
7266 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
7267
7268 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
7269 putop(c, OP_RET);
7270
7271 upb_inttable_compact(&method->dispatch);
7272 }
7273
7274 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
7275 * Returns the method for these handlers.
7276 *
7277 * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)7278 static void find_methods(compiler *c, const upb_handlers *h) {
7279 upb_value v;
7280 upb_msg_field_iter i;
7281 const upb_msgdef *md;
7282
7283 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
7284 return;
7285 newmethod(h, c->group);
7286
7287 /* Find submethods. */
7288 md = upb_handlers_msgdef(h);
7289 for(upb_msg_field_begin(&i, md);
7290 !upb_msg_field_done(&i);
7291 upb_msg_field_next(&i)) {
7292 const upb_fielddef *f = upb_msg_iter_field(&i);
7293 const upb_handlers *sub_h;
7294 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
7295 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
7296 /* We only generate a decoder method for submessages with handlers.
7297 * Others will be parsed as unknown fields. */
7298 find_methods(c, sub_h);
7299 }
7300 }
7301 }
7302
7303 /* (Re-)compile bytecode for all messages in "msgs."
7304 * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)7305 static void compile_methods(compiler *c) {
7306 upb_inttable_iter i;
7307
7308 /* Start over at the beginning of the bytecode. */
7309 c->pc = c->group->bytecode;
7310
7311 upb_inttable_begin(&i, &c->group->methods);
7312 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7313 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7314 compile_method(c, method);
7315 }
7316 }
7317
set_bytecode_handlers(mgroup * g)7318 static void set_bytecode_handlers(mgroup *g) {
7319 upb_inttable_iter i;
7320 upb_inttable_begin(&i, &g->methods);
7321 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7322 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
7323 upb_byteshandler *h = &m->input_handler_;
7324
7325 m->code_base.ptr = g->bytecode + m->code_base.ofs;
7326
7327 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
7328 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
7329 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
7330 }
7331 }
7332
7333
7334 /* JIT setup. *****************************************************************/
7335
7336 #ifdef UPB_USE_JIT_X64
7337
sethandlers(mgroup * g,bool allowjit)7338 static void sethandlers(mgroup *g, bool allowjit) {
7339 g->jit_code = NULL;
7340 if (allowjit) {
7341 /* Compile byte-code into machine code, create handlers. */
7342 upb_pbdecoder_jit(g);
7343 } else {
7344 set_bytecode_handlers(g);
7345 }
7346 }
7347
7348 #else /* UPB_USE_JIT_X64 */
7349
sethandlers(mgroup * g,bool allowjit)7350 static void sethandlers(mgroup *g, bool allowjit) {
7351 /* No JIT compiled in; use bytecode handlers unconditionally. */
7352 UPB_UNUSED(allowjit);
7353 set_bytecode_handlers(g);
7354 }
7355
7356 #endif /* UPB_USE_JIT_X64 */
7357
7358
7359 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
7360 * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool allowjit,bool lazy,const void * owner)7361 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
7362 const void *owner) {
7363 mgroup *g;
7364 compiler *c;
7365
7366 UPB_UNUSED(allowjit);
7367 assert(upb_handlers_isfrozen(dest));
7368
7369 g = newgroup(owner);
7370 c = newcompiler(g, lazy);
7371 find_methods(c, dest);
7372
7373 /* We compile in two passes:
7374 * 1. all messages are assigned relative offsets from the beginning of the
7375 * bytecode (saved in method->code_base).
7376 * 2. forwards OP_CALL instructions can be correctly linked since message
7377 * offsets have been previously assigned.
7378 *
7379 * Could avoid the second pass by linking OP_CALL instructions somehow. */
7380 compile_methods(c);
7381 compile_methods(c);
7382 g->bytecode_end = c->pc;
7383 freecompiler(c);
7384
7385 #ifdef UPB_DUMP_BYTECODE
7386 {
7387 FILE *f = fopen("/tmp/upb-bytecode", "wb");
7388 assert(f);
7389 dumpbc(g->bytecode, g->bytecode_end, stderr);
7390 dumpbc(g->bytecode, g->bytecode_end, f);
7391 fclose(f);
7392 }
7393 #endif
7394
7395 sethandlers(g, allowjit);
7396 return g;
7397 }
7398
7399
7400 /* upb_pbcodecache ************************************************************/
7401
upb_pbcodecache_init(upb_pbcodecache * c)7402 void upb_pbcodecache_init(upb_pbcodecache *c) {
7403 upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
7404 c->allow_jit_ = true;
7405 }
7406
upb_pbcodecache_uninit(upb_pbcodecache * c)7407 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
7408 upb_inttable_iter i;
7409 upb_inttable_begin(&i, &c->groups);
7410 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7411 const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
7412 mgroup_unref(group, c);
7413 }
7414 upb_inttable_uninit(&c->groups);
7415 }
7416
upb_pbcodecache_allowjit(const upb_pbcodecache * c)7417 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
7418 return c->allow_jit_;
7419 }
7420
upb_pbcodecache_setallowjit(upb_pbcodecache * c,bool allow)7421 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
7422 if (upb_inttable_count(&c->groups) > 0)
7423 return false;
7424 c->allow_jit_ = allow;
7425 return true;
7426 }
7427
upb_pbcodecache_getdecodermethod(upb_pbcodecache * c,const upb_pbdecodermethodopts * opts)7428 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
7429 upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
7430 upb_value v;
7431 bool ok;
7432
7433 /* Right now we build a new DecoderMethod every time.
7434 * TODO(haberman): properly cache methods by their true key. */
7435 const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
7436 upb_inttable_push(&c->groups, upb_value_constptr(g));
7437
7438 ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
7439 UPB_ASSERT_VAR(ok, ok);
7440 return upb_value_getptr(v);
7441 }
7442
7443
7444 /* upb_pbdecodermethodopts ****************************************************/
7445
upb_pbdecodermethodopts_init(upb_pbdecodermethodopts * opts,const upb_handlers * h)7446 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
7447 const upb_handlers *h) {
7448 opts->handlers = h;
7449 opts->lazy = false;
7450 }
7451
upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts * opts,bool lazy)7452 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
7453 opts->lazy = lazy;
7454 }
7455 /*
7456 ** upb::Decoder (Bytecode Decoder VM)
7457 **
7458 ** Bytecode must previously have been generated using the bytecode compiler in
7459 ** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
7460 ** parse the input.
7461 **
7462 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
7463 ** instruction and resume from there. A fair amount of the logic here is to
7464 ** handle the fact that values can span buffer seams and we have to be able to
7465 ** be capable of suspending/resuming from any byte in the stream. This
7466 ** sometimes requires keeping a few trailing bytes from the last buffer around
7467 ** in the "residual" buffer.
7468 */
7469
7470 #include <inttypes.h>
7471 #include <stddef.h>
7472
7473 #ifdef UPB_DUMP_BYTECODE
7474 #include <stdio.h>
7475 #endif
7476
7477 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
7478
7479 /* Error messages that are shared between the bytecode and JIT decoders. */
7480 const char *kPbDecoderStackOverflow = "Nesting too deep.";
7481 const char *kPbDecoderSubmessageTooLong =
7482 "Submessage end extends past enclosing submessage.";
7483
7484 /* Error messages shared within this file. */
7485 static const char *kUnterminatedVarint = "Unterminated varint.";
7486
7487 /* upb_pbdecoder **************************************************************/
7488
7489 static opcode halt = OP_HALT;
7490
7491 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)7492 static bool consumes_input(opcode op) {
7493 switch (op) {
7494 case OP_SETDISPATCH:
7495 case OP_STARTMSG:
7496 case OP_ENDMSG:
7497 case OP_STARTSEQ:
7498 case OP_ENDSEQ:
7499 case OP_STARTSUBMSG:
7500 case OP_ENDSUBMSG:
7501 case OP_STARTSTR:
7502 case OP_ENDSTR:
7503 case OP_PUSHTAGDELIM:
7504 case OP_POP:
7505 case OP_SETDELIM:
7506 case OP_SETBIGGROUPNUM:
7507 case OP_CHECKDELIM:
7508 case OP_CALL:
7509 case OP_RET:
7510 case OP_BRANCH:
7511 return false;
7512 default:
7513 return true;
7514 }
7515 }
7516
stacksize(upb_pbdecoder * d,size_t entries)7517 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
7518 UPB_UNUSED(d);
7519 return entries * sizeof(upb_pbdecoder_frame);
7520 }
7521
callstacksize(upb_pbdecoder * d,size_t entries)7522 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7523 UPB_UNUSED(d);
7524
7525 #ifdef UPB_USE_JIT_X64
7526 if (d->method_->is_native_) {
7527 /* Each native stack frame needs two pointers, plus we need a few frames for
7528 * the enter/exit trampolines. */
7529 size_t ret = entries * sizeof(void*) * 2;
7530 ret += sizeof(void*) * 10;
7531 return ret;
7532 }
7533 #endif
7534
7535 return entries * sizeof(uint32_t*);
7536 }
7537
7538
7539 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7540
7541 /* It's unfortunate that we have to micro-manage the compiler with
7542 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7543 * specific to one hardware configuration. But empirically on a Core i7,
7544 * performance increases 30-50% with these annotations. Every instance where
7545 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7546 * benchmarks. */
7547
seterr(upb_pbdecoder * d,const char * msg)7548 static void seterr(upb_pbdecoder *d, const char *msg) {
7549 upb_status status = UPB_STATUS_INIT;
7550 upb_status_seterrmsg(&status, msg);
7551 upb_env_reporterror(d->env, &status);
7552 }
7553
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)7554 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7555 seterr(d, msg);
7556 }
7557
7558
7559 /* Buffering ******************************************************************/
7560
7561 /* We operate on one buffer at a time, which is either the user's buffer passed
7562 * to our "decode" callback or some residual bytes from the previous buffer. */
7563
7564 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
7565 * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)7566 static size_t curbufleft(const upb_pbdecoder *d) {
7567 assert(d->data_end >= d->ptr);
7568 return d->data_end - d->ptr;
7569 }
7570
7571 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)7572 static size_t bufleft(const upb_pbdecoder *d) {
7573 return d->end - d->ptr;
7574 }
7575
7576 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)7577 uint64_t offset(const upb_pbdecoder *d) {
7578 return d->bufstart_ofs + (d->ptr - d->buf);
7579 }
7580
7581 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)7582 size_t delim_remaining(const upb_pbdecoder *d) {
7583 return d->top->end_ofs - offset(d);
7584 }
7585
7586 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)7587 static void advance(upb_pbdecoder *d, size_t len) {
7588 assert(curbufleft(d) >= len);
7589 d->ptr += len;
7590 }
7591
in_buf(const char * p,const char * buf,const char * end)7592 static bool in_buf(const char *p, const char *buf, const char *end) {
7593 return p >= buf && p <= end;
7594 }
7595
in_residual_buf(const upb_pbdecoder * d,const char * p)7596 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7597 return in_buf(p, d->residual, d->residual_end);
7598 }
7599
7600 /* Calculates the delim_end value, which is affected by both the current buffer
7601 * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)7602 static void set_delim_end(upb_pbdecoder *d) {
7603 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7604 if (delim_ofs <= (size_t)(d->end - d->buf)) {
7605 d->delim_end = d->buf + delim_ofs;
7606 d->data_end = d->delim_end;
7607 } else {
7608 d->data_end = d->end;
7609 d->delim_end = NULL;
7610 }
7611 }
7612
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)7613 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7614 d->ptr = buf;
7615 d->buf = buf;
7616 d->end = end;
7617 set_delim_end(d);
7618 }
7619
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)7620 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7621 assert(curbufleft(d) == 0);
7622 d->bufstart_ofs += (d->end - d->buf);
7623 switchtobuf(d, buf, buf + len);
7624 }
7625
checkpoint(upb_pbdecoder * d)7626 static void checkpoint(upb_pbdecoder *d) {
7627 /* The assertion here is in the interests of efficiency, not correctness.
7628 * We are trying to ensure that we don't checkpoint() more often than
7629 * necessary. */
7630 assert(d->checkpoint != d->ptr);
7631 d->checkpoint = d->ptr;
7632 }
7633
7634 /* Skips "bytes" bytes in the stream, which may be more than available. If we
7635 * skip more bytes than are available, we return a long read count to the caller
7636 * indicating how many bytes can be skipped over before passing actual data
7637 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
7638 * won't actually be read.
7639 */
skip(upb_pbdecoder * d,size_t bytes)7640 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7641 assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7642 assert(d->skip == 0);
7643 if (bytes > delim_remaining(d)) {
7644 seterr(d, "Skipped value extended beyond enclosing submessage.");
7645 return upb_pbdecoder_suspend(d);
7646 } else if (bufleft(d) > bytes) {
7647 /* Skipped data is all in current buffer, and more is still available. */
7648 advance(d, bytes);
7649 d->skip = 0;
7650 return DECODE_OK;
7651 } else {
7652 /* Skipped data extends beyond currently available buffers. */
7653 d->pc = d->last;
7654 d->skip = bytes - curbufleft(d);
7655 d->bufstart_ofs += (d->end - d->buf);
7656 d->residual_end = d->residual;
7657 switchtobuf(d, d->residual, d->residual_end);
7658 return d->size_param + d->skip;
7659 }
7660 }
7661
7662
7663 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)7664 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7665 size_t size, const upb_bufhandle *handle) {
7666 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
7667
7668 d->buf_param = buf;
7669 d->size_param = size;
7670 d->handle = handle;
7671
7672 if (d->residual_end > d->residual) {
7673 /* We have residual bytes from the last buffer. */
7674 assert(d->ptr == d->residual);
7675 } else {
7676 switchtobuf(d, buf, buf + size);
7677 }
7678
7679 d->checkpoint = d->ptr;
7680
7681 if (d->skip) {
7682 size_t skip_bytes = d->skip;
7683 d->skip = 0;
7684 CHECK_RETURN(skip(d, skip_bytes));
7685 d->checkpoint = d->ptr;
7686 }
7687
7688 if (!buf) {
7689 /* NULL buf is ok if its entire span is covered by the "skip" above, but
7690 * by this point we know that "skip" doesn't cover the buffer. */
7691 seterr(d, "Passed NULL buffer over non-skippable region.");
7692 return upb_pbdecoder_suspend(d);
7693 }
7694
7695 if (d->top->groupnum < 0) {
7696 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
7697 d->checkpoint = d->ptr;
7698 }
7699
7700 return DECODE_OK;
7701 }
7702
7703 /* Suspends the decoder at the last checkpoint, without saving any residual
7704 * bytes. If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)7705 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7706 d->pc = d->last;
7707 if (d->checkpoint == d->residual) {
7708 /* Checkpoint was in residual buf; no user bytes were consumed. */
7709 d->ptr = d->residual;
7710 return 0;
7711 } else {
7712 size_t consumed;
7713 assert(!in_residual_buf(d, d->checkpoint));
7714 assert(d->buf == d->buf_param);
7715
7716 consumed = d->checkpoint - d->buf;
7717 d->bufstart_ofs += consumed;
7718 d->residual_end = d->residual;
7719 switchtobuf(d, d->residual, d->residual_end);
7720 return consumed;
7721 }
7722 }
7723
7724 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
7725 * bytes in our residual buffer. This is necessary if we need more user
7726 * bytes to form a complete value, which might not be contiguous in the
7727 * user's buffers. Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)7728 static size_t suspend_save(upb_pbdecoder *d) {
7729 /* We hit end-of-buffer before we could parse a full value.
7730 * Save any unconsumed bytes (if any) to the residual buffer. */
7731 d->pc = d->last;
7732
7733 if (d->checkpoint == d->residual) {
7734 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
7735 assert((d->residual_end - d->residual) + d->size_param <=
7736 sizeof(d->residual));
7737 if (!in_residual_buf(d, d->ptr)) {
7738 d->bufstart_ofs -= (d->residual_end - d->residual);
7739 }
7740 memcpy(d->residual_end, d->buf_param, d->size_param);
7741 d->residual_end += d->size_param;
7742 } else {
7743 /* Checkpoint was in user buf; old residual bytes not needed. */
7744 size_t save;
7745 assert(!in_residual_buf(d, d->checkpoint));
7746
7747 d->ptr = d->checkpoint;
7748 save = curbufleft(d);
7749 assert(save <= sizeof(d->residual));
7750 memcpy(d->residual, d->ptr, save);
7751 d->residual_end = d->residual + save;
7752 d->bufstart_ofs = offset(d);
7753 }
7754
7755 switchtobuf(d, d->residual, d->residual_end);
7756 return d->size_param;
7757 }
7758
7759 /* Copies the next "bytes" bytes into "buf" and advances the stream.
7760 * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)7761 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7762 size_t bytes) {
7763 assert(bytes <= curbufleft(d));
7764 memcpy(buf, d->ptr, bytes);
7765 advance(d, bytes);
7766 }
7767
7768 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
7769 * available in the current buffer or not. Returns a status code as described
7770 * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7771 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7772 size_t bytes) {
7773 const size_t avail = curbufleft(d);
7774 consumebytes(d, buf, avail);
7775 bytes -= avail;
7776 assert(bytes > 0);
7777 if (in_residual_buf(d, d->ptr)) {
7778 advancetobuf(d, d->buf_param, d->size_param);
7779 }
7780 if (curbufleft(d) >= bytes) {
7781 consumebytes(d, (char *)buf + avail, bytes);
7782 return DECODE_OK;
7783 } else if (d->data_end == d->delim_end) {
7784 seterr(d, "Submessage ended in the middle of a value or group");
7785 return upb_pbdecoder_suspend(d);
7786 } else {
7787 return suspend_save(d);
7788 }
7789 }
7790
7791 /* Gets the next "bytes" bytes, regardless of whether they are available in the
7792 * current buffer or not. Returns a status code as described in decoder.int.h.
7793 */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)7794 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7795 size_t bytes) {
7796 if (curbufleft(d) >= bytes) {
7797 /* Buffer has enough data to satisfy. */
7798 consumebytes(d, buf, bytes);
7799 return DECODE_OK;
7800 } else {
7801 return getbytes_slow(d, buf, bytes);
7802 }
7803 }
7804
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7805 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7806 size_t bytes) {
7807 size_t ret = curbufleft(d);
7808 memcpy(buf, d->ptr, ret);
7809 if (in_residual_buf(d, d->ptr)) {
7810 size_t copy = UPB_MIN(bytes - ret, d->size_param);
7811 memcpy((char *)buf + ret, d->buf_param, copy);
7812 ret += copy;
7813 }
7814 return ret;
7815 }
7816
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)7817 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7818 size_t bytes) {
7819 if (curbufleft(d) >= bytes) {
7820 memcpy(buf, d->ptr, bytes);
7821 return bytes;
7822 } else {
7823 return peekbytes_slow(d, buf, bytes);
7824 }
7825 }
7826
7827
7828 /* Decoding of wire types *****************************************************/
7829
7830 /* Slow path for decoding a varint from the current buffer position.
7831 * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)7832 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7833 uint64_t *u64) {
7834 uint8_t byte = 0x80;
7835 int bitpos;
7836 *u64 = 0;
7837 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
7838 int32_t ret = getbytes(d, &byte, 1);
7839 if (ret >= 0) return ret;
7840 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7841 }
7842 if(bitpos == 70 && (byte & 0x80)) {
7843 seterr(d, kUnterminatedVarint);
7844 return upb_pbdecoder_suspend(d);
7845 }
7846 return DECODE_OK;
7847 }
7848
7849 /* Decodes a varint from the current buffer position.
7850 * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)7851 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7852 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7853 *u64 = *d->ptr;
7854 advance(d, 1);
7855 return DECODE_OK;
7856 } else if (curbufleft(d) >= 10) {
7857 /* Fast case. */
7858 upb_decoderet r = upb_vdecode_fast(d->ptr);
7859 if (r.p == NULL) {
7860 seterr(d, kUnterminatedVarint);
7861 return upb_pbdecoder_suspend(d);
7862 }
7863 advance(d, r.p - d->ptr);
7864 *u64 = r.val;
7865 return DECODE_OK;
7866 } else {
7867 /* Slow case -- varint spans buffer seam. */
7868 return upb_pbdecoder_decode_varint_slow(d, u64);
7869 }
7870 }
7871
7872 /* Decodes a 32-bit varint from the current buffer position.
7873 * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)7874 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7875 uint64_t u64;
7876 int32_t ret = decode_varint(d, &u64);
7877 if (ret >= 0) return ret;
7878 if (u64 > UINT32_MAX) {
7879 seterr(d, "Unterminated 32-bit varint");
7880 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7881 * so we know this path will always be treated as error by our caller.
7882 * Right now the size_t -> int32_t can overflow and produce negative values.
7883 */
7884 *u32 = 0;
7885 return upb_pbdecoder_suspend(d);
7886 }
7887 *u32 = u64;
7888 return DECODE_OK;
7889 }
7890
7891 /* Decodes a fixed32 from the current buffer position.
7892 * Returns a status code as described in decoder.int.h.
7893 * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)7894 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7895 return getbytes(d, u32, 4);
7896 }
7897
7898 /* Decodes a fixed64 from the current buffer position.
7899 * Returns a status code as described in decoder.int.h.
7900 * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)7901 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7902 return getbytes(d, u64, 8);
7903 }
7904
7905 /* Non-static versions of the above functions.
7906 * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)7907 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7908 return decode_fixed32(d, u32);
7909 }
7910
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)7911 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7912 return decode_fixed64(d, u64);
7913 }
7914
as_double(uint64_t n)7915 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)7916 static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
7917
7918 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)7919 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7920 upb_pbdecoder_frame *fr = d->top;
7921
7922 if (end > fr->end_ofs) {
7923 seterr(d, kPbDecoderSubmessageTooLong);
7924 return false;
7925 } else if (fr == d->limit) {
7926 seterr(d, kPbDecoderStackOverflow);
7927 return false;
7928 }
7929
7930 fr++;
7931 fr->end_ofs = end;
7932 fr->dispatch = NULL;
7933 fr->groupnum = 0;
7934 d->top = fr;
7935 return true;
7936 }
7937
pushtagdelim(upb_pbdecoder * d,uint32_t arg)7938 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7939 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7940 * field number) prior to hitting any enclosing submessage end, pushing our
7941 * existing delim end prevents us from continuing to parse values from a
7942 * corrupt proto that doesn't give us an END tag in time. */
7943 if (!decoder_push(d, d->top->end_ofs))
7944 return false;
7945 d->top->groupnum = arg;
7946 return true;
7947 }
7948
7949 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)7950 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7951
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)7952 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7953 uint64_t expected) {
7954 uint64_t data = 0;
7955 size_t bytes = upb_value_size(expected);
7956 size_t read = peekbytes(d, &data, bytes);
7957 if (read == bytes && data == expected) {
7958 /* Advance past matched bytes. */
7959 int32_t ok = getbytes(d, &data, read);
7960 UPB_ASSERT_VAR(ok, ok < 0);
7961 return DECODE_OK;
7962 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7963 return suspend_save(d);
7964 } else {
7965 return DECODE_MISMATCH;
7966 }
7967 }
7968
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)7969 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7970 uint8_t wire_type) {
7971 if (fieldnum >= 0)
7972 goto have_tag;
7973
7974 while (true) {
7975 uint32_t tag;
7976 CHECK_RETURN(decode_v32(d, &tag));
7977 wire_type = tag & 0x7;
7978 fieldnum = tag >> 3;
7979
7980 have_tag:
7981 if (fieldnum == 0) {
7982 seterr(d, "Saw invalid field number (0)");
7983 return upb_pbdecoder_suspend(d);
7984 }
7985
7986 /* TODO: deliver to unknown field callback. */
7987 switch (wire_type) {
7988 case UPB_WIRE_TYPE_32BIT:
7989 CHECK_RETURN(skip(d, 4));
7990 break;
7991 case UPB_WIRE_TYPE_64BIT:
7992 CHECK_RETURN(skip(d, 8));
7993 break;
7994 case UPB_WIRE_TYPE_VARINT: {
7995 uint64_t u64;
7996 CHECK_RETURN(decode_varint(d, &u64));
7997 break;
7998 }
7999 case UPB_WIRE_TYPE_DELIMITED: {
8000 uint32_t len;
8001 CHECK_RETURN(decode_v32(d, &len));
8002 CHECK_RETURN(skip(d, len));
8003 break;
8004 }
8005 case UPB_WIRE_TYPE_START_GROUP:
8006 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
8007 break;
8008 case UPB_WIRE_TYPE_END_GROUP:
8009 if (fieldnum == -d->top->groupnum) {
8010 decoder_pop(d);
8011 } else if (fieldnum == d->top->groupnum) {
8012 return DECODE_ENDGROUP;
8013 } else {
8014 seterr(d, "Unmatched ENDGROUP tag.");
8015 return upb_pbdecoder_suspend(d);
8016 }
8017 break;
8018 default:
8019 seterr(d, "Invalid wire type");
8020 return upb_pbdecoder_suspend(d);
8021 }
8022
8023 if (d->top->groupnum >= 0) {
8024 return DECODE_OK;
8025 }
8026
8027 /* Unknown group -- continue looping over unknown fields. */
8028 checkpoint(d);
8029 }
8030 }
8031
goto_endmsg(upb_pbdecoder * d)8032 static void goto_endmsg(upb_pbdecoder *d) {
8033 upb_value v;
8034 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
8035 UPB_ASSERT_VAR(found, found);
8036 d->pc = d->top->base + upb_value_getuint64(v);
8037 }
8038
8039 /* Parses a tag and jumps to the corresponding bytecode instruction for this
8040 * field.
8041 *
8042 * If the tag is unknown (or the wire type doesn't match), parses the field as
8043 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
8044 * instruction for the end of message. */
dispatch(upb_pbdecoder * d)8045 static int32_t dispatch(upb_pbdecoder *d) {
8046 upb_inttable *dispatch = d->top->dispatch;
8047 uint32_t tag;
8048 uint8_t wire_type;
8049 uint32_t fieldnum;
8050 upb_value val;
8051 int32_t retval;
8052
8053 /* Decode tag. */
8054 CHECK_RETURN(decode_v32(d, &tag));
8055 wire_type = tag & 0x7;
8056 fieldnum = tag >> 3;
8057
8058 /* Lookup tag. Because of packed/non-packed compatibility, we have to
8059 * check the wire type against two possibilities. */
8060 if (fieldnum != DISPATCH_ENDMSG &&
8061 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
8062 uint64_t v = upb_value_getuint64(val);
8063 if (wire_type == (v & 0xff)) {
8064 d->pc = d->top->base + (v >> 16);
8065 return DECODE_OK;
8066 } else if (wire_type == ((v >> 8) & 0xff)) {
8067 bool found =
8068 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
8069 UPB_ASSERT_VAR(found, found);
8070 d->pc = d->top->base + upb_value_getuint64(val);
8071 return DECODE_OK;
8072 }
8073 }
8074
8075 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
8076 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
8077 * we need to back up to, so that when we're done skipping unknown data we
8078 * can re-check the delimited end. */
8079 d->last--; /* Necessary if we get suspended */
8080 d->pc = d->last;
8081 assert(getop(*d->last) == OP_CHECKDELIM);
8082
8083 /* Unknown field or ENDGROUP. */
8084 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
8085
8086 CHECK_RETURN(retval);
8087
8088 if (retval == DECODE_ENDGROUP) {
8089 goto_endmsg(d);
8090 return DECODE_OK;
8091 }
8092
8093 return DECODE_OK;
8094 }
8095
8096 /* Callers know that the stack is more than one deep because the opcodes that
8097 * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)8098 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
8099 assert(d->top != d->stack);
8100 return d->top - 1;
8101 }
8102
8103
8104 /* The main decoding loop *****************************************************/
8105
8106 /* The main decoder VM function. Uses traditional bytecode dispatch loop with a
8107 * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)8108 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
8109 const upb_bufhandle* handle) {
8110
8111 #define VMCASE(op, code) \
8112 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
8113 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
8114 VMCASE(OP_PARSE_ ## type, { \
8115 ctype val; \
8116 CHECK_RETURN(decode_ ## wt(d, &val)); \
8117 upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
8118 })
8119
8120 while(1) {
8121 int32_t instruction;
8122 opcode op;
8123 uint32_t arg;
8124 int32_t longofs;
8125
8126 d->last = d->pc;
8127 instruction = *d->pc++;
8128 op = getop(instruction);
8129 arg = instruction >> 8;
8130 longofs = arg;
8131 assert(d->ptr != d->residual_end);
8132 UPB_UNUSED(group);
8133 #ifdef UPB_DUMP_BYTECODE
8134 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
8135 "%x %s (%d)\n",
8136 (int)offset(d),
8137 (int)(d->ptr - d->buf),
8138 (int)(d->data_end - d->ptr),
8139 (int)(d->end - d->ptr),
8140 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
8141 (int)(d->pc - 1 - group->bytecode),
8142 upb_pbdecoder_getopname(op),
8143 arg);
8144 #endif
8145 switch (op) {
8146 /* Technically, we are losing data if we see a 32-bit varint that is not
8147 * properly sign-extended. We could detect this and error about the data
8148 * loss, but proto2 does not do this, so we pass. */
8149 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
8150 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
8151 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
8152 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
8153 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
8154 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
8155 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
8156 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
8157 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
8158 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
8159 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
8160 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
8161 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
8162
8163 VMCASE(OP_SETDISPATCH,
8164 d->top->base = d->pc - 1;
8165 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
8166 d->pc += sizeof(void*) / sizeof(uint32_t);
8167 )
8168 VMCASE(OP_STARTMSG,
8169 CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
8170 )
8171 VMCASE(OP_ENDMSG,
8172 CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
8173 )
8174 VMCASE(OP_STARTSEQ,
8175 upb_pbdecoder_frame *outer = outer_frame(d);
8176 CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
8177 )
8178 VMCASE(OP_ENDSEQ,
8179 CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
8180 )
8181 VMCASE(OP_STARTSUBMSG,
8182 upb_pbdecoder_frame *outer = outer_frame(d);
8183 CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
8184 )
8185 VMCASE(OP_ENDSUBMSG,
8186 CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
8187 )
8188 VMCASE(OP_STARTSTR,
8189 uint32_t len = delim_remaining(d);
8190 upb_pbdecoder_frame *outer = outer_frame(d);
8191 CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
8192 if (len == 0) {
8193 d->pc++; /* Skip OP_STRING. */
8194 }
8195 )
8196 VMCASE(OP_STRING,
8197 uint32_t len = curbufleft(d);
8198 size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
8199 if (n > len) {
8200 if (n > delim_remaining(d)) {
8201 seterr(d, "Tried to skip past end of string.");
8202 return upb_pbdecoder_suspend(d);
8203 } else {
8204 int32_t ret = skip(d, n);
8205 /* This shouldn't return DECODE_OK, because n > len. */
8206 assert(ret >= 0);
8207 return ret;
8208 }
8209 }
8210 advance(d, n);
8211 if (n < len || d->delim_end == NULL) {
8212 /* We aren't finished with this string yet. */
8213 d->pc--; /* Repeat OP_STRING. */
8214 if (n > 0) checkpoint(d);
8215 return upb_pbdecoder_suspend(d);
8216 }
8217 )
8218 VMCASE(OP_ENDSTR,
8219 CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
8220 )
8221 VMCASE(OP_PUSHTAGDELIM,
8222 CHECK_SUSPEND(pushtagdelim(d, arg));
8223 )
8224 VMCASE(OP_SETBIGGROUPNUM,
8225 d->top->groupnum = *d->pc++;
8226 )
8227 VMCASE(OP_POP,
8228 assert(d->top > d->stack);
8229 decoder_pop(d);
8230 )
8231 VMCASE(OP_PUSHLENDELIM,
8232 uint32_t len;
8233 CHECK_RETURN(decode_v32(d, &len));
8234 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
8235 set_delim_end(d);
8236 )
8237 VMCASE(OP_SETDELIM,
8238 set_delim_end(d);
8239 )
8240 VMCASE(OP_CHECKDELIM,
8241 /* We are guaranteed of this assert because we never allow ourselves to
8242 * consume bytes beyond data_end, which covers delim_end when non-NULL.
8243 */
8244 assert(!(d->delim_end && d->ptr > d->delim_end));
8245 if (d->ptr == d->delim_end)
8246 d->pc += longofs;
8247 )
8248 VMCASE(OP_CALL,
8249 d->callstack[d->call_len++] = d->pc;
8250 d->pc += longofs;
8251 )
8252 VMCASE(OP_RET,
8253 assert(d->call_len > 0);
8254 d->pc = d->callstack[--d->call_len];
8255 )
8256 VMCASE(OP_BRANCH,
8257 d->pc += longofs;
8258 )
8259 VMCASE(OP_TAG1,
8260 uint8_t expected;
8261 CHECK_SUSPEND(curbufleft(d) > 0);
8262 expected = (arg >> 8) & 0xff;
8263 if (*d->ptr == expected) {
8264 advance(d, 1);
8265 } else {
8266 int8_t shortofs;
8267 badtag:
8268 shortofs = arg;
8269 if (shortofs == LABEL_DISPATCH) {
8270 CHECK_RETURN(dispatch(d));
8271 } else {
8272 d->pc += shortofs;
8273 break; /* Avoid checkpoint(). */
8274 }
8275 }
8276 )
8277 VMCASE(OP_TAG2,
8278 uint16_t expected;
8279 CHECK_SUSPEND(curbufleft(d) > 0);
8280 expected = (arg >> 8) & 0xffff;
8281 if (curbufleft(d) >= 2) {
8282 uint16_t actual;
8283 memcpy(&actual, d->ptr, 2);
8284 if (expected == actual) {
8285 advance(d, 2);
8286 } else {
8287 goto badtag;
8288 }
8289 } else {
8290 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
8291 if (result == DECODE_MISMATCH) goto badtag;
8292 if (result >= 0) return result;
8293 }
8294 )
8295 VMCASE(OP_TAGN, {
8296 uint64_t expected;
8297 int32_t result;
8298 memcpy(&expected, d->pc, 8);
8299 d->pc += 2;
8300 result = upb_pbdecoder_checktag_slow(d, expected);
8301 if (result == DECODE_MISMATCH) goto badtag;
8302 if (result >= 0) return result;
8303 })
8304 VMCASE(OP_DISPATCH, {
8305 CHECK_RETURN(dispatch(d));
8306 })
8307 VMCASE(OP_HALT, {
8308 return d->size_param;
8309 })
8310 }
8311 }
8312 }
8313
8314
8315 /* BytesHandler handlers ******************************************************/
8316
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)8317 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
8318 upb_pbdecoder *d = closure;
8319 UPB_UNUSED(size_hint);
8320 d->top->end_ofs = UINT64_MAX;
8321 d->bufstart_ofs = 0;
8322 d->call_len = 1;
8323 d->callstack[0] = &halt;
8324 d->pc = pc;
8325 d->skip = 0;
8326 return d;
8327 }
8328
upb_pbdecoder_startjit(void * closure,const void * hd,size_t size_hint)8329 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
8330 upb_pbdecoder *d = closure;
8331 UPB_UNUSED(hd);
8332 UPB_UNUSED(size_hint);
8333 d->top->end_ofs = UINT64_MAX;
8334 d->bufstart_ofs = 0;
8335 d->call_len = 0;
8336 d->skip = 0;
8337 return d;
8338 }
8339
upb_pbdecoder_end(void * closure,const void * handler_data)8340 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
8341 upb_pbdecoder *d = closure;
8342 const upb_pbdecodermethod *method = handler_data;
8343 uint64_t end;
8344 char dummy;
8345
8346 if (d->residual_end > d->residual) {
8347 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
8348 return false;
8349 }
8350
8351 if (d->skip) {
8352 seterr(d, "Unexpected EOF inside skipped data");
8353 return false;
8354 }
8355
8356 if (d->top->end_ofs != UINT64_MAX) {
8357 seterr(d, "Unexpected EOF inside delimited string");
8358 return false;
8359 }
8360
8361 /* The user's end() call indicates that the message ends here. */
8362 end = offset(d);
8363 d->top->end_ofs = end;
8364
8365 #ifdef UPB_USE_JIT_X64
8366 if (method->is_native_) {
8367 const mgroup *group = (const mgroup*)method->group;
8368 if (d->top != d->stack)
8369 d->stack->end_ofs = 0;
8370 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
8371 } else
8372 #endif
8373 {
8374 const uint32_t *p = d->pc;
8375 d->stack->end_ofs = end;
8376 /* Check the previous bytecode, but guard against beginning. */
8377 if (p != method->code_base.ptr) p--;
8378 if (getop(*p) == OP_CHECKDELIM) {
8379 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
8380 assert(getop(*d->pc) == OP_TAG1 ||
8381 getop(*d->pc) == OP_TAG2 ||
8382 getop(*d->pc) == OP_TAGN ||
8383 getop(*d->pc) == OP_DISPATCH);
8384 d->pc = p;
8385 }
8386 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
8387 }
8388
8389 if (d->call_len != 0) {
8390 seterr(d, "Unexpected EOF inside submessage or group");
8391 return false;
8392 }
8393
8394 return true;
8395 }
8396
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)8397 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
8398 size_t size, const upb_bufhandle *handle) {
8399 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
8400
8401 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
8402 CHECK_RETURN(result);
8403
8404 return run_decoder_vm(decoder, group, handle);
8405 }
8406
8407
8408 /* Public API *****************************************************************/
8409
upb_pbdecoder_reset(upb_pbdecoder * d)8410 void upb_pbdecoder_reset(upb_pbdecoder *d) {
8411 d->top = d->stack;
8412 d->top->groupnum = 0;
8413 d->ptr = d->residual;
8414 d->buf = d->residual;
8415 d->end = d->residual;
8416 d->residual_end = d->residual;
8417 }
8418
upb_pbdecoder_create(upb_env * e,const upb_pbdecodermethod * m,upb_sink * sink)8419 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
8420 upb_sink *sink) {
8421 const size_t default_max_nesting = 64;
8422 #ifndef NDEBUG
8423 size_t size_before = upb_env_bytesallocated(e);
8424 #endif
8425
8426 upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
8427 if (!d) return NULL;
8428
8429 d->method_ = m;
8430 d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
8431 d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
8432 if (!d->stack || !d->callstack) {
8433 return NULL;
8434 }
8435
8436 d->env = e;
8437 d->limit = d->stack + default_max_nesting - 1;
8438 d->stack_size = default_max_nesting;
8439
8440 upb_pbdecoder_reset(d);
8441 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
8442
8443 assert(sink);
8444 if (d->method_->dest_handlers_) {
8445 if (sink->handlers != d->method_->dest_handlers_)
8446 return NULL;
8447 }
8448 upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
8449
8450 /* If this fails, increase the value in decoder.h. */
8451 assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
8452 return d;
8453 }
8454
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)8455 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
8456 return offset(d);
8457 }
8458
upb_pbdecoder_method(const upb_pbdecoder * d)8459 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
8460 return d->method_;
8461 }
8462
upb_pbdecoder_input(upb_pbdecoder * d)8463 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
8464 return &d->input_;
8465 }
8466
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)8467 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
8468 return d->stack_size;
8469 }
8470
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)8471 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
8472 assert(d->top >= d->stack);
8473
8474 if (max < (size_t)(d->top - d->stack)) {
8475 /* Can't set a limit smaller than what we are currently at. */
8476 return false;
8477 }
8478
8479 if (max > d->stack_size) {
8480 /* Need to reallocate stack and callstack to accommodate. */
8481 size_t old_size = stacksize(d, d->stack_size);
8482 size_t new_size = stacksize(d, max);
8483 void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
8484 if (!p) {
8485 return false;
8486 }
8487 d->stack = p;
8488
8489 old_size = callstacksize(d, d->stack_size);
8490 new_size = callstacksize(d, max);
8491 p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
8492 if (!p) {
8493 return false;
8494 }
8495 d->callstack = p;
8496
8497 d->stack_size = max;
8498 }
8499
8500 d->limit = d->stack + max - 1;
8501 return true;
8502 }
8503 /*
8504 ** upb::Encoder
8505 **
8506 ** Since we are implementing pure handlers (ie. without any out-of-band access
8507 ** to pre-computed lengths), we have to buffer all submessages before we can
8508 ** emit even their first byte.
8509 **
8510 ** Not knowing the size of submessages also means we can't write a perfect
8511 ** zero-copy implementation, even with buffering. Lengths are stored as
8512 ** varints, which means that we don't know how many bytes to reserve for the
8513 ** length until we know what the length is.
8514 **
8515 ** This leaves us with three main choices:
8516 **
8517 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
8518 ** once into the output buffer.
8519 **
8520 ** 2. attempt to buffer data directly into the output buffer, estimating how
8521 ** many bytes each length will take. When our guesses are wrong, use
8522 ** memmove() to grow or shrink the allotted space.
8523 **
8524 ** 3. buffer directly into the output buffer, allocating a max length
8525 ** ahead-of-time for each submessage length. If we overallocated, we waste
8526 ** space, but no memcpy() or memmove() is required. This approach requires
8527 ** defining a maximum size for submessages and rejecting submessages that
8528 ** exceed that size.
8529 **
8530 ** (2) and (3) have the potential to have better performance, but they are more
8531 ** complicated and subtle to implement:
8532 **
8533 ** (3) requires making an arbitrary choice of the maximum message size; it
8534 ** wastes space when submessages are shorter than this and fails
8535 ** completely when they are longer. This makes it more finicky and
8536 ** requires configuration based on the input. It also makes it impossible
8537 ** to perfectly match the output of reference encoders that always use the
8538 ** optimal amount of space for each length.
8539 **
8540 ** (2) requires guessing the the size upfront, and if multiple lengths are
8541 ** guessed wrong the minimum required number of memmove() operations may
8542 ** be complicated to compute correctly. Implemented properly, it may have
8543 ** a useful amortized or average cost, but more investigation is required
8544 ** to determine this and what the optimal algorithm is to achieve it.
8545 **
8546 ** (1) makes you always pay for exactly one copy, but its implementation is
8547 ** the simplest and its performance is predictable.
8548 **
8549 ** So for now, we implement (1) only. If we wish to optimize later, we should
8550 ** be able to do it without affecting users.
8551 **
8552 ** The strategy is to buffer the segments of data that do *not* depend on
8553 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8554 ** and lengths. When the top-level submessage ends, we can go beginning to end,
8555 ** alternating the writing of lengths with memcpy() of the rest of the data.
8556 ** At the top level though, no buffering is required.
8557 */
8558
8559
8560 #include <stdlib.h>
8561
8562 /* The output buffer is divided into segments; a segment is a string of data
8563 * that is "ready to go" -- it does not need any varint lengths inserted into
8564 * the middle. The seams between segments are where varints will be inserted
8565 * once they are known.
8566 *
8567 * We also use the concept of a "run", which is a range of encoded bytes that
8568 * occur at a single submessage level. Every segment contains one or more runs.
8569 *
8570 * A segment can span messages. Consider:
8571 *
8572 * .--Submessage lengths---------.
8573 * | | |
8574 * | V V
8575 * V | |--------------- | |-----------------
8576 * Submessages: | |-----------------------------------------------
8577 * Top-level msg: ------------------------------------------------------------
8578 *
8579 * Segments: ----- ------------------- -----------------
8580 * Runs: *---- *--------------*--- *----------------
8581 * (* marks the start)
8582 *
8583 * Note that the top-level menssage is not in any segment because it does not
8584 * have any length preceding it.
8585 *
8586 * A segment is only interrupted when another length needs to be inserted. So
8587 * observe how the second segment spans both the inner submessage and part of
8588 * the next enclosing message. */
8589 typedef struct {
8590 uint32_t msglen; /* The length to varint-encode before this segment. */
8591 uint32_t seglen; /* Length of the segment. */
8592 } upb_pb_encoder_segment;
8593
8594 struct upb_pb_encoder {
8595 upb_env *env;
8596
8597 /* Our input and output. */
8598 upb_sink input_;
8599 upb_bytessink *output_;
8600
8601 /* The "subclosure" -- used as the inner closure as part of the bytessink
8602 * protocol. */
8603 void *subc;
8604
8605 /* The output buffer and limit, and our current write position. "buf"
8606 * initially points to "initbuf", but is dynamically allocated if we need to
8607 * grow beyond the initial size. */
8608 char *buf, *ptr, *limit;
8609
8610 /* The beginning of the current run, or undefined if we are at the top
8611 * level. */
8612 char *runbegin;
8613
8614 /* The list of segments we are accumulating. */
8615 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8616
8617 /* The stack of enclosing submessages. Each entry in the stack points to the
8618 * segment where this submessage's length is being accumulated. */
8619 int *stack, *top, *stacklimit;
8620
8621 /* Depth of startmsg/endmsg calls. */
8622 int depth;
8623 };
8624
8625 /* low-level buffering ********************************************************/
8626
8627 /* Low-level functions for interacting with the output buffer. */
8628
8629 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)8630 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8631 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8632 UPB_ASSERT_VAR(n, n == len);
8633 }
8634
top(upb_pb_encoder * e)8635 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8636 return &e->segbuf[*e->top];
8637 }
8638
8639 /* Call to ensure that at least "bytes" bytes are available for writing at
8640 * e->ptr. Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)8641 static bool reserve(upb_pb_encoder *e, size_t bytes) {
8642 if ((size_t)(e->limit - e->ptr) < bytes) {
8643 /* Grow buffer. */
8644 char *new_buf;
8645 size_t needed = bytes + (e->ptr - e->buf);
8646 size_t old_size = e->limit - e->buf;
8647
8648 size_t new_size = old_size;
8649
8650 while (new_size < needed) {
8651 new_size *= 2;
8652 }
8653
8654 new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
8655
8656 if (new_buf == NULL) {
8657 return false;
8658 }
8659
8660 e->ptr = new_buf + (e->ptr - e->buf);
8661 e->runbegin = new_buf + (e->runbegin - e->buf);
8662 e->limit = new_buf + new_size;
8663 e->buf = new_buf;
8664 }
8665
8666 return true;
8667 }
8668
8669 /* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
8670 * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)8671 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8672 assert((size_t)(e->limit - e->ptr) >= bytes);
8673 e->ptr += bytes;
8674 }
8675
8676 /* Call when all of the bytes for a handler have been written. Flushes the
8677 * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)8678 static bool commit(upb_pb_encoder *e) {
8679 if (!e->top) {
8680 /* We aren't inside a delimited region. Flush our accumulated bytes to
8681 * the output.
8682 *
8683 * TODO(haberman): in the future we may want to delay flushing for
8684 * efficiency reasons. */
8685 putbuf(e, e->buf, e->ptr - e->buf);
8686 e->ptr = e->buf;
8687 }
8688
8689 return true;
8690 }
8691
8692 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)8693 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8694 if (!reserve(e, len)) {
8695 return false;
8696 }
8697
8698 memcpy(e->ptr, data, len);
8699 encoder_advance(e, len);
8700 return true;
8701 }
8702
8703 /* Finish the current run by adding the run totals to the segment and message
8704 * length. */
accumulate(upb_pb_encoder * e)8705 static void accumulate(upb_pb_encoder *e) {
8706 size_t run_len;
8707 assert(e->ptr >= e->runbegin);
8708 run_len = e->ptr - e->runbegin;
8709 e->segptr->seglen += run_len;
8710 top(e)->msglen += run_len;
8711 e->runbegin = e->ptr;
8712 }
8713
8714 /* Call to indicate the start of delimited region for which the full length is
8715 * not yet known. All data will be buffered until the length is known.
8716 * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)8717 static bool start_delim(upb_pb_encoder *e) {
8718 if (e->top) {
8719 /* We are already buffering, advance to the next segment and push it on the
8720 * stack. */
8721 accumulate(e);
8722
8723 if (++e->top == e->stacklimit) {
8724 /* TODO(haberman): grow stack? */
8725 return false;
8726 }
8727
8728 if (++e->segptr == e->seglimit) {
8729 /* Grow segment buffer. */
8730 size_t old_size =
8731 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8732 size_t new_size = old_size * 2;
8733 upb_pb_encoder_segment *new_buf =
8734 upb_env_realloc(e->env, e->segbuf, old_size, new_size);
8735
8736 if (new_buf == NULL) {
8737 return false;
8738 }
8739
8740 e->segptr = new_buf + (e->segptr - e->segbuf);
8741 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8742 e->segbuf = new_buf;
8743 }
8744 } else {
8745 /* We were previously at the top level, start buffering. */
8746 e->segptr = e->segbuf;
8747 e->top = e->stack;
8748 e->runbegin = e->ptr;
8749 }
8750
8751 *e->top = e->segptr - e->segbuf;
8752 e->segptr->seglen = 0;
8753 e->segptr->msglen = 0;
8754
8755 return true;
8756 }
8757
8758 /* Call to indicate the end of a delimited region. We now know the length of
8759 * the delimited region. If we are not nested inside any other delimited
8760 * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)8761 static bool end_delim(upb_pb_encoder *e) {
8762 size_t msglen;
8763 accumulate(e);
8764 msglen = top(e)->msglen;
8765
8766 if (e->top == e->stack) {
8767 /* All lengths are now available, emit all buffered data. */
8768 char buf[UPB_PB_VARINT_MAX_LEN];
8769 upb_pb_encoder_segment *s;
8770 const char *ptr = e->buf;
8771 for (s = e->segbuf; s <= e->segptr; s++) {
8772 size_t lenbytes = upb_vencode64(s->msglen, buf);
8773 putbuf(e, buf, lenbytes);
8774 putbuf(e, ptr, s->seglen);
8775 ptr += s->seglen;
8776 }
8777
8778 e->ptr = e->buf;
8779 e->top = NULL;
8780 } else {
8781 /* Need to keep buffering; propagate length info into enclosing
8782 * submessages. */
8783 --e->top;
8784 top(e)->msglen += msglen + upb_varint_size(msglen);
8785 }
8786
8787 return true;
8788 }
8789
8790
8791 /* tag_t **********************************************************************/
8792
8793 /* A precomputed (pre-encoded) tag and length. */
8794
8795 typedef struct {
8796 uint8_t bytes;
8797 char tag[7];
8798 } tag_t;
8799
8800 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)8801 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8802 upb_handlerattr *attr) {
8803 uint32_t n = upb_fielddef_number(f);
8804
8805 tag_t *tag = malloc(sizeof(tag_t));
8806 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8807
8808 upb_handlerattr_init(attr);
8809 upb_handlerattr_sethandlerdata(attr, tag);
8810 upb_handlers_addcleanup(h, tag, free);
8811 }
8812
encode_tag(upb_pb_encoder * e,const tag_t * tag)8813 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8814 return encode_bytes(e, tag->tag, tag->bytes);
8815 }
8816
8817
8818 /* encoding of wire types *****************************************************/
8819
encode_fixed64(upb_pb_encoder * e,uint64_t val)8820 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8821 /* TODO(haberman): byte-swap for big endian. */
8822 return encode_bytes(e, &val, sizeof(uint64_t));
8823 }
8824
encode_fixed32(upb_pb_encoder * e,uint32_t val)8825 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8826 /* TODO(haberman): byte-swap for big endian. */
8827 return encode_bytes(e, &val, sizeof(uint32_t));
8828 }
8829
encode_varint(upb_pb_encoder * e,uint64_t val)8830 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8831 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8832 return false;
8833 }
8834
8835 encoder_advance(e, upb_vencode64(val, e->ptr));
8836 return true;
8837 }
8838
dbl2uint64(double d)8839 static uint64_t dbl2uint64(double d) {
8840 uint64_t ret;
8841 memcpy(&ret, &d, sizeof(uint64_t));
8842 return ret;
8843 }
8844
flt2uint32(float d)8845 static uint32_t flt2uint32(float d) {
8846 uint32_t ret;
8847 memcpy(&ret, &d, sizeof(uint32_t));
8848 return ret;
8849 }
8850
8851
8852 /* encoding of proto types ****************************************************/
8853
startmsg(void * c,const void * hd)8854 static bool startmsg(void *c, const void *hd) {
8855 upb_pb_encoder *e = c;
8856 UPB_UNUSED(hd);
8857 if (e->depth++ == 0) {
8858 upb_bytessink_start(e->output_, 0, &e->subc);
8859 }
8860 return true;
8861 }
8862
endmsg(void * c,const void * hd,upb_status * status)8863 static bool endmsg(void *c, const void *hd, upb_status *status) {
8864 upb_pb_encoder *e = c;
8865 UPB_UNUSED(hd);
8866 UPB_UNUSED(status);
8867 if (--e->depth == 0) {
8868 upb_bytessink_end(e->output_);
8869 }
8870 return true;
8871 }
8872
encode_startdelimfield(void * c,const void * hd)8873 static void *encode_startdelimfield(void *c, const void *hd) {
8874 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8875 return ok ? c : UPB_BREAK;
8876 }
8877
encode_enddelimfield(void * c,const void * hd)8878 static bool encode_enddelimfield(void *c, const void *hd) {
8879 UPB_UNUSED(hd);
8880 return end_delim(c);
8881 }
8882
encode_startgroup(void * c,const void * hd)8883 static void *encode_startgroup(void *c, const void *hd) {
8884 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8885 }
8886
encode_endgroup(void * c,const void * hd)8887 static bool encode_endgroup(void *c, const void *hd) {
8888 return encode_tag(c, hd) && commit(c);
8889 }
8890
encode_startstr(void * c,const void * hd,size_t size_hint)8891 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8892 UPB_UNUSED(size_hint);
8893 return encode_startdelimfield(c, hd);
8894 }
8895
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)8896 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8897 size_t len, const upb_bufhandle *h) {
8898 UPB_UNUSED(hd);
8899 UPB_UNUSED(h);
8900 return encode_bytes(c, buf, len) ? len : 0;
8901 }
8902
8903 #define T(type, ctype, convert, encode) \
8904 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8905 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
8906 } \
8907 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8908 UPB_UNUSED(hd); \
8909 return encode(e, (convert)(val)); \
8910 }
8911
T(double,double,dbl2uint64,encode_fixed64)8912 T(double, double, dbl2uint64, encode_fixed64)
8913 T(float, float, flt2uint32, encode_fixed32)
8914 T(int64, int64_t, uint64_t, encode_varint)
8915 T(int32, int32_t, uint32_t, encode_varint)
8916 T(fixed64, uint64_t, uint64_t, encode_fixed64)
8917 T(fixed32, uint32_t, uint32_t, encode_fixed32)
8918 T(bool, bool, bool, encode_varint)
8919 T(uint32, uint32_t, uint32_t, encode_varint)
8920 T(uint64, uint64_t, uint64_t, encode_varint)
8921 T(enum, int32_t, uint32_t, encode_varint)
8922 T(sfixed32, int32_t, uint32_t, encode_fixed32)
8923 T(sfixed64, int64_t, uint64_t, encode_fixed64)
8924 T(sint32, int32_t, upb_zzenc_32, encode_varint)
8925 T(sint64, int64_t, upb_zzenc_64, encode_varint)
8926
8927 #undef T
8928
8929
8930 /* code to build the handlers *************************************************/
8931
8932 static void newhandlers_callback(const void *closure, upb_handlers *h) {
8933 const upb_msgdef *m;
8934 upb_msg_field_iter i;
8935
8936 UPB_UNUSED(closure);
8937
8938 upb_handlers_setstartmsg(h, startmsg, NULL);
8939 upb_handlers_setendmsg(h, endmsg, NULL);
8940
8941 m = upb_handlers_msgdef(h);
8942 for(upb_msg_field_begin(&i, m);
8943 !upb_msg_field_done(&i);
8944 upb_msg_field_next(&i)) {
8945 const upb_fielddef *f = upb_msg_iter_field(&i);
8946 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8947 upb_fielddef_packed(f);
8948 upb_handlerattr attr;
8949 upb_wiretype_t wt =
8950 packed ? UPB_WIRE_TYPE_DELIMITED
8951 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8952
8953 /* Pre-encode the tag for this field. */
8954 new_tag(h, f, wt, &attr);
8955
8956 if (packed) {
8957 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8958 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8959 }
8960
8961 #define T(upper, lower, upbtype) \
8962 case UPB_DESCRIPTOR_TYPE_##upper: \
8963 if (packed) { \
8964 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8965 } else { \
8966 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8967 } \
8968 break;
8969
8970 switch (upb_fielddef_descriptortype(f)) {
8971 T(DOUBLE, double, double);
8972 T(FLOAT, float, float);
8973 T(INT64, int64, int64);
8974 T(INT32, int32, int32);
8975 T(FIXED64, fixed64, uint64);
8976 T(FIXED32, fixed32, uint32);
8977 T(BOOL, bool, bool);
8978 T(UINT32, uint32, uint32);
8979 T(UINT64, uint64, uint64);
8980 T(ENUM, enum, int32);
8981 T(SFIXED32, sfixed32, int32);
8982 T(SFIXED64, sfixed64, int64);
8983 T(SINT32, sint32, int32);
8984 T(SINT64, sint64, int64);
8985 case UPB_DESCRIPTOR_TYPE_STRING:
8986 case UPB_DESCRIPTOR_TYPE_BYTES:
8987 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8988 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8989 upb_handlers_setstring(h, f, encode_strbuf, &attr);
8990 break;
8991 case UPB_DESCRIPTOR_TYPE_MESSAGE:
8992 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8993 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8994 break;
8995 case UPB_DESCRIPTOR_TYPE_GROUP: {
8996 /* Endgroup takes a different tag (wire_type = END_GROUP). */
8997 upb_handlerattr attr2;
8998 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8999
9000 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
9001 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
9002
9003 upb_handlerattr_uninit(&attr2);
9004 break;
9005 }
9006 }
9007
9008 #undef T
9009
9010 upb_handlerattr_uninit(&attr);
9011 }
9012 }
9013
upb_pb_encoder_reset(upb_pb_encoder * e)9014 void upb_pb_encoder_reset(upb_pb_encoder *e) {
9015 e->segptr = NULL;
9016 e->top = NULL;
9017 e->depth = 0;
9018 }
9019
9020
9021 /* public API *****************************************************************/
9022
upb_pb_encoder_newhandlers(const upb_msgdef * m,const void * owner)9023 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
9024 const void *owner) {
9025 return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
9026 }
9027
upb_pb_encoder_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)9028 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
9029 upb_bytessink *output) {
9030 const size_t initial_bufsize = 256;
9031 const size_t initial_segbufsize = 16;
9032 /* TODO(haberman): make this configurable. */
9033 const size_t stack_size = 64;
9034 #ifndef NDEBUG
9035 const size_t size_before = upb_env_bytesallocated(env);
9036 #endif
9037
9038 upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
9039 if (!e) return NULL;
9040
9041 e->buf = upb_env_malloc(env, initial_bufsize);
9042 e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
9043 e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
9044
9045 if (!e->buf || !e->segbuf || !e->stack) {
9046 return NULL;
9047 }
9048
9049 e->limit = e->buf + initial_bufsize;
9050 e->seglimit = e->segbuf + initial_segbufsize;
9051 e->stacklimit = e->stack + stack_size;
9052
9053 upb_pb_encoder_reset(e);
9054 upb_sink_reset(&e->input_, h, e);
9055
9056 e->env = env;
9057 e->output_ = output;
9058 e->subc = output->closure;
9059 e->ptr = e->buf;
9060
9061 /* If this fails, increase the value in encoder.h. */
9062 assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
9063 return e;
9064 }
9065
upb_pb_encoder_input(upb_pb_encoder * e)9066 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
9067
9068
9069 #include <stdio.h>
9070 #include <stdlib.h>
9071 #include <string.h>
9072
upb_load_defs_from_descriptor(const char * str,size_t len,int * n,void * owner,upb_status * status)9073 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
9074 void *owner, upb_status *status) {
9075 /* Create handlers. */
9076 const upb_pbdecodermethod *decoder_m;
9077 const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
9078 upb_env env;
9079 upb_pbdecodermethodopts opts;
9080 upb_pbdecoder *decoder;
9081 upb_descreader *reader;
9082 bool ok;
9083 upb_def **ret = NULL;
9084 upb_def **defs;
9085
9086 upb_pbdecodermethodopts_init(&opts, reader_h);
9087 decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
9088
9089 upb_env_init(&env);
9090 upb_env_reporterrorsto(&env, status);
9091
9092 reader = upb_descreader_create(&env, reader_h);
9093 decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
9094
9095 /* Push input data. */
9096 ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
9097
9098 if (!ok) goto cleanup;
9099 defs = upb_descreader_getdefs(reader, owner, n);
9100 ret = malloc(sizeof(upb_def*) * (*n));
9101 memcpy(ret, defs, sizeof(upb_def*) * (*n));
9102
9103 cleanup:
9104 upb_env_uninit(&env);
9105 upb_handlers_unref(reader_h, &reader_h);
9106 upb_pbdecodermethod_unref(decoder_m, &decoder_m);
9107 return ret;
9108 }
9109
upb_load_descriptor_into_symtab(upb_symtab * s,const char * str,size_t len,upb_status * status)9110 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
9111 upb_status *status) {
9112 int n;
9113 bool success;
9114 upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
9115 if (!defs) return false;
9116 success = upb_symtab_add(s, defs, n, &defs, status);
9117 free(defs);
9118 return success;
9119 }
9120
upb_readfile(const char * filename,size_t * len)9121 char *upb_readfile(const char *filename, size_t *len) {
9122 long size;
9123 char *buf;
9124 FILE *f = fopen(filename, "rb");
9125 if(!f) return NULL;
9126 if(fseek(f, 0, SEEK_END) != 0) goto error;
9127 size = ftell(f);
9128 if(size < 0) goto error;
9129 if(fseek(f, 0, SEEK_SET) != 0) goto error;
9130 buf = malloc(size + 1);
9131 if(size && fread(buf, size, 1, f) != 1) goto error;
9132 fclose(f);
9133 if (len) *len = size;
9134 return buf;
9135
9136 error:
9137 fclose(f);
9138 return NULL;
9139 }
9140
upb_load_descriptor_file_into_symtab(upb_symtab * symtab,const char * fname,upb_status * status)9141 bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
9142 upb_status *status) {
9143 size_t len;
9144 bool success;
9145 char *data = upb_readfile(fname, &len);
9146 if (!data) {
9147 if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
9148 return false;
9149 }
9150 success = upb_load_descriptor_into_symtab(symtab, data, len, status);
9151 free(data);
9152 return success;
9153 }
9154 /*
9155 * upb::pb::TextPrinter
9156 *
9157 * OPT: This is not optimized at all. It uses printf() which parses the format
9158 * string every time, and it allocates memory for every put.
9159 */
9160
9161
9162 #include <ctype.h>
9163 #include <float.h>
9164 #include <inttypes.h>
9165 #include <stdarg.h>
9166 #include <stdio.h>
9167 #include <stdlib.h>
9168 #include <string.h>
9169
9170
9171 struct upb_textprinter {
9172 upb_sink input_;
9173 upb_bytessink *output_;
9174 int indent_depth_;
9175 bool single_line_;
9176 void *subc;
9177 };
9178
9179 #define CHECK(x) if ((x) < 0) goto err;
9180
shortname(const char * longname)9181 static const char *shortname(const char *longname) {
9182 const char *last = strrchr(longname, '.');
9183 return last ? last + 1 : longname;
9184 }
9185
indent(upb_textprinter * p)9186 static int indent(upb_textprinter *p) {
9187 int i;
9188 if (!p->single_line_)
9189 for (i = 0; i < p->indent_depth_; i++)
9190 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
9191 return 0;
9192 }
9193
endfield(upb_textprinter * p)9194 static int endfield(upb_textprinter *p) {
9195 const char ch = (p->single_line_ ? ' ' : '\n');
9196 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
9197 return 0;
9198 }
9199
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)9200 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
9201 bool preserve_utf8) {
9202 /* Based on CEscapeInternal() from Google's protobuf release. */
9203 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
9204 const char *end = buf + len;
9205
9206 /* I think hex is prettier and more useful, but proto2 uses octal; should
9207 * investigate whether it can parse hex also. */
9208 const bool use_hex = false;
9209 bool last_hex_escape = false; /* true if last output char was \xNN */
9210
9211 for (; buf < end; buf++) {
9212 bool is_hex_escape;
9213
9214 if (dstend - dst < 4) {
9215 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9216 dst = dstbuf;
9217 }
9218
9219 is_hex_escape = false;
9220 switch (*buf) {
9221 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
9222 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
9223 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
9224 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
9225 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
9226 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
9227 default:
9228 /* Note that if we emit \xNN and the buf character after that is a hex
9229 * digit then that digit must be escaped too to prevent it being
9230 * interpreted as part of the character code by C. */
9231 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
9232 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
9233 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
9234 is_hex_escape = use_hex;
9235 dst += 4;
9236 } else {
9237 *(dst++) = *buf; break;
9238 }
9239 }
9240 last_hex_escape = is_hex_escape;
9241 }
9242 /* Flush remaining data. */
9243 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9244 return 0;
9245 }
9246
putf(upb_textprinter * p,const char * fmt,...)9247 bool putf(upb_textprinter *p, const char *fmt, ...) {
9248 va_list args;
9249 va_list args_copy;
9250 char *str;
9251 int written;
9252 int len;
9253 bool ok;
9254
9255 va_start(args, fmt);
9256
9257 /* Run once to get the length of the string. */
9258 _upb_va_copy(args_copy, args);
9259 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
9260 va_end(args_copy);
9261
9262 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
9263 str = malloc(len + 1);
9264 if (!str) return false;
9265 written = vsprintf(str, fmt, args);
9266 va_end(args);
9267 UPB_ASSERT_VAR(written, written == len);
9268
9269 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
9270 free(str);
9271 return ok;
9272 }
9273
9274
9275 /* handlers *******************************************************************/
9276
textprinter_startmsg(void * c,const void * hd)9277 static bool textprinter_startmsg(void *c, const void *hd) {
9278 upb_textprinter *p = c;
9279 UPB_UNUSED(hd);
9280 if (p->indent_depth_ == 0) {
9281 upb_bytessink_start(p->output_, 0, &p->subc);
9282 }
9283 return true;
9284 }
9285
textprinter_endmsg(void * c,const void * hd,upb_status * s)9286 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
9287 upb_textprinter *p = c;
9288 UPB_UNUSED(hd);
9289 UPB_UNUSED(s);
9290 if (p->indent_depth_ == 0) {
9291 upb_bytessink_end(p->output_);
9292 }
9293 return true;
9294 }
9295
9296 #define TYPE(name, ctype, fmt) \
9297 static bool textprinter_put ## name(void *closure, const void *handler_data, \
9298 ctype val) { \
9299 upb_textprinter *p = closure; \
9300 const upb_fielddef *f = handler_data; \
9301 CHECK(indent(p)); \
9302 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
9303 CHECK(endfield(p)); \
9304 return true; \
9305 err: \
9306 return false; \
9307 }
9308
textprinter_putbool(void * closure,const void * handler_data,bool val)9309 static bool textprinter_putbool(void *closure, const void *handler_data,
9310 bool val) {
9311 upb_textprinter *p = closure;
9312 const upb_fielddef *f = handler_data;
9313 CHECK(indent(p));
9314 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
9315 CHECK(endfield(p));
9316 return true;
9317 err:
9318 return false;
9319 }
9320
9321 #define STRINGIFY_HELPER(x) #x
9322 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
9323
9324 TYPE(int32, int32_t, "%" PRId32)
9325 TYPE(int64, int64_t, "%" PRId64)
9326 TYPE(uint32, uint32_t, "%" PRIu32)
9327 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)9328 TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
9329 TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
9330
9331 #undef TYPE
9332
9333 /* Output a symbolic value from the enum if found, else just print as int32. */
9334 static bool textprinter_putenum(void *closure, const void *handler_data,
9335 int32_t val) {
9336 upb_textprinter *p = closure;
9337 const upb_fielddef *f = handler_data;
9338 const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
9339 const char *label = upb_enumdef_iton(enum_def, val);
9340 if (label) {
9341 indent(p);
9342 putf(p, "%s: %s", upb_fielddef_name(f), label);
9343 endfield(p);
9344 } else {
9345 if (!textprinter_putint32(closure, handler_data, val))
9346 return false;
9347 }
9348 return true;
9349 }
9350
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)9351 static void *textprinter_startstr(void *closure, const void *handler_data,
9352 size_t size_hint) {
9353 upb_textprinter *p = closure;
9354 const upb_fielddef *f = handler_data;
9355 UPB_UNUSED(size_hint);
9356 indent(p);
9357 putf(p, "%s: \"", upb_fielddef_name(f));
9358 return p;
9359 }
9360
textprinter_endstr(void * closure,const void * handler_data)9361 static bool textprinter_endstr(void *closure, const void *handler_data) {
9362 upb_textprinter *p = closure;
9363 UPB_UNUSED(handler_data);
9364 putf(p, "\"");
9365 endfield(p);
9366 return true;
9367 }
9368
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)9369 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
9370 size_t len, const upb_bufhandle *handle) {
9371 upb_textprinter *p = closure;
9372 const upb_fielddef *f = hd;
9373 UPB_UNUSED(handle);
9374 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
9375 return len;
9376 err:
9377 return 0;
9378 }
9379
textprinter_startsubmsg(void * closure,const void * handler_data)9380 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
9381 upb_textprinter *p = closure;
9382 const char *name = handler_data;
9383 CHECK(indent(p));
9384 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
9385 p->indent_depth_++;
9386 return p;
9387 err:
9388 return UPB_BREAK;
9389 }
9390
textprinter_endsubmsg(void * closure,const void * handler_data)9391 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
9392 upb_textprinter *p = closure;
9393 UPB_UNUSED(handler_data);
9394 p->indent_depth_--;
9395 CHECK(indent(p));
9396 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
9397 CHECK(endfield(p));
9398 return true;
9399 err:
9400 return false;
9401 }
9402
onmreg(const void * c,upb_handlers * h)9403 static void onmreg(const void *c, upb_handlers *h) {
9404 const upb_msgdef *m = upb_handlers_msgdef(h);
9405 upb_msg_field_iter i;
9406 UPB_UNUSED(c);
9407
9408 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
9409 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
9410
9411 for(upb_msg_field_begin(&i, m);
9412 !upb_msg_field_done(&i);
9413 upb_msg_field_next(&i)) {
9414 upb_fielddef *f = upb_msg_iter_field(&i);
9415 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
9416 upb_handlerattr_sethandlerdata(&attr, f);
9417 switch (upb_fielddef_type(f)) {
9418 case UPB_TYPE_INT32:
9419 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
9420 break;
9421 case UPB_TYPE_INT64:
9422 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
9423 break;
9424 case UPB_TYPE_UINT32:
9425 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
9426 break;
9427 case UPB_TYPE_UINT64:
9428 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
9429 break;
9430 case UPB_TYPE_FLOAT:
9431 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
9432 break;
9433 case UPB_TYPE_DOUBLE:
9434 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
9435 break;
9436 case UPB_TYPE_BOOL:
9437 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
9438 break;
9439 case UPB_TYPE_STRING:
9440 case UPB_TYPE_BYTES:
9441 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
9442 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
9443 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
9444 break;
9445 case UPB_TYPE_MESSAGE: {
9446 const char *name =
9447 upb_fielddef_istagdelim(f)
9448 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
9449 : upb_fielddef_name(f);
9450 upb_handlerattr_sethandlerdata(&attr, name);
9451 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
9452 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
9453 break;
9454 }
9455 case UPB_TYPE_ENUM:
9456 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
9457 break;
9458 }
9459 }
9460 }
9461
textprinter_reset(upb_textprinter * p,bool single_line)9462 static void textprinter_reset(upb_textprinter *p, bool single_line) {
9463 p->single_line_ = single_line;
9464 p->indent_depth_ = 0;
9465 }
9466
9467
9468 /* Public API *****************************************************************/
9469
upb_textprinter_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)9470 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
9471 upb_bytessink *output) {
9472 upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
9473 if (!p) return NULL;
9474
9475 p->output_ = output;
9476 upb_sink_reset(&p->input_, h, p);
9477 textprinter_reset(p, false);
9478
9479 return p;
9480 }
9481
upb_textprinter_newhandlers(const upb_msgdef * m,const void * owner)9482 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
9483 const void *owner) {
9484 return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
9485 }
9486
upb_textprinter_input(upb_textprinter * p)9487 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
9488
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)9489 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
9490 p->single_line_ = single_line;
9491 }
9492
9493
9494 /* Index is descriptor type. */
9495 const uint8_t upb_pb_native_wire_types[] = {
9496 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
9497 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
9498 UPB_WIRE_TYPE_32BIT, /* FLOAT */
9499 UPB_WIRE_TYPE_VARINT, /* INT64 */
9500 UPB_WIRE_TYPE_VARINT, /* UINT64 */
9501 UPB_WIRE_TYPE_VARINT, /* INT32 */
9502 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
9503 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
9504 UPB_WIRE_TYPE_VARINT, /* BOOL */
9505 UPB_WIRE_TYPE_DELIMITED, /* STRING */
9506 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
9507 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
9508 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
9509 UPB_WIRE_TYPE_VARINT, /* UINT32 */
9510 UPB_WIRE_TYPE_VARINT, /* ENUM */
9511 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
9512 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
9513 UPB_WIRE_TYPE_VARINT, /* SINT32 */
9514 UPB_WIRE_TYPE_VARINT, /* SINT64 */
9515 };
9516
9517 /* A basic branch-based decoder, uses 32-bit values to get good performance
9518 * on 32-bit architectures (but performs well on 64-bits also).
9519 * This scheme comes from the original Google Protobuf implementation
9520 * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)9521 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
9522 upb_decoderet err = {NULL, 0};
9523 const char *p = r.p;
9524 uint32_t low = (uint32_t)r.val;
9525 uint32_t high = 0;
9526 uint32_t b;
9527 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9528 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9529 b = *(p++); low |= (b & 0x7fU) << 28;
9530 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
9531 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
9532 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
9533 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
9534 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
9535 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
9536 return err;
9537
9538 done:
9539 r.val = ((uint64_t)high << 32) | low;
9540 r.p = p;
9541 return r;
9542 }
9543
9544 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)9545 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
9546 const char *p = r.p;
9547 uint64_t val = r.val;
9548 uint64_t b;
9549 upb_decoderet err = {NULL, 0};
9550 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9551 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9552 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
9553 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
9554 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
9555 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
9556 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
9557 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
9558 return err;
9559
9560 done:
9561 r.val = val;
9562 r.p = p;
9563 return r;
9564 }
9565
9566 /* Given an encoded varint v, returns an integer with a single bit set that
9567 * indicates the end of the varint. Subtracting one from this value will
9568 * yield a mask that leaves only bits that are part of the varint. Returns
9569 * 0 if the varint is unterminated. */
upb_get_vstopbit(uint64_t v)9570 static uint64_t upb_get_vstopbit(uint64_t v) {
9571 uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
9572 return ~cbits & (cbits+1);
9573 }
9574
9575 /* A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. */
upb_vdecode_max8_massimino(upb_decoderet r)9576 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
9577 uint64_t b;
9578 uint64_t stop_bit;
9579 upb_decoderet my_r;
9580 memcpy(&b, r.p, sizeof(b));
9581 stop_bit = upb_get_vstopbit(b);
9582 b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
9583 b += b & 0x007f007f007f007fULL;
9584 b += 3 * (b & 0x0000ffff0000ffffULL);
9585 b += 15 * (b & 0x00000000ffffffffULL);
9586 if (stop_bit == 0) {
9587 /* Error: unterminated varint. */
9588 upb_decoderet err_r = {(void*)0, 0};
9589 return err_r;
9590 }
9591 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9592 r.val | (b << 7));
9593 return my_r;
9594 }
9595
9596 /* A branchless decoder. Credit to Daniel Wright for the bit-twiddling. */
upb_vdecode_max8_wright(upb_decoderet r)9597 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
9598 uint64_t b;
9599 uint64_t stop_bit;
9600 upb_decoderet my_r;
9601 memcpy(&b, r.p, sizeof(b));
9602 stop_bit = upb_get_vstopbit(b);
9603 b &= (stop_bit - 1);
9604 b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
9605 b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
9606 b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
9607 if (stop_bit == 0) {
9608 /* Error: unterminated varint. */
9609 upb_decoderet err_r = {(void*)0, 0};
9610 return err_r;
9611 }
9612 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9613 r.val | (b << 14));
9614 return my_r;
9615 }
9616
9617 #line 1 "upb/json/parser.rl"
9618 /*
9619 ** upb::json::Parser (upb_json_parser)
9620 **
9621 ** A parser that uses the Ragel State Machine Compiler to generate
9622 ** the finite automata.
9623 **
9624 ** Ragel only natively handles regular languages, but we can manually
9625 ** program it a bit to handle context-free languages like JSON, by using
9626 ** the "fcall" and "fret" constructs.
9627 **
9628 ** This parser can handle the basics, but needs several things to be fleshed
9629 ** out:
9630 **
9631 ** - handling of unicode escape sequences (including high surrogate pairs).
9632 ** - properly check and report errors for unknown fields, stack overflow,
9633 ** improper array nesting (or lack of nesting).
9634 ** - handling of base64 sequences with padding characters.
9635 ** - handling of push-back (non-success returns from sink functions).
9636 ** - handling of keys/escape-sequences/etc that span input buffers.
9637 */
9638
9639 #include <stdio.h>
9640 #include <stdint.h>
9641 #include <assert.h>
9642 #include <string.h>
9643 #include <stdlib.h>
9644 #include <errno.h>
9645
9646
9647 #define UPB_JSON_MAX_DEPTH 64
9648
9649 typedef struct {
9650 upb_sink sink;
9651
9652 /* The current message in which we're parsing, and the field whose value we're
9653 * expecting next. */
9654 const upb_msgdef *m;
9655 const upb_fielddef *f;
9656
9657 /* We are in a repeated-field context, ready to emit mapentries as
9658 * submessages. This flag alters the start-of-object (open-brace) behavior to
9659 * begin a sequence of mapentry messages rather than a single submessage. */
9660 bool is_map;
9661
9662 /* We are in a map-entry message context. This flag is set when parsing the
9663 * value field of a single map entry and indicates to all value-field parsers
9664 * (subobjects, strings, numbers, and bools) that the map-entry submessage
9665 * should end as soon as the value is parsed. */
9666 bool is_mapentry;
9667
9668 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9669 * message's map field that we're currently parsing. This differs from |f|
9670 * because |f| is the field in the *current* message (i.e., the map-entry
9671 * message itself), not the parent's field that leads to this map. */
9672 const upb_fielddef *mapfield;
9673 } upb_jsonparser_frame;
9674
9675 struct upb_json_parser {
9676 upb_env *env;
9677 upb_byteshandler input_handler_;
9678 upb_bytessink input_;
9679
9680 /* Stack to track the JSON scopes we are in. */
9681 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9682 upb_jsonparser_frame *top;
9683 upb_jsonparser_frame *limit;
9684
9685 upb_status status;
9686
9687 /* Ragel's internal parsing stack for the parsing state machine. */
9688 int current_state;
9689 int parser_stack[UPB_JSON_MAX_DEPTH];
9690 int parser_top;
9691
9692 /* The handle for the current buffer. */
9693 const upb_bufhandle *handle;
9694
9695 /* Accumulate buffer. See details in parser.rl. */
9696 const char *accumulated;
9697 size_t accumulated_len;
9698 char *accumulate_buf;
9699 size_t accumulate_buf_size;
9700
9701 /* Multi-part text data. See details in parser.rl. */
9702 int multipart_state;
9703 upb_selector_t string_selector;
9704
9705 /* Input capture. See details in parser.rl. */
9706 const char *capture;
9707
9708 /* Intermediate result of parsing a unicode escape sequence. */
9709 uint32_t digit;
9710 };
9711
9712 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
9713
9714 /* Used to signal that a capture has been suspended. */
9715 static char suspend_capture;
9716
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)9717 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9718 upb_handlertype_t type) {
9719 upb_selector_t sel;
9720 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9721 UPB_ASSERT_VAR(ok, ok);
9722 return sel;
9723 }
9724
parser_getsel(upb_json_parser * p)9725 static upb_selector_t parser_getsel(upb_json_parser *p) {
9726 return getsel_for_handlertype(
9727 p, upb_handlers_getprimitivehandlertype(p->top->f));
9728 }
9729
check_stack(upb_json_parser * p)9730 static bool check_stack(upb_json_parser *p) {
9731 if ((p->top + 1) == p->limit) {
9732 upb_status_seterrmsg(&p->status, "Nesting too deep");
9733 upb_env_reporterror(p->env, &p->status);
9734 return false;
9735 }
9736
9737 return true;
9738 }
9739
9740 /* There are GCC/Clang built-ins for overflow checking which we could start
9741 * using if there was any performance benefit to it. */
9742
checked_add(size_t a,size_t b,size_t * c)9743 static bool checked_add(size_t a, size_t b, size_t *c) {
9744 if (SIZE_MAX - a < b) return false;
9745 *c = a + b;
9746 return true;
9747 }
9748
saturating_multiply(size_t a,size_t b)9749 static size_t saturating_multiply(size_t a, size_t b) {
9750 /* size_t is unsigned, so this is defined behavior even on overflow. */
9751 size_t ret = a * b;
9752 if (b != 0 && ret / b != a) {
9753 ret = SIZE_MAX;
9754 }
9755 return ret;
9756 }
9757
9758
9759 /* Base64 decoding ************************************************************/
9760
9761 /* TODO(haberman): make this streaming. */
9762
9763 static const signed char b64table[] = {
9764 -1, -1, -1, -1, -1, -1, -1, -1,
9765 -1, -1, -1, -1, -1, -1, -1, -1,
9766 -1, -1, -1, -1, -1, -1, -1, -1,
9767 -1, -1, -1, -1, -1, -1, -1, -1,
9768 -1, -1, -1, -1, -1, -1, -1, -1,
9769 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
9770 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9771 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
9772 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
9773 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9774 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9775 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
9776 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9777 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9778 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9779 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
9780 -1, -1, -1, -1, -1, -1, -1, -1,
9781 -1, -1, -1, -1, -1, -1, -1, -1,
9782 -1, -1, -1, -1, -1, -1, -1, -1,
9783 -1, -1, -1, -1, -1, -1, -1, -1,
9784 -1, -1, -1, -1, -1, -1, -1, -1,
9785 -1, -1, -1, -1, -1, -1, -1, -1,
9786 -1, -1, -1, -1, -1, -1, -1, -1,
9787 -1, -1, -1, -1, -1, -1, -1, -1,
9788 -1, -1, -1, -1, -1, -1, -1, -1,
9789 -1, -1, -1, -1, -1, -1, -1, -1,
9790 -1, -1, -1, -1, -1, -1, -1, -1,
9791 -1, -1, -1, -1, -1, -1, -1, -1,
9792 -1, -1, -1, -1, -1, -1, -1, -1,
9793 -1, -1, -1, -1, -1, -1, -1, -1,
9794 -1, -1, -1, -1, -1, -1, -1, -1,
9795 -1, -1, -1, -1, -1, -1, -1, -1
9796 };
9797
9798 /* Returns the table value sign-extended to 32 bits. Knowing that the upper
9799 * bits will be 1 for unrecognized characters makes it easier to check for
9800 * this error condition later (see below). */
b64lookup(unsigned char ch)9801 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9802
9803 /* Returns true if the given character is not a valid base64 character or
9804 * padding. */
nonbase64(unsigned char ch)9805 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9806
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)9807 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9808 size_t len) {
9809 const char *limit = ptr + len;
9810 for (; ptr < limit; ptr += 4) {
9811 uint32_t val;
9812 char output[3];
9813
9814 if (limit - ptr < 4) {
9815 upb_status_seterrf(&p->status,
9816 "Base64 input for bytes field not a multiple of 4: %s",
9817 upb_fielddef_name(p->top->f));
9818 upb_env_reporterror(p->env, &p->status);
9819 return false;
9820 }
9821
9822 val = b64lookup(ptr[0]) << 18 |
9823 b64lookup(ptr[1]) << 12 |
9824 b64lookup(ptr[2]) << 6 |
9825 b64lookup(ptr[3]);
9826
9827 /* Test the upper bit; returns true if any of the characters returned -1. */
9828 if (val & 0x80000000) {
9829 goto otherchar;
9830 }
9831
9832 output[0] = val >> 16;
9833 output[1] = (val >> 8) & 0xff;
9834 output[2] = val & 0xff;
9835 upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
9836 }
9837 return true;
9838
9839 otherchar:
9840 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9841 nonbase64(ptr[3]) ) {
9842 upb_status_seterrf(&p->status,
9843 "Non-base64 characters in bytes field: %s",
9844 upb_fielddef_name(p->top->f));
9845 upb_env_reporterror(p->env, &p->status);
9846 return false;
9847 } if (ptr[2] == '=') {
9848 uint32_t val;
9849 char output;
9850
9851 /* Last group contains only two input bytes, one output byte. */
9852 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9853 goto badpadding;
9854 }
9855
9856 val = b64lookup(ptr[0]) << 18 |
9857 b64lookup(ptr[1]) << 12;
9858
9859 assert(!(val & 0x80000000));
9860 output = val >> 16;
9861 upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
9862 return true;
9863 } else {
9864 uint32_t val;
9865 char output[2];
9866
9867 /* Last group contains only three input bytes, two output bytes. */
9868 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9869 goto badpadding;
9870 }
9871
9872 val = b64lookup(ptr[0]) << 18 |
9873 b64lookup(ptr[1]) << 12 |
9874 b64lookup(ptr[2]) << 6;
9875
9876 output[0] = val >> 16;
9877 output[1] = (val >> 8) & 0xff;
9878 upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
9879 return true;
9880 }
9881
9882 badpadding:
9883 upb_status_seterrf(&p->status,
9884 "Incorrect base64 padding for field: %s (%.*s)",
9885 upb_fielddef_name(p->top->f),
9886 4, ptr);
9887 upb_env_reporterror(p->env, &p->status);
9888 return false;
9889 }
9890
9891
9892 /* Accumulate buffer **********************************************************/
9893
9894 /* Functionality for accumulating a buffer.
9895 *
9896 * Some parts of the parser need an entire value as a contiguous string. For
9897 * example, to look up a member name in a hash table, or to turn a string into
9898 * a number, the relevant library routines need the input string to be in
9899 * contiguous memory, even if the value spanned two or more buffers in the
9900 * input. These routines handle that.
9901 *
9902 * In the common case we can just point to the input buffer to get this
9903 * contiguous string and avoid any actual copy. So we optimistically begin
9904 * this way. But there are a few cases where we must instead copy into a
9905 * separate buffer:
9906 *
9907 * 1. The string was not contiguous in the input (it spanned buffers).
9908 *
9909 * 2. The string included escape sequences that need to be interpreted to get
9910 * the true value in a contiguous buffer. */
9911
assert_accumulate_empty(upb_json_parser * p)9912 static void assert_accumulate_empty(upb_json_parser *p) {
9913 UPB_UNUSED(p);
9914 assert(p->accumulated == NULL);
9915 assert(p->accumulated_len == 0);
9916 }
9917
accumulate_clear(upb_json_parser * p)9918 static void accumulate_clear(upb_json_parser *p) {
9919 p->accumulated = NULL;
9920 p->accumulated_len = 0;
9921 }
9922
9923 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)9924 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9925 void *mem;
9926 size_t old_size = p->accumulate_buf_size;
9927 size_t new_size = UPB_MAX(old_size, 128);
9928 while (new_size < need) {
9929 new_size = saturating_multiply(new_size, 2);
9930 }
9931
9932 mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
9933 if (!mem) {
9934 upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
9935 upb_env_reporterror(p->env, &p->status);
9936 return false;
9937 }
9938
9939 p->accumulate_buf = mem;
9940 p->accumulate_buf_size = new_size;
9941 return true;
9942 }
9943
9944 /* Logically appends the given data to the append buffer.
9945 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9946 * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)9947 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9948 bool can_alias) {
9949 size_t need;
9950
9951 if (!p->accumulated && can_alias) {
9952 p->accumulated = buf;
9953 p->accumulated_len = len;
9954 return true;
9955 }
9956
9957 if (!checked_add(p->accumulated_len, len, &need)) {
9958 upb_status_seterrmsg(&p->status, "Integer overflow.");
9959 upb_env_reporterror(p->env, &p->status);
9960 return false;
9961 }
9962
9963 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9964 return false;
9965 }
9966
9967 if (p->accumulated != p->accumulate_buf) {
9968 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9969 p->accumulated = p->accumulate_buf;
9970 }
9971
9972 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9973 p->accumulated_len += len;
9974 return true;
9975 }
9976
9977 /* Returns a pointer to the data accumulated since the last accumulate_clear()
9978 * call, and writes the length to *len. This with point either to the input
9979 * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)9980 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
9981 assert(p->accumulated);
9982 *len = p->accumulated_len;
9983 return p->accumulated;
9984 }
9985
9986
9987 /* Mult-part text data ********************************************************/
9988
9989 /* When we have text data in the input, it can often come in multiple segments.
9990 * For example, there may be some raw string data followed by an escape
9991 * sequence. The two segments are processed with different logic. Also buffer
9992 * seams in the input can cause multiple segments.
9993 *
9994 * As we see segments, there are two main cases for how we want to process them:
9995 *
9996 * 1. we want to push the captured input directly to string handlers.
9997 *
9998 * 2. we need to accumulate all the parts into a contiguous buffer for further
9999 * processing (field name lookup, string->number conversion, etc). */
10000
10001 /* This is the set of states for p->multipart_state. */
10002 enum {
10003 /* We are not currently processing multipart data. */
10004 MULTIPART_INACTIVE = 0,
10005
10006 /* We are processing multipart data by accumulating it into a contiguous
10007 * buffer. */
10008 MULTIPART_ACCUMULATE = 1,
10009
10010 /* We are processing multipart data by pushing each part directly to the
10011 * current string handlers. */
10012 MULTIPART_PUSHEAGERLY = 2
10013 };
10014
10015 /* Start a multi-part text value where we accumulate the data for processing at
10016 * the end. */
multipart_startaccum(upb_json_parser * p)10017 static void multipart_startaccum(upb_json_parser *p) {
10018 assert_accumulate_empty(p);
10019 assert(p->multipart_state == MULTIPART_INACTIVE);
10020 p->multipart_state = MULTIPART_ACCUMULATE;
10021 }
10022
10023 /* Start a multi-part text value where we immediately push text data to a string
10024 * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)10025 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
10026 assert_accumulate_empty(p);
10027 assert(p->multipart_state == MULTIPART_INACTIVE);
10028 p->multipart_state = MULTIPART_PUSHEAGERLY;
10029 p->string_selector = sel;
10030 }
10031
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)10032 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
10033 bool can_alias) {
10034 switch (p->multipart_state) {
10035 case MULTIPART_INACTIVE:
10036 upb_status_seterrmsg(
10037 &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
10038 upb_env_reporterror(p->env, &p->status);
10039 return false;
10040
10041 case MULTIPART_ACCUMULATE:
10042 if (!accumulate_append(p, buf, len, can_alias)) {
10043 return false;
10044 }
10045 break;
10046
10047 case MULTIPART_PUSHEAGERLY: {
10048 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
10049 upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
10050 break;
10051 }
10052 }
10053
10054 return true;
10055 }
10056
10057 /* Note: this invalidates the accumulate buffer! Call only after reading its
10058 * contents. */
multipart_end(upb_json_parser * p)10059 static void multipart_end(upb_json_parser *p) {
10060 assert(p->multipart_state != MULTIPART_INACTIVE);
10061 p->multipart_state = MULTIPART_INACTIVE;
10062 accumulate_clear(p);
10063 }
10064
10065
10066 /* Input capture **************************************************************/
10067
10068 /* Functionality for capturing a region of the input as text. Gracefully
10069 * handles the case where a buffer seam occurs in the middle of the captured
10070 * region. */
10071
capture_begin(upb_json_parser * p,const char * ptr)10072 static void capture_begin(upb_json_parser *p, const char *ptr) {
10073 assert(p->multipart_state != MULTIPART_INACTIVE);
10074 assert(p->capture == NULL);
10075 p->capture = ptr;
10076 }
10077
capture_end(upb_json_parser * p,const char * ptr)10078 static bool capture_end(upb_json_parser *p, const char *ptr) {
10079 assert(p->capture);
10080 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
10081 p->capture = NULL;
10082 return true;
10083 } else {
10084 return false;
10085 }
10086 }
10087
10088 /* This is called at the end of each input buffer (ie. when we have hit a
10089 * buffer seam). If we are in the middle of capturing the input, this
10090 * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)10091 static void capture_suspend(upb_json_parser *p, const char **ptr) {
10092 if (!p->capture) return;
10093
10094 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
10095 /* We use this as a signal that we were in the middle of capturing, and
10096 * that capturing should resume at the beginning of the next buffer.
10097 *
10098 * We can't use *ptr here, because we have no guarantee that this pointer
10099 * will be valid when we resume (if the underlying memory is freed, then
10100 * using the pointer at all, even to compare to NULL, is likely undefined
10101 * behavior). */
10102 p->capture = &suspend_capture;
10103 } else {
10104 /* Need to back up the pointer to the beginning of the capture, since
10105 * we were not able to actually preserve it. */
10106 *ptr = p->capture;
10107 }
10108 }
10109
capture_resume(upb_json_parser * p,const char * ptr)10110 static void capture_resume(upb_json_parser *p, const char *ptr) {
10111 if (p->capture) {
10112 assert(p->capture == &suspend_capture);
10113 p->capture = ptr;
10114 }
10115 }
10116
10117
10118 /* Callbacks from the parser **************************************************/
10119
10120 /* These are the functions called directly from the parser itself.
10121 * We define these in the same order as their declarations in the parser. */
10122
escape_char(char in)10123 static char escape_char(char in) {
10124 switch (in) {
10125 case 'r': return '\r';
10126 case 't': return '\t';
10127 case 'n': return '\n';
10128 case 'f': return '\f';
10129 case 'b': return '\b';
10130 case '/': return '/';
10131 case '"': return '"';
10132 case '\\': return '\\';
10133 default:
10134 assert(0);
10135 return 'x';
10136 }
10137 }
10138
escape(upb_json_parser * p,const char * ptr)10139 static bool escape(upb_json_parser *p, const char *ptr) {
10140 char ch = escape_char(*ptr);
10141 return multipart_text(p, &ch, 1, false);
10142 }
10143
start_hex(upb_json_parser * p)10144 static void start_hex(upb_json_parser *p) {
10145 p->digit = 0;
10146 }
10147
hexdigit(upb_json_parser * p,const char * ptr)10148 static void hexdigit(upb_json_parser *p, const char *ptr) {
10149 char ch = *ptr;
10150
10151 p->digit <<= 4;
10152
10153 if (ch >= '0' && ch <= '9') {
10154 p->digit += (ch - '0');
10155 } else if (ch >= 'a' && ch <= 'f') {
10156 p->digit += ((ch - 'a') + 10);
10157 } else {
10158 assert(ch >= 'A' && ch <= 'F');
10159 p->digit += ((ch - 'A') + 10);
10160 }
10161 }
10162
end_hex(upb_json_parser * p)10163 static bool end_hex(upb_json_parser *p) {
10164 uint32_t codepoint = p->digit;
10165
10166 /* emit the codepoint as UTF-8. */
10167 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
10168 int length = 0;
10169 if (codepoint <= 0x7F) {
10170 utf8[0] = codepoint;
10171 length = 1;
10172 } else if (codepoint <= 0x07FF) {
10173 utf8[1] = (codepoint & 0x3F) | 0x80;
10174 codepoint >>= 6;
10175 utf8[0] = (codepoint & 0x1F) | 0xC0;
10176 length = 2;
10177 } else /* codepoint <= 0xFFFF */ {
10178 utf8[2] = (codepoint & 0x3F) | 0x80;
10179 codepoint >>= 6;
10180 utf8[1] = (codepoint & 0x3F) | 0x80;
10181 codepoint >>= 6;
10182 utf8[0] = (codepoint & 0x0F) | 0xE0;
10183 length = 3;
10184 }
10185 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
10186 * we have to wait for the next escape to get the full code point). */
10187
10188 return multipart_text(p, utf8, length, false);
10189 }
10190
start_text(upb_json_parser * p,const char * ptr)10191 static void start_text(upb_json_parser *p, const char *ptr) {
10192 capture_begin(p, ptr);
10193 }
10194
end_text(upb_json_parser * p,const char * ptr)10195 static bool end_text(upb_json_parser *p, const char *ptr) {
10196 return capture_end(p, ptr);
10197 }
10198
start_number(upb_json_parser * p,const char * ptr)10199 static void start_number(upb_json_parser *p, const char *ptr) {
10200 multipart_startaccum(p);
10201 capture_begin(p, ptr);
10202 }
10203
10204 static bool parse_number(upb_json_parser *p);
10205
end_number(upb_json_parser * p,const char * ptr)10206 static bool end_number(upb_json_parser *p, const char *ptr) {
10207 if (!capture_end(p, ptr)) {
10208 return false;
10209 }
10210
10211 return parse_number(p);
10212 }
10213
parse_number(upb_json_parser * p)10214 static bool parse_number(upb_json_parser *p) {
10215 size_t len;
10216 const char *buf;
10217 const char *myend;
10218 char *end;
10219
10220 /* strtol() and friends unfortunately do not support specifying the length of
10221 * the input string, so we need to force a copy into a NULL-terminated buffer. */
10222 if (!multipart_text(p, "\0", 1, false)) {
10223 return false;
10224 }
10225
10226 buf = accumulate_getptr(p, &len);
10227 myend = buf + len - 1; /* One for NULL. */
10228
10229 /* XXX: We are using strtol to parse integers, but this is wrong as even
10230 * integers can be represented as 1e6 (for example), which strtol can't
10231 * handle correctly.
10232 *
10233 * XXX: Also, we can't handle large integers properly because strto[u]ll
10234 * isn't in C89.
10235 *
10236 * XXX: Also, we don't properly check floats for overflow, since strtof
10237 * isn't in C89. */
10238 switch (upb_fielddef_type(p->top->f)) {
10239 case UPB_TYPE_ENUM:
10240 case UPB_TYPE_INT32: {
10241 long val = strtol(p->accumulated, &end, 0);
10242 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
10243 goto err;
10244 else
10245 upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
10246 break;
10247 }
10248 case UPB_TYPE_INT64: {
10249 long long val = strtol(p->accumulated, &end, 0);
10250 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
10251 goto err;
10252 else
10253 upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
10254 break;
10255 }
10256 case UPB_TYPE_UINT32: {
10257 unsigned long val = strtoul(p->accumulated, &end, 0);
10258 if (val > UINT32_MAX || errno == ERANGE || end != myend)
10259 goto err;
10260 else
10261 upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
10262 break;
10263 }
10264 case UPB_TYPE_UINT64: {
10265 unsigned long long val = strtoul(p->accumulated, &end, 0);
10266 if (val > UINT64_MAX || errno == ERANGE || end != myend)
10267 goto err;
10268 else
10269 upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
10270 break;
10271 }
10272 case UPB_TYPE_DOUBLE: {
10273 double val = strtod(p->accumulated, &end);
10274 if (errno == ERANGE || end != myend)
10275 goto err;
10276 else
10277 upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
10278 break;
10279 }
10280 case UPB_TYPE_FLOAT: {
10281 float val = strtod(p->accumulated, &end);
10282 if (errno == ERANGE || end != myend)
10283 goto err;
10284 else
10285 upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
10286 break;
10287 }
10288 default:
10289 assert(false);
10290 }
10291
10292 multipart_end(p);
10293
10294 return true;
10295
10296 err:
10297 upb_status_seterrf(&p->status, "error parsing number: %s", buf);
10298 upb_env_reporterror(p->env, &p->status);
10299 multipart_end(p);
10300 return false;
10301 }
10302
parser_putbool(upb_json_parser * p,bool val)10303 static bool parser_putbool(upb_json_parser *p, bool val) {
10304 bool ok;
10305
10306 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
10307 upb_status_seterrf(&p->status,
10308 "Boolean value specified for non-bool field: %s",
10309 upb_fielddef_name(p->top->f));
10310 upb_env_reporterror(p->env, &p->status);
10311 return false;
10312 }
10313
10314 ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
10315 UPB_ASSERT_VAR(ok, ok);
10316
10317 return true;
10318 }
10319
start_stringval(upb_json_parser * p)10320 static bool start_stringval(upb_json_parser *p) {
10321 assert(p->top->f);
10322
10323 if (upb_fielddef_isstring(p->top->f)) {
10324 upb_jsonparser_frame *inner;
10325 upb_selector_t sel;
10326
10327 if (!check_stack(p)) return false;
10328
10329 /* Start a new parser frame: parser frames correspond one-to-one with
10330 * handler frames, and string events occur in a sub-frame. */
10331 inner = p->top + 1;
10332 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10333 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
10334 inner->m = p->top->m;
10335 inner->f = p->top->f;
10336 inner->is_map = false;
10337 inner->is_mapentry = false;
10338 p->top = inner;
10339
10340 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10341 /* For STRING fields we push data directly to the handlers as it is
10342 * parsed. We don't do this yet for BYTES fields, because our base64
10343 * decoder is not streaming.
10344 *
10345 * TODO(haberman): make base64 decoding streaming also. */
10346 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10347 return true;
10348 } else {
10349 multipart_startaccum(p);
10350 return true;
10351 }
10352 } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
10353 /* No need to push a frame -- symbolic enum names in quotes remain in the
10354 * current parser frame.
10355 *
10356 * Enum string values must accumulate so we can look up the value in a table
10357 * once it is complete. */
10358 multipart_startaccum(p);
10359 return true;
10360 } else {
10361 upb_status_seterrf(&p->status,
10362 "String specified for non-string/non-enum field: %s",
10363 upb_fielddef_name(p->top->f));
10364 upb_env_reporterror(p->env, &p->status);
10365 return false;
10366 }
10367 }
10368
end_stringval(upb_json_parser * p)10369 static bool end_stringval(upb_json_parser *p) {
10370 bool ok = true;
10371
10372 switch (upb_fielddef_type(p->top->f)) {
10373 case UPB_TYPE_BYTES:
10374 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10375 p->accumulated, p->accumulated_len)) {
10376 return false;
10377 }
10378 /* Fall through. */
10379
10380 case UPB_TYPE_STRING: {
10381 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10382 upb_sink_endstr(&p->top->sink, sel);
10383 p->top--;
10384 break;
10385 }
10386
10387 case UPB_TYPE_ENUM: {
10388 /* Resolve enum symbolic name to integer value. */
10389 const upb_enumdef *enumdef =
10390 (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
10391
10392 size_t len;
10393 const char *buf = accumulate_getptr(p, &len);
10394
10395 int32_t int_val = 0;
10396 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10397
10398 if (ok) {
10399 upb_selector_t sel = parser_getsel(p);
10400 upb_sink_putint32(&p->top->sink, sel, int_val);
10401 } else {
10402 upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
10403 upb_env_reporterror(p->env, &p->status);
10404 }
10405
10406 break;
10407 }
10408
10409 default:
10410 assert(false);
10411 upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
10412 upb_env_reporterror(p->env, &p->status);
10413 ok = false;
10414 break;
10415 }
10416
10417 multipart_end(p);
10418
10419 return ok;
10420 }
10421
start_member(upb_json_parser * p)10422 static void start_member(upb_json_parser *p) {
10423 assert(!p->top->f);
10424 multipart_startaccum(p);
10425 }
10426
10427 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10428 * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)10429 static bool parse_mapentry_key(upb_json_parser *p) {
10430
10431 size_t len;
10432 const char *buf = accumulate_getptr(p, &len);
10433
10434 /* Emit the key field. We do a bit of ad-hoc parsing here because the
10435 * parser state machine has already decided that this is a string field
10436 * name, and we are reinterpreting it as some arbitrary key type. In
10437 * particular, integer and bool keys are quoted, so we need to parse the
10438 * quoted string contents here. */
10439
10440 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10441 if (p->top->f == NULL) {
10442 upb_status_seterrmsg(&p->status, "mapentry message has no key");
10443 upb_env_reporterror(p->env, &p->status);
10444 return false;
10445 }
10446 switch (upb_fielddef_type(p->top->f)) {
10447 case UPB_TYPE_INT32:
10448 case UPB_TYPE_INT64:
10449 case UPB_TYPE_UINT32:
10450 case UPB_TYPE_UINT64:
10451 /* Invoke end_number. The accum buffer has the number's text already. */
10452 if (!parse_number(p)) {
10453 return false;
10454 }
10455 break;
10456 case UPB_TYPE_BOOL:
10457 if (len == 4 && !strncmp(buf, "true", 4)) {
10458 if (!parser_putbool(p, true)) {
10459 return false;
10460 }
10461 } else if (len == 5 && !strncmp(buf, "false", 5)) {
10462 if (!parser_putbool(p, false)) {
10463 return false;
10464 }
10465 } else {
10466 upb_status_seterrmsg(&p->status,
10467 "Map bool key not 'true' or 'false'");
10468 upb_env_reporterror(p->env, &p->status);
10469 return false;
10470 }
10471 multipart_end(p);
10472 break;
10473 case UPB_TYPE_STRING:
10474 case UPB_TYPE_BYTES: {
10475 upb_sink subsink;
10476 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10477 upb_sink_startstr(&p->top->sink, sel, len, &subsink);
10478 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10479 upb_sink_putstring(&subsink, sel, buf, len, NULL);
10480 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10481 upb_sink_endstr(&subsink, sel);
10482 multipart_end(p);
10483 break;
10484 }
10485 default:
10486 upb_status_seterrmsg(&p->status, "Invalid field type for map key");
10487 upb_env_reporterror(p->env, &p->status);
10488 return false;
10489 }
10490
10491 return true;
10492 }
10493
10494 /* Helper: emit one map entry (as a submessage in the map field sequence). This
10495 * is invoked from end_membername(), at the end of the map entry's key string,
10496 * with the map key in the accumulate buffer. It parses the key from that
10497 * buffer, emits the handler calls to start the mapentry submessage (setting up
10498 * its subframe in the process), and sets up state in the subframe so that the
10499 * value parser (invoked next) will emit the mapentry's value field and then
10500 * end the mapentry message. */
10501
handle_mapentry(upb_json_parser * p)10502 static bool handle_mapentry(upb_json_parser *p) {
10503 const upb_fielddef *mapfield;
10504 const upb_msgdef *mapentrymsg;
10505 upb_jsonparser_frame *inner;
10506 upb_selector_t sel;
10507
10508 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10509 * for the mapentry itself, and then set |f| in that frame so that the map
10510 * value field is parsed, and also set a flag to end the frame after the
10511 * map-entry value is parsed. */
10512 if (!check_stack(p)) return false;
10513
10514 mapfield = p->top->mapfield;
10515 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10516
10517 inner = p->top + 1;
10518 p->top->f = mapfield;
10519 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10520 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10521 inner->m = mapentrymsg;
10522 inner->mapfield = mapfield;
10523 inner->is_map = false;
10524
10525 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10526 * the key field value to the sink, and these handlers will pop the frame
10527 * if they see is_mapentry (when invoked by the parser state machine, they
10528 * would have just seen the map-entry value, not key). */
10529 inner->is_mapentry = false;
10530 p->top = inner;
10531
10532 /* send STARTMSG in submsg frame. */
10533 upb_sink_startmsg(&p->top->sink);
10534
10535 parse_mapentry_key(p);
10536
10537 /* Set up the value field to receive the map-entry value. */
10538 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10539 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
10540 p->top->mapfield = mapfield;
10541 if (p->top->f == NULL) {
10542 upb_status_seterrmsg(&p->status, "mapentry message has no value");
10543 upb_env_reporterror(p->env, &p->status);
10544 return false;
10545 }
10546
10547 return true;
10548 }
10549
end_membername(upb_json_parser * p)10550 static bool end_membername(upb_json_parser *p) {
10551 assert(!p->top->f);
10552
10553 if (p->top->is_map) {
10554 return handle_mapentry(p);
10555 } else {
10556 size_t len;
10557 const char *buf = accumulate_getptr(p, &len);
10558 const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
10559
10560 if (!f) {
10561 /* TODO(haberman): Ignore unknown fields if requested/configured to do
10562 * so. */
10563 upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
10564 upb_env_reporterror(p->env, &p->status);
10565 return false;
10566 }
10567
10568 p->top->f = f;
10569 multipart_end(p);
10570
10571 return true;
10572 }
10573 }
10574
end_member(upb_json_parser * p)10575 static void end_member(upb_json_parser *p) {
10576 /* If we just parsed a map-entry value, end that frame too. */
10577 if (p->top->is_mapentry) {
10578 upb_status s = UPB_STATUS_INIT;
10579 upb_selector_t sel;
10580 bool ok;
10581 const upb_fielddef *mapfield;
10582
10583 assert(p->top > p->stack);
10584 /* send ENDMSG on submsg. */
10585 upb_sink_endmsg(&p->top->sink, &s);
10586 mapfield = p->top->mapfield;
10587
10588 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10589 p->top--;
10590 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
10591 UPB_ASSERT_VAR(ok, ok);
10592 upb_sink_endsubmsg(&p->top->sink, sel);
10593 }
10594
10595 p->top->f = NULL;
10596 }
10597
start_subobject(upb_json_parser * p)10598 static bool start_subobject(upb_json_parser *p) {
10599 assert(p->top->f);
10600
10601 if (upb_fielddef_ismap(p->top->f)) {
10602 upb_jsonparser_frame *inner;
10603 upb_selector_t sel;
10604
10605 /* Beginning of a map. Start a new parser frame in a repeated-field
10606 * context. */
10607 if (!check_stack(p)) return false;
10608
10609 inner = p->top + 1;
10610 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10611 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10612 inner->m = upb_fielddef_msgsubdef(p->top->f);
10613 inner->mapfield = p->top->f;
10614 inner->f = NULL;
10615 inner->is_map = true;
10616 inner->is_mapentry = false;
10617 p->top = inner;
10618
10619 return true;
10620 } else if (upb_fielddef_issubmsg(p->top->f)) {
10621 upb_jsonparser_frame *inner;
10622 upb_selector_t sel;
10623
10624 /* Beginning of a subobject. Start a new parser frame in the submsg
10625 * context. */
10626 if (!check_stack(p)) return false;
10627
10628 inner = p->top + 1;
10629
10630 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10631 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10632 inner->m = upb_fielddef_msgsubdef(p->top->f);
10633 inner->f = NULL;
10634 inner->is_map = false;
10635 inner->is_mapentry = false;
10636 p->top = inner;
10637
10638 return true;
10639 } else {
10640 upb_status_seterrf(&p->status,
10641 "Object specified for non-message/group field: %s",
10642 upb_fielddef_name(p->top->f));
10643 upb_env_reporterror(p->env, &p->status);
10644 return false;
10645 }
10646 }
10647
end_subobject(upb_json_parser * p)10648 static void end_subobject(upb_json_parser *p) {
10649 if (p->top->is_map) {
10650 upb_selector_t sel;
10651 p->top--;
10652 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10653 upb_sink_endseq(&p->top->sink, sel);
10654 } else {
10655 upb_selector_t sel;
10656 p->top--;
10657 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
10658 upb_sink_endsubmsg(&p->top->sink, sel);
10659 }
10660 }
10661
start_array(upb_json_parser * p)10662 static bool start_array(upb_json_parser *p) {
10663 upb_jsonparser_frame *inner;
10664 upb_selector_t sel;
10665
10666 assert(p->top->f);
10667
10668 if (!upb_fielddef_isseq(p->top->f)) {
10669 upb_status_seterrf(&p->status,
10670 "Array specified for non-repeated field: %s",
10671 upb_fielddef_name(p->top->f));
10672 upb_env_reporterror(p->env, &p->status);
10673 return false;
10674 }
10675
10676 if (!check_stack(p)) return false;
10677
10678 inner = p->top + 1;
10679 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10680 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10681 inner->m = p->top->m;
10682 inner->f = p->top->f;
10683 inner->is_map = false;
10684 inner->is_mapentry = false;
10685 p->top = inner;
10686
10687 return true;
10688 }
10689
end_array(upb_json_parser * p)10690 static void end_array(upb_json_parser *p) {
10691 upb_selector_t sel;
10692
10693 assert(p->top > p->stack);
10694
10695 p->top--;
10696 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10697 upb_sink_endseq(&p->top->sink, sel);
10698 }
10699
start_object(upb_json_parser * p)10700 static void start_object(upb_json_parser *p) {
10701 if (!p->top->is_map) {
10702 upb_sink_startmsg(&p->top->sink);
10703 }
10704 }
10705
end_object(upb_json_parser * p)10706 static void end_object(upb_json_parser *p) {
10707 if (!p->top->is_map) {
10708 upb_status status;
10709 upb_status_clear(&status);
10710 upb_sink_endmsg(&p->top->sink, &status);
10711 if (!upb_ok(&status)) {
10712 upb_env_reporterror(p->env, &status);
10713 }
10714 }
10715 }
10716
10717
10718 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
10719
10720
10721 /* The actual parser **********************************************************/
10722
10723 /* What follows is the Ragel parser itself. The language is specified in Ragel
10724 * and the actions call our C functions above.
10725 *
10726 * Ragel has an extensive set of functionality, and we use only a small part of
10727 * it. There are many action types but we only use a few:
10728 *
10729 * ">" -- transition into a machine
10730 * "%" -- transition out of a machine
10731 * "@" -- transition into a final state of a machine.
10732 *
10733 * "@" transitions are tricky because a machine can transition into a final
10734 * state repeatedly. But in some cases we know this can't happen, for example
10735 * a string which is delimited by a final '"' can only transition into its
10736 * final state once, when the closing '"' is seen. */
10737
10738
10739 #line 1218 "upb/json/parser.rl"
10740
10741
10742
10743 #line 1130 "upb/json/parser.c"
10744 static const char _json_actions[] = {
10745 0, 1, 0, 1, 2, 1, 3, 1,
10746 5, 1, 6, 1, 7, 1, 8, 1,
10747 10, 1, 12, 1, 13, 1, 14, 1,
10748 15, 1, 16, 1, 17, 1, 21, 1,
10749 25, 1, 27, 2, 3, 8, 2, 4,
10750 5, 2, 6, 2, 2, 6, 8, 2,
10751 11, 9, 2, 13, 15, 2, 14, 15,
10752 2, 18, 1, 2, 19, 27, 2, 20,
10753 9, 2, 22, 27, 2, 23, 27, 2,
10754 24, 27, 2, 26, 27, 3, 14, 11,
10755 9
10756 };
10757
10758 static const unsigned char _json_key_offsets[] = {
10759 0, 0, 4, 9, 14, 15, 19, 24,
10760 29, 34, 38, 42, 45, 48, 50, 54,
10761 58, 60, 62, 67, 69, 71, 80, 86,
10762 92, 98, 104, 106, 115, 116, 116, 116,
10763 121, 126, 131, 132, 133, 134, 135, 135,
10764 136, 137, 138, 138, 139, 140, 141, 141,
10765 146, 151, 152, 156, 161, 166, 171, 175,
10766 175, 178, 178, 178
10767 };
10768
10769 static const char _json_trans_keys[] = {
10770 32, 123, 9, 13, 32, 34, 125, 9,
10771 13, 32, 34, 125, 9, 13, 34, 32,
10772 58, 9, 13, 32, 93, 125, 9, 13,
10773 32, 44, 125, 9, 13, 32, 44, 125,
10774 9, 13, 32, 34, 9, 13, 45, 48,
10775 49, 57, 48, 49, 57, 46, 69, 101,
10776 48, 57, 69, 101, 48, 57, 43, 45,
10777 48, 57, 48, 57, 48, 57, 46, 69,
10778 101, 48, 57, 34, 92, 34, 92, 34,
10779 47, 92, 98, 102, 110, 114, 116, 117,
10780 48, 57, 65, 70, 97, 102, 48, 57,
10781 65, 70, 97, 102, 48, 57, 65, 70,
10782 97, 102, 48, 57, 65, 70, 97, 102,
10783 34, 92, 34, 45, 91, 102, 110, 116,
10784 123, 48, 57, 34, 32, 93, 125, 9,
10785 13, 32, 44, 93, 9, 13, 32, 93,
10786 125, 9, 13, 97, 108, 115, 101, 117,
10787 108, 108, 114, 117, 101, 32, 34, 125,
10788 9, 13, 32, 34, 125, 9, 13, 34,
10789 32, 58, 9, 13, 32, 93, 125, 9,
10790 13, 32, 44, 125, 9, 13, 32, 44,
10791 125, 9, 13, 32, 34, 9, 13, 32,
10792 9, 13, 0
10793 };
10794
10795 static const char _json_single_lengths[] = {
10796 0, 2, 3, 3, 1, 2, 3, 3,
10797 3, 2, 2, 1, 3, 0, 2, 2,
10798 0, 0, 3, 2, 2, 9, 0, 0,
10799 0, 0, 2, 7, 1, 0, 0, 3,
10800 3, 3, 1, 1, 1, 1, 0, 1,
10801 1, 1, 0, 1, 1, 1, 0, 3,
10802 3, 1, 2, 3, 3, 3, 2, 0,
10803 1, 0, 0, 0
10804 };
10805
10806 static const char _json_range_lengths[] = {
10807 0, 1, 1, 1, 0, 1, 1, 1,
10808 1, 1, 1, 1, 0, 1, 1, 1,
10809 1, 1, 1, 0, 0, 0, 3, 3,
10810 3, 3, 0, 1, 0, 0, 0, 1,
10811 1, 1, 0, 0, 0, 0, 0, 0,
10812 0, 0, 0, 0, 0, 0, 0, 1,
10813 1, 0, 1, 1, 1, 1, 1, 0,
10814 1, 0, 0, 0
10815 };
10816
10817 static const short _json_index_offsets[] = {
10818 0, 0, 4, 9, 14, 16, 20, 25,
10819 30, 35, 39, 43, 46, 50, 52, 56,
10820 60, 62, 64, 69, 72, 75, 85, 89,
10821 93, 97, 101, 104, 113, 115, 116, 117,
10822 122, 127, 132, 134, 136, 138, 140, 141,
10823 143, 145, 147, 148, 150, 152, 154, 155,
10824 160, 165, 167, 171, 176, 181, 186, 190,
10825 191, 194, 195, 196
10826 };
10827
10828 static const char _json_indicies[] = {
10829 0, 2, 0, 1, 3, 4, 5, 3,
10830 1, 6, 7, 8, 6, 1, 9, 1,
10831 10, 11, 10, 1, 11, 1, 1, 11,
10832 12, 13, 14, 15, 13, 1, 16, 17,
10833 8, 16, 1, 17, 7, 17, 1, 18,
10834 19, 20, 1, 19, 20, 1, 22, 23,
10835 23, 21, 24, 1, 23, 23, 24, 21,
10836 25, 25, 26, 1, 26, 1, 26, 21,
10837 22, 23, 23, 20, 21, 28, 29, 27,
10838 31, 32, 30, 33, 33, 33, 33, 33,
10839 33, 33, 33, 34, 1, 35, 35, 35,
10840 1, 36, 36, 36, 1, 37, 37, 37,
10841 1, 38, 38, 38, 1, 40, 41, 39,
10842 42, 43, 44, 45, 46, 47, 48, 43,
10843 1, 49, 1, 50, 51, 53, 54, 1,
10844 53, 52, 55, 56, 54, 55, 1, 56,
10845 1, 1, 56, 52, 57, 1, 58, 1,
10846 59, 1, 60, 1, 61, 62, 1, 63,
10847 1, 64, 1, 65, 66, 1, 67, 1,
10848 68, 1, 69, 70, 71, 72, 70, 1,
10849 73, 74, 75, 73, 1, 76, 1, 77,
10850 78, 77, 1, 78, 1, 1, 78, 79,
10851 80, 81, 82, 80, 1, 83, 84, 75,
10852 83, 1, 84, 74, 84, 1, 85, 86,
10853 86, 1, 1, 1, 1, 0
10854 };
10855
10856 static const char _json_trans_targs[] = {
10857 1, 0, 2, 3, 4, 56, 3, 4,
10858 56, 5, 5, 6, 7, 8, 9, 56,
10859 8, 9, 11, 12, 18, 57, 13, 15,
10860 14, 16, 17, 20, 58, 21, 20, 58,
10861 21, 19, 22, 23, 24, 25, 26, 20,
10862 58, 21, 28, 30, 31, 34, 39, 43,
10863 47, 29, 59, 59, 32, 31, 29, 32,
10864 33, 35, 36, 37, 38, 59, 40, 41,
10865 42, 59, 44, 45, 46, 59, 48, 49,
10866 55, 48, 49, 55, 50, 50, 51, 52,
10867 53, 54, 55, 53, 54, 59, 56
10868 };
10869
10870 static const char _json_trans_actions[] = {
10871 0, 0, 0, 21, 77, 53, 0, 47,
10872 23, 17, 0, 0, 15, 19, 19, 50,
10873 0, 0, 0, 0, 0, 1, 0, 0,
10874 0, 0, 0, 3, 13, 0, 0, 35,
10875 5, 11, 0, 38, 7, 7, 7, 41,
10876 44, 9, 62, 56, 25, 0, 0, 0,
10877 31, 29, 33, 59, 15, 0, 27, 0,
10878 0, 0, 0, 0, 0, 68, 0, 0,
10879 0, 71, 0, 0, 0, 65, 21, 77,
10880 53, 0, 47, 23, 17, 0, 0, 15,
10881 19, 19, 50, 0, 0, 74, 0
10882 };
10883
10884 static const int json_start = 1;
10885
10886 static const int json_en_number_machine = 10;
10887 static const int json_en_string_machine = 19;
10888 static const int json_en_value_machine = 27;
10889 static const int json_en_main = 1;
10890
10891
10892 #line 1221 "upb/json/parser.rl"
10893
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)10894 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10895 const upb_bufhandle *handle) {
10896 upb_json_parser *parser = closure;
10897
10898 /* Variables used by Ragel's generated code. */
10899 int cs = parser->current_state;
10900 int *stack = parser->parser_stack;
10901 int top = parser->parser_top;
10902
10903 const char *p = buf;
10904 const char *pe = buf + size;
10905
10906 parser->handle = handle;
10907
10908 UPB_UNUSED(hd);
10909 UPB_UNUSED(handle);
10910
10911 capture_resume(parser, buf);
10912
10913
10914 #line 1301 "upb/json/parser.c"
10915 {
10916 int _klen;
10917 unsigned int _trans;
10918 const char *_acts;
10919 unsigned int _nacts;
10920 const char *_keys;
10921
10922 if ( p == pe )
10923 goto _test_eof;
10924 if ( cs == 0 )
10925 goto _out;
10926 _resume:
10927 _keys = _json_trans_keys + _json_key_offsets[cs];
10928 _trans = _json_index_offsets[cs];
10929
10930 _klen = _json_single_lengths[cs];
10931 if ( _klen > 0 ) {
10932 const char *_lower = _keys;
10933 const char *_mid;
10934 const char *_upper = _keys + _klen - 1;
10935 while (1) {
10936 if ( _upper < _lower )
10937 break;
10938
10939 _mid = _lower + ((_upper-_lower) >> 1);
10940 if ( (*p) < *_mid )
10941 _upper = _mid - 1;
10942 else if ( (*p) > *_mid )
10943 _lower = _mid + 1;
10944 else {
10945 _trans += (unsigned int)(_mid - _keys);
10946 goto _match;
10947 }
10948 }
10949 _keys += _klen;
10950 _trans += _klen;
10951 }
10952
10953 _klen = _json_range_lengths[cs];
10954 if ( _klen > 0 ) {
10955 const char *_lower = _keys;
10956 const char *_mid;
10957 const char *_upper = _keys + (_klen<<1) - 2;
10958 while (1) {
10959 if ( _upper < _lower )
10960 break;
10961
10962 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
10963 if ( (*p) < _mid[0] )
10964 _upper = _mid - 2;
10965 else if ( (*p) > _mid[1] )
10966 _lower = _mid + 2;
10967 else {
10968 _trans += (unsigned int)((_mid - _keys)>>1);
10969 goto _match;
10970 }
10971 }
10972 _trans += _klen;
10973 }
10974
10975 _match:
10976 _trans = _json_indicies[_trans];
10977 cs = _json_trans_targs[_trans];
10978
10979 if ( _json_trans_actions[_trans] == 0 )
10980 goto _again;
10981
10982 _acts = _json_actions + _json_trans_actions[_trans];
10983 _nacts = (unsigned int) *_acts++;
10984 while ( _nacts-- > 0 )
10985 {
10986 switch ( *_acts++ )
10987 {
10988 case 0:
10989 #line 1133 "upb/json/parser.rl"
10990 { p--; {cs = stack[--top]; goto _again;} }
10991 break;
10992 case 1:
10993 #line 1134 "upb/json/parser.rl"
10994 { p--; {stack[top++] = cs; cs = 10; goto _again;} }
10995 break;
10996 case 2:
10997 #line 1138 "upb/json/parser.rl"
10998 { start_text(parser, p); }
10999 break;
11000 case 3:
11001 #line 1139 "upb/json/parser.rl"
11002 { CHECK_RETURN_TOP(end_text(parser, p)); }
11003 break;
11004 case 4:
11005 #line 1145 "upb/json/parser.rl"
11006 { start_hex(parser); }
11007 break;
11008 case 5:
11009 #line 1146 "upb/json/parser.rl"
11010 { hexdigit(parser, p); }
11011 break;
11012 case 6:
11013 #line 1147 "upb/json/parser.rl"
11014 { CHECK_RETURN_TOP(end_hex(parser)); }
11015 break;
11016 case 7:
11017 #line 1153 "upb/json/parser.rl"
11018 { CHECK_RETURN_TOP(escape(parser, p)); }
11019 break;
11020 case 8:
11021 #line 1159 "upb/json/parser.rl"
11022 { p--; {cs = stack[--top]; goto _again;} }
11023 break;
11024 case 9:
11025 #line 1162 "upb/json/parser.rl"
11026 { {stack[top++] = cs; cs = 19; goto _again;} }
11027 break;
11028 case 10:
11029 #line 1164 "upb/json/parser.rl"
11030 { p--; {stack[top++] = cs; cs = 27; goto _again;} }
11031 break;
11032 case 11:
11033 #line 1169 "upb/json/parser.rl"
11034 { start_member(parser); }
11035 break;
11036 case 12:
11037 #line 1170 "upb/json/parser.rl"
11038 { CHECK_RETURN_TOP(end_membername(parser)); }
11039 break;
11040 case 13:
11041 #line 1173 "upb/json/parser.rl"
11042 { end_member(parser); }
11043 break;
11044 case 14:
11045 #line 1179 "upb/json/parser.rl"
11046 { start_object(parser); }
11047 break;
11048 case 15:
11049 #line 1182 "upb/json/parser.rl"
11050 { end_object(parser); }
11051 break;
11052 case 16:
11053 #line 1188 "upb/json/parser.rl"
11054 { CHECK_RETURN_TOP(start_array(parser)); }
11055 break;
11056 case 17:
11057 #line 1192 "upb/json/parser.rl"
11058 { end_array(parser); }
11059 break;
11060 case 18:
11061 #line 1197 "upb/json/parser.rl"
11062 { start_number(parser, p); }
11063 break;
11064 case 19:
11065 #line 1198 "upb/json/parser.rl"
11066 { CHECK_RETURN_TOP(end_number(parser, p)); }
11067 break;
11068 case 20:
11069 #line 1200 "upb/json/parser.rl"
11070 { CHECK_RETURN_TOP(start_stringval(parser)); }
11071 break;
11072 case 21:
11073 #line 1201 "upb/json/parser.rl"
11074 { CHECK_RETURN_TOP(end_stringval(parser)); }
11075 break;
11076 case 22:
11077 #line 1203 "upb/json/parser.rl"
11078 { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
11079 break;
11080 case 23:
11081 #line 1205 "upb/json/parser.rl"
11082 { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
11083 break;
11084 case 24:
11085 #line 1207 "upb/json/parser.rl"
11086 { /* null value */ }
11087 break;
11088 case 25:
11089 #line 1209 "upb/json/parser.rl"
11090 { CHECK_RETURN_TOP(start_subobject(parser)); }
11091 break;
11092 case 26:
11093 #line 1210 "upb/json/parser.rl"
11094 { end_subobject(parser); }
11095 break;
11096 case 27:
11097 #line 1215 "upb/json/parser.rl"
11098 { p--; {cs = stack[--top]; goto _again;} }
11099 break;
11100 #line 1487 "upb/json/parser.c"
11101 }
11102 }
11103
11104 _again:
11105 if ( cs == 0 )
11106 goto _out;
11107 if ( ++p != pe )
11108 goto _resume;
11109 _test_eof: {}
11110 _out: {}
11111 }
11112
11113 #line 1242 "upb/json/parser.rl"
11114
11115 if (p != pe) {
11116 upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
11117 upb_env_reporterror(parser->env, &parser->status);
11118 } else {
11119 capture_suspend(parser, &p);
11120 }
11121
11122 error:
11123 /* Save parsing state back to parser. */
11124 parser->current_state = cs;
11125 parser->parser_top = top;
11126
11127 return p - buf;
11128 }
11129
end(void * closure,const void * hd)11130 bool end(void *closure, const void *hd) {
11131 UPB_UNUSED(closure);
11132 UPB_UNUSED(hd);
11133
11134 /* Prevent compile warning on unused static constants. */
11135 UPB_UNUSED(json_start);
11136 UPB_UNUSED(json_en_number_machine);
11137 UPB_UNUSED(json_en_string_machine);
11138 UPB_UNUSED(json_en_value_machine);
11139 UPB_UNUSED(json_en_main);
11140 return true;
11141 }
11142
json_parser_reset(upb_json_parser * p)11143 static void json_parser_reset(upb_json_parser *p) {
11144 int cs;
11145 int top;
11146
11147 p->top = p->stack;
11148 p->top->f = NULL;
11149 p->top->is_map = false;
11150 p->top->is_mapentry = false;
11151
11152 /* Emit Ragel initialization of the parser. */
11153
11154 #line 1541 "upb/json/parser.c"
11155 {
11156 cs = json_start;
11157 top = 0;
11158 }
11159
11160 #line 1282 "upb/json/parser.rl"
11161 p->current_state = cs;
11162 p->parser_top = top;
11163 accumulate_clear(p);
11164 p->multipart_state = MULTIPART_INACTIVE;
11165 p->capture = NULL;
11166 p->accumulated = NULL;
11167 upb_status_clear(&p->status);
11168 }
11169
11170
11171 /* Public API *****************************************************************/
11172
upb_json_parser_create(upb_env * env,upb_sink * output)11173 upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
11174 #ifndef NDEBUG
11175 const size_t size_before = upb_env_bytesallocated(env);
11176 #endif
11177 upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
11178 if (!p) return false;
11179
11180 p->env = env;
11181 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
11182 p->accumulate_buf = NULL;
11183 p->accumulate_buf_size = 0;
11184 upb_byteshandler_init(&p->input_handler_);
11185 upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
11186 upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
11187 upb_bytessink_reset(&p->input_, &p->input_handler_, p);
11188
11189 json_parser_reset(p);
11190 upb_sink_reset(&p->top->sink, output->handlers, output->closure);
11191 p->top->m = upb_handlers_msgdef(output->handlers);
11192
11193 /* If this fails, uncomment and increase the value in parser.h. */
11194 /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
11195 assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11196 return p;
11197 }
11198
upb_json_parser_input(upb_json_parser * p)11199 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
11200 return &p->input_;
11201 }
11202 /*
11203 ** This currently uses snprintf() to format primitives, and could be optimized
11204 ** further.
11205 */
11206
11207
11208 #include <stdlib.h>
11209 #include <stdio.h>
11210 #include <string.h>
11211 #include <stdint.h>
11212
11213 struct upb_json_printer {
11214 upb_sink input_;
11215 /* BytesSink closure. */
11216 void *subc_;
11217 upb_bytessink *output_;
11218
11219 /* We track the depth so that we know when to emit startstr/endstr on the
11220 * output. */
11221 int depth_;
11222
11223 /* Have we emitted the first element? This state is necessary to emit commas
11224 * without leaving a trailing comma in arrays/maps. We keep this state per
11225 * frame depth.
11226 *
11227 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
11228 * We count frames (contexts in which we separate elements by commas) as both
11229 * repeated fields and messages (maps), and the worst case is a
11230 * message->repeated field->submessage->repeated field->... nesting. */
11231 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
11232 };
11233
11234 /* StringPiece; a pointer plus a length. */
11235 typedef struct {
11236 const char *ptr;
11237 size_t len;
11238 } strpc;
11239
newstrpc(upb_handlers * h,const upb_fielddef * f)11240 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
11241 strpc *ret = malloc(sizeof(*ret));
11242 ret->ptr = upb_fielddef_name(f);
11243 ret->len = strlen(ret->ptr);
11244 upb_handlers_addcleanup(h, ret, free);
11245 return ret;
11246 }
11247
11248 /* ------------ JSON string printing: values, maps, arrays ------------------ */
11249
print_data(upb_json_printer * p,const char * buf,unsigned int len)11250 static void print_data(
11251 upb_json_printer *p, const char *buf, unsigned int len) {
11252 /* TODO: Will need to change if we support pushback from the sink. */
11253 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
11254 UPB_ASSERT_VAR(n, n == len);
11255 }
11256
print_comma(upb_json_printer * p)11257 static void print_comma(upb_json_printer *p) {
11258 if (!p->first_elem_[p->depth_]) {
11259 print_data(p, ",", 1);
11260 }
11261 p->first_elem_[p->depth_] = false;
11262 }
11263
11264 /* Helpers that print properly formatted elements to the JSON output stream. */
11265
11266 /* Used for escaping control chars in strings. */
11267 static const char kControlCharLimit = 0x20;
11268
is_json_escaped(char c)11269 UPB_INLINE bool is_json_escaped(char c) {
11270 /* See RFC 4627. */
11271 unsigned char uc = (unsigned char)c;
11272 return uc < kControlCharLimit || uc == '"' || uc == '\\';
11273 }
11274
json_nice_escape(char c)11275 UPB_INLINE char* json_nice_escape(char c) {
11276 switch (c) {
11277 case '"': return "\\\"";
11278 case '\\': return "\\\\";
11279 case '\b': return "\\b";
11280 case '\f': return "\\f";
11281 case '\n': return "\\n";
11282 case '\r': return "\\r";
11283 case '\t': return "\\t";
11284 default: return NULL;
11285 }
11286 }
11287
11288 /* Write a properly escaped string chunk. The surrounding quotes are *not*
11289 * printed; this is so that the caller has the option of emitting the string
11290 * content in chunks. */
putstring(upb_json_printer * p,const char * buf,unsigned int len)11291 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
11292 const char* unescaped_run = NULL;
11293 unsigned int i;
11294 for (i = 0; i < len; i++) {
11295 char c = buf[i];
11296 /* Handle escaping. */
11297 if (is_json_escaped(c)) {
11298 /* Use a "nice" escape, like \n, if one exists for this character. */
11299 const char* escape = json_nice_escape(c);
11300 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
11301 * escape. */
11302 char escape_buf[8];
11303 if (!escape) {
11304 unsigned char byte = (unsigned char)c;
11305 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
11306 escape = escape_buf;
11307 }
11308
11309 /* N.B. that we assume that the input encoding is equal to the output
11310 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
11311 * can simply pass the bytes through. */
11312
11313 /* If there's a current run of unescaped chars, print that run first. */
11314 if (unescaped_run) {
11315 print_data(p, unescaped_run, &buf[i] - unescaped_run);
11316 unescaped_run = NULL;
11317 }
11318 /* Then print the escape code. */
11319 print_data(p, escape, strlen(escape));
11320 } else {
11321 /* Add to the current unescaped run of characters. */
11322 if (unescaped_run == NULL) {
11323 unescaped_run = &buf[i];
11324 }
11325 }
11326 }
11327
11328 /* If the string ended in a run of unescaped characters, print that last run. */
11329 if (unescaped_run) {
11330 print_data(p, unescaped_run, &buf[len] - unescaped_run);
11331 }
11332 }
11333
11334 #define CHKLENGTH(x) if (!(x)) return -1;
11335
11336 /* Helpers that format floating point values according to our custom formats.
11337 * Right now we use %.8g and %.17g for float/double, respectively, to match
11338 * proto2::util::JsonFormat's defaults. May want to change this later. */
11339
fmt_double(double val,char * buf,size_t length)11340 static size_t fmt_double(double val, char* buf, size_t length) {
11341 size_t n = _upb_snprintf(buf, length, "%.17g", val);
11342 CHKLENGTH(n > 0 && n < length);
11343 return n;
11344 }
11345
fmt_float(float val,char * buf,size_t length)11346 static size_t fmt_float(float val, char* buf, size_t length) {
11347 size_t n = _upb_snprintf(buf, length, "%.8g", val);
11348 CHKLENGTH(n > 0 && n < length);
11349 return n;
11350 }
11351
fmt_bool(bool val,char * buf,size_t length)11352 static size_t fmt_bool(bool val, char* buf, size_t length) {
11353 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
11354 CHKLENGTH(n > 0 && n < length);
11355 return n;
11356 }
11357
fmt_int64(long val,char * buf,size_t length)11358 static size_t fmt_int64(long val, char* buf, size_t length) {
11359 size_t n = _upb_snprintf(buf, length, "%ld", val);
11360 CHKLENGTH(n > 0 && n < length);
11361 return n;
11362 }
11363
fmt_uint64(unsigned long long val,char * buf,size_t length)11364 static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
11365 size_t n = _upb_snprintf(buf, length, "%llu", val);
11366 CHKLENGTH(n > 0 && n < length);
11367 return n;
11368 }
11369
11370 /* Print a map key given a field name. Called by scalar field handlers and by
11371 * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)11372 static bool putkey(void *closure, const void *handler_data) {
11373 upb_json_printer *p = closure;
11374 const strpc *key = handler_data;
11375 print_comma(p);
11376 print_data(p, "\"", 1);
11377 putstring(p, key->ptr, key->len);
11378 print_data(p, "\":", 2);
11379 return true;
11380 }
11381
11382 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
11383 #define CHK(val) if (!(val)) return false;
11384
11385 #define TYPE_HANDLERS(type, fmt_func) \
11386 static bool put##type(void *closure, const void *handler_data, type val) { \
11387 upb_json_printer *p = closure; \
11388 char data[64]; \
11389 size_t length = fmt_func(val, data, sizeof(data)); \
11390 UPB_UNUSED(handler_data); \
11391 CHKFMT(length); \
11392 print_data(p, data, length); \
11393 return true; \
11394 } \
11395 static bool scalar_##type(void *closure, const void *handler_data, \
11396 type val) { \
11397 CHK(putkey(closure, handler_data)); \
11398 CHK(put##type(closure, handler_data, val)); \
11399 return true; \
11400 } \
11401 static bool repeated_##type(void *closure, const void *handler_data, \
11402 type val) { \
11403 upb_json_printer *p = closure; \
11404 print_comma(p); \
11405 CHK(put##type(closure, handler_data, val)); \
11406 return true; \
11407 }
11408
11409 #define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
11410 static bool putmapkey_##type(void *closure, const void *handler_data, \
11411 type val) { \
11412 upb_json_printer *p = closure; \
11413 print_data(p, "\"", 1); \
11414 CHK(put##type(closure, handler_data, val)); \
11415 print_data(p, "\":", 2); \
11416 return true; \
11417 }
11418
11419 TYPE_HANDLERS(double, fmt_double)
11420 TYPE_HANDLERS(float, fmt_float)
11421 TYPE_HANDLERS(bool, fmt_bool)
11422 TYPE_HANDLERS(int32_t, fmt_int64)
11423 TYPE_HANDLERS(uint32_t, fmt_int64)
11424 TYPE_HANDLERS(int64_t, fmt_int64)
11425 TYPE_HANDLERS(uint64_t, fmt_uint64)
11426
11427 /* double and float are not allowed to be map keys. */
11428 TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
11429 TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64)
11430 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
11431 TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64)
11432 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
11433
11434 #undef TYPE_HANDLERS
11435 #undef TYPE_HANDLERS_MAPKEY
11436
11437 typedef struct {
11438 void *keyname;
11439 const upb_enumdef *enumdef;
11440 } EnumHandlerData;
11441
scalar_enum(void * closure,const void * handler_data,int32_t val)11442 static bool scalar_enum(void *closure, const void *handler_data,
11443 int32_t val) {
11444 const EnumHandlerData *hd = handler_data;
11445 upb_json_printer *p = closure;
11446 const char *symbolic_name;
11447
11448 CHK(putkey(closure, hd->keyname));
11449
11450 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
11451 if (symbolic_name) {
11452 print_data(p, "\"", 1);
11453 putstring(p, symbolic_name, strlen(symbolic_name));
11454 print_data(p, "\"", 1);
11455 } else {
11456 putint32_t(closure, NULL, val);
11457 }
11458
11459 return true;
11460 }
11461
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)11462 static void print_enum_symbolic_name(upb_json_printer *p,
11463 const upb_enumdef *def,
11464 int32_t val) {
11465 const char *symbolic_name = upb_enumdef_iton(def, val);
11466 if (symbolic_name) {
11467 print_data(p, "\"", 1);
11468 putstring(p, symbolic_name, strlen(symbolic_name));
11469 print_data(p, "\"", 1);
11470 } else {
11471 putint32_t(p, NULL, val);
11472 }
11473 }
11474
repeated_enum(void * closure,const void * handler_data,int32_t val)11475 static bool repeated_enum(void *closure, const void *handler_data,
11476 int32_t val) {
11477 const EnumHandlerData *hd = handler_data;
11478 upb_json_printer *p = closure;
11479 print_comma(p);
11480
11481 print_enum_symbolic_name(p, hd->enumdef, val);
11482
11483 return true;
11484 }
11485
mapvalue_enum(void * closure,const void * handler_data,int32_t val)11486 static bool mapvalue_enum(void *closure, const void *handler_data,
11487 int32_t val) {
11488 const EnumHandlerData *hd = handler_data;
11489 upb_json_printer *p = closure;
11490
11491 print_enum_symbolic_name(p, hd->enumdef, val);
11492
11493 return true;
11494 }
11495
scalar_startsubmsg(void * closure,const void * handler_data)11496 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
11497 return putkey(closure, handler_data) ? closure : UPB_BREAK;
11498 }
11499
repeated_startsubmsg(void * closure,const void * handler_data)11500 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
11501 upb_json_printer *p = closure;
11502 UPB_UNUSED(handler_data);
11503 print_comma(p);
11504 return closure;
11505 }
11506
start_frame(upb_json_printer * p)11507 static void start_frame(upb_json_printer *p) {
11508 p->depth_++;
11509 p->first_elem_[p->depth_] = true;
11510 print_data(p, "{", 1);
11511 }
11512
end_frame(upb_json_printer * p)11513 static void end_frame(upb_json_printer *p) {
11514 print_data(p, "}", 1);
11515 p->depth_--;
11516 }
11517
printer_startmsg(void * closure,const void * handler_data)11518 static bool printer_startmsg(void *closure, const void *handler_data) {
11519 upb_json_printer *p = closure;
11520 UPB_UNUSED(handler_data);
11521 if (p->depth_ == 0) {
11522 upb_bytessink_start(p->output_, 0, &p->subc_);
11523 }
11524 start_frame(p);
11525 return true;
11526 }
11527
printer_endmsg(void * closure,const void * handler_data,upb_status * s)11528 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
11529 upb_json_printer *p = closure;
11530 UPB_UNUSED(handler_data);
11531 UPB_UNUSED(s);
11532 end_frame(p);
11533 if (p->depth_ == 0) {
11534 upb_bytessink_end(p->output_);
11535 }
11536 return true;
11537 }
11538
startseq(void * closure,const void * handler_data)11539 static void *startseq(void *closure, const void *handler_data) {
11540 upb_json_printer *p = closure;
11541 CHK(putkey(closure, handler_data));
11542 p->depth_++;
11543 p->first_elem_[p->depth_] = true;
11544 print_data(p, "[", 1);
11545 return closure;
11546 }
11547
endseq(void * closure,const void * handler_data)11548 static bool endseq(void *closure, const void *handler_data) {
11549 upb_json_printer *p = closure;
11550 UPB_UNUSED(handler_data);
11551 print_data(p, "]", 1);
11552 p->depth_--;
11553 return true;
11554 }
11555
startmap(void * closure,const void * handler_data)11556 static void *startmap(void *closure, const void *handler_data) {
11557 upb_json_printer *p = closure;
11558 CHK(putkey(closure, handler_data));
11559 p->depth_++;
11560 p->first_elem_[p->depth_] = true;
11561 print_data(p, "{", 1);
11562 return closure;
11563 }
11564
endmap(void * closure,const void * handler_data)11565 static bool endmap(void *closure, const void *handler_data) {
11566 upb_json_printer *p = closure;
11567 UPB_UNUSED(handler_data);
11568 print_data(p, "}", 1);
11569 p->depth_--;
11570 return true;
11571 }
11572
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11573 static size_t putstr(void *closure, const void *handler_data, const char *str,
11574 size_t len, const upb_bufhandle *handle) {
11575 upb_json_printer *p = closure;
11576 UPB_UNUSED(handler_data);
11577 UPB_UNUSED(handle);
11578 putstring(p, str, len);
11579 return len;
11580 }
11581
11582 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11583 static size_t putbytes(void *closure, const void *handler_data, const char *str,
11584 size_t len, const upb_bufhandle *handle) {
11585 upb_json_printer *p = closure;
11586
11587 /* This is the regular base64, not the "web-safe" version. */
11588 static const char base64[] =
11589 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
11590
11591 /* Base64-encode. */
11592 char data[16000];
11593 const char *limit = data + sizeof(data);
11594 const unsigned char *from = (const unsigned char*)str;
11595 char *to = data;
11596 size_t remaining = len;
11597 size_t bytes;
11598
11599 UPB_UNUSED(handler_data);
11600 UPB_UNUSED(handle);
11601
11602 while (remaining > 2) {
11603 /* TODO(haberman): handle encoded lengths > sizeof(data) */
11604 UPB_ASSERT_VAR(limit, (limit - to) >= 4);
11605
11606 to[0] = base64[from[0] >> 2];
11607 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11608 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
11609 to[3] = base64[from[2] & 0x3f];
11610
11611 remaining -= 3;
11612 to += 4;
11613 from += 3;
11614 }
11615
11616 switch (remaining) {
11617 case 2:
11618 to[0] = base64[from[0] >> 2];
11619 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11620 to[2] = base64[(from[1] & 0xf) << 2];
11621 to[3] = '=';
11622 to += 4;
11623 from += 2;
11624 break;
11625 case 1:
11626 to[0] = base64[from[0] >> 2];
11627 to[1] = base64[((from[0] & 0x3) << 4)];
11628 to[2] = '=';
11629 to[3] = '=';
11630 to += 4;
11631 from += 1;
11632 break;
11633 }
11634
11635 bytes = to - data;
11636 print_data(p, "\"", 1);
11637 putstring(p, data, bytes);
11638 print_data(p, "\"", 1);
11639 return len;
11640 }
11641
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)11642 static void *scalar_startstr(void *closure, const void *handler_data,
11643 size_t size_hint) {
11644 upb_json_printer *p = closure;
11645 UPB_UNUSED(handler_data);
11646 UPB_UNUSED(size_hint);
11647 CHK(putkey(closure, handler_data));
11648 print_data(p, "\"", 1);
11649 return p;
11650 }
11651
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11652 static size_t scalar_str(void *closure, const void *handler_data,
11653 const char *str, size_t len,
11654 const upb_bufhandle *handle) {
11655 CHK(putstr(closure, handler_data, str, len, handle));
11656 return len;
11657 }
11658
scalar_endstr(void * closure,const void * handler_data)11659 static bool scalar_endstr(void *closure, const void *handler_data) {
11660 upb_json_printer *p = closure;
11661 UPB_UNUSED(handler_data);
11662 print_data(p, "\"", 1);
11663 return true;
11664 }
11665
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)11666 static void *repeated_startstr(void *closure, const void *handler_data,
11667 size_t size_hint) {
11668 upb_json_printer *p = closure;
11669 UPB_UNUSED(handler_data);
11670 UPB_UNUSED(size_hint);
11671 print_comma(p);
11672 print_data(p, "\"", 1);
11673 return p;
11674 }
11675
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11676 static size_t repeated_str(void *closure, const void *handler_data,
11677 const char *str, size_t len,
11678 const upb_bufhandle *handle) {
11679 CHK(putstr(closure, handler_data, str, len, handle));
11680 return len;
11681 }
11682
repeated_endstr(void * closure,const void * handler_data)11683 static bool repeated_endstr(void *closure, const void *handler_data) {
11684 upb_json_printer *p = closure;
11685 UPB_UNUSED(handler_data);
11686 print_data(p, "\"", 1);
11687 return true;
11688 }
11689
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)11690 static void *mapkeyval_startstr(void *closure, const void *handler_data,
11691 size_t size_hint) {
11692 upb_json_printer *p = closure;
11693 UPB_UNUSED(handler_data);
11694 UPB_UNUSED(size_hint);
11695 print_data(p, "\"", 1);
11696 return p;
11697 }
11698
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11699 static size_t mapkey_str(void *closure, const void *handler_data,
11700 const char *str, size_t len,
11701 const upb_bufhandle *handle) {
11702 CHK(putstr(closure, handler_data, str, len, handle));
11703 return len;
11704 }
11705
mapkey_endstr(void * closure,const void * handler_data)11706 static bool mapkey_endstr(void *closure, const void *handler_data) {
11707 upb_json_printer *p = closure;
11708 UPB_UNUSED(handler_data);
11709 print_data(p, "\":", 2);
11710 return true;
11711 }
11712
mapvalue_endstr(void * closure,const void * handler_data)11713 static bool mapvalue_endstr(void *closure, const void *handler_data) {
11714 upb_json_printer *p = closure;
11715 UPB_UNUSED(handler_data);
11716 print_data(p, "\"", 1);
11717 return true;
11718 }
11719
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11720 static size_t scalar_bytes(void *closure, const void *handler_data,
11721 const char *str, size_t len,
11722 const upb_bufhandle *handle) {
11723 CHK(putkey(closure, handler_data));
11724 CHK(putbytes(closure, handler_data, str, len, handle));
11725 return len;
11726 }
11727
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11728 static size_t repeated_bytes(void *closure, const void *handler_data,
11729 const char *str, size_t len,
11730 const upb_bufhandle *handle) {
11731 upb_json_printer *p = closure;
11732 print_comma(p);
11733 CHK(putbytes(closure, handler_data, str, len, handle));
11734 return len;
11735 }
11736
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11737 static size_t mapkey_bytes(void *closure, const void *handler_data,
11738 const char *str, size_t len,
11739 const upb_bufhandle *handle) {
11740 upb_json_printer *p = closure;
11741 CHK(putbytes(closure, handler_data, str, len, handle));
11742 print_data(p, ":", 1);
11743 return len;
11744 }
11745
set_enum_hd(upb_handlers * h,const upb_fielddef * f,upb_handlerattr * attr)11746 static void set_enum_hd(upb_handlers *h,
11747 const upb_fielddef *f,
11748 upb_handlerattr *attr) {
11749 EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
11750 hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
11751 hd->keyname = newstrpc(h, f);
11752 upb_handlers_addcleanup(h, hd, free);
11753 upb_handlerattr_sethandlerdata(attr, hd);
11754 }
11755
11756 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
11757 * in a map).
11758 *
11759 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
11760 * key or value cases properly. The right way to do this is to allocate a
11761 * temporary structure at the start of a mapentry submessage, store key and
11762 * value data in it as key and value handlers are called, and then print the
11763 * key/value pair once at the end of the submessage. If we don't do this, we
11764 * should at least detect the case and throw an error. However, so far all of
11765 * our sources that emit mapentry messages do so canonically (with one key
11766 * field, and then one value field), so this is not a pressing concern at the
11767 * moment. */
printer_sethandlers_mapentry(const void * closure,upb_handlers * h)11768 void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
11769 const upb_msgdef *md = upb_handlers_msgdef(h);
11770
11771 /* A mapentry message is printed simply as '"key": value'. Rather than
11772 * special-case key and value for every type below, we just handle both
11773 * fields explicitly here. */
11774 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
11775 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
11776
11777 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11778
11779 UPB_UNUSED(closure);
11780
11781 switch (upb_fielddef_type(key_field)) {
11782 case UPB_TYPE_INT32:
11783 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
11784 break;
11785 case UPB_TYPE_INT64:
11786 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
11787 break;
11788 case UPB_TYPE_UINT32:
11789 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
11790 break;
11791 case UPB_TYPE_UINT64:
11792 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
11793 break;
11794 case UPB_TYPE_BOOL:
11795 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
11796 break;
11797 case UPB_TYPE_STRING:
11798 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
11799 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
11800 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
11801 break;
11802 case UPB_TYPE_BYTES:
11803 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
11804 break;
11805 default:
11806 assert(false);
11807 break;
11808 }
11809
11810 switch (upb_fielddef_type(value_field)) {
11811 case UPB_TYPE_INT32:
11812 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
11813 break;
11814 case UPB_TYPE_INT64:
11815 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
11816 break;
11817 case UPB_TYPE_UINT32:
11818 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
11819 break;
11820 case UPB_TYPE_UINT64:
11821 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
11822 break;
11823 case UPB_TYPE_BOOL:
11824 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
11825 break;
11826 case UPB_TYPE_FLOAT:
11827 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
11828 break;
11829 case UPB_TYPE_DOUBLE:
11830 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
11831 break;
11832 case UPB_TYPE_STRING:
11833 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
11834 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
11835 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
11836 break;
11837 case UPB_TYPE_BYTES:
11838 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
11839 break;
11840 case UPB_TYPE_ENUM: {
11841 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11842 set_enum_hd(h, value_field, &enum_attr);
11843 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
11844 upb_handlerattr_uninit(&enum_attr);
11845 break;
11846 }
11847 case UPB_TYPE_MESSAGE:
11848 /* No handler necessary -- the submsg handlers will print the message
11849 * as appropriate. */
11850 break;
11851 }
11852
11853 upb_handlerattr_uninit(&empty_attr);
11854 }
11855
printer_sethandlers(const void * closure,upb_handlers * h)11856 void printer_sethandlers(const void *closure, upb_handlers *h) {
11857 const upb_msgdef *md = upb_handlers_msgdef(h);
11858 bool is_mapentry = upb_msgdef_mapentry(md);
11859 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11860 upb_msg_field_iter i;
11861
11862 UPB_UNUSED(closure);
11863
11864 if (is_mapentry) {
11865 /* mapentry messages are sufficiently different that we handle them
11866 * separately. */
11867 printer_sethandlers_mapentry(closure, h);
11868 return;
11869 }
11870
11871 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
11872 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
11873
11874 #define TYPE(type, name, ctype) \
11875 case type: \
11876 if (upb_fielddef_isseq(f)) { \
11877 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
11878 } else { \
11879 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
11880 } \
11881 break;
11882
11883 upb_msg_field_begin(&i, md);
11884 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
11885 const upb_fielddef *f = upb_msg_iter_field(&i);
11886
11887 upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
11888 upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
11889
11890 if (upb_fielddef_ismap(f)) {
11891 upb_handlers_setstartseq(h, f, startmap, &name_attr);
11892 upb_handlers_setendseq(h, f, endmap, &name_attr);
11893 } else if (upb_fielddef_isseq(f)) {
11894 upb_handlers_setstartseq(h, f, startseq, &name_attr);
11895 upb_handlers_setendseq(h, f, endseq, &empty_attr);
11896 }
11897
11898 switch (upb_fielddef_type(f)) {
11899 TYPE(UPB_TYPE_FLOAT, float, float);
11900 TYPE(UPB_TYPE_DOUBLE, double, double);
11901 TYPE(UPB_TYPE_BOOL, bool, bool);
11902 TYPE(UPB_TYPE_INT32, int32, int32_t);
11903 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
11904 TYPE(UPB_TYPE_INT64, int64, int64_t);
11905 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
11906 case UPB_TYPE_ENUM: {
11907 /* For now, we always emit symbolic names for enums. We may want an
11908 * option later to control this behavior, but we will wait for a real
11909 * need first. */
11910 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11911 set_enum_hd(h, f, &enum_attr);
11912
11913 if (upb_fielddef_isseq(f)) {
11914 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
11915 } else {
11916 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
11917 }
11918
11919 upb_handlerattr_uninit(&enum_attr);
11920 break;
11921 }
11922 case UPB_TYPE_STRING:
11923 if (upb_fielddef_isseq(f)) {
11924 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
11925 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
11926 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
11927 } else {
11928 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
11929 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
11930 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
11931 }
11932 break;
11933 case UPB_TYPE_BYTES:
11934 /* XXX: this doesn't support strings that span buffers yet. The base64
11935 * encoder will need to be made resumable for this to work properly. */
11936 if (upb_fielddef_isseq(f)) {
11937 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
11938 } else {
11939 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
11940 }
11941 break;
11942 case UPB_TYPE_MESSAGE:
11943 if (upb_fielddef_isseq(f)) {
11944 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
11945 } else {
11946 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
11947 }
11948 break;
11949 }
11950
11951 upb_handlerattr_uninit(&name_attr);
11952 }
11953
11954 upb_handlerattr_uninit(&empty_attr);
11955 #undef TYPE
11956 }
11957
json_printer_reset(upb_json_printer * p)11958 static void json_printer_reset(upb_json_printer *p) {
11959 p->depth_ = 0;
11960 }
11961
11962
11963 /* Public API *****************************************************************/
11964
upb_json_printer_create(upb_env * e,const upb_handlers * h,upb_bytessink * output)11965 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
11966 upb_bytessink *output) {
11967 #ifndef NDEBUG
11968 size_t size_before = upb_env_bytesallocated(e);
11969 #endif
11970
11971 upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
11972 if (!p) return NULL;
11973
11974 p->output_ = output;
11975 json_printer_reset(p);
11976 upb_sink_reset(&p->input_, h, p);
11977
11978 /* If this fails, increase the value in printer.h. */
11979 assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
11980 return p;
11981 }
11982
upb_json_printer_input(upb_json_printer * p)11983 upb_sink *upb_json_printer_input(upb_json_printer *p) {
11984 return &p->input_;
11985 }
11986
upb_json_printer_newhandlers(const upb_msgdef * md,const void * owner)11987 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
11988 const void *owner) {
11989 return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL);
11990 }
11991