1
2 #include "upb/def.h"
3
4 #include <ctype.h>
5 #include <errno.h>
6 #include <setjmp.h>
7 #include <stdlib.h>
8 #include <string.h>
9
10 #include "google/protobuf/descriptor.upb.h"
11 #include "upb/port_def.inc"
12
13 typedef struct {
14 size_t len;
15 char str[1]; /* Null-terminated string data follows. */
16 } str_t;
17
18 struct upb_fielddef {
19 const upb_filedef *file;
20 const upb_msgdef *msgdef;
21 const char *full_name;
22 const char *json_name;
23 union {
24 int64_t sint;
25 uint64_t uint;
26 double dbl;
27 float flt;
28 bool boolean;
29 str_t *str;
30 } defaultval;
31 const upb_oneofdef *oneof;
32 union {
33 const upb_msgdef *msgdef;
34 const upb_enumdef *enumdef;
35 const google_protobuf_FieldDescriptorProto *unresolved;
36 } sub;
37 uint32_t number_;
38 uint16_t index_;
39 uint16_t layout_index;
40 uint32_t selector_base; /* Used to index into a upb::Handlers table. */
41 bool is_extension_;
42 bool lazy_;
43 bool packed_;
44 bool proto3_optional_;
45 upb_descriptortype_t type_;
46 upb_label_t label_;
47 };
48
49 struct upb_msgdef {
50 const upb_msglayout *layout;
51 const upb_filedef *file;
52 const char *full_name;
53 uint32_t selector_count;
54 uint32_t submsg_field_count;
55
56 /* Tables for looking up fields by number and name. */
57 upb_inttable itof;
58 upb_strtable ntof;
59
60 const upb_fielddef *fields;
61 const upb_oneofdef *oneofs;
62 int field_count;
63 int oneof_count;
64 int real_oneof_count;
65
66 /* Is this a map-entry message? */
67 bool map_entry;
68 upb_wellknowntype_t well_known_type;
69
70 /* TODO(haberman): proper extension ranges (there can be multiple). */
71 };
72
73 struct upb_enumdef {
74 const upb_filedef *file;
75 const char *full_name;
76 upb_strtable ntoi;
77 upb_inttable iton;
78 int32_t defaultval;
79 };
80
81 struct upb_oneofdef {
82 const upb_msgdef *parent;
83 const char *full_name;
84 int field_count;
85 bool synthetic;
86 const upb_fielddef **fields;
87 upb_strtable ntof;
88 upb_inttable itof;
89 };
90
91 struct upb_filedef {
92 const char *name;
93 const char *package;
94 const char *phpprefix;
95 const char *phpnamespace;
96
97 const upb_filedef **deps;
98 const upb_msgdef *msgs;
99 const upb_enumdef *enums;
100 const upb_fielddef *exts;
101 const upb_symtab *symtab;
102
103 int dep_count;
104 int msg_count;
105 int enum_count;
106 int ext_count;
107 upb_syntax_t syntax;
108 };
109
110 struct upb_symtab {
111 upb_arena *arena;
112 upb_strtable syms; /* full_name -> packed def ptr */
113 upb_strtable files; /* file_name -> upb_filedef* */
114 size_t bytes_loaded;
115 };
116
117 /* Inside a symtab we store tagged pointers to specific def types. */
118 typedef enum {
119 UPB_DEFTYPE_FIELD = 0,
120
121 /* Only inside symtab table. */
122 UPB_DEFTYPE_MSG = 1,
123 UPB_DEFTYPE_ENUM = 2,
124
125 /* Only inside message table. */
126 UPB_DEFTYPE_ONEOF = 1,
127 UPB_DEFTYPE_FIELD_JSONNAME = 2
128 } upb_deftype_t;
129
unpack_def(upb_value v,upb_deftype_t type)130 static const void *unpack_def(upb_value v, upb_deftype_t type) {
131 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
132 return (num & 3) == type ? (const void*)(num & ~3) : NULL;
133 }
134
pack_def(const void * ptr,upb_deftype_t type)135 static upb_value pack_def(const void *ptr, upb_deftype_t type) {
136 uintptr_t num = (uintptr_t)ptr | type;
137 return upb_value_constptr((const void*)num);
138 }
139
140 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)141 static bool upb_isbetween(char c, char low, char high) {
142 return c >= low && c <= high;
143 }
144
upb_isletter(char c)145 static bool upb_isletter(char c) {
146 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
147 }
148
upb_isalphanum(char c)149 static bool upb_isalphanum(char c) {
150 return upb_isletter(c) || upb_isbetween(c, '0', '9');
151 }
152
shortdefname(const char * fullname)153 static const char *shortdefname(const char *fullname) {
154 const char *p;
155
156 if (fullname == NULL) {
157 return NULL;
158 } else if ((p = strrchr(fullname, '.')) == NULL) {
159 /* No '.' in the name, return the full string. */
160 return fullname;
161 } else {
162 /* Return one past the last '.'. */
163 return p + 1;
164 }
165 }
166
167 /* All submessage fields are lower than all other fields.
168 * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)169 uint32_t field_rank(const upb_fielddef *f) {
170 uint32_t ret = upb_fielddef_number(f);
171 const uint32_t high_bit = 1 << 30;
172 UPB_ASSERT(ret < high_bit);
173 if (!upb_fielddef_issubmsg(f))
174 ret |= high_bit;
175 return ret;
176 }
177
cmp_fields(const void * p1,const void * p2)178 int cmp_fields(const void *p1, const void *p2) {
179 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
180 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
181 return field_rank(f1) - field_rank(f2);
182 }
183
184 /* A few implementation details of handlers. We put these here to avoid
185 * a def -> handlers dependency. */
186
187 #define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */
188
upb_handlers_selectorbaseoffset(const upb_fielddef * f)189 static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
190 return upb_fielddef_isseq(f) ? 2 : 0;
191 }
192
upb_handlers_selectorcount(const upb_fielddef * f)193 static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
194 uint32_t ret = 1;
195 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
196 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
197 if (upb_fielddef_issubmsg(f)) {
198 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
199 ret += 0;
200 if (upb_fielddef_lazy(f)) {
201 /* STARTSTR/ENDSTR/STRING (for lazy) */
202 ret += 3;
203 }
204 }
205 return ret;
206 }
207
upb_status_setoom(upb_status * status)208 static void upb_status_setoom(upb_status *status) {
209 upb_status_seterrmsg(status, "out of memory");
210 }
211
assign_msg_wellknowntype(upb_msgdef * m)212 static void assign_msg_wellknowntype(upb_msgdef *m) {
213 const char *name = upb_msgdef_fullname(m);
214 if (name == NULL) {
215 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
216 return;
217 }
218 if (!strcmp(name, "google.protobuf.Any")) {
219 m->well_known_type = UPB_WELLKNOWN_ANY;
220 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
221 m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
222 } else if (!strcmp(name, "google.protobuf.Duration")) {
223 m->well_known_type = UPB_WELLKNOWN_DURATION;
224 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
225 m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
226 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
227 m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
228 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
229 m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
230 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
231 m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
232 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
233 m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
234 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
235 m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
236 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
237 m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
238 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
239 m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
240 } else if (!strcmp(name, "google.protobuf.StringValue")) {
241 m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
242 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
243 m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
244 } else if (!strcmp(name, "google.protobuf.Value")) {
245 m->well_known_type = UPB_WELLKNOWN_VALUE;
246 } else if (!strcmp(name, "google.protobuf.ListValue")) {
247 m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
248 } else if (!strcmp(name, "google.protobuf.Struct")) {
249 m->well_known_type = UPB_WELLKNOWN_STRUCT;
250 } else {
251 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
252 }
253 }
254
255
256 /* upb_enumdef ****************************************************************/
257
upb_enumdef_fullname(const upb_enumdef * e)258 const char *upb_enumdef_fullname(const upb_enumdef *e) {
259 return e->full_name;
260 }
261
upb_enumdef_name(const upb_enumdef * e)262 const char *upb_enumdef_name(const upb_enumdef *e) {
263 return shortdefname(e->full_name);
264 }
265
upb_enumdef_file(const upb_enumdef * e)266 const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
267 return e->file;
268 }
269
upb_enumdef_default(const upb_enumdef * e)270 int32_t upb_enumdef_default(const upb_enumdef *e) {
271 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
272 return e->defaultval;
273 }
274
upb_enumdef_numvals(const upb_enumdef * e)275 int upb_enumdef_numvals(const upb_enumdef *e) {
276 return (int)upb_strtable_count(&e->ntoi);
277 }
278
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)279 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
280 /* We iterate over the ntoi table, to account for duplicate numbers. */
281 upb_strtable_begin(i, &e->ntoi);
282 }
283
upb_enum_next(upb_enum_iter * iter)284 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)285 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
286
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)287 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
288 size_t len, int32_t *num) {
289 upb_value v;
290 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
291 return false;
292 }
293 if (num) *num = upb_value_getint32(v);
294 return true;
295 }
296
upb_enumdef_iton(const upb_enumdef * def,int32_t num)297 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
298 upb_value v;
299 return upb_inttable_lookup32(&def->iton, num, &v) ?
300 upb_value_getcstr(v) : NULL;
301 }
302
upb_enum_iter_name(upb_enum_iter * iter)303 const char *upb_enum_iter_name(upb_enum_iter *iter) {
304 return upb_strtable_iter_key(iter).data;
305 }
306
upb_enum_iter_number(upb_enum_iter * iter)307 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
308 return upb_value_getint32(upb_strtable_iter_value(iter));
309 }
310
311
312 /* upb_fielddef ***************************************************************/
313
upb_fielddef_fullname(const upb_fielddef * f)314 const char *upb_fielddef_fullname(const upb_fielddef *f) {
315 return f->full_name;
316 }
317
upb_fielddef_type(const upb_fielddef * f)318 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
319 switch (f->type_) {
320 case UPB_DESCRIPTOR_TYPE_DOUBLE:
321 return UPB_TYPE_DOUBLE;
322 case UPB_DESCRIPTOR_TYPE_FLOAT:
323 return UPB_TYPE_FLOAT;
324 case UPB_DESCRIPTOR_TYPE_INT64:
325 case UPB_DESCRIPTOR_TYPE_SINT64:
326 case UPB_DESCRIPTOR_TYPE_SFIXED64:
327 return UPB_TYPE_INT64;
328 case UPB_DESCRIPTOR_TYPE_INT32:
329 case UPB_DESCRIPTOR_TYPE_SFIXED32:
330 case UPB_DESCRIPTOR_TYPE_SINT32:
331 return UPB_TYPE_INT32;
332 case UPB_DESCRIPTOR_TYPE_UINT64:
333 case UPB_DESCRIPTOR_TYPE_FIXED64:
334 return UPB_TYPE_UINT64;
335 case UPB_DESCRIPTOR_TYPE_UINT32:
336 case UPB_DESCRIPTOR_TYPE_FIXED32:
337 return UPB_TYPE_UINT32;
338 case UPB_DESCRIPTOR_TYPE_ENUM:
339 return UPB_TYPE_ENUM;
340 case UPB_DESCRIPTOR_TYPE_BOOL:
341 return UPB_TYPE_BOOL;
342 case UPB_DESCRIPTOR_TYPE_STRING:
343 return UPB_TYPE_STRING;
344 case UPB_DESCRIPTOR_TYPE_BYTES:
345 return UPB_TYPE_BYTES;
346 case UPB_DESCRIPTOR_TYPE_GROUP:
347 case UPB_DESCRIPTOR_TYPE_MESSAGE:
348 return UPB_TYPE_MESSAGE;
349 }
350 UPB_UNREACHABLE();
351 }
352
upb_fielddef_descriptortype(const upb_fielddef * f)353 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
354 return f->type_;
355 }
356
upb_fielddef_index(const upb_fielddef * f)357 uint32_t upb_fielddef_index(const upb_fielddef *f) {
358 return f->index_;
359 }
360
upb_fielddef_label(const upb_fielddef * f)361 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
362 return f->label_;
363 }
364
upb_fielddef_number(const upb_fielddef * f)365 uint32_t upb_fielddef_number(const upb_fielddef *f) {
366 return f->number_;
367 }
368
upb_fielddef_isextension(const upb_fielddef * f)369 bool upb_fielddef_isextension(const upb_fielddef *f) {
370 return f->is_extension_;
371 }
372
upb_fielddef_lazy(const upb_fielddef * f)373 bool upb_fielddef_lazy(const upb_fielddef *f) {
374 return f->lazy_;
375 }
376
upb_fielddef_packed(const upb_fielddef * f)377 bool upb_fielddef_packed(const upb_fielddef *f) {
378 return f->packed_;
379 }
380
upb_fielddef_name(const upb_fielddef * f)381 const char *upb_fielddef_name(const upb_fielddef *f) {
382 return shortdefname(f->full_name);
383 }
384
upb_fielddef_jsonname(const upb_fielddef * f)385 const char *upb_fielddef_jsonname(const upb_fielddef *f) {
386 return f->json_name;
387 }
388
upb_fielddef_selectorbase(const upb_fielddef * f)389 uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
390 return f->selector_base;
391 }
392
upb_fielddef_file(const upb_fielddef * f)393 const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
394 return f->file;
395 }
396
upb_fielddef_containingtype(const upb_fielddef * f)397 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
398 return f->msgdef;
399 }
400
upb_fielddef_containingoneof(const upb_fielddef * f)401 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
402 return f->oneof;
403 }
404
upb_fielddef_realcontainingoneof(const upb_fielddef * f)405 const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
406 if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
407 return f->oneof;
408 }
409
chkdefaulttype(const upb_fielddef * f,int ctype)410 static void chkdefaulttype(const upb_fielddef *f, int ctype) {
411 UPB_UNUSED(f);
412 UPB_UNUSED(ctype);
413 }
414
upb_fielddef_defaultint64(const upb_fielddef * f)415 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
416 chkdefaulttype(f, UPB_TYPE_INT64);
417 return f->defaultval.sint;
418 }
419
upb_fielddef_defaultint32(const upb_fielddef * f)420 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
421 chkdefaulttype(f, UPB_TYPE_INT32);
422 return (int32_t)f->defaultval.sint;
423 }
424
upb_fielddef_defaultuint64(const upb_fielddef * f)425 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
426 chkdefaulttype(f, UPB_TYPE_UINT64);
427 return f->defaultval.uint;
428 }
429
upb_fielddef_defaultuint32(const upb_fielddef * f)430 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
431 chkdefaulttype(f, UPB_TYPE_UINT32);
432 return (uint32_t)f->defaultval.uint;
433 }
434
upb_fielddef_defaultbool(const upb_fielddef * f)435 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
436 chkdefaulttype(f, UPB_TYPE_BOOL);
437 return f->defaultval.boolean;
438 }
439
upb_fielddef_defaultfloat(const upb_fielddef * f)440 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
441 chkdefaulttype(f, UPB_TYPE_FLOAT);
442 return f->defaultval.flt;
443 }
444
upb_fielddef_defaultdouble(const upb_fielddef * f)445 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
446 chkdefaulttype(f, UPB_TYPE_DOUBLE);
447 return f->defaultval.dbl;
448 }
449
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)450 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
451 str_t *str = f->defaultval.str;
452 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
453 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
454 upb_fielddef_type(f) == UPB_TYPE_ENUM);
455 if (str) {
456 if (len) *len = str->len;
457 return str->str;
458 } else {
459 if (len) *len = 0;
460 return NULL;
461 }
462 }
463
upb_fielddef_msgsubdef(const upb_fielddef * f)464 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
465 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE ? f->sub.msgdef : NULL;
466 }
467
upb_fielddef_enumsubdef(const upb_fielddef * f)468 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
469 return upb_fielddef_type(f) == UPB_TYPE_ENUM ? f->sub.enumdef : NULL;
470 }
471
upb_fielddef_layout(const upb_fielddef * f)472 const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
473 return &f->msgdef->layout->fields[f->layout_index];
474 }
475
upb_fielddef_issubmsg(const upb_fielddef * f)476 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
477 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
478 }
479
upb_fielddef_isstring(const upb_fielddef * f)480 bool upb_fielddef_isstring(const upb_fielddef *f) {
481 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
482 upb_fielddef_type(f) == UPB_TYPE_BYTES;
483 }
484
upb_fielddef_isseq(const upb_fielddef * f)485 bool upb_fielddef_isseq(const upb_fielddef *f) {
486 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
487 }
488
upb_fielddef_isprimitive(const upb_fielddef * f)489 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
490 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
491 }
492
upb_fielddef_ismap(const upb_fielddef * f)493 bool upb_fielddef_ismap(const upb_fielddef *f) {
494 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
495 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
496 }
497
upb_fielddef_hassubdef(const upb_fielddef * f)498 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
499 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
500 }
501
upb_fielddef_haspresence(const upb_fielddef * f)502 bool upb_fielddef_haspresence(const upb_fielddef *f) {
503 if (upb_fielddef_isseq(f)) return false;
504 return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
505 f->file->syntax == UPB_SYNTAX_PROTO2;
506 }
507
between(int32_t x,int32_t low,int32_t high)508 static bool between(int32_t x, int32_t low, int32_t high) {
509 return x >= low && x <= high;
510 }
511
upb_fielddef_checklabel(int32_t label)512 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)513 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)514 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
515
upb_fielddef_checkdescriptortype(int32_t type)516 bool upb_fielddef_checkdescriptortype(int32_t type) {
517 return between(type, 1, 18);
518 }
519
520 /* upb_msgdef *****************************************************************/
521
upb_msgdef_fullname(const upb_msgdef * m)522 const char *upb_msgdef_fullname(const upb_msgdef *m) {
523 return m->full_name;
524 }
525
upb_msgdef_file(const upb_msgdef * m)526 const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
527 return m->file;
528 }
529
upb_msgdef_name(const upb_msgdef * m)530 const char *upb_msgdef_name(const upb_msgdef *m) {
531 return shortdefname(m->full_name);
532 }
533
upb_msgdef_syntax(const upb_msgdef * m)534 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
535 return m->file->syntax;
536 }
537
upb_msgdef_selectorcount(const upb_msgdef * m)538 size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
539 return m->selector_count;
540 }
541
upb_msgdef_submsgfieldcount(const upb_msgdef * m)542 uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
543 return m->submsg_field_count;
544 }
545
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)546 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
547 upb_value val;
548 return upb_inttable_lookup32(&m->itof, i, &val) ?
549 upb_value_getconstptr(val) : NULL;
550 }
551
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)552 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
553 size_t len) {
554 upb_value val;
555
556 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
557 return NULL;
558 }
559
560 return unpack_def(val, UPB_DEFTYPE_FIELD);
561 }
562
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)563 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
564 size_t len) {
565 upb_value val;
566
567 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
568 return NULL;
569 }
570
571 return unpack_def(val, UPB_DEFTYPE_ONEOF);
572 }
573
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)574 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
575 const upb_fielddef **f, const upb_oneofdef **o) {
576 upb_value val;
577
578 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
579 return false;
580 }
581
582 *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
583 *f = unpack_def(val, UPB_DEFTYPE_FIELD);
584 return *o || *f; /* False if this was a JSON name. */
585 }
586
upb_msgdef_lookupjsonname(const upb_msgdef * m,const char * name,size_t len)587 const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
588 const char *name, size_t len) {
589 upb_value val;
590 const upb_fielddef* f;
591
592 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
593 return NULL;
594 }
595
596 f = unpack_def(val, UPB_DEFTYPE_FIELD);
597 if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
598
599 return f;
600 }
601
upb_msgdef_numfields(const upb_msgdef * m)602 int upb_msgdef_numfields(const upb_msgdef *m) {
603 return m->field_count;
604 }
605
upb_msgdef_numoneofs(const upb_msgdef * m)606 int upb_msgdef_numoneofs(const upb_msgdef *m) {
607 return m->oneof_count;
608 }
609
upb_msgdef_numrealoneofs(const upb_msgdef * m)610 int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
611 return m->real_oneof_count;
612 }
613
upb_msgdef_fieldcount(const upb_msgdef * m)614 int upb_msgdef_fieldcount(const upb_msgdef *m) {
615 return m->field_count;
616 }
617
upb_msgdef_oneofcount(const upb_msgdef * m)618 int upb_msgdef_oneofcount(const upb_msgdef *m) {
619 return m->oneof_count;
620 }
621
upb_msgdef_realoneofcount(const upb_msgdef * m)622 int upb_msgdef_realoneofcount(const upb_msgdef *m) {
623 return m->real_oneof_count;
624 }
625
upb_msgdef_layout(const upb_msgdef * m)626 const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
627 return m->layout;
628 }
629
upb_msgdef_field(const upb_msgdef * m,int i)630 const upb_fielddef *upb_msgdef_field(const upb_msgdef *m, int i) {
631 UPB_ASSERT(i >= 0 && i < m->field_count);
632 return &m->fields[i];
633 }
634
upb_msgdef_oneof(const upb_msgdef * m,int i)635 const upb_oneofdef *upb_msgdef_oneof(const upb_msgdef *m, int i) {
636 UPB_ASSERT(i >= 0 && i < m->oneof_count);
637 return &m->oneofs[i];
638 }
639
upb_msgdef_mapentry(const upb_msgdef * m)640 bool upb_msgdef_mapentry(const upb_msgdef *m) {
641 return m->map_entry;
642 }
643
upb_msgdef_wellknowntype(const upb_msgdef * m)644 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
645 return m->well_known_type;
646 }
647
upb_msgdef_isnumberwrapper(const upb_msgdef * m)648 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
649 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
650 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
651 type <= UPB_WELLKNOWN_UINT32VALUE;
652 }
653
upb_msgdef_iswrapper(const upb_msgdef * m)654 bool upb_msgdef_iswrapper(const upb_msgdef *m) {
655 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
656 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
657 type <= UPB_WELLKNOWN_BOOLVALUE;
658 }
659
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)660 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
661 upb_inttable_begin(iter, &m->itof);
662 }
663
upb_msg_field_next(upb_msg_field_iter * iter)664 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
665
upb_msg_field_done(const upb_msg_field_iter * iter)666 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
667 return upb_inttable_done(iter);
668 }
669
upb_msg_iter_field(const upb_msg_field_iter * iter)670 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
671 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
672 }
673
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)674 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
675 upb_inttable_iter_setdone(iter);
676 }
677
upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,const upb_msg_field_iter * iter2)678 bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
679 const upb_msg_field_iter * iter2) {
680 return upb_inttable_iter_isequal(iter1, iter2);
681 }
682
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)683 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
684 upb_strtable_begin(iter, &m->ntof);
685 /* We need to skip past any initial fields. */
686 while (!upb_strtable_done(iter) &&
687 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
688 upb_strtable_next(iter);
689 }
690 }
691
upb_msg_oneof_next(upb_msg_oneof_iter * iter)692 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
693 /* We need to skip past fields to return only oneofs. */
694 do {
695 upb_strtable_next(iter);
696 } while (!upb_strtable_done(iter) &&
697 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
698 }
699
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)700 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
701 return upb_strtable_done(iter);
702 }
703
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)704 const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
705 return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
706 }
707
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)708 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
709 upb_strtable_iter_setdone(iter);
710 }
711
upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter * iter1,const upb_msg_oneof_iter * iter2)712 bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
713 const upb_msg_oneof_iter *iter2) {
714 return upb_strtable_iter_isequal(iter1, iter2);
715 }
716
717 /* upb_oneofdef ***************************************************************/
718
upb_oneofdef_name(const upb_oneofdef * o)719 const char *upb_oneofdef_name(const upb_oneofdef *o) {
720 return shortdefname(o->full_name);
721 }
722
upb_oneofdef_containingtype(const upb_oneofdef * o)723 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
724 return o->parent;
725 }
726
upb_oneofdef_fieldcount(const upb_oneofdef * o)727 int upb_oneofdef_fieldcount(const upb_oneofdef *o) {
728 return o->field_count;
729 }
730
upb_oneofdef_field(const upb_oneofdef * o,int i)731 const upb_fielddef *upb_oneofdef_field(const upb_oneofdef *o, int i) {
732 UPB_ASSERT(i < o->field_count);
733 return o->fields[i];
734 }
735
upb_oneofdef_numfields(const upb_oneofdef * o)736 int upb_oneofdef_numfields(const upb_oneofdef *o) {
737 return o->field_count;
738 }
739
upb_oneofdef_index(const upb_oneofdef * o)740 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
741 return o - o->parent->oneofs;
742 }
743
upb_oneofdef_issynthetic(const upb_oneofdef * o)744 bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
745 return o->synthetic;
746 }
747
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)748 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
749 const char *name, size_t length) {
750 upb_value val;
751 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
752 upb_value_getptr(val) : NULL;
753 }
754
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)755 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
756 upb_value val;
757 return upb_inttable_lookup32(&o->itof, num, &val) ?
758 upb_value_getptr(val) : NULL;
759 }
760
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)761 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
762 upb_inttable_begin(iter, &o->itof);
763 }
764
upb_oneof_next(upb_oneof_iter * iter)765 void upb_oneof_next(upb_oneof_iter *iter) {
766 upb_inttable_next(iter);
767 }
768
upb_oneof_done(upb_oneof_iter * iter)769 bool upb_oneof_done(upb_oneof_iter *iter) {
770 return upb_inttable_done(iter);
771 }
772
upb_oneof_iter_field(const upb_oneof_iter * iter)773 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
774 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
775 }
776
upb_oneof_iter_setdone(upb_oneof_iter * iter)777 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
778 upb_inttable_iter_setdone(iter);
779 }
780
781 /* upb_filedef ****************************************************************/
782
upb_filedef_name(const upb_filedef * f)783 const char *upb_filedef_name(const upb_filedef *f) {
784 return f->name;
785 }
786
upb_filedef_package(const upb_filedef * f)787 const char *upb_filedef_package(const upb_filedef *f) {
788 return f->package;
789 }
790
upb_filedef_phpprefix(const upb_filedef * f)791 const char *upb_filedef_phpprefix(const upb_filedef *f) {
792 return f->phpprefix;
793 }
794
upb_filedef_phpnamespace(const upb_filedef * f)795 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
796 return f->phpnamespace;
797 }
798
upb_filedef_syntax(const upb_filedef * f)799 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
800 return f->syntax;
801 }
802
upb_filedef_msgcount(const upb_filedef * f)803 int upb_filedef_msgcount(const upb_filedef *f) {
804 return f->msg_count;
805 }
806
upb_filedef_depcount(const upb_filedef * f)807 int upb_filedef_depcount(const upb_filedef *f) {
808 return f->dep_count;
809 }
810
upb_filedef_enumcount(const upb_filedef * f)811 int upb_filedef_enumcount(const upb_filedef *f) {
812 return f->enum_count;
813 }
814
upb_filedef_dep(const upb_filedef * f,int i)815 const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
816 return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
817 }
818
upb_filedef_msg(const upb_filedef * f,int i)819 const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
820 return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
821 }
822
upb_filedef_enum(const upb_filedef * f,int i)823 const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
824 return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
825 }
826
upb_filedef_symtab(const upb_filedef * f)827 const upb_symtab *upb_filedef_symtab(const upb_filedef *f) {
828 return f->symtab;
829 }
830
upb_symtab_free(upb_symtab * s)831 void upb_symtab_free(upb_symtab *s) {
832 upb_arena_free(s->arena);
833 upb_gfree(s);
834 }
835
upb_symtab_new(void)836 upb_symtab *upb_symtab_new(void) {
837 upb_symtab *s = upb_gmalloc(sizeof(*s));
838 upb_alloc *alloc;
839
840 if (!s) {
841 return NULL;
842 }
843
844 s->arena = upb_arena_new();
845 s->bytes_loaded = 0;
846 alloc = upb_arena_alloc(s->arena);
847
848 if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) ||
849 !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) {
850 upb_arena_free(s->arena);
851 upb_gfree(s);
852 s = NULL;
853 }
854 return s;
855 }
856
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)857 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
858 upb_value v;
859 return upb_strtable_lookup(&s->syms, sym, &v) ?
860 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
861 }
862
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)863 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
864 size_t len) {
865 upb_value v;
866 return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
867 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
868 }
869
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)870 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
871 upb_value v;
872 return upb_strtable_lookup(&s->syms, sym, &v) ?
873 unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
874 }
875
upb_symtab_lookupfile(const upb_symtab * s,const char * name)876 const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
877 upb_value v;
878 return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
879 : NULL;
880 }
881
upb_symtab_lookupfile2(const upb_symtab * s,const char * name,size_t len)882 const upb_filedef *upb_symtab_lookupfile2(
883 const upb_symtab *s, const char *name, size_t len) {
884 upb_value v;
885 return upb_strtable_lookup2(&s->files, name, len, &v) ?
886 upb_value_getconstptr(v) : NULL;
887 }
888
upb_symtab_filecount(const upb_symtab * s)889 int upb_symtab_filecount(const upb_symtab *s) {
890 return (int)upb_strtable_count(&s->files);
891 }
892
893 /* Code to build defs from descriptor protos. *********************************/
894
895 /* There is a question of how much validation to do here. It will be difficult
896 * to perfectly match the amount of validation performed by proto2. But since
897 * this code is used to directly build defs from Ruby (for example) we do need
898 * to validate important constraints like uniqueness of names and numbers. */
899
900 #define CHK_OOM(x) if (!(x)) { symtab_oomerr(ctx); }
901
902 typedef struct {
903 upb_symtab *symtab;
904 upb_filedef *file; /* File we are building. */
905 upb_arena *file_arena; /* Allocate defs here. */
906 upb_alloc *alloc; /* Alloc of file_arena, for tables. */
907 const upb_msglayout **layouts; /* NULL if we should build layouts. */
908 upb_status *status; /* Record errors here. */
909 jmp_buf err; /* longjmp() on error. */
910 } symtab_addctx;
911
912 UPB_NORETURN UPB_NOINLINE
symtab_errf(symtab_addctx * ctx,const char * fmt,...)913 static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) {
914 va_list argp;
915 va_start(argp, fmt);
916 upb_status_vseterrf(ctx->status, fmt, argp);
917 va_end(argp);
918 UPB_LONGJMP(ctx->err, 1);
919 }
920
921 UPB_NORETURN UPB_NOINLINE
symtab_oomerr(symtab_addctx * ctx)922 static void symtab_oomerr(symtab_addctx *ctx) {
923 upb_status_setoom(ctx->status);
924 UPB_LONGJMP(ctx->err, 1);
925 }
926
symtab_alloc(symtab_addctx * ctx,size_t bytes)927 void *symtab_alloc(symtab_addctx *ctx, size_t bytes) {
928 void *ret = upb_arena_malloc(ctx->file_arena, bytes);
929 if (!ret) symtab_oomerr(ctx);
930 return ret;
931 }
932
check_ident(symtab_addctx * ctx,upb_strview name,bool full)933 static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) {
934 const char *str = name.data;
935 size_t len = name.size;
936 bool start = true;
937 size_t i;
938 for (i = 0; i < len; i++) {
939 char c = str[i];
940 if (c == '.') {
941 if (start || !full) {
942 symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
943 }
944 start = true;
945 } else if (start) {
946 if (!upb_isletter(c)) {
947 symtab_errf(
948 ctx,
949 "invalid name: path components must start with a letter (%.*s)",
950 (int)len, str);
951 }
952 start = false;
953 } else {
954 if (!upb_isalphanum(c)) {
955 symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
956 (int)len, str);
957 }
958 }
959 }
960 if (start) {
961 symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
962 }
963 }
964
div_round_up(size_t n,size_t d)965 static size_t div_round_up(size_t n, size_t d) {
966 return (n + d - 1) / d;
967 }
968
upb_msgval_sizeof(upb_fieldtype_t type)969 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
970 switch (type) {
971 case UPB_TYPE_DOUBLE:
972 case UPB_TYPE_INT64:
973 case UPB_TYPE_UINT64:
974 return 8;
975 case UPB_TYPE_ENUM:
976 case UPB_TYPE_INT32:
977 case UPB_TYPE_UINT32:
978 case UPB_TYPE_FLOAT:
979 return 4;
980 case UPB_TYPE_BOOL:
981 return 1;
982 case UPB_TYPE_MESSAGE:
983 return sizeof(void*);
984 case UPB_TYPE_BYTES:
985 case UPB_TYPE_STRING:
986 return sizeof(upb_strview);
987 }
988 UPB_UNREACHABLE();
989 }
990
upb_msg_fielddefsize(const upb_fielddef * f)991 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
992 if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
993 upb_map_entry ent;
994 UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
995 return sizeof(ent.k);
996 } else if (upb_fielddef_isseq(f)) {
997 return sizeof(void*);
998 } else {
999 return upb_msgval_sizeof(upb_fielddef_type(f));
1000 }
1001 }
1002
upb_msglayout_place(upb_msglayout * l,size_t size)1003 static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
1004 uint32_t ret;
1005
1006 l->size = UPB_ALIGN_UP(l->size, size);
1007 ret = l->size;
1008 l->size += size;
1009 return ret;
1010 }
1011
field_number_cmp(const void * p1,const void * p2)1012 static int field_number_cmp(const void *p1, const void *p2) {
1013 const upb_msglayout_field *f1 = p1;
1014 const upb_msglayout_field *f2 = p2;
1015 return f1->number - f2->number;
1016 }
1017
assign_layout_indices(const upb_msgdef * m,upb_msglayout_field * fields)1018 static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fields) {
1019 int i;
1020 int n = upb_msgdef_numfields(m);
1021 for (i = 0; i < n; i++) {
1022 upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number);
1023 UPB_ASSERT(f);
1024 f->layout_index = i;
1025 }
1026 }
1027
1028 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
1029 * It computes a dynamic layout for all of the fields in |m|. */
make_layout(symtab_addctx * ctx,const upb_msgdef * m)1030 static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) {
1031 upb_msglayout *l = (upb_msglayout*)m->layout;
1032 upb_msg_field_iter it;
1033 upb_msg_oneof_iter oit;
1034 size_t hasbit;
1035 size_t submsg_count = m->submsg_field_count;
1036 const upb_msglayout **submsgs;
1037 upb_msglayout_field *fields;
1038
1039 memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry));
1040
1041 fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields));
1042 submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs));
1043
1044 l->field_count = upb_msgdef_numfields(m);
1045 l->fields = fields;
1046 l->submsgs = submsgs;
1047 l->table_mask = 0;
1048
1049 /* TODO(haberman): initialize fast tables so that reflection-based parsing
1050 * can get the same speeds as linked-in types. */
1051 l->fasttable[0].field_parser = &fastdecode_generic;
1052 l->fasttable[0].field_data = 0;
1053
1054 if (upb_msgdef_mapentry(m)) {
1055 /* TODO(haberman): refactor this method so this special case is more
1056 * elegant. */
1057 const upb_fielddef *key = upb_msgdef_itof(m, 1);
1058 const upb_fielddef *val = upb_msgdef_itof(m, 2);
1059 fields[0].number = 1;
1060 fields[1].number = 2;
1061 fields[0].label = UPB_LABEL_OPTIONAL;
1062 fields[1].label = UPB_LABEL_OPTIONAL;
1063 fields[0].presence = 0;
1064 fields[1].presence = 0;
1065 fields[0].descriptortype = upb_fielddef_descriptortype(key);
1066 fields[1].descriptortype = upb_fielddef_descriptortype(val);
1067 fields[0].offset = 0;
1068 fields[1].offset = sizeof(upb_strview);
1069 fields[1].submsg_index = 0;
1070
1071 if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
1072 submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
1073 }
1074
1075 l->field_count = 2;
1076 l->size = 2 * sizeof(upb_strview);
1077 l->size = UPB_ALIGN_UP(l->size, 8);
1078 return;
1079 }
1080
1081 /* Allocate data offsets in three stages:
1082 *
1083 * 1. hasbits.
1084 * 2. regular fields.
1085 * 3. oneof fields.
1086 *
1087 * OPT: There is a lot of room for optimization here to minimize the size.
1088 */
1089
1090 /* Allocate hasbits and set basic field attributes. */
1091 submsg_count = 0;
1092 for (upb_msg_field_begin(&it, m), hasbit = 0;
1093 !upb_msg_field_done(&it);
1094 upb_msg_field_next(&it)) {
1095 upb_fielddef* f = upb_msg_iter_field(&it);
1096 upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
1097
1098 field->number = upb_fielddef_number(f);
1099 field->descriptortype = upb_fielddef_descriptortype(f);
1100 field->label = upb_fielddef_label(f);
1101
1102 if (field->descriptortype == UPB_DTYPE_STRING &&
1103 f->file->syntax == UPB_SYNTAX_PROTO2) {
1104 /* See TableDescriptorType() in upbc/generator.cc for details and
1105 * rationale. */
1106 field->descriptortype = UPB_DTYPE_BYTES;
1107 }
1108
1109 if (upb_fielddef_ismap(f)) {
1110 field->label = _UPB_LABEL_MAP;
1111 } else if (upb_fielddef_packed(f)) {
1112 field->label = _UPB_LABEL_PACKED;
1113 }
1114
1115 if (upb_fielddef_issubmsg(f)) {
1116 const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
1117 field->submsg_index = submsg_count++;
1118 submsgs[field->submsg_index] = subm->layout;
1119 }
1120
1121 if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
1122 /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
1123 * table. This wastes one hasbit, but we don't worry about it for now. */
1124 field->presence = ++hasbit;
1125 } else {
1126 field->presence = 0;
1127 }
1128 }
1129
1130 /* Account for space used by hasbits. */
1131 l->size = div_round_up(hasbit, 8);
1132
1133 /* Allocate non-oneof fields. */
1134 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
1135 upb_msg_field_next(&it)) {
1136 const upb_fielddef* f = upb_msg_iter_field(&it);
1137 size_t field_size = upb_msg_fielddefsize(f);
1138 size_t index = upb_fielddef_index(f);
1139
1140 if (upb_fielddef_realcontainingoneof(f)) {
1141 /* Oneofs are handled separately below. */
1142 continue;
1143 }
1144
1145 fields[index].offset = upb_msglayout_place(l, field_size);
1146 }
1147
1148 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
1149 * and space for the actual data. */
1150 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
1151 upb_msg_oneof_next(&oit)) {
1152 const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
1153 upb_oneof_iter fit;
1154
1155 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
1156 size_t field_size = 0;
1157 uint32_t case_offset;
1158 uint32_t data_offset;
1159
1160 if (upb_oneofdef_issynthetic(o)) continue;
1161
1162 /* Calculate field size: the max of all field sizes. */
1163 for (upb_oneof_begin(&fit, o);
1164 !upb_oneof_done(&fit);
1165 upb_oneof_next(&fit)) {
1166 const upb_fielddef* f = upb_oneof_iter_field(&fit);
1167 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
1168 }
1169
1170 /* Align and allocate case offset. */
1171 case_offset = upb_msglayout_place(l, case_size);
1172 data_offset = upb_msglayout_place(l, field_size);
1173
1174 for (upb_oneof_begin(&fit, o);
1175 !upb_oneof_done(&fit);
1176 upb_oneof_next(&fit)) {
1177 const upb_fielddef* f = upb_oneof_iter_field(&fit);
1178 fields[upb_fielddef_index(f)].offset = data_offset;
1179 fields[upb_fielddef_index(f)].presence = ~case_offset;
1180 }
1181 }
1182
1183 /* Size of the entire structure should be a multiple of its greatest
1184 * alignment. TODO: track overall alignment for real? */
1185 l->size = UPB_ALIGN_UP(l->size, 8);
1186
1187 /* Sort fields by number. */
1188 qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp);
1189 assign_layout_indices(m, fields);
1190 }
1191
assign_msg_indices(symtab_addctx * ctx,upb_msgdef * m)1192 static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) {
1193 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
1194 * lowest indexes, but we do not publicly guarantee this. */
1195 upb_msg_field_iter j;
1196 int i;
1197 uint32_t selector;
1198 int n = upb_msgdef_numfields(m);
1199 upb_fielddef **fields;
1200
1201 if (n == 0) {
1202 m->selector_count = UPB_STATIC_SELECTOR_COUNT;
1203 m->submsg_field_count = 0;
1204 return;
1205 }
1206
1207 fields = upb_gmalloc(n * sizeof(*fields));
1208
1209 m->submsg_field_count = 0;
1210 for(i = 0, upb_msg_field_begin(&j, m);
1211 !upb_msg_field_done(&j);
1212 upb_msg_field_next(&j), i++) {
1213 upb_fielddef *f = upb_msg_iter_field(&j);
1214 UPB_ASSERT(f->msgdef == m);
1215 if (upb_fielddef_issubmsg(f)) {
1216 m->submsg_field_count++;
1217 }
1218 fields[i] = f;
1219 }
1220
1221 qsort(fields, n, sizeof(*fields), cmp_fields);
1222
1223 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
1224 for (i = 0; i < n; i++) {
1225 upb_fielddef *f = fields[i];
1226 f->index_ = i;
1227 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
1228 selector += upb_handlers_selectorcount(f);
1229 }
1230 m->selector_count = selector;
1231
1232 upb_gfree(fields);
1233 }
1234
strviewdup(symtab_addctx * ctx,upb_strview view)1235 static char *strviewdup(symtab_addctx *ctx, upb_strview view) {
1236 return upb_strdup2(view.data, view.size, ctx->alloc);
1237 }
1238
streql2(const char * a,size_t n,const char * b)1239 static bool streql2(const char *a, size_t n, const char *b) {
1240 return n == strlen(b) && memcmp(a, b, n) == 0;
1241 }
1242
streql_view(upb_strview view,const char * b)1243 static bool streql_view(upb_strview view, const char *b) {
1244 return streql2(view.data, view.size, b);
1245 }
1246
makefullname(symtab_addctx * ctx,const char * prefix,upb_strview name)1247 static const char *makefullname(symtab_addctx *ctx, const char *prefix,
1248 upb_strview name) {
1249 if (prefix) {
1250 /* ret = prefix + '.' + name; */
1251 size_t n = strlen(prefix);
1252 char *ret = symtab_alloc(ctx, n + name.size + 2);
1253 strcpy(ret, prefix);
1254 ret[n] = '.';
1255 memcpy(&ret[n + 1], name.data, name.size);
1256 ret[n + 1 + name.size] = '\0';
1257 return ret;
1258 } else {
1259 return strviewdup(ctx, name);
1260 }
1261 }
1262
finalize_oneofs(symtab_addctx * ctx,upb_msgdef * m)1263 static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) {
1264 int i;
1265 int synthetic_count = 0;
1266 upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
1267
1268 for (i = 0; i < m->oneof_count; i++) {
1269 upb_oneofdef *o = &mutable_oneofs[i];
1270
1271 if (o->synthetic && o->field_count != 1) {
1272 symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
1273 o->field_count, upb_oneofdef_name(o));
1274 }
1275
1276 if (o->synthetic) {
1277 synthetic_count++;
1278 } else if (synthetic_count != 0) {
1279 symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
1280 upb_oneofdef_name(o));
1281 }
1282
1283 o->fields = symtab_alloc(ctx, sizeof(upb_fielddef *) * o->field_count);
1284 o->field_count = 0;
1285 }
1286
1287 for (i = 0; i < m->field_count; i++) {
1288 const upb_fielddef *f = &m->fields[i];
1289 upb_oneofdef *o = (upb_oneofdef*)f->oneof;
1290 if (o) {
1291 o->fields[o->field_count++] = f;
1292 }
1293 }
1294
1295 m->real_oneof_count = m->oneof_count - synthetic_count;
1296 }
1297
getjsonname(const char * name,char * buf,size_t len)1298 size_t getjsonname(const char *name, char *buf, size_t len) {
1299 size_t src, dst = 0;
1300 bool ucase_next = false;
1301
1302 #define WRITE(byte) \
1303 ++dst; \
1304 if (dst < len) buf[dst - 1] = byte; \
1305 else if (dst == len) buf[dst - 1] = '\0'
1306
1307 if (!name) {
1308 WRITE('\0');
1309 return 0;
1310 }
1311
1312 /* Implement the transformation as described in the spec:
1313 * 1. upper case all letters after an underscore.
1314 * 2. remove all underscores.
1315 */
1316 for (src = 0; name[src]; src++) {
1317 if (name[src] == '_') {
1318 ucase_next = true;
1319 continue;
1320 }
1321
1322 if (ucase_next) {
1323 WRITE(toupper(name[src]));
1324 ucase_next = false;
1325 } else {
1326 WRITE(name[src]);
1327 }
1328 }
1329
1330 WRITE('\0');
1331 return dst;
1332
1333 #undef WRITE
1334 }
1335
makejsonname(symtab_addctx * ctx,const char * name)1336 static char* makejsonname(symtab_addctx *ctx, const char* name) {
1337 size_t size = getjsonname(name, NULL, 0);
1338 char* json_name = symtab_alloc(ctx, size);
1339 getjsonname(name, json_name, size);
1340 return json_name;
1341 }
1342
symtab_add(symtab_addctx * ctx,const char * name,upb_value v)1343 static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) {
1344 if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
1345 symtab_errf(ctx, "duplicate symbol '%s'", name);
1346 }
1347 upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena);
1348 size_t len = strlen(name);
1349 CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc));
1350 }
1351
1352 /* Given a symbol and the base symbol inside which it is defined, find the
1353 * symbol's definition in t. */
symtab_resolve(symtab_addctx * ctx,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type)1354 static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f,
1355 const char *base, upb_strview sym,
1356 upb_deftype_t type) {
1357 const upb_strtable *t = &ctx->symtab->syms;
1358 if(sym.size == 0) goto notfound;
1359 if(sym.data[0] == '.') {
1360 /* Symbols starting with '.' are absolute, so we do a single lookup.
1361 * Slice to omit the leading '.' */
1362 upb_value v;
1363 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
1364 goto notfound;
1365 }
1366
1367 const void *ret = unpack_def(v, type);
1368 if (!ret) {
1369 symtab_errf(ctx, "type mismatch when resolving field %s, name %s",
1370 f->full_name, sym.data);
1371 }
1372 return ret;
1373 } else {
1374 /* Remove components from base until we find an entry or run out.
1375 * TODO: This branch is totally broken, but currently not used. */
1376 (void)base;
1377 UPB_ASSERT(false);
1378 goto notfound;
1379 }
1380
1381 notfound:
1382 symtab_errf(ctx, "couldn't resolve name '%s'", sym.data);
1383 }
1384
create_oneofdef(symtab_addctx * ctx,upb_msgdef * m,const google_protobuf_OneofDescriptorProto * oneof_proto)1385 static void create_oneofdef(
1386 symtab_addctx *ctx, upb_msgdef *m,
1387 const google_protobuf_OneofDescriptorProto *oneof_proto) {
1388 upb_oneofdef *o;
1389 upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
1390 upb_value v;
1391
1392 o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
1393 o->parent = m;
1394 o->full_name = makefullname(ctx, m->full_name, name);
1395 o->field_count = 0;
1396 o->synthetic = false;
1397
1398 v = pack_def(o, UPB_DEFTYPE_ONEOF);
1399 symtab_add(ctx, o->full_name, v);
1400 CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
1401
1402 CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1403 CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc));
1404 }
1405
newstr(symtab_addctx * ctx,const char * data,size_t len)1406 static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) {
1407 str_t *ret = symtab_alloc(ctx, sizeof(*ret) + len);
1408 if (!ret) return NULL;
1409 ret->len = len;
1410 if (len) memcpy(ret->str, data, len);
1411 ret->str[len] = '\0';
1412 return ret;
1413 }
1414
parse_default(symtab_addctx * ctx,const char * str,size_t len,upb_fielddef * f)1415 static void parse_default(symtab_addctx *ctx, const char *str, size_t len,
1416 upb_fielddef *f) {
1417 char *end;
1418 char nullz[64];
1419 errno = 0;
1420
1421 switch (upb_fielddef_type(f)) {
1422 case UPB_TYPE_INT32:
1423 case UPB_TYPE_INT64:
1424 case UPB_TYPE_UINT32:
1425 case UPB_TYPE_UINT64:
1426 case UPB_TYPE_DOUBLE:
1427 case UPB_TYPE_FLOAT:
1428 /* Standard C number parsing functions expect null-terminated strings. */
1429 if (len >= sizeof(nullz) - 1) {
1430 symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
1431 }
1432 memcpy(nullz, str, len);
1433 nullz[len] = '\0';
1434 str = nullz;
1435 break;
1436 default:
1437 break;
1438 }
1439
1440 switch (upb_fielddef_type(f)) {
1441 case UPB_TYPE_INT32: {
1442 long val = strtol(str, &end, 0);
1443 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
1444 goto invalid;
1445 }
1446 f->defaultval.sint = val;
1447 break;
1448 }
1449 case UPB_TYPE_ENUM: {
1450 const upb_enumdef *e = f->sub.enumdef;
1451 int32_t val;
1452 if (!upb_enumdef_ntoi(e, str, len, &val)) {
1453 goto invalid;
1454 }
1455 f->defaultval.sint = val;
1456 break;
1457 }
1458 case UPB_TYPE_INT64: {
1459 /* XXX: Need to write our own strtoll, since it's not available in c89. */
1460 int64_t val = strtol(str, &end, 0);
1461 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
1462 goto invalid;
1463 }
1464 f->defaultval.sint = val;
1465 break;
1466 }
1467 case UPB_TYPE_UINT32: {
1468 unsigned long val = strtoul(str, &end, 0);
1469 if (val > UINT32_MAX || errno == ERANGE || *end) {
1470 goto invalid;
1471 }
1472 f->defaultval.uint = val;
1473 break;
1474 }
1475 case UPB_TYPE_UINT64: {
1476 /* XXX: Need to write our own strtoull, since it's not available in c89. */
1477 uint64_t val = strtoul(str, &end, 0);
1478 if (val > UINT64_MAX || errno == ERANGE || *end) {
1479 goto invalid;
1480 }
1481 f->defaultval.uint = val;
1482 break;
1483 }
1484 case UPB_TYPE_DOUBLE: {
1485 double val = strtod(str, &end);
1486 if (errno == ERANGE || *end) {
1487 goto invalid;
1488 }
1489 f->defaultval.dbl = val;
1490 break;
1491 }
1492 case UPB_TYPE_FLOAT: {
1493 /* XXX: Need to write our own strtof, since it's not available in c89. */
1494 float val = strtod(str, &end);
1495 if (errno == ERANGE || *end) {
1496 goto invalid;
1497 }
1498 f->defaultval.flt = val;
1499 break;
1500 }
1501 case UPB_TYPE_BOOL: {
1502 if (streql2(str, len, "false")) {
1503 f->defaultval.boolean = false;
1504 } else if (streql2(str, len, "true")) {
1505 f->defaultval.boolean = true;
1506 } else {
1507 }
1508 break;
1509 }
1510 case UPB_TYPE_STRING:
1511 f->defaultval.str = newstr(ctx, str, len);
1512 break;
1513 case UPB_TYPE_BYTES:
1514 /* XXX: need to interpret the C-escaped value. */
1515 f->defaultval.str = newstr(ctx, str, len);
1516 break;
1517 case UPB_TYPE_MESSAGE:
1518 /* Should not have a default value. */
1519 symtab_errf(ctx, "Message should not have a default (%s)",
1520 upb_fielddef_fullname(f));
1521 }
1522
1523 return;
1524
1525 invalid:
1526 symtab_errf(ctx, "Invalid default '%.*s' for field %f", (int)len, str,
1527 upb_fielddef_fullname(f));
1528 }
1529
set_default_default(symtab_addctx * ctx,upb_fielddef * f)1530 static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) {
1531 switch (upb_fielddef_type(f)) {
1532 case UPB_TYPE_INT32:
1533 case UPB_TYPE_INT64:
1534 case UPB_TYPE_ENUM:
1535 f->defaultval.sint = 0;
1536 break;
1537 case UPB_TYPE_UINT64:
1538 case UPB_TYPE_UINT32:
1539 f->defaultval.uint = 0;
1540 break;
1541 case UPB_TYPE_DOUBLE:
1542 case UPB_TYPE_FLOAT:
1543 f->defaultval.dbl = 0;
1544 break;
1545 case UPB_TYPE_STRING:
1546 case UPB_TYPE_BYTES:
1547 f->defaultval.str = newstr(ctx, NULL, 0);
1548 break;
1549 case UPB_TYPE_BOOL:
1550 f->defaultval.boolean = false;
1551 break;
1552 case UPB_TYPE_MESSAGE:
1553 break;
1554 }
1555 }
1556
create_fielddef(symtab_addctx * ctx,const char * prefix,upb_msgdef * m,const google_protobuf_FieldDescriptorProto * field_proto)1557 static void create_fielddef(
1558 symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
1559 const google_protobuf_FieldDescriptorProto *field_proto) {
1560 upb_alloc *alloc = ctx->alloc;
1561 upb_fielddef *f;
1562 const google_protobuf_FieldOptions *options;
1563 upb_strview name;
1564 const char *full_name;
1565 const char *json_name;
1566 const char *shortname;
1567 uint32_t field_number;
1568
1569 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
1570 symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m));
1571 }
1572
1573 name = google_protobuf_FieldDescriptorProto_name(field_proto);
1574 check_ident(ctx, name, false);
1575 full_name = makefullname(ctx, prefix, name);
1576 shortname = shortdefname(full_name);
1577
1578 if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
1579 json_name = strviewdup(
1580 ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
1581 } else {
1582 json_name = makejsonname(ctx, shortname);
1583 }
1584
1585 field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
1586
1587 if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
1588 symtab_errf(ctx, "invalid field number (%u)", field_number);
1589 }
1590
1591 if (m) {
1592 /* direct message field. */
1593 upb_value v, field_v, json_v;
1594 size_t json_size;
1595
1596 f = (upb_fielddef*)&m->fields[m->field_count++];
1597 f->msgdef = m;
1598 f->is_extension_ = false;
1599
1600 if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
1601 symtab_errf(ctx, "duplicate field name (%s)", shortname);
1602 }
1603
1604 if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
1605 symtab_errf(ctx, "duplicate json_name (%s)", json_name);
1606 }
1607
1608 if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
1609 symtab_errf(ctx, "duplicate field number (%u)", field_number);
1610 }
1611
1612 field_v = pack_def(f, UPB_DEFTYPE_FIELD);
1613 json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
1614 v = upb_value_constptr(f);
1615 json_size = strlen(json_name);
1616
1617 CHK_OOM(
1618 upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
1619 CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
1620
1621 if (strcmp(shortname, json_name) != 0) {
1622 upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
1623 }
1624
1625 if (ctx->layouts) {
1626 const upb_msglayout_field *fields = m->layout->fields;
1627 int count = m->layout->field_count;
1628 bool found = false;
1629 int i;
1630 for (i = 0; i < count; i++) {
1631 if (fields[i].number == field_number) {
1632 f->layout_index = i;
1633 found = true;
1634 break;
1635 }
1636 }
1637 UPB_ASSERT(found);
1638 }
1639 } else {
1640 /* extension field. */
1641 f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
1642 f->is_extension_ = true;
1643 symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD));
1644 }
1645
1646 f->full_name = full_name;
1647 f->json_name = json_name;
1648 f->file = ctx->file;
1649 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
1650 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
1651 f->number_ = field_number;
1652 f->oneof = NULL;
1653 f->proto3_optional_ =
1654 google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
1655
1656 /* We can't resolve the subdef or (in the case of extensions) the containing
1657 * message yet, because it may not have been defined yet. We stash a pointer
1658 * to the field_proto until later when we can properly resolve it. */
1659 f->sub.unresolved = field_proto;
1660
1661 if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
1662 symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
1663 }
1664
1665 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
1666 int oneof_index =
1667 google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
1668 upb_oneofdef *oneof;
1669 upb_value v = upb_value_constptr(f);
1670
1671 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1672 symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
1673 f->full_name);
1674 }
1675
1676 if (!m) {
1677 symtab_errf(ctx, "oneof_index provided for extension field (%s)",
1678 f->full_name);
1679 }
1680
1681 if (oneof_index >= m->oneof_count) {
1682 symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
1683 }
1684
1685 oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
1686 f->oneof = oneof;
1687
1688 oneof->field_count++;
1689 if (f->proto3_optional_) {
1690 oneof->synthetic = true;
1691 }
1692 CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
1693 CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
1694 } else {
1695 f->oneof = NULL;
1696 if (f->proto3_optional_) {
1697 symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
1698 f->full_name);
1699 }
1700 }
1701
1702 options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ?
1703 google_protobuf_FieldDescriptorProto_options(field_proto) : NULL;
1704
1705 if (options && google_protobuf_FieldOptions_has_packed(options)) {
1706 f->packed_ = google_protobuf_FieldOptions_packed(options);
1707 } else {
1708 /* Repeated fields default to packed for proto3 only. */
1709 f->packed_ = upb_fielddef_isprimitive(f) &&
1710 f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3;
1711 }
1712
1713 if (options) {
1714 f->lazy_ = google_protobuf_FieldOptions_lazy(options);
1715 } else {
1716 f->lazy_ = false;
1717 }
1718 }
1719
create_enumdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto)1720 static void create_enumdef(
1721 symtab_addctx *ctx, const char *prefix,
1722 const google_protobuf_EnumDescriptorProto *enum_proto) {
1723 upb_enumdef *e;
1724 const google_protobuf_EnumValueDescriptorProto *const *values;
1725 upb_strview name;
1726 size_t i, n;
1727
1728 name = google_protobuf_EnumDescriptorProto_name(enum_proto);
1729 check_ident(ctx, name, false);
1730
1731 e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
1732 e->full_name = makefullname(ctx, prefix, name);
1733 symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
1734
1735 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
1736 CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc));
1737 CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
1738
1739 e->file = ctx->file;
1740 e->defaultval = 0;
1741
1742 if (n == 0) {
1743 symtab_errf(ctx, "enums must contain at least one value (%s)",
1744 e->full_name);
1745 }
1746
1747 for (i = 0; i < n; i++) {
1748 const google_protobuf_EnumValueDescriptorProto *value = values[i];
1749 upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
1750 char *name2 = strviewdup(ctx, name);
1751 int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
1752 upb_value v = upb_value_int32(num);
1753
1754 if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
1755 symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
1756 e->full_name);
1757 }
1758
1759 if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
1760 symtab_errf(ctx, "duplicate enum label '%s'", name2);
1761 }
1762
1763 CHK_OOM(name2)
1764 CHK_OOM(
1765 upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
1766
1767 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1768 upb_value v = upb_value_cstr(name2);
1769 CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
1770 }
1771 }
1772
1773 upb_inttable_compact2(&e->iton, ctx->alloc);
1774 }
1775
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto)1776 static void create_msgdef(symtab_addctx *ctx, const char *prefix,
1777 const google_protobuf_DescriptorProto *msg_proto) {
1778 upb_msgdef *m;
1779 const google_protobuf_MessageOptions *options;
1780 const google_protobuf_OneofDescriptorProto *const *oneofs;
1781 const google_protobuf_FieldDescriptorProto *const *fields;
1782 const google_protobuf_EnumDescriptorProto *const *enums;
1783 const google_protobuf_DescriptorProto *const *msgs;
1784 size_t i, n_oneof, n_field, n;
1785 upb_strview name;
1786
1787 name = google_protobuf_DescriptorProto_name(msg_proto);
1788 check_ident(ctx, name, false);
1789
1790 m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
1791 m->full_name = makefullname(ctx, prefix, name);
1792 symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
1793
1794 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
1795 fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
1796
1797 CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1798 CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field,
1799 ctx->alloc));
1800
1801 m->file = ctx->file;
1802 m->map_entry = false;
1803
1804 options = google_protobuf_DescriptorProto_options(msg_proto);
1805
1806 if (options) {
1807 m->map_entry = google_protobuf_MessageOptions_map_entry(options);
1808 }
1809
1810 if (ctx->layouts) {
1811 m->layout = *ctx->layouts;
1812 ctx->layouts++;
1813 } else {
1814 /* Allocate now (to allow cross-linking), populate later. */
1815 m->layout = symtab_alloc(
1816 ctx, sizeof(*m->layout) + sizeof(_upb_fasttable_entry));
1817 }
1818
1819 m->oneof_count = 0;
1820 m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
1821 for (i = 0; i < n_oneof; i++) {
1822 create_oneofdef(ctx, m, oneofs[i]);
1823 }
1824
1825 m->field_count = 0;
1826 m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
1827 for (i = 0; i < n_field; i++) {
1828 create_fielddef(ctx, m->full_name, m, fields[i]);
1829 }
1830
1831 assign_msg_indices(ctx, m);
1832 finalize_oneofs(ctx, m);
1833 assign_msg_wellknowntype(m);
1834 upb_inttable_compact2(&m->itof, ctx->alloc);
1835
1836 /* This message is built. Now build nested messages and enums. */
1837
1838 enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1839 for (i = 0; i < n; i++) {
1840 create_enumdef(ctx, m->full_name, enums[i]);
1841 }
1842
1843 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1844 for (i = 0; i < n; i++) {
1845 create_msgdef(ctx, m->full_name, msgs[i]);
1846 }
1847 }
1848
count_types_in_msg(const google_protobuf_DescriptorProto * msg_proto,upb_filedef * file)1849 static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
1850 upb_filedef *file) {
1851 const google_protobuf_DescriptorProto *const *msgs;
1852 size_t i, n;
1853
1854 file->msg_count++;
1855
1856 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1857 for (i = 0; i < n; i++) {
1858 count_types_in_msg(msgs[i], file);
1859 }
1860
1861 google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1862 file->enum_count += n;
1863
1864 google_protobuf_DescriptorProto_extension(msg_proto, &n);
1865 file->ext_count += n;
1866 }
1867
count_types_in_file(const google_protobuf_FileDescriptorProto * file_proto,upb_filedef * file)1868 static void count_types_in_file(
1869 const google_protobuf_FileDescriptorProto *file_proto,
1870 upb_filedef *file) {
1871 const google_protobuf_DescriptorProto *const *msgs;
1872 size_t i, n;
1873
1874 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
1875 for (i = 0; i < n; i++) {
1876 count_types_in_msg(msgs[i], file);
1877 }
1878
1879 google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
1880 file->enum_count += n;
1881
1882 google_protobuf_FileDescriptorProto_extension(file_proto, &n);
1883 file->ext_count += n;
1884 }
1885
resolve_fielddef(symtab_addctx * ctx,const char * prefix,upb_fielddef * f)1886 static void resolve_fielddef(symtab_addctx *ctx, const char *prefix,
1887 upb_fielddef *f) {
1888 upb_strview name;
1889 const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
1890
1891 if (f->is_extension_) {
1892 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
1893 symtab_errf(ctx, "extension for field '%s' had no extendee",
1894 f->full_name);
1895 }
1896
1897 name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
1898 f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1899 }
1900
1901 if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
1902 !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
1903 symtab_errf(ctx, "field '%s' is missing type name", f->full_name);
1904 }
1905
1906 name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
1907
1908 if (upb_fielddef_issubmsg(f)) {
1909 f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1910 } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
1911 f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
1912 }
1913
1914 /* Have to delay resolving of the default value until now because of the enum
1915 * case, since enum defaults are specified with a label. */
1916 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
1917 upb_strview defaultval =
1918 google_protobuf_FieldDescriptorProto_default_value(field_proto);
1919
1920 if (f->file->syntax == UPB_SYNTAX_PROTO3) {
1921 symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
1922 f->full_name);
1923 }
1924
1925 if (upb_fielddef_issubmsg(f)) {
1926 symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
1927 f->full_name);
1928 }
1929
1930 parse_default(ctx, defaultval.data, defaultval.size, f);
1931 } else {
1932 set_default_default(ctx, f);
1933 }
1934 }
1935
build_filedef(symtab_addctx * ctx,upb_filedef * file,const google_protobuf_FileDescriptorProto * file_proto)1936 static void build_filedef(
1937 symtab_addctx *ctx, upb_filedef *file,
1938 const google_protobuf_FileDescriptorProto *file_proto) {
1939 const google_protobuf_FileOptions *file_options_proto;
1940 const google_protobuf_DescriptorProto *const *msgs;
1941 const google_protobuf_EnumDescriptorProto *const *enums;
1942 const google_protobuf_FieldDescriptorProto *const *exts;
1943 const upb_strview* strs;
1944 size_t i, n;
1945
1946 count_types_in_file(file_proto, file);
1947
1948 file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count);
1949 file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count);
1950 file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count);
1951
1952 /* We increment these as defs are added. */
1953 file->msg_count = 0;
1954 file->enum_count = 0;
1955 file->ext_count = 0;
1956
1957 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
1958 symtab_errf(ctx, "File has no name");
1959 }
1960
1961 file->name =
1962 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
1963 file->phpprefix = NULL;
1964 file->phpnamespace = NULL;
1965
1966 if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
1967 upb_strview package =
1968 google_protobuf_FileDescriptorProto_package(file_proto);
1969 check_ident(ctx, package, true);
1970 file->package = strviewdup(ctx, package);
1971 } else {
1972 file->package = NULL;
1973 }
1974
1975 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
1976 upb_strview syntax =
1977 google_protobuf_FileDescriptorProto_syntax(file_proto);
1978
1979 if (streql_view(syntax, "proto2")) {
1980 file->syntax = UPB_SYNTAX_PROTO2;
1981 } else if (streql_view(syntax, "proto3")) {
1982 file->syntax = UPB_SYNTAX_PROTO3;
1983 } else {
1984 symtab_errf(ctx, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
1985 UPB_STRVIEW_ARGS(syntax));
1986 }
1987 } else {
1988 file->syntax = UPB_SYNTAX_PROTO2;
1989 }
1990
1991 /* Read options. */
1992 file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
1993 if (file_options_proto) {
1994 if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
1995 file->phpprefix = strviewdup(
1996 ctx,
1997 google_protobuf_FileOptions_php_class_prefix(file_options_proto));
1998 }
1999 if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
2000 file->phpnamespace = strviewdup(
2001 ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
2002 }
2003 }
2004
2005 /* Verify dependencies. */
2006 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
2007 file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
2008
2009 for (i = 0; i < n; i++) {
2010 upb_strview dep_name = strs[i];
2011 upb_value v;
2012 if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
2013 dep_name.size, &v)) {
2014 symtab_errf(ctx,
2015 "Depends on file '" UPB_STRVIEW_FORMAT
2016 "', but it has not been loaded",
2017 UPB_STRVIEW_ARGS(dep_name));
2018 }
2019 file->deps[i] = upb_value_getconstptr(v);
2020 }
2021
2022 /* Create messages. */
2023 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
2024 for (i = 0; i < n; i++) {
2025 create_msgdef(ctx, file->package, msgs[i]);
2026 }
2027
2028 /* Create enums. */
2029 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
2030 for (i = 0; i < n; i++) {
2031 create_enumdef(ctx, file->package, enums[i]);
2032 }
2033
2034 /* Create extensions. */
2035 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
2036 file->exts = symtab_alloc(ctx, sizeof(*file->exts) * n);
2037 for (i = 0; i < n; i++) {
2038 create_fielddef(ctx, file->package, NULL, exts[i]);
2039 }
2040
2041 /* Now that all names are in the table, build layouts and resolve refs. */
2042 for (i = 0; i < (size_t)file->ext_count; i++) {
2043 resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]);
2044 }
2045
2046 for (i = 0; i < (size_t)file->msg_count; i++) {
2047 const upb_msgdef *m = &file->msgs[i];
2048 int j;
2049 for (j = 0; j < m->field_count; j++) {
2050 resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]);
2051 }
2052 }
2053
2054 if (!ctx->layouts) {
2055 for (i = 0; i < (size_t)file->msg_count; i++) {
2056 const upb_msgdef *m = &file->msgs[i];
2057 make_layout(ctx, m);
2058 }
2059 }
2060 }
2061
remove_filedef(upb_symtab * s,upb_filedef * file)2062 static void remove_filedef(upb_symtab *s, upb_filedef *file) {
2063 upb_alloc *alloc = upb_arena_alloc(s->arena);
2064 int i;
2065 for (i = 0; i < file->msg_count; i++) {
2066 const char *name = file->msgs[i].full_name;
2067 upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
2068 }
2069 for (i = 0; i < file->enum_count; i++) {
2070 const char *name = file->enums[i].full_name;
2071 upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
2072 }
2073 for (i = 0; i < file->ext_count; i++) {
2074 const char *name = file->exts[i].full_name;
2075 upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
2076 }
2077 }
2078
_upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_msglayout ** layouts,upb_status * status)2079 static const upb_filedef *_upb_symtab_addfile(
2080 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2081 const upb_msglayout **layouts, upb_status *status) {
2082 upb_arena *file_arena = upb_arena_new();
2083 upb_filedef *file;
2084 symtab_addctx ctx;
2085
2086 if (!file_arena) return NULL;
2087
2088 file = upb_arena_malloc(file_arena, sizeof(*file));
2089 if (!file) goto done;
2090
2091 ctx.file = file;
2092 ctx.symtab = s;
2093 ctx.file_arena = file_arena;
2094 ctx.alloc = upb_arena_alloc(file_arena);
2095 ctx.layouts = layouts;
2096 ctx.status = status;
2097
2098 file->msg_count = 0;
2099 file->enum_count = 0;
2100 file->ext_count = 0;
2101 file->symtab = s;
2102
2103 if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
2104 UPB_ASSERT(!upb_ok(status));
2105 remove_filedef(s, file);
2106 file = NULL;
2107 } else {
2108 build_filedef(&ctx, file, file_proto);
2109 upb_strtable_insert3(&s->files, file->name, strlen(file->name),
2110 upb_value_constptr(file), ctx.alloc);
2111 UPB_ASSERT(upb_ok(status));
2112 upb_arena_fuse(s->arena, file_arena);
2113 }
2114
2115 done:
2116 upb_arena_free(file_arena);
2117 return file;
2118 }
2119
upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,upb_status * status)2120 const upb_filedef *upb_symtab_addfile(
2121 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2122 upb_status *status) {
2123 return _upb_symtab_addfile(s, file_proto, NULL, status);
2124 }
2125
2126 /* Include here since we want most of this file to be stdio-free. */
2127 #include <stdio.h>
2128
_upb_symtab_loaddefinit(upb_symtab * s,const upb_def_init * init)2129 bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
2130 /* Since this function should never fail (it would indicate a bug in upb) we
2131 * print errors to stderr instead of returning error status to the user. */
2132 upb_def_init **deps = init->deps;
2133 google_protobuf_FileDescriptorProto *file;
2134 upb_arena *arena;
2135 upb_status status;
2136
2137 upb_status_clear(&status);
2138
2139 if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
2140 return true;
2141 }
2142
2143 arena = upb_arena_new();
2144
2145 for (; *deps; deps++) {
2146 if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
2147 }
2148
2149 file = google_protobuf_FileDescriptorProto_parse_ex(
2150 init->descriptor.data, init->descriptor.size, arena, UPB_DECODE_ALIAS);
2151 s->bytes_loaded += init->descriptor.size;
2152
2153 if (!file) {
2154 upb_status_seterrf(
2155 &status,
2156 "Failed to parse compiled-in descriptor for file '%s'. This should "
2157 "never happen.",
2158 init->filename);
2159 goto err;
2160 }
2161
2162 if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
2163
2164 upb_arena_free(arena);
2165 return true;
2166
2167 err:
2168 fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
2169 upb_status_errmsg(&status));
2170 upb_arena_free(arena);
2171 return false;
2172 }
2173
_upb_symtab_bytesloaded(const upb_symtab * s)2174 size_t _upb_symtab_bytesloaded(const upb_symtab *s) {
2175 return s->bytes_loaded;
2176 }
2177
2178 #undef CHK_OOM
2179