• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include "upb/def.h"
3 
4 #include <ctype.h>
5 #include <errno.h>
6 #include <setjmp.h>
7 #include <stdlib.h>
8 #include <string.h>
9 
10 #include "google/protobuf/descriptor.upb.h"
11 #include "upb/port_def.inc"
12 
13 typedef struct {
14   size_t len;
15   char str[1];  /* Null-terminated string data follows. */
16 } str_t;
17 
18 struct upb_fielddef {
19   const upb_filedef *file;
20   const upb_msgdef *msgdef;
21   const char *full_name;
22   const char *json_name;
23   union {
24     int64_t sint;
25     uint64_t uint;
26     double dbl;
27     float flt;
28     bool boolean;
29     str_t *str;
30   } defaultval;
31   const upb_oneofdef *oneof;
32   union {
33     const upb_msgdef *msgdef;
34     const upb_enumdef *enumdef;
35     const google_protobuf_FieldDescriptorProto *unresolved;
36   } sub;
37   uint32_t number_;
38   uint16_t index_;
39   uint16_t layout_index;
40   uint32_t selector_base;  /* Used to index into a upb::Handlers table. */
41   bool is_extension_;
42   bool lazy_;
43   bool packed_;
44   bool proto3_optional_;
45   upb_descriptortype_t type_;
46   upb_label_t label_;
47 };
48 
49 struct upb_msgdef {
50   const upb_msglayout *layout;
51   const upb_filedef *file;
52   const char *full_name;
53   uint32_t selector_count;
54   uint32_t submsg_field_count;
55 
56   /* Tables for looking up fields by number and name. */
57   upb_inttable itof;
58   upb_strtable ntof;
59 
60   const upb_fielddef *fields;
61   const upb_oneofdef *oneofs;
62   int field_count;
63   int oneof_count;
64   int real_oneof_count;
65 
66   /* Is this a map-entry message? */
67   bool map_entry;
68   upb_wellknowntype_t well_known_type;
69 
70   /* TODO(haberman): proper extension ranges (there can be multiple). */
71 };
72 
73 struct upb_enumdef {
74   const upb_filedef *file;
75   const char *full_name;
76   upb_strtable ntoi;
77   upb_inttable iton;
78   int32_t defaultval;
79 };
80 
81 struct upb_oneofdef {
82   const upb_msgdef *parent;
83   const char *full_name;
84   int field_count;
85   bool synthetic;
86   const upb_fielddef **fields;
87   upb_strtable ntof;
88   upb_inttable itof;
89 };
90 
91 struct upb_filedef {
92   const char *name;
93   const char *package;
94   const char *phpprefix;
95   const char *phpnamespace;
96 
97   const upb_filedef **deps;
98   const upb_msgdef *msgs;
99   const upb_enumdef *enums;
100   const upb_fielddef *exts;
101   const upb_symtab *symtab;
102 
103   int dep_count;
104   int msg_count;
105   int enum_count;
106   int ext_count;
107   upb_syntax_t syntax;
108 };
109 
110 struct upb_symtab {
111   upb_arena *arena;
112   upb_strtable syms;  /* full_name -> packed def ptr */
113   upb_strtable files;  /* file_name -> upb_filedef* */
114   size_t bytes_loaded;
115 };
116 
117 /* Inside a symtab we store tagged pointers to specific def types. */
118 typedef enum {
119   UPB_DEFTYPE_FIELD = 0,
120 
121   /* Only inside symtab table. */
122   UPB_DEFTYPE_MSG = 1,
123   UPB_DEFTYPE_ENUM = 2,
124 
125   /* Only inside message table. */
126   UPB_DEFTYPE_ONEOF = 1,
127   UPB_DEFTYPE_FIELD_JSONNAME = 2
128 } upb_deftype_t;
129 
unpack_def(upb_value v,upb_deftype_t type)130 static const void *unpack_def(upb_value v, upb_deftype_t type) {
131   uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
132   return (num & 3) == type ? (const void*)(num & ~3) : NULL;
133 }
134 
pack_def(const void * ptr,upb_deftype_t type)135 static upb_value pack_def(const void *ptr, upb_deftype_t type) {
136   uintptr_t num = (uintptr_t)ptr | type;
137   return upb_value_constptr((const void*)num);
138 }
139 
140 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)141 static bool upb_isbetween(char c, char low, char high) {
142   return c >= low && c <= high;
143 }
144 
upb_isletter(char c)145 static bool upb_isletter(char c) {
146   return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
147 }
148 
upb_isalphanum(char c)149 static bool upb_isalphanum(char c) {
150   return upb_isletter(c) || upb_isbetween(c, '0', '9');
151 }
152 
shortdefname(const char * fullname)153 static const char *shortdefname(const char *fullname) {
154   const char *p;
155 
156   if (fullname == NULL) {
157     return NULL;
158   } else if ((p = strrchr(fullname, '.')) == NULL) {
159     /* No '.' in the name, return the full string. */
160     return fullname;
161   } else {
162     /* Return one past the last '.'. */
163     return p + 1;
164   }
165 }
166 
167 /* All submessage fields are lower than all other fields.
168  * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)169 uint32_t field_rank(const upb_fielddef *f) {
170   uint32_t ret = upb_fielddef_number(f);
171   const uint32_t high_bit = 1 << 30;
172   UPB_ASSERT(ret < high_bit);
173   if (!upb_fielddef_issubmsg(f))
174     ret |= high_bit;
175   return ret;
176 }
177 
cmp_fields(const void * p1,const void * p2)178 int cmp_fields(const void *p1, const void *p2) {
179   const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
180   const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
181   return field_rank(f1) - field_rank(f2);
182 }
183 
184 /* A few implementation details of handlers.  We put these here to avoid
185  * a def -> handlers dependency. */
186 
187 #define UPB_STATIC_SELECTOR_COUNT 3  /* Warning: also in upb/handlers.h. */
188 
upb_handlers_selectorbaseoffset(const upb_fielddef * f)189 static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
190   return upb_fielddef_isseq(f) ? 2 : 0;
191 }
192 
upb_handlers_selectorcount(const upb_fielddef * f)193 static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
194   uint32_t ret = 1;
195   if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
196   if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
197   if (upb_fielddef_issubmsg(f)) {
198     /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
199     ret += 0;
200     if (upb_fielddef_lazy(f)) {
201       /* STARTSTR/ENDSTR/STRING (for lazy) */
202       ret += 3;
203     }
204   }
205   return ret;
206 }
207 
upb_status_setoom(upb_status * status)208 static void upb_status_setoom(upb_status *status) {
209   upb_status_seterrmsg(status, "out of memory");
210 }
211 
assign_msg_wellknowntype(upb_msgdef * m)212 static void assign_msg_wellknowntype(upb_msgdef *m) {
213   const char *name = upb_msgdef_fullname(m);
214   if (name == NULL) {
215     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
216     return;
217   }
218   if (!strcmp(name, "google.protobuf.Any")) {
219     m->well_known_type = UPB_WELLKNOWN_ANY;
220   } else if (!strcmp(name, "google.protobuf.FieldMask")) {
221     m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
222   } else if (!strcmp(name, "google.protobuf.Duration")) {
223     m->well_known_type = UPB_WELLKNOWN_DURATION;
224   } else if (!strcmp(name, "google.protobuf.Timestamp")) {
225     m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
226   } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
227     m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
228   } else if (!strcmp(name, "google.protobuf.FloatValue")) {
229     m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
230   } else if (!strcmp(name, "google.protobuf.Int64Value")) {
231     m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
232   } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
233     m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
234   } else if (!strcmp(name, "google.protobuf.Int32Value")) {
235     m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
236   } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
237     m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
238   } else if (!strcmp(name, "google.protobuf.BoolValue")) {
239     m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
240   } else if (!strcmp(name, "google.protobuf.StringValue")) {
241     m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
242   } else if (!strcmp(name, "google.protobuf.BytesValue")) {
243     m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
244   } else if (!strcmp(name, "google.protobuf.Value")) {
245     m->well_known_type = UPB_WELLKNOWN_VALUE;
246   } else if (!strcmp(name, "google.protobuf.ListValue")) {
247     m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
248   } else if (!strcmp(name, "google.protobuf.Struct")) {
249     m->well_known_type = UPB_WELLKNOWN_STRUCT;
250   } else {
251     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
252   }
253 }
254 
255 
256 /* upb_enumdef ****************************************************************/
257 
upb_enumdef_fullname(const upb_enumdef * e)258 const char *upb_enumdef_fullname(const upb_enumdef *e) {
259   return e->full_name;
260 }
261 
upb_enumdef_name(const upb_enumdef * e)262 const char *upb_enumdef_name(const upb_enumdef *e) {
263   return shortdefname(e->full_name);
264 }
265 
upb_enumdef_file(const upb_enumdef * e)266 const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
267   return e->file;
268 }
269 
upb_enumdef_default(const upb_enumdef * e)270 int32_t upb_enumdef_default(const upb_enumdef *e) {
271   UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
272   return e->defaultval;
273 }
274 
upb_enumdef_numvals(const upb_enumdef * e)275 int upb_enumdef_numvals(const upb_enumdef *e) {
276   return (int)upb_strtable_count(&e->ntoi);
277 }
278 
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)279 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
280   /* We iterate over the ntoi table, to account for duplicate numbers. */
281   upb_strtable_begin(i, &e->ntoi);
282 }
283 
upb_enum_next(upb_enum_iter * iter)284 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)285 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
286 
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)287 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
288                       size_t len, int32_t *num) {
289   upb_value v;
290   if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
291     return false;
292   }
293   if (num) *num = upb_value_getint32(v);
294   return true;
295 }
296 
upb_enumdef_iton(const upb_enumdef * def,int32_t num)297 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
298   upb_value v;
299   return upb_inttable_lookup32(&def->iton, num, &v) ?
300       upb_value_getcstr(v) : NULL;
301 }
302 
upb_enum_iter_name(upb_enum_iter * iter)303 const char *upb_enum_iter_name(upb_enum_iter *iter) {
304   return upb_strtable_iter_key(iter).data;
305 }
306 
upb_enum_iter_number(upb_enum_iter * iter)307 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
308   return upb_value_getint32(upb_strtable_iter_value(iter));
309 }
310 
311 
312 /* upb_fielddef ***************************************************************/
313 
upb_fielddef_fullname(const upb_fielddef * f)314 const char *upb_fielddef_fullname(const upb_fielddef *f) {
315   return f->full_name;
316 }
317 
upb_fielddef_type(const upb_fielddef * f)318 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
319   switch (f->type_) {
320     case UPB_DESCRIPTOR_TYPE_DOUBLE:
321       return UPB_TYPE_DOUBLE;
322     case UPB_DESCRIPTOR_TYPE_FLOAT:
323       return UPB_TYPE_FLOAT;
324     case UPB_DESCRIPTOR_TYPE_INT64:
325     case UPB_DESCRIPTOR_TYPE_SINT64:
326     case UPB_DESCRIPTOR_TYPE_SFIXED64:
327       return UPB_TYPE_INT64;
328     case UPB_DESCRIPTOR_TYPE_INT32:
329     case UPB_DESCRIPTOR_TYPE_SFIXED32:
330     case UPB_DESCRIPTOR_TYPE_SINT32:
331       return UPB_TYPE_INT32;
332     case UPB_DESCRIPTOR_TYPE_UINT64:
333     case UPB_DESCRIPTOR_TYPE_FIXED64:
334       return UPB_TYPE_UINT64;
335     case UPB_DESCRIPTOR_TYPE_UINT32:
336     case UPB_DESCRIPTOR_TYPE_FIXED32:
337       return UPB_TYPE_UINT32;
338     case UPB_DESCRIPTOR_TYPE_ENUM:
339       return UPB_TYPE_ENUM;
340     case UPB_DESCRIPTOR_TYPE_BOOL:
341       return UPB_TYPE_BOOL;
342     case UPB_DESCRIPTOR_TYPE_STRING:
343       return UPB_TYPE_STRING;
344     case UPB_DESCRIPTOR_TYPE_BYTES:
345       return UPB_TYPE_BYTES;
346     case UPB_DESCRIPTOR_TYPE_GROUP:
347     case UPB_DESCRIPTOR_TYPE_MESSAGE:
348       return UPB_TYPE_MESSAGE;
349   }
350   UPB_UNREACHABLE();
351 }
352 
upb_fielddef_descriptortype(const upb_fielddef * f)353 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
354   return f->type_;
355 }
356 
upb_fielddef_index(const upb_fielddef * f)357 uint32_t upb_fielddef_index(const upb_fielddef *f) {
358   return f->index_;
359 }
360 
upb_fielddef_label(const upb_fielddef * f)361 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
362   return f->label_;
363 }
364 
upb_fielddef_number(const upb_fielddef * f)365 uint32_t upb_fielddef_number(const upb_fielddef *f) {
366   return f->number_;
367 }
368 
upb_fielddef_isextension(const upb_fielddef * f)369 bool upb_fielddef_isextension(const upb_fielddef *f) {
370   return f->is_extension_;
371 }
372 
upb_fielddef_lazy(const upb_fielddef * f)373 bool upb_fielddef_lazy(const upb_fielddef *f) {
374   return f->lazy_;
375 }
376 
upb_fielddef_packed(const upb_fielddef * f)377 bool upb_fielddef_packed(const upb_fielddef *f) {
378   return f->packed_;
379 }
380 
upb_fielddef_name(const upb_fielddef * f)381 const char *upb_fielddef_name(const upb_fielddef *f) {
382   return shortdefname(f->full_name);
383 }
384 
upb_fielddef_jsonname(const upb_fielddef * f)385 const char *upb_fielddef_jsonname(const upb_fielddef *f) {
386   return f->json_name;
387 }
388 
upb_fielddef_selectorbase(const upb_fielddef * f)389 uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
390   return f->selector_base;
391 }
392 
upb_fielddef_file(const upb_fielddef * f)393 const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
394   return f->file;
395 }
396 
upb_fielddef_containingtype(const upb_fielddef * f)397 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
398   return f->msgdef;
399 }
400 
upb_fielddef_containingoneof(const upb_fielddef * f)401 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
402   return f->oneof;
403 }
404 
upb_fielddef_realcontainingoneof(const upb_fielddef * f)405 const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
406   if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
407   return f->oneof;
408 }
409 
chkdefaulttype(const upb_fielddef * f,int ctype)410 static void chkdefaulttype(const upb_fielddef *f, int ctype) {
411   UPB_UNUSED(f);
412   UPB_UNUSED(ctype);
413 }
414 
upb_fielddef_defaultint64(const upb_fielddef * f)415 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
416   chkdefaulttype(f, UPB_TYPE_INT64);
417   return f->defaultval.sint;
418 }
419 
upb_fielddef_defaultint32(const upb_fielddef * f)420 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
421   chkdefaulttype(f, UPB_TYPE_INT32);
422   return (int32_t)f->defaultval.sint;
423 }
424 
upb_fielddef_defaultuint64(const upb_fielddef * f)425 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
426   chkdefaulttype(f, UPB_TYPE_UINT64);
427   return f->defaultval.uint;
428 }
429 
upb_fielddef_defaultuint32(const upb_fielddef * f)430 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
431   chkdefaulttype(f, UPB_TYPE_UINT32);
432   return (uint32_t)f->defaultval.uint;
433 }
434 
upb_fielddef_defaultbool(const upb_fielddef * f)435 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
436   chkdefaulttype(f, UPB_TYPE_BOOL);
437   return f->defaultval.boolean;
438 }
439 
upb_fielddef_defaultfloat(const upb_fielddef * f)440 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
441   chkdefaulttype(f, UPB_TYPE_FLOAT);
442   return f->defaultval.flt;
443 }
444 
upb_fielddef_defaultdouble(const upb_fielddef * f)445 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
446   chkdefaulttype(f, UPB_TYPE_DOUBLE);
447   return f->defaultval.dbl;
448 }
449 
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)450 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
451   str_t *str = f->defaultval.str;
452   UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
453          upb_fielddef_type(f) == UPB_TYPE_BYTES ||
454          upb_fielddef_type(f) == UPB_TYPE_ENUM);
455   if (str) {
456     if (len) *len = str->len;
457     return str->str;
458   } else {
459     if (len) *len = 0;
460     return NULL;
461   }
462 }
463 
upb_fielddef_msgsubdef(const upb_fielddef * f)464 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
465   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE ? f->sub.msgdef : NULL;
466 }
467 
upb_fielddef_enumsubdef(const upb_fielddef * f)468 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
469   return upb_fielddef_type(f) == UPB_TYPE_ENUM ? f->sub.enumdef : NULL;
470 }
471 
upb_fielddef_layout(const upb_fielddef * f)472 const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
473   return &f->msgdef->layout->fields[f->layout_index];
474 }
475 
upb_fielddef_issubmsg(const upb_fielddef * f)476 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
477   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
478 }
479 
upb_fielddef_isstring(const upb_fielddef * f)480 bool upb_fielddef_isstring(const upb_fielddef *f) {
481   return upb_fielddef_type(f) == UPB_TYPE_STRING ||
482          upb_fielddef_type(f) == UPB_TYPE_BYTES;
483 }
484 
upb_fielddef_isseq(const upb_fielddef * f)485 bool upb_fielddef_isseq(const upb_fielddef *f) {
486   return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
487 }
488 
upb_fielddef_isprimitive(const upb_fielddef * f)489 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
490   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
491 }
492 
upb_fielddef_ismap(const upb_fielddef * f)493 bool upb_fielddef_ismap(const upb_fielddef *f) {
494   return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
495          upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
496 }
497 
upb_fielddef_hassubdef(const upb_fielddef * f)498 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
499   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
500 }
501 
upb_fielddef_haspresence(const upb_fielddef * f)502 bool upb_fielddef_haspresence(const upb_fielddef *f) {
503   if (upb_fielddef_isseq(f)) return false;
504   return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
505          f->file->syntax == UPB_SYNTAX_PROTO2;
506 }
507 
between(int32_t x,int32_t low,int32_t high)508 static bool between(int32_t x, int32_t low, int32_t high) {
509   return x >= low && x <= high;
510 }
511 
upb_fielddef_checklabel(int32_t label)512 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)513 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)514 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
515 
upb_fielddef_checkdescriptortype(int32_t type)516 bool upb_fielddef_checkdescriptortype(int32_t type) {
517   return between(type, 1, 18);
518 }
519 
520 /* upb_msgdef *****************************************************************/
521 
upb_msgdef_fullname(const upb_msgdef * m)522 const char *upb_msgdef_fullname(const upb_msgdef *m) {
523   return m->full_name;
524 }
525 
upb_msgdef_file(const upb_msgdef * m)526 const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
527   return m->file;
528 }
529 
upb_msgdef_name(const upb_msgdef * m)530 const char *upb_msgdef_name(const upb_msgdef *m) {
531   return shortdefname(m->full_name);
532 }
533 
upb_msgdef_syntax(const upb_msgdef * m)534 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
535   return m->file->syntax;
536 }
537 
upb_msgdef_selectorcount(const upb_msgdef * m)538 size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
539   return m->selector_count;
540 }
541 
upb_msgdef_submsgfieldcount(const upb_msgdef * m)542 uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
543   return m->submsg_field_count;
544 }
545 
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)546 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
547   upb_value val;
548   return upb_inttable_lookup32(&m->itof, i, &val) ?
549       upb_value_getconstptr(val) : NULL;
550 }
551 
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)552 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
553                                     size_t len) {
554   upb_value val;
555 
556   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
557     return NULL;
558   }
559 
560   return unpack_def(val, UPB_DEFTYPE_FIELD);
561 }
562 
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)563 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
564                                     size_t len) {
565   upb_value val;
566 
567   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
568     return NULL;
569   }
570 
571   return unpack_def(val, UPB_DEFTYPE_ONEOF);
572 }
573 
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)574 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
575                            const upb_fielddef **f, const upb_oneofdef **o) {
576   upb_value val;
577 
578   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
579     return false;
580   }
581 
582   *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
583   *f = unpack_def(val, UPB_DEFTYPE_FIELD);
584   return *o || *f;  /* False if this was a JSON name. */
585 }
586 
upb_msgdef_lookupjsonname(const upb_msgdef * m,const char * name,size_t len)587 const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
588                                               const char *name, size_t len) {
589   upb_value val;
590   const upb_fielddef* f;
591 
592   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
593     return NULL;
594   }
595 
596   f = unpack_def(val, UPB_DEFTYPE_FIELD);
597   if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
598 
599   return f;
600 }
601 
upb_msgdef_numfields(const upb_msgdef * m)602 int upb_msgdef_numfields(const upb_msgdef *m) {
603   return m->field_count;
604 }
605 
upb_msgdef_numoneofs(const upb_msgdef * m)606 int upb_msgdef_numoneofs(const upb_msgdef *m) {
607   return m->oneof_count;
608 }
609 
upb_msgdef_numrealoneofs(const upb_msgdef * m)610 int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
611   return m->real_oneof_count;
612 }
613 
upb_msgdef_fieldcount(const upb_msgdef * m)614 int upb_msgdef_fieldcount(const upb_msgdef *m) {
615   return m->field_count;
616 }
617 
upb_msgdef_oneofcount(const upb_msgdef * m)618 int upb_msgdef_oneofcount(const upb_msgdef *m) {
619   return m->oneof_count;
620 }
621 
upb_msgdef_realoneofcount(const upb_msgdef * m)622 int upb_msgdef_realoneofcount(const upb_msgdef *m) {
623   return m->real_oneof_count;
624 }
625 
upb_msgdef_layout(const upb_msgdef * m)626 const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
627   return m->layout;
628 }
629 
upb_msgdef_field(const upb_msgdef * m,int i)630 const upb_fielddef *upb_msgdef_field(const upb_msgdef *m, int i) {
631   UPB_ASSERT(i >= 0 && i < m->field_count);
632   return &m->fields[i];
633 }
634 
upb_msgdef_oneof(const upb_msgdef * m,int i)635 const upb_oneofdef *upb_msgdef_oneof(const upb_msgdef *m, int i) {
636   UPB_ASSERT(i >= 0 && i < m->oneof_count);
637   return &m->oneofs[i];
638 }
639 
upb_msgdef_mapentry(const upb_msgdef * m)640 bool upb_msgdef_mapentry(const upb_msgdef *m) {
641   return m->map_entry;
642 }
643 
upb_msgdef_wellknowntype(const upb_msgdef * m)644 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
645   return m->well_known_type;
646 }
647 
upb_msgdef_isnumberwrapper(const upb_msgdef * m)648 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
649   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
650   return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
651          type <= UPB_WELLKNOWN_UINT32VALUE;
652 }
653 
upb_msgdef_iswrapper(const upb_msgdef * m)654 bool upb_msgdef_iswrapper(const upb_msgdef *m) {
655   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
656   return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
657          type <= UPB_WELLKNOWN_BOOLVALUE;
658 }
659 
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)660 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
661   upb_inttable_begin(iter, &m->itof);
662 }
663 
upb_msg_field_next(upb_msg_field_iter * iter)664 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
665 
upb_msg_field_done(const upb_msg_field_iter * iter)666 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
667   return upb_inttable_done(iter);
668 }
669 
upb_msg_iter_field(const upb_msg_field_iter * iter)670 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
671   return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
672 }
673 
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)674 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
675   upb_inttable_iter_setdone(iter);
676 }
677 
upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,const upb_msg_field_iter * iter2)678 bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
679                                 const upb_msg_field_iter * iter2) {
680   return upb_inttable_iter_isequal(iter1, iter2);
681 }
682 
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)683 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
684   upb_strtable_begin(iter, &m->ntof);
685   /* We need to skip past any initial fields. */
686   while (!upb_strtable_done(iter) &&
687          !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
688     upb_strtable_next(iter);
689   }
690 }
691 
upb_msg_oneof_next(upb_msg_oneof_iter * iter)692 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
693   /* We need to skip past fields to return only oneofs. */
694   do {
695     upb_strtable_next(iter);
696   } while (!upb_strtable_done(iter) &&
697            !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
698 }
699 
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)700 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
701   return upb_strtable_done(iter);
702 }
703 
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)704 const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
705   return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
706 }
707 
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)708 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
709   upb_strtable_iter_setdone(iter);
710 }
711 
upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter * iter1,const upb_msg_oneof_iter * iter2)712 bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
713                                 const upb_msg_oneof_iter *iter2) {
714   return upb_strtable_iter_isequal(iter1, iter2);
715 }
716 
717 /* upb_oneofdef ***************************************************************/
718 
upb_oneofdef_name(const upb_oneofdef * o)719 const char *upb_oneofdef_name(const upb_oneofdef *o) {
720   return shortdefname(o->full_name);
721 }
722 
upb_oneofdef_containingtype(const upb_oneofdef * o)723 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
724   return o->parent;
725 }
726 
upb_oneofdef_fieldcount(const upb_oneofdef * o)727 int upb_oneofdef_fieldcount(const upb_oneofdef *o) {
728   return o->field_count;
729 }
730 
upb_oneofdef_field(const upb_oneofdef * o,int i)731 const upb_fielddef *upb_oneofdef_field(const upb_oneofdef *o, int i) {
732   UPB_ASSERT(i < o->field_count);
733   return o->fields[i];
734 }
735 
upb_oneofdef_numfields(const upb_oneofdef * o)736 int upb_oneofdef_numfields(const upb_oneofdef *o) {
737   return o->field_count;
738 }
739 
upb_oneofdef_index(const upb_oneofdef * o)740 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
741   return o - o->parent->oneofs;
742 }
743 
upb_oneofdef_issynthetic(const upb_oneofdef * o)744 bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
745   return o->synthetic;
746 }
747 
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)748 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
749                                       const char *name, size_t length) {
750   upb_value val;
751   return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
752       upb_value_getptr(val) : NULL;
753 }
754 
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)755 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
756   upb_value val;
757   return upb_inttable_lookup32(&o->itof, num, &val) ?
758       upb_value_getptr(val) : NULL;
759 }
760 
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)761 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
762   upb_inttable_begin(iter, &o->itof);
763 }
764 
upb_oneof_next(upb_oneof_iter * iter)765 void upb_oneof_next(upb_oneof_iter *iter) {
766   upb_inttable_next(iter);
767 }
768 
upb_oneof_done(upb_oneof_iter * iter)769 bool upb_oneof_done(upb_oneof_iter *iter) {
770   return upb_inttable_done(iter);
771 }
772 
upb_oneof_iter_field(const upb_oneof_iter * iter)773 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
774   return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
775 }
776 
upb_oneof_iter_setdone(upb_oneof_iter * iter)777 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
778   upb_inttable_iter_setdone(iter);
779 }
780 
781 /* upb_filedef ****************************************************************/
782 
upb_filedef_name(const upb_filedef * f)783 const char *upb_filedef_name(const upb_filedef *f) {
784   return f->name;
785 }
786 
upb_filedef_package(const upb_filedef * f)787 const char *upb_filedef_package(const upb_filedef *f) {
788   return f->package;
789 }
790 
upb_filedef_phpprefix(const upb_filedef * f)791 const char *upb_filedef_phpprefix(const upb_filedef *f) {
792   return f->phpprefix;
793 }
794 
upb_filedef_phpnamespace(const upb_filedef * f)795 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
796   return f->phpnamespace;
797 }
798 
upb_filedef_syntax(const upb_filedef * f)799 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
800   return f->syntax;
801 }
802 
upb_filedef_msgcount(const upb_filedef * f)803 int upb_filedef_msgcount(const upb_filedef *f) {
804   return f->msg_count;
805 }
806 
upb_filedef_depcount(const upb_filedef * f)807 int upb_filedef_depcount(const upb_filedef *f) {
808   return f->dep_count;
809 }
810 
upb_filedef_enumcount(const upb_filedef * f)811 int upb_filedef_enumcount(const upb_filedef *f) {
812   return f->enum_count;
813 }
814 
upb_filedef_dep(const upb_filedef * f,int i)815 const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
816   return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
817 }
818 
upb_filedef_msg(const upb_filedef * f,int i)819 const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
820   return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
821 }
822 
upb_filedef_enum(const upb_filedef * f,int i)823 const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
824   return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
825 }
826 
upb_filedef_symtab(const upb_filedef * f)827 const upb_symtab *upb_filedef_symtab(const upb_filedef *f) {
828   return f->symtab;
829 }
830 
upb_symtab_free(upb_symtab * s)831 void upb_symtab_free(upb_symtab *s) {
832   upb_arena_free(s->arena);
833   upb_gfree(s);
834 }
835 
upb_symtab_new(void)836 upb_symtab *upb_symtab_new(void) {
837   upb_symtab *s = upb_gmalloc(sizeof(*s));
838   upb_alloc *alloc;
839 
840   if (!s) {
841     return NULL;
842   }
843 
844   s->arena = upb_arena_new();
845   s->bytes_loaded = 0;
846   alloc = upb_arena_alloc(s->arena);
847 
848   if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) ||
849       !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) {
850     upb_arena_free(s->arena);
851     upb_gfree(s);
852     s = NULL;
853   }
854   return s;
855 }
856 
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)857 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
858   upb_value v;
859   return upb_strtable_lookup(&s->syms, sym, &v) ?
860       unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
861 }
862 
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)863 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
864                                         size_t len) {
865   upb_value v;
866   return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
867       unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
868 }
869 
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)870 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
871   upb_value v;
872   return upb_strtable_lookup(&s->syms, sym, &v) ?
873       unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
874 }
875 
upb_symtab_lookupfile(const upb_symtab * s,const char * name)876 const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
877   upb_value v;
878   return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
879                                                   : NULL;
880 }
881 
upb_symtab_lookupfile2(const upb_symtab * s,const char * name,size_t len)882 const upb_filedef *upb_symtab_lookupfile2(
883     const upb_symtab *s, const char *name, size_t len) {
884   upb_value v;
885   return upb_strtable_lookup2(&s->files, name, len, &v) ?
886       upb_value_getconstptr(v) : NULL;
887 }
888 
upb_symtab_filecount(const upb_symtab * s)889 int upb_symtab_filecount(const upb_symtab *s) {
890   return (int)upb_strtable_count(&s->files);
891 }
892 
893 /* Code to build defs from descriptor protos. *********************************/
894 
895 /* There is a question of how much validation to do here.  It will be difficult
896  * to perfectly match the amount of validation performed by proto2.  But since
897  * this code is used to directly build defs from Ruby (for example) we do need
898  * to validate important constraints like uniqueness of names and numbers. */
899 
900 #define CHK_OOM(x) if (!(x)) { symtab_oomerr(ctx); }
901 
902 typedef struct {
903   upb_symtab *symtab;
904   upb_filedef *file;              /* File we are building. */
905   upb_arena *file_arena;          /* Allocate defs here. */
906   upb_alloc *alloc;               /* Alloc of file_arena, for tables. */
907   const upb_msglayout **layouts;  /* NULL if we should build layouts. */
908   upb_status *status;             /* Record errors here. */
909   jmp_buf err;                    /* longjmp() on error. */
910 } symtab_addctx;
911 
912 UPB_NORETURN UPB_NOINLINE
symtab_errf(symtab_addctx * ctx,const char * fmt,...)913 static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) {
914   va_list argp;
915   va_start(argp, fmt);
916   upb_status_vseterrf(ctx->status, fmt, argp);
917   va_end(argp);
918   UPB_LONGJMP(ctx->err, 1);
919 }
920 
921 UPB_NORETURN UPB_NOINLINE
symtab_oomerr(symtab_addctx * ctx)922 static void symtab_oomerr(symtab_addctx *ctx) {
923   upb_status_setoom(ctx->status);
924   UPB_LONGJMP(ctx->err, 1);
925 }
926 
symtab_alloc(symtab_addctx * ctx,size_t bytes)927 void *symtab_alloc(symtab_addctx *ctx, size_t bytes) {
928   void *ret = upb_arena_malloc(ctx->file_arena, bytes);
929   if (!ret) symtab_oomerr(ctx);
930   return ret;
931 }
932 
check_ident(symtab_addctx * ctx,upb_strview name,bool full)933 static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) {
934   const char *str = name.data;
935   size_t len = name.size;
936   bool start = true;
937   size_t i;
938   for (i = 0; i < len; i++) {
939     char c = str[i];
940     if (c == '.') {
941       if (start || !full) {
942         symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
943       }
944       start = true;
945     } else if (start) {
946       if (!upb_isletter(c)) {
947         symtab_errf(
948             ctx,
949             "invalid name: path components must start with a letter (%.*s)",
950             (int)len, str);
951       }
952       start = false;
953     } else {
954       if (!upb_isalphanum(c)) {
955         symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
956                     (int)len, str);
957       }
958     }
959   }
960   if (start) {
961     symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
962   }
963 }
964 
div_round_up(size_t n,size_t d)965 static size_t div_round_up(size_t n, size_t d) {
966   return (n + d - 1) / d;
967 }
968 
upb_msgval_sizeof(upb_fieldtype_t type)969 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
970   switch (type) {
971     case UPB_TYPE_DOUBLE:
972     case UPB_TYPE_INT64:
973     case UPB_TYPE_UINT64:
974       return 8;
975     case UPB_TYPE_ENUM:
976     case UPB_TYPE_INT32:
977     case UPB_TYPE_UINT32:
978     case UPB_TYPE_FLOAT:
979       return 4;
980     case UPB_TYPE_BOOL:
981       return 1;
982     case UPB_TYPE_MESSAGE:
983       return sizeof(void*);
984     case UPB_TYPE_BYTES:
985     case UPB_TYPE_STRING:
986       return sizeof(upb_strview);
987   }
988   UPB_UNREACHABLE();
989 }
990 
upb_msg_fielddefsize(const upb_fielddef * f)991 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
992   if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
993     upb_map_entry ent;
994     UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
995     return sizeof(ent.k);
996   } else if (upb_fielddef_isseq(f)) {
997     return sizeof(void*);
998   } else {
999     return upb_msgval_sizeof(upb_fielddef_type(f));
1000   }
1001 }
1002 
upb_msglayout_place(upb_msglayout * l,size_t size)1003 static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
1004   uint32_t ret;
1005 
1006   l->size = UPB_ALIGN_UP(l->size, size);
1007   ret = l->size;
1008   l->size += size;
1009   return ret;
1010 }
1011 
field_number_cmp(const void * p1,const void * p2)1012 static int field_number_cmp(const void *p1, const void *p2) {
1013   const upb_msglayout_field *f1 = p1;
1014   const upb_msglayout_field *f2 = p2;
1015   return f1->number - f2->number;
1016 }
1017 
assign_layout_indices(const upb_msgdef * m,upb_msglayout_field * fields)1018 static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fields) {
1019   int i;
1020   int n = upb_msgdef_numfields(m);
1021   for (i = 0; i < n; i++) {
1022     upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number);
1023     UPB_ASSERT(f);
1024     f->layout_index = i;
1025   }
1026 }
1027 
1028 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
1029  * It computes a dynamic layout for all of the fields in |m|. */
make_layout(symtab_addctx * ctx,const upb_msgdef * m)1030 static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) {
1031   upb_msglayout *l = (upb_msglayout*)m->layout;
1032   upb_msg_field_iter it;
1033   upb_msg_oneof_iter oit;
1034   size_t hasbit;
1035   size_t submsg_count = m->submsg_field_count;
1036   const upb_msglayout **submsgs;
1037   upb_msglayout_field *fields;
1038 
1039   memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry));
1040 
1041   fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields));
1042   submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs));
1043 
1044   l->field_count = upb_msgdef_numfields(m);
1045   l->fields = fields;
1046   l->submsgs = submsgs;
1047   l->table_mask = 0;
1048 
1049   /* TODO(haberman): initialize fast tables so that reflection-based parsing
1050    * can get the same speeds as linked-in types. */
1051   l->fasttable[0].field_parser = &fastdecode_generic;
1052   l->fasttable[0].field_data = 0;
1053 
1054   if (upb_msgdef_mapentry(m)) {
1055     /* TODO(haberman): refactor this method so this special case is more
1056      * elegant. */
1057     const upb_fielddef *key = upb_msgdef_itof(m, 1);
1058     const upb_fielddef *val = upb_msgdef_itof(m, 2);
1059     fields[0].number = 1;
1060     fields[1].number = 2;
1061     fields[0].label = UPB_LABEL_OPTIONAL;
1062     fields[1].label = UPB_LABEL_OPTIONAL;
1063     fields[0].presence = 0;
1064     fields[1].presence = 0;
1065     fields[0].descriptortype = upb_fielddef_descriptortype(key);
1066     fields[1].descriptortype = upb_fielddef_descriptortype(val);
1067     fields[0].offset = 0;
1068     fields[1].offset = sizeof(upb_strview);
1069     fields[1].submsg_index = 0;
1070 
1071     if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
1072       submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
1073     }
1074 
1075     l->field_count = 2;
1076     l->size = 2 * sizeof(upb_strview);
1077     l->size = UPB_ALIGN_UP(l->size, 8);
1078     return;
1079   }
1080 
1081   /* Allocate data offsets in three stages:
1082    *
1083    * 1. hasbits.
1084    * 2. regular fields.
1085    * 3. oneof fields.
1086    *
1087    * OPT: There is a lot of room for optimization here to minimize the size.
1088    */
1089 
1090   /* Allocate hasbits and set basic field attributes. */
1091   submsg_count = 0;
1092   for (upb_msg_field_begin(&it, m), hasbit = 0;
1093        !upb_msg_field_done(&it);
1094        upb_msg_field_next(&it)) {
1095     upb_fielddef* f = upb_msg_iter_field(&it);
1096     upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
1097 
1098     field->number = upb_fielddef_number(f);
1099     field->descriptortype = upb_fielddef_descriptortype(f);
1100     field->label = upb_fielddef_label(f);
1101 
1102     if (field->descriptortype == UPB_DTYPE_STRING &&
1103         f->file->syntax == UPB_SYNTAX_PROTO2) {
1104       /* See TableDescriptorType() in upbc/generator.cc for details and
1105        * rationale. */
1106       field->descriptortype = UPB_DTYPE_BYTES;
1107     }
1108 
1109     if (upb_fielddef_ismap(f)) {
1110       field->label = _UPB_LABEL_MAP;
1111     } else if (upb_fielddef_packed(f)) {
1112       field->label = _UPB_LABEL_PACKED;
1113     }
1114 
1115     if (upb_fielddef_issubmsg(f)) {
1116       const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
1117       field->submsg_index = submsg_count++;
1118       submsgs[field->submsg_index] = subm->layout;
1119     }
1120 
1121     if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
1122       /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
1123        * table. This wastes one hasbit, but we don't worry about it for now. */
1124       field->presence = ++hasbit;
1125     } else {
1126       field->presence = 0;
1127     }
1128   }
1129 
1130   /* Account for space used by hasbits. */
1131   l->size = div_round_up(hasbit, 8);
1132 
1133   /* Allocate non-oneof fields. */
1134   for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
1135        upb_msg_field_next(&it)) {
1136     const upb_fielddef* f = upb_msg_iter_field(&it);
1137     size_t field_size = upb_msg_fielddefsize(f);
1138     size_t index = upb_fielddef_index(f);
1139 
1140     if (upb_fielddef_realcontainingoneof(f)) {
1141       /* Oneofs are handled separately below. */
1142       continue;
1143     }
1144 
1145     fields[index].offset = upb_msglayout_place(l, field_size);
1146   }
1147 
1148   /* Allocate oneof fields.  Each oneof field consists of a uint32 for the case
1149    * and space for the actual data. */
1150   for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
1151        upb_msg_oneof_next(&oit)) {
1152     const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
1153     upb_oneof_iter fit;
1154 
1155     size_t case_size = sizeof(uint32_t);  /* Could potentially optimize this. */
1156     size_t field_size = 0;
1157     uint32_t case_offset;
1158     uint32_t data_offset;
1159 
1160     if (upb_oneofdef_issynthetic(o)) continue;
1161 
1162     /* Calculate field size: the max of all field sizes. */
1163     for (upb_oneof_begin(&fit, o);
1164          !upb_oneof_done(&fit);
1165          upb_oneof_next(&fit)) {
1166       const upb_fielddef* f = upb_oneof_iter_field(&fit);
1167       field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
1168     }
1169 
1170     /* Align and allocate case offset. */
1171     case_offset = upb_msglayout_place(l, case_size);
1172     data_offset = upb_msglayout_place(l, field_size);
1173 
1174     for (upb_oneof_begin(&fit, o);
1175          !upb_oneof_done(&fit);
1176          upb_oneof_next(&fit)) {
1177       const upb_fielddef* f = upb_oneof_iter_field(&fit);
1178       fields[upb_fielddef_index(f)].offset = data_offset;
1179       fields[upb_fielddef_index(f)].presence = ~case_offset;
1180     }
1181   }
1182 
1183   /* Size of the entire structure should be a multiple of its greatest
1184    * alignment.  TODO: track overall alignment for real? */
1185   l->size = UPB_ALIGN_UP(l->size, 8);
1186 
1187   /* Sort fields by number. */
1188   qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp);
1189   assign_layout_indices(m, fields);
1190 }
1191 
assign_msg_indices(symtab_addctx * ctx,upb_msgdef * m)1192 static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) {
1193   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
1194    * lowest indexes, but we do not publicly guarantee this. */
1195   upb_msg_field_iter j;
1196   int i;
1197   uint32_t selector;
1198   int n = upb_msgdef_numfields(m);
1199   upb_fielddef **fields;
1200 
1201   if (n == 0) {
1202     m->selector_count = UPB_STATIC_SELECTOR_COUNT;
1203     m->submsg_field_count = 0;
1204     return;
1205   }
1206 
1207   fields = upb_gmalloc(n * sizeof(*fields));
1208 
1209   m->submsg_field_count = 0;
1210   for(i = 0, upb_msg_field_begin(&j, m);
1211       !upb_msg_field_done(&j);
1212       upb_msg_field_next(&j), i++) {
1213     upb_fielddef *f = upb_msg_iter_field(&j);
1214     UPB_ASSERT(f->msgdef == m);
1215     if (upb_fielddef_issubmsg(f)) {
1216       m->submsg_field_count++;
1217     }
1218     fields[i] = f;
1219   }
1220 
1221   qsort(fields, n, sizeof(*fields), cmp_fields);
1222 
1223   selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
1224   for (i = 0; i < n; i++) {
1225     upb_fielddef *f = fields[i];
1226     f->index_ = i;
1227     f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
1228     selector += upb_handlers_selectorcount(f);
1229   }
1230   m->selector_count = selector;
1231 
1232   upb_gfree(fields);
1233 }
1234 
strviewdup(symtab_addctx * ctx,upb_strview view)1235 static char *strviewdup(symtab_addctx *ctx, upb_strview view) {
1236   return upb_strdup2(view.data, view.size, ctx->alloc);
1237 }
1238 
streql2(const char * a,size_t n,const char * b)1239 static bool streql2(const char *a, size_t n, const char *b) {
1240   return n == strlen(b) && memcmp(a, b, n) == 0;
1241 }
1242 
streql_view(upb_strview view,const char * b)1243 static bool streql_view(upb_strview view, const char *b) {
1244   return streql2(view.data, view.size, b);
1245 }
1246 
makefullname(symtab_addctx * ctx,const char * prefix,upb_strview name)1247 static const char *makefullname(symtab_addctx *ctx, const char *prefix,
1248                                 upb_strview name) {
1249   if (prefix) {
1250     /* ret = prefix + '.' + name; */
1251     size_t n = strlen(prefix);
1252     char *ret = symtab_alloc(ctx, n + name.size + 2);
1253     strcpy(ret, prefix);
1254     ret[n] = '.';
1255     memcpy(&ret[n + 1], name.data, name.size);
1256     ret[n + 1 + name.size] = '\0';
1257     return ret;
1258   } else {
1259     return strviewdup(ctx, name);
1260   }
1261 }
1262 
finalize_oneofs(symtab_addctx * ctx,upb_msgdef * m)1263 static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) {
1264   int i;
1265   int synthetic_count = 0;
1266   upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
1267 
1268   for (i = 0; i < m->oneof_count; i++) {
1269     upb_oneofdef *o = &mutable_oneofs[i];
1270 
1271     if (o->synthetic && o->field_count != 1) {
1272       symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
1273                   o->field_count, upb_oneofdef_name(o));
1274     }
1275 
1276     if (o->synthetic) {
1277       synthetic_count++;
1278     } else if (synthetic_count != 0) {
1279       symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
1280                   upb_oneofdef_name(o));
1281     }
1282 
1283     o->fields = symtab_alloc(ctx, sizeof(upb_fielddef *) * o->field_count);
1284     o->field_count = 0;
1285   }
1286 
1287   for (i = 0; i < m->field_count; i++) {
1288     const upb_fielddef *f = &m->fields[i];
1289     upb_oneofdef *o = (upb_oneofdef*)f->oneof;
1290     if (o) {
1291       o->fields[o->field_count++] = f;
1292     }
1293   }
1294 
1295   m->real_oneof_count = m->oneof_count - synthetic_count;
1296 }
1297 
getjsonname(const char * name,char * buf,size_t len)1298 size_t getjsonname(const char *name, char *buf, size_t len) {
1299   size_t src, dst = 0;
1300   bool ucase_next = false;
1301 
1302 #define WRITE(byte) \
1303   ++dst; \
1304   if (dst < len) buf[dst - 1] = byte; \
1305   else if (dst == len) buf[dst - 1] = '\0'
1306 
1307   if (!name) {
1308     WRITE('\0');
1309     return 0;
1310   }
1311 
1312   /* Implement the transformation as described in the spec:
1313    *   1. upper case all letters after an underscore.
1314    *   2. remove all underscores.
1315    */
1316   for (src = 0; name[src]; src++) {
1317     if (name[src] == '_') {
1318       ucase_next = true;
1319       continue;
1320     }
1321 
1322     if (ucase_next) {
1323       WRITE(toupper(name[src]));
1324       ucase_next = false;
1325     } else {
1326       WRITE(name[src]);
1327     }
1328   }
1329 
1330   WRITE('\0');
1331   return dst;
1332 
1333 #undef WRITE
1334 }
1335 
makejsonname(symtab_addctx * ctx,const char * name)1336 static char* makejsonname(symtab_addctx *ctx, const char* name) {
1337   size_t size = getjsonname(name, NULL, 0);
1338   char* json_name = symtab_alloc(ctx, size);
1339   getjsonname(name, json_name, size);
1340   return json_name;
1341 }
1342 
symtab_add(symtab_addctx * ctx,const char * name,upb_value v)1343 static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) {
1344   if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
1345     symtab_errf(ctx, "duplicate symbol '%s'", name);
1346   }
1347   upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena);
1348   size_t len = strlen(name);
1349   CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc));
1350 }
1351 
1352 /* Given a symbol and the base symbol inside which it is defined, find the
1353  * symbol's definition in t. */
symtab_resolve(symtab_addctx * ctx,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type)1354 static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f,
1355                                   const char *base, upb_strview sym,
1356                                   upb_deftype_t type) {
1357   const upb_strtable *t = &ctx->symtab->syms;
1358   if(sym.size == 0) goto notfound;
1359   if(sym.data[0] == '.') {
1360     /* Symbols starting with '.' are absolute, so we do a single lookup.
1361      * Slice to omit the leading '.' */
1362     upb_value v;
1363     if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
1364       goto notfound;
1365     }
1366 
1367     const void *ret = unpack_def(v, type);
1368     if (!ret) {
1369       symtab_errf(ctx, "type mismatch when resolving field %s, name %s",
1370                   f->full_name, sym.data);
1371     }
1372     return ret;
1373   } else {
1374     /* Remove components from base until we find an entry or run out.
1375      * TODO: This branch is totally broken, but currently not used. */
1376     (void)base;
1377     UPB_ASSERT(false);
1378     goto notfound;
1379   }
1380 
1381 notfound:
1382   symtab_errf(ctx, "couldn't resolve name '%s'", sym.data);
1383 }
1384 
create_oneofdef(symtab_addctx * ctx,upb_msgdef * m,const google_protobuf_OneofDescriptorProto * oneof_proto)1385 static void create_oneofdef(
1386     symtab_addctx *ctx, upb_msgdef *m,
1387     const google_protobuf_OneofDescriptorProto *oneof_proto) {
1388   upb_oneofdef *o;
1389   upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
1390   upb_value v;
1391 
1392   o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
1393   o->parent = m;
1394   o->full_name = makefullname(ctx, m->full_name, name);
1395   o->field_count = 0;
1396   o->synthetic = false;
1397 
1398   v = pack_def(o, UPB_DEFTYPE_ONEOF);
1399   symtab_add(ctx, o->full_name, v);
1400   CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
1401 
1402   CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1403   CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc));
1404 }
1405 
newstr(symtab_addctx * ctx,const char * data,size_t len)1406 static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) {
1407   str_t *ret = symtab_alloc(ctx, sizeof(*ret) + len);
1408   if (!ret) return NULL;
1409   ret->len = len;
1410   if (len) memcpy(ret->str, data, len);
1411   ret->str[len] = '\0';
1412   return ret;
1413 }
1414 
parse_default(symtab_addctx * ctx,const char * str,size_t len,upb_fielddef * f)1415 static void parse_default(symtab_addctx *ctx, const char *str, size_t len,
1416                           upb_fielddef *f) {
1417   char *end;
1418   char nullz[64];
1419   errno = 0;
1420 
1421   switch (upb_fielddef_type(f)) {
1422     case UPB_TYPE_INT32:
1423     case UPB_TYPE_INT64:
1424     case UPB_TYPE_UINT32:
1425     case UPB_TYPE_UINT64:
1426     case UPB_TYPE_DOUBLE:
1427     case UPB_TYPE_FLOAT:
1428       /* Standard C number parsing functions expect null-terminated strings. */
1429       if (len >= sizeof(nullz) - 1) {
1430         symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
1431       }
1432       memcpy(nullz, str, len);
1433       nullz[len] = '\0';
1434       str = nullz;
1435       break;
1436     default:
1437       break;
1438   }
1439 
1440   switch (upb_fielddef_type(f)) {
1441     case UPB_TYPE_INT32: {
1442       long val = strtol(str, &end, 0);
1443       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
1444         goto invalid;
1445       }
1446       f->defaultval.sint = val;
1447       break;
1448     }
1449     case UPB_TYPE_ENUM: {
1450       const upb_enumdef *e = f->sub.enumdef;
1451       int32_t val;
1452       if (!upb_enumdef_ntoi(e, str, len, &val)) {
1453         goto invalid;
1454       }
1455       f->defaultval.sint = val;
1456       break;
1457     }
1458     case UPB_TYPE_INT64: {
1459       /* XXX: Need to write our own strtoll, since it's not available in c89. */
1460       int64_t val = strtol(str, &end, 0);
1461       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
1462         goto invalid;
1463       }
1464       f->defaultval.sint = val;
1465       break;
1466     }
1467     case UPB_TYPE_UINT32: {
1468       unsigned long val = strtoul(str, &end, 0);
1469       if (val > UINT32_MAX || errno == ERANGE || *end) {
1470         goto invalid;
1471       }
1472       f->defaultval.uint = val;
1473       break;
1474     }
1475     case UPB_TYPE_UINT64: {
1476       /* XXX: Need to write our own strtoull, since it's not available in c89. */
1477       uint64_t val = strtoul(str, &end, 0);
1478       if (val > UINT64_MAX || errno == ERANGE || *end) {
1479         goto invalid;
1480       }
1481       f->defaultval.uint = val;
1482       break;
1483     }
1484     case UPB_TYPE_DOUBLE: {
1485       double val = strtod(str, &end);
1486       if (errno == ERANGE || *end) {
1487         goto invalid;
1488       }
1489       f->defaultval.dbl = val;
1490       break;
1491     }
1492     case UPB_TYPE_FLOAT: {
1493       /* XXX: Need to write our own strtof, since it's not available in c89. */
1494       float val = strtod(str, &end);
1495       if (errno == ERANGE || *end) {
1496         goto invalid;
1497       }
1498       f->defaultval.flt = val;
1499       break;
1500     }
1501     case UPB_TYPE_BOOL: {
1502       if (streql2(str, len, "false")) {
1503         f->defaultval.boolean = false;
1504       } else if (streql2(str, len, "true")) {
1505         f->defaultval.boolean = true;
1506       } else {
1507       }
1508       break;
1509     }
1510     case UPB_TYPE_STRING:
1511       f->defaultval.str = newstr(ctx, str, len);
1512       break;
1513     case UPB_TYPE_BYTES:
1514       /* XXX: need to interpret the C-escaped value. */
1515       f->defaultval.str = newstr(ctx, str, len);
1516       break;
1517     case UPB_TYPE_MESSAGE:
1518       /* Should not have a default value. */
1519       symtab_errf(ctx, "Message should not have a default (%s)",
1520                   upb_fielddef_fullname(f));
1521   }
1522 
1523   return;
1524 
1525 invalid:
1526   symtab_errf(ctx, "Invalid default '%.*s' for field %f", (int)len, str,
1527               upb_fielddef_fullname(f));
1528 }
1529 
set_default_default(symtab_addctx * ctx,upb_fielddef * f)1530 static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) {
1531   switch (upb_fielddef_type(f)) {
1532     case UPB_TYPE_INT32:
1533     case UPB_TYPE_INT64:
1534     case UPB_TYPE_ENUM:
1535       f->defaultval.sint = 0;
1536       break;
1537     case UPB_TYPE_UINT64:
1538     case UPB_TYPE_UINT32:
1539       f->defaultval.uint = 0;
1540       break;
1541     case UPB_TYPE_DOUBLE:
1542     case UPB_TYPE_FLOAT:
1543       f->defaultval.dbl = 0;
1544       break;
1545     case UPB_TYPE_STRING:
1546     case UPB_TYPE_BYTES:
1547       f->defaultval.str = newstr(ctx, NULL, 0);
1548       break;
1549     case UPB_TYPE_BOOL:
1550       f->defaultval.boolean = false;
1551       break;
1552     case UPB_TYPE_MESSAGE:
1553       break;
1554   }
1555 }
1556 
create_fielddef(symtab_addctx * ctx,const char * prefix,upb_msgdef * m,const google_protobuf_FieldDescriptorProto * field_proto)1557 static void create_fielddef(
1558     symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
1559     const google_protobuf_FieldDescriptorProto *field_proto) {
1560   upb_alloc *alloc = ctx->alloc;
1561   upb_fielddef *f;
1562   const google_protobuf_FieldOptions *options;
1563   upb_strview name;
1564   const char *full_name;
1565   const char *json_name;
1566   const char *shortname;
1567   uint32_t field_number;
1568 
1569   if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
1570     symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m));
1571   }
1572 
1573   name = google_protobuf_FieldDescriptorProto_name(field_proto);
1574   check_ident(ctx, name, false);
1575   full_name = makefullname(ctx, prefix, name);
1576   shortname = shortdefname(full_name);
1577 
1578   if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
1579     json_name = strviewdup(
1580         ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
1581   } else {
1582     json_name = makejsonname(ctx, shortname);
1583   }
1584 
1585   field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
1586 
1587   if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
1588     symtab_errf(ctx, "invalid field number (%u)", field_number);
1589   }
1590 
1591   if (m) {
1592     /* direct message field. */
1593     upb_value v, field_v, json_v;
1594     size_t json_size;
1595 
1596     f = (upb_fielddef*)&m->fields[m->field_count++];
1597     f->msgdef = m;
1598     f->is_extension_ = false;
1599 
1600     if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
1601       symtab_errf(ctx, "duplicate field name (%s)", shortname);
1602     }
1603 
1604     if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
1605       symtab_errf(ctx, "duplicate json_name (%s)", json_name);
1606     }
1607 
1608     if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
1609       symtab_errf(ctx, "duplicate field number (%u)", field_number);
1610     }
1611 
1612     field_v = pack_def(f, UPB_DEFTYPE_FIELD);
1613     json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
1614     v = upb_value_constptr(f);
1615     json_size = strlen(json_name);
1616 
1617     CHK_OOM(
1618         upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
1619     CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
1620 
1621     if (strcmp(shortname, json_name) != 0) {
1622       upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
1623     }
1624 
1625     if (ctx->layouts) {
1626       const upb_msglayout_field *fields = m->layout->fields;
1627       int count = m->layout->field_count;
1628       bool found = false;
1629       int i;
1630       for (i = 0; i < count; i++) {
1631         if (fields[i].number == field_number) {
1632           f->layout_index = i;
1633           found = true;
1634           break;
1635         }
1636       }
1637       UPB_ASSERT(found);
1638     }
1639   } else {
1640     /* extension field. */
1641     f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
1642     f->is_extension_ = true;
1643     symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD));
1644   }
1645 
1646   f->full_name = full_name;
1647   f->json_name = json_name;
1648   f->file = ctx->file;
1649   f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
1650   f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
1651   f->number_ = field_number;
1652   f->oneof = NULL;
1653   f->proto3_optional_ =
1654       google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
1655 
1656   /* We can't resolve the subdef or (in the case of extensions) the containing
1657    * message yet, because it may not have been defined yet.  We stash a pointer
1658    * to the field_proto until later when we can properly resolve it. */
1659   f->sub.unresolved = field_proto;
1660 
1661   if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
1662     symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
1663   }
1664 
1665   if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
1666     int oneof_index =
1667         google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
1668     upb_oneofdef *oneof;
1669     upb_value v = upb_value_constptr(f);
1670 
1671     if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1672       symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
1673                   f->full_name);
1674     }
1675 
1676     if (!m) {
1677       symtab_errf(ctx, "oneof_index provided for extension field (%s)",
1678                   f->full_name);
1679     }
1680 
1681     if (oneof_index >= m->oneof_count) {
1682       symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
1683     }
1684 
1685     oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
1686     f->oneof = oneof;
1687 
1688     oneof->field_count++;
1689     if (f->proto3_optional_) {
1690       oneof->synthetic = true;
1691     }
1692     CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
1693     CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
1694   } else {
1695     f->oneof = NULL;
1696     if (f->proto3_optional_) {
1697       symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
1698                   f->full_name);
1699     }
1700   }
1701 
1702   options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ?
1703     google_protobuf_FieldDescriptorProto_options(field_proto) : NULL;
1704 
1705   if (options && google_protobuf_FieldOptions_has_packed(options)) {
1706     f->packed_ = google_protobuf_FieldOptions_packed(options);
1707   } else {
1708     /* Repeated fields default to packed for proto3 only. */
1709     f->packed_ = upb_fielddef_isprimitive(f) &&
1710         f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3;
1711   }
1712 
1713   if (options) {
1714     f->lazy_ = google_protobuf_FieldOptions_lazy(options);
1715   } else {
1716     f->lazy_ = false;
1717   }
1718 }
1719 
create_enumdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto)1720 static void create_enumdef(
1721     symtab_addctx *ctx, const char *prefix,
1722     const google_protobuf_EnumDescriptorProto *enum_proto) {
1723   upb_enumdef *e;
1724   const google_protobuf_EnumValueDescriptorProto *const *values;
1725   upb_strview name;
1726   size_t i, n;
1727 
1728   name = google_protobuf_EnumDescriptorProto_name(enum_proto);
1729   check_ident(ctx, name, false);
1730 
1731   e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
1732   e->full_name = makefullname(ctx, prefix, name);
1733   symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
1734 
1735   values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
1736   CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc));
1737   CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
1738 
1739   e->file = ctx->file;
1740   e->defaultval = 0;
1741 
1742   if (n == 0) {
1743     symtab_errf(ctx, "enums must contain at least one value (%s)",
1744                 e->full_name);
1745   }
1746 
1747   for (i = 0; i < n; i++) {
1748     const google_protobuf_EnumValueDescriptorProto *value = values[i];
1749     upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
1750     char *name2 = strviewdup(ctx, name);
1751     int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
1752     upb_value v = upb_value_int32(num);
1753 
1754     if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
1755       symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
1756                   e->full_name);
1757     }
1758 
1759     if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
1760       symtab_errf(ctx, "duplicate enum label '%s'", name2);
1761     }
1762 
1763     CHK_OOM(name2)
1764     CHK_OOM(
1765         upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
1766 
1767     if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1768       upb_value v = upb_value_cstr(name2);
1769       CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
1770     }
1771   }
1772 
1773   upb_inttable_compact2(&e->iton, ctx->alloc);
1774 }
1775 
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto)1776 static void create_msgdef(symtab_addctx *ctx, const char *prefix,
1777                           const google_protobuf_DescriptorProto *msg_proto) {
1778   upb_msgdef *m;
1779   const google_protobuf_MessageOptions *options;
1780   const google_protobuf_OneofDescriptorProto *const *oneofs;
1781   const google_protobuf_FieldDescriptorProto *const *fields;
1782   const google_protobuf_EnumDescriptorProto *const *enums;
1783   const google_protobuf_DescriptorProto *const *msgs;
1784   size_t i, n_oneof, n_field, n;
1785   upb_strview name;
1786 
1787   name = google_protobuf_DescriptorProto_name(msg_proto);
1788   check_ident(ctx, name, false);
1789 
1790   m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
1791   m->full_name = makefullname(ctx, prefix, name);
1792   symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
1793 
1794   oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
1795   fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
1796 
1797   CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1798   CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field,
1799                              ctx->alloc));
1800 
1801   m->file = ctx->file;
1802   m->map_entry = false;
1803 
1804   options = google_protobuf_DescriptorProto_options(msg_proto);
1805 
1806   if (options) {
1807     m->map_entry = google_protobuf_MessageOptions_map_entry(options);
1808   }
1809 
1810   if (ctx->layouts) {
1811     m->layout = *ctx->layouts;
1812     ctx->layouts++;
1813   } else {
1814     /* Allocate now (to allow cross-linking), populate later. */
1815     m->layout = symtab_alloc(
1816         ctx, sizeof(*m->layout) + sizeof(_upb_fasttable_entry));
1817   }
1818 
1819   m->oneof_count = 0;
1820   m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
1821   for (i = 0; i < n_oneof; i++) {
1822     create_oneofdef(ctx, m, oneofs[i]);
1823   }
1824 
1825   m->field_count = 0;
1826   m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
1827   for (i = 0; i < n_field; i++) {
1828     create_fielddef(ctx, m->full_name, m, fields[i]);
1829   }
1830 
1831   assign_msg_indices(ctx, m);
1832   finalize_oneofs(ctx, m);
1833   assign_msg_wellknowntype(m);
1834   upb_inttable_compact2(&m->itof, ctx->alloc);
1835 
1836   /* This message is built.  Now build nested messages and enums. */
1837 
1838   enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1839   for (i = 0; i < n; i++) {
1840     create_enumdef(ctx, m->full_name, enums[i]);
1841   }
1842 
1843   msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1844   for (i = 0; i < n; i++) {
1845     create_msgdef(ctx, m->full_name, msgs[i]);
1846   }
1847 }
1848 
count_types_in_msg(const google_protobuf_DescriptorProto * msg_proto,upb_filedef * file)1849 static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
1850                                upb_filedef *file) {
1851   const google_protobuf_DescriptorProto *const *msgs;
1852   size_t i, n;
1853 
1854   file->msg_count++;
1855 
1856   msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1857   for (i = 0; i < n; i++) {
1858     count_types_in_msg(msgs[i], file);
1859   }
1860 
1861   google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1862   file->enum_count += n;
1863 
1864   google_protobuf_DescriptorProto_extension(msg_proto, &n);
1865   file->ext_count += n;
1866 }
1867 
count_types_in_file(const google_protobuf_FileDescriptorProto * file_proto,upb_filedef * file)1868 static void count_types_in_file(
1869     const google_protobuf_FileDescriptorProto *file_proto,
1870     upb_filedef *file) {
1871   const google_protobuf_DescriptorProto *const *msgs;
1872   size_t i, n;
1873 
1874   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
1875   for (i = 0; i < n; i++) {
1876     count_types_in_msg(msgs[i], file);
1877   }
1878 
1879   google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
1880   file->enum_count += n;
1881 
1882   google_protobuf_FileDescriptorProto_extension(file_proto, &n);
1883   file->ext_count += n;
1884 }
1885 
resolve_fielddef(symtab_addctx * ctx,const char * prefix,upb_fielddef * f)1886 static void resolve_fielddef(symtab_addctx *ctx, const char *prefix,
1887                              upb_fielddef *f) {
1888   upb_strview name;
1889   const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
1890 
1891   if (f->is_extension_) {
1892     if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
1893       symtab_errf(ctx, "extension for field '%s' had no extendee",
1894                   f->full_name);
1895     }
1896 
1897     name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
1898     f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1899   }
1900 
1901   if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
1902       !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
1903     symtab_errf(ctx, "field '%s' is missing type name", f->full_name);
1904   }
1905 
1906   name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
1907 
1908   if (upb_fielddef_issubmsg(f)) {
1909     f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1910   } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
1911     f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
1912   }
1913 
1914   /* Have to delay resolving of the default value until now because of the enum
1915    * case, since enum defaults are specified with a label. */
1916   if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
1917     upb_strview defaultval =
1918         google_protobuf_FieldDescriptorProto_default_value(field_proto);
1919 
1920     if (f->file->syntax == UPB_SYNTAX_PROTO3) {
1921       symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
1922                   f->full_name);
1923     }
1924 
1925     if (upb_fielddef_issubmsg(f)) {
1926       symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
1927                   f->full_name);
1928     }
1929 
1930     parse_default(ctx, defaultval.data, defaultval.size, f);
1931   } else {
1932     set_default_default(ctx, f);
1933   }
1934 }
1935 
build_filedef(symtab_addctx * ctx,upb_filedef * file,const google_protobuf_FileDescriptorProto * file_proto)1936 static void build_filedef(
1937     symtab_addctx *ctx, upb_filedef *file,
1938     const google_protobuf_FileDescriptorProto *file_proto) {
1939   const google_protobuf_FileOptions *file_options_proto;
1940   const google_protobuf_DescriptorProto *const *msgs;
1941   const google_protobuf_EnumDescriptorProto *const *enums;
1942   const google_protobuf_FieldDescriptorProto *const *exts;
1943   const upb_strview* strs;
1944   size_t i, n;
1945 
1946   count_types_in_file(file_proto, file);
1947 
1948   file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count);
1949   file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count);
1950   file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count);
1951 
1952   /* We increment these as defs are added. */
1953   file->msg_count = 0;
1954   file->enum_count = 0;
1955   file->ext_count = 0;
1956 
1957   if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
1958     symtab_errf(ctx, "File has no name");
1959   }
1960 
1961   file->name =
1962       strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
1963   file->phpprefix = NULL;
1964   file->phpnamespace = NULL;
1965 
1966   if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
1967     upb_strview package =
1968         google_protobuf_FileDescriptorProto_package(file_proto);
1969     check_ident(ctx, package, true);
1970     file->package = strviewdup(ctx, package);
1971   } else {
1972     file->package = NULL;
1973   }
1974 
1975   if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
1976     upb_strview syntax =
1977         google_protobuf_FileDescriptorProto_syntax(file_proto);
1978 
1979     if (streql_view(syntax, "proto2")) {
1980       file->syntax = UPB_SYNTAX_PROTO2;
1981     } else if (streql_view(syntax, "proto3")) {
1982       file->syntax = UPB_SYNTAX_PROTO3;
1983     } else {
1984       symtab_errf(ctx, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
1985                   UPB_STRVIEW_ARGS(syntax));
1986     }
1987   } else {
1988     file->syntax = UPB_SYNTAX_PROTO2;
1989   }
1990 
1991   /* Read options. */
1992   file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
1993   if (file_options_proto) {
1994     if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
1995       file->phpprefix = strviewdup(
1996           ctx,
1997           google_protobuf_FileOptions_php_class_prefix(file_options_proto));
1998     }
1999     if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
2000       file->phpnamespace = strviewdup(
2001           ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
2002     }
2003   }
2004 
2005   /* Verify dependencies. */
2006   strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
2007   file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
2008 
2009   for (i = 0; i < n; i++) {
2010     upb_strview dep_name = strs[i];
2011     upb_value v;
2012     if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
2013                               dep_name.size, &v)) {
2014       symtab_errf(ctx,
2015                   "Depends on file '" UPB_STRVIEW_FORMAT
2016                   "', but it has not been loaded",
2017                   UPB_STRVIEW_ARGS(dep_name));
2018     }
2019     file->deps[i] = upb_value_getconstptr(v);
2020   }
2021 
2022   /* Create messages. */
2023   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
2024   for (i = 0; i < n; i++) {
2025     create_msgdef(ctx, file->package, msgs[i]);
2026   }
2027 
2028   /* Create enums. */
2029   enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
2030   for (i = 0; i < n; i++) {
2031     create_enumdef(ctx, file->package, enums[i]);
2032   }
2033 
2034   /* Create extensions. */
2035   exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
2036   file->exts = symtab_alloc(ctx, sizeof(*file->exts) * n);
2037   for (i = 0; i < n; i++) {
2038     create_fielddef(ctx, file->package, NULL, exts[i]);
2039   }
2040 
2041   /* Now that all names are in the table, build layouts and resolve refs. */
2042   for (i = 0; i < (size_t)file->ext_count; i++) {
2043     resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]);
2044   }
2045 
2046   for (i = 0; i < (size_t)file->msg_count; i++) {
2047     const upb_msgdef *m = &file->msgs[i];
2048     int j;
2049     for (j = 0; j < m->field_count; j++) {
2050       resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]);
2051     }
2052   }
2053 
2054   if (!ctx->layouts) {
2055     for (i = 0; i < (size_t)file->msg_count; i++) {
2056       const upb_msgdef *m = &file->msgs[i];
2057       make_layout(ctx, m);
2058     }
2059   }
2060 }
2061 
remove_filedef(upb_symtab * s,upb_filedef * file)2062 static void remove_filedef(upb_symtab *s, upb_filedef *file) {
2063   upb_alloc *alloc = upb_arena_alloc(s->arena);
2064   int i;
2065   for (i = 0; i < file->msg_count; i++) {
2066     const char *name = file->msgs[i].full_name;
2067     upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
2068   }
2069   for (i = 0; i < file->enum_count; i++) {
2070     const char *name = file->enums[i].full_name;
2071     upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
2072   }
2073   for (i = 0; i < file->ext_count; i++) {
2074     const char *name = file->exts[i].full_name;
2075     upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc);
2076   }
2077 }
2078 
_upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_msglayout ** layouts,upb_status * status)2079 static const upb_filedef *_upb_symtab_addfile(
2080     upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2081     const upb_msglayout **layouts, upb_status *status) {
2082   upb_arena *file_arena = upb_arena_new();
2083   upb_filedef *file;
2084   symtab_addctx ctx;
2085 
2086   if (!file_arena) return NULL;
2087 
2088   file = upb_arena_malloc(file_arena, sizeof(*file));
2089   if (!file) goto done;
2090 
2091   ctx.file = file;
2092   ctx.symtab = s;
2093   ctx.file_arena = file_arena;
2094   ctx.alloc = upb_arena_alloc(file_arena);
2095   ctx.layouts = layouts;
2096   ctx.status = status;
2097 
2098   file->msg_count = 0;
2099   file->enum_count = 0;
2100   file->ext_count = 0;
2101   file->symtab = s;
2102 
2103   if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
2104     UPB_ASSERT(!upb_ok(status));
2105     remove_filedef(s, file);
2106     file = NULL;
2107   } else {
2108     build_filedef(&ctx, file, file_proto);
2109     upb_strtable_insert3(&s->files, file->name, strlen(file->name),
2110                          upb_value_constptr(file), ctx.alloc);
2111     UPB_ASSERT(upb_ok(status));
2112     upb_arena_fuse(s->arena, file_arena);
2113   }
2114 
2115 done:
2116   upb_arena_free(file_arena);
2117   return file;
2118 }
2119 
upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,upb_status * status)2120 const upb_filedef *upb_symtab_addfile(
2121     upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2122     upb_status *status) {
2123   return _upb_symtab_addfile(s, file_proto, NULL, status);
2124 }
2125 
2126 /* Include here since we want most of this file to be stdio-free. */
2127 #include <stdio.h>
2128 
_upb_symtab_loaddefinit(upb_symtab * s,const upb_def_init * init)2129 bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
2130   /* Since this function should never fail (it would indicate a bug in upb) we
2131    * print errors to stderr instead of returning error status to the user. */
2132   upb_def_init **deps = init->deps;
2133   google_protobuf_FileDescriptorProto *file;
2134   upb_arena *arena;
2135   upb_status status;
2136 
2137   upb_status_clear(&status);
2138 
2139   if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
2140     return true;
2141   }
2142 
2143   arena = upb_arena_new();
2144 
2145   for (; *deps; deps++) {
2146     if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
2147   }
2148 
2149   file = google_protobuf_FileDescriptorProto_parse_ex(
2150       init->descriptor.data, init->descriptor.size, arena, UPB_DECODE_ALIAS);
2151   s->bytes_loaded += init->descriptor.size;
2152 
2153   if (!file) {
2154     upb_status_seterrf(
2155         &status,
2156         "Failed to parse compiled-in descriptor for file '%s'. This should "
2157         "never happen.",
2158         init->filename);
2159     goto err;
2160   }
2161 
2162   if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
2163 
2164   upb_arena_free(arena);
2165   return true;
2166 
2167 err:
2168   fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
2169           upb_status_errmsg(&status));
2170   upb_arena_free(arena);
2171   return false;
2172 }
2173 
_upb_symtab_bytesloaded(const upb_symtab * s)2174 size_t _upb_symtab_bytesloaded(const upb_symtab *s) {
2175   return s->bytes_loaded;
2176 }
2177 
2178 #undef CHK_OOM
2179