• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "upb/reflection/internal/field_def.h"
9 
10 #include <ctype.h>
11 #include <errno.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include "upb/base/descriptor_constants.h"
18 #include "upb/base/string_view.h"
19 #include "upb/base/upcast.h"
20 #include "upb/mem/arena.h"
21 #include "upb/message/accessors.h"
22 #include "upb/mini_descriptor/decode.h"
23 #include "upb/mini_descriptor/internal/encode.h"
24 #include "upb/mini_descriptor/internal/modifiers.h"
25 #include "upb/mini_table/enum.h"
26 #include "upb/mini_table/extension.h"
27 #include "upb/mini_table/field.h"
28 #include "upb/mini_table/message.h"
29 #include "upb/mini_table/sub.h"
30 #include "upb/reflection/def.h"
31 #include "upb/reflection/def_type.h"
32 #include "upb/reflection/internal/def_builder.h"
33 #include "upb/reflection/internal/def_pool.h"
34 #include "upb/reflection/internal/desc_state.h"
35 #include "upb/reflection/internal/enum_def.h"
36 #include "upb/reflection/internal/file_def.h"
37 #include "upb/reflection/internal/message_def.h"
38 #include "upb/reflection/internal/oneof_def.h"
39 #include "upb/reflection/internal/strdup2.h"
40 
41 // Must be last.
42 #include "upb/port/def.inc"
43 
44 #define UPB_FIELD_TYPE_UNSPECIFIED 0
45 
46 typedef struct {
47   size_t len;
48   char str[1];  // Null-terminated string data follows.
49 } str_t;
50 
51 struct upb_FieldDef {
52   const UPB_DESC(FieldOptions*) opts;
53   const UPB_DESC(FeatureSet*) resolved_features;
54   const upb_FileDef* file;
55   const upb_MessageDef* msgdef;
56   const char* full_name;
57   const char* json_name;
58   union {
59     int64_t sint;
60     uint64_t uint;
61     double dbl;
62     float flt;
63     bool boolean;
64     str_t* str;
65     void* msg;  // Always NULL.
66   } defaultval;
67   union {
68     const upb_OneofDef* oneof;
69     const upb_MessageDef* extension_scope;
70   } scope;
71   union {
72     const upb_MessageDef* msgdef;
73     const upb_EnumDef* enumdef;
74     const UPB_DESC(FieldDescriptorProto) * unresolved;
75   } sub;
76   uint32_t number_;
77   uint16_t index_;
78   uint16_t layout_index;  // Index into msgdef->layout->fields or file->exts
79   bool has_default;
80   bool has_json_name;
81   bool has_presence;
82   bool is_extension;
83   bool is_proto3_optional;
84   upb_FieldType type_;
85   upb_Label label_;
86 };
87 
_upb_FieldDef_At(const upb_FieldDef * f,int i)88 upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) {
89   return (upb_FieldDef*)&f[i];
90 }
91 
UPB_DESC(FieldOptions)92 const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) {
93   return f->opts;
94 }
95 
upb_FieldDef_HasOptions(const upb_FieldDef * f)96 bool upb_FieldDef_HasOptions(const upb_FieldDef* f) {
97   return f->opts != (void*)kUpbDefOptDefault;
98 }
99 
UPB_DESC(FeatureSet)100 const UPB_DESC(FeatureSet) *
101     upb_FieldDef_ResolvedFeatures(const upb_FieldDef* f) {
102   return f->resolved_features;
103 }
104 
upb_FieldDef_FullName(const upb_FieldDef * f)105 const char* upb_FieldDef_FullName(const upb_FieldDef* f) {
106   return f->full_name;
107 }
108 
upb_FieldDef_CType(const upb_FieldDef * f)109 upb_CType upb_FieldDef_CType(const upb_FieldDef* f) {
110   return upb_FieldType_CType(f->type_);
111 }
112 
upb_FieldDef_Type(const upb_FieldDef * f)113 upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; }
114 
upb_FieldDef_Index(const upb_FieldDef * f)115 uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; }
116 
upb_FieldDef_LayoutIndex(const upb_FieldDef * f)117 uint32_t upb_FieldDef_LayoutIndex(const upb_FieldDef* f) {
118   return f->layout_index;
119 }
120 
upb_FieldDef_Label(const upb_FieldDef * f)121 upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; }
122 
upb_FieldDef_Number(const upb_FieldDef * f)123 uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; }
124 
upb_FieldDef_IsExtension(const upb_FieldDef * f)125 bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; }
126 
_upb_FieldDef_IsPackable(const upb_FieldDef * f)127 bool _upb_FieldDef_IsPackable(const upb_FieldDef* f) {
128   return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsPrimitive(f);
129 }
130 
upb_FieldDef_IsPacked(const upb_FieldDef * f)131 bool upb_FieldDef_IsPacked(const upb_FieldDef* f) {
132   return _upb_FieldDef_IsPackable(f) &&
133          UPB_DESC(FeatureSet_repeated_field_encoding(f->resolved_features)) ==
134              UPB_DESC(FeatureSet_PACKED);
135 }
136 
upb_FieldDef_Name(const upb_FieldDef * f)137 const char* upb_FieldDef_Name(const upb_FieldDef* f) {
138   return _upb_DefBuilder_FullToShort(f->full_name);
139 }
140 
upb_FieldDef_JsonName(const upb_FieldDef * f)141 const char* upb_FieldDef_JsonName(const upb_FieldDef* f) {
142   return f->json_name;
143 }
144 
upb_FieldDef_HasJsonName(const upb_FieldDef * f)145 bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) {
146   return f->has_json_name;
147 }
148 
upb_FieldDef_File(const upb_FieldDef * f)149 const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; }
150 
upb_FieldDef_ContainingType(const upb_FieldDef * f)151 const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) {
152   return f->msgdef;
153 }
154 
upb_FieldDef_ExtensionScope(const upb_FieldDef * f)155 const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) {
156   return f->is_extension ? f->scope.extension_scope : NULL;
157 }
158 
upb_FieldDef_ContainingOneof(const upb_FieldDef * f)159 const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) {
160   return f->is_extension ? NULL : f->scope.oneof;
161 }
162 
upb_FieldDef_RealContainingOneof(const upb_FieldDef * f)163 const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) {
164   const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f);
165   if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL;
166   return oneof;
167 }
168 
upb_FieldDef_Default(const upb_FieldDef * f)169 upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) {
170   upb_MessageValue ret;
171 
172   if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) {
173     return (upb_MessageValue){.msg_val = NULL};
174   }
175 
176   switch (upb_FieldDef_CType(f)) {
177     case kUpb_CType_Bool:
178       return (upb_MessageValue){.bool_val = f->defaultval.boolean};
179     case kUpb_CType_Int64:
180       return (upb_MessageValue){.int64_val = f->defaultval.sint};
181     case kUpb_CType_UInt64:
182       return (upb_MessageValue){.uint64_val = f->defaultval.uint};
183     case kUpb_CType_Enum:
184     case kUpb_CType_Int32:
185       return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint};
186     case kUpb_CType_UInt32:
187       return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint};
188     case kUpb_CType_Float:
189       return (upb_MessageValue){.float_val = f->defaultval.flt};
190     case kUpb_CType_Double:
191       return (upb_MessageValue){.double_val = f->defaultval.dbl};
192     case kUpb_CType_String:
193     case kUpb_CType_Bytes: {
194       str_t* str = f->defaultval.str;
195       if (str) {
196         return (upb_MessageValue){
197             .str_val = (upb_StringView){.data = str->str, .size = str->len}};
198       } else {
199         return (upb_MessageValue){
200             .str_val = (upb_StringView){.data = NULL, .size = 0}};
201       }
202     }
203     default:
204       UPB_UNREACHABLE();
205   }
206 
207   return ret;
208 }
209 
upb_FieldDef_MessageSubDef(const upb_FieldDef * f)210 const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) {
211   return upb_FieldDef_IsSubMessage(f) ? f->sub.msgdef : NULL;
212 }
213 
upb_FieldDef_EnumSubDef(const upb_FieldDef * f)214 const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) {
215   return upb_FieldDef_IsEnum(f) ? f->sub.enumdef : NULL;
216 }
217 
upb_FieldDef_MiniTable(const upb_FieldDef * f)218 const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) {
219   if (upb_FieldDef_IsExtension(f)) {
220     const upb_FileDef* file = upb_FieldDef_File(f);
221     return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable(
222         file, f->layout_index);
223   } else {
224     const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef);
225     return &layout->UPB_PRIVATE(fields)[f->layout_index];
226   }
227 }
228 
upb_FieldDef_MiniTableExtension(const upb_FieldDef * f)229 const upb_MiniTableExtension* upb_FieldDef_MiniTableExtension(
230     const upb_FieldDef* f) {
231   UPB_ASSERT(upb_FieldDef_IsExtension(f));
232   const upb_FileDef* file = upb_FieldDef_File(f);
233   return _upb_FileDef_ExtensionMiniTable(file, f->layout_index);
234 }
235 
_upb_FieldDef_IsClosedEnum(const upb_FieldDef * f)236 bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) {
237   if (f->type_ != kUpb_FieldType_Enum) return false;
238   return upb_EnumDef_IsClosed(f->sub.enumdef);
239 }
240 
_upb_FieldDef_IsProto3Optional(const upb_FieldDef * f)241 bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) {
242   return f->is_proto3_optional;
243 }
244 
_upb_FieldDef_LayoutIndex(const upb_FieldDef * f)245 int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; }
246 
_upb_FieldDef_ValidateUtf8(const upb_FieldDef * f)247 bool _upb_FieldDef_ValidateUtf8(const upb_FieldDef* f) {
248   if (upb_FieldDef_Type(f) != kUpb_FieldType_String) return false;
249   return UPB_DESC(FeatureSet_utf8_validation(f->resolved_features)) ==
250          UPB_DESC(FeatureSet_VERIFY);
251 }
252 
_upb_FieldDef_IsGroupLike(const upb_FieldDef * f)253 bool _upb_FieldDef_IsGroupLike(const upb_FieldDef* f) {
254   // Groups are always tag-delimited.
255   if (f->type_ != kUpb_FieldType_Group) {
256     return false;
257   }
258 
259   const upb_MessageDef* msg = upb_FieldDef_MessageSubDef(f);
260 
261   // Group fields always are always the lowercase type name.
262   const char* mname = upb_MessageDef_Name(msg);
263   const char* fname = upb_FieldDef_Name(f);
264   size_t name_size = strlen(fname);
265   if (name_size != strlen(mname)) return false;
266   for (size_t i = 0; i < name_size; ++i) {
267     if ((mname[i] | 0x20) != fname[i]) {
268       // Case-insensitive ascii comparison.
269       return false;
270     }
271   }
272 
273   if (upb_MessageDef_File(msg) != upb_FieldDef_File(f)) {
274     return false;
275   }
276 
277   // Group messages are always defined in the same scope as the field.  File
278   // level extensions will compare NULL == NULL here, which is why the file
279   // comparison above is necessary to ensure both come from the same file.
280   return upb_FieldDef_IsExtension(f) ? upb_FieldDef_ExtensionScope(f) ==
281                                            upb_MessageDef_ContainingType(msg)
282                                      : upb_FieldDef_ContainingType(f) ==
283                                            upb_MessageDef_ContainingType(msg);
284 }
285 
_upb_FieldDef_Modifiers(const upb_FieldDef * f)286 uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) {
287   uint64_t out = upb_FieldDef_IsPacked(f) ? kUpb_FieldModifier_IsPacked : 0;
288 
289   if (upb_FieldDef_IsRepeated(f)) {
290     out |= kUpb_FieldModifier_IsRepeated;
291   } else if (upb_FieldDef_IsRequired(f)) {
292     out |= kUpb_FieldModifier_IsRequired;
293   } else if (!upb_FieldDef_HasPresence(f)) {
294     out |= kUpb_FieldModifier_IsProto3Singular;
295   }
296 
297   if (_upb_FieldDef_IsClosedEnum(f)) {
298     out |= kUpb_FieldModifier_IsClosedEnum;
299   }
300 
301   if (_upb_FieldDef_ValidateUtf8(f)) {
302     out |= kUpb_FieldModifier_ValidateUtf8;
303   }
304 
305   return out;
306 }
307 
upb_FieldDef_HasDefault(const upb_FieldDef * f)308 bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; }
upb_FieldDef_HasPresence(const upb_FieldDef * f)309 bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; }
310 
upb_FieldDef_HasSubDef(const upb_FieldDef * f)311 bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) {
312   return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsEnum(f);
313 }
314 
upb_FieldDef_IsEnum(const upb_FieldDef * f)315 bool upb_FieldDef_IsEnum(const upb_FieldDef* f) {
316   return upb_FieldDef_CType(f) == kUpb_CType_Enum;
317 }
318 
upb_FieldDef_IsMap(const upb_FieldDef * f)319 bool upb_FieldDef_IsMap(const upb_FieldDef* f) {
320   return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) &&
321          upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f));
322 }
323 
upb_FieldDef_IsOptional(const upb_FieldDef * f)324 bool upb_FieldDef_IsOptional(const upb_FieldDef* f) {
325   return upb_FieldDef_Label(f) == kUpb_Label_Optional;
326 }
327 
upb_FieldDef_IsPrimitive(const upb_FieldDef * f)328 bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) {
329   return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f);
330 }
331 
upb_FieldDef_IsRepeated(const upb_FieldDef * f)332 bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) {
333   return upb_FieldDef_Label(f) == kUpb_Label_Repeated;
334 }
335 
upb_FieldDef_IsRequired(const upb_FieldDef * f)336 bool upb_FieldDef_IsRequired(const upb_FieldDef* f) {
337   return UPB_DESC(FeatureSet_field_presence)(f->resolved_features) ==
338          UPB_DESC(FeatureSet_LEGACY_REQUIRED);
339 }
340 
upb_FieldDef_IsString(const upb_FieldDef * f)341 bool upb_FieldDef_IsString(const upb_FieldDef* f) {
342   return upb_FieldDef_CType(f) == kUpb_CType_String ||
343          upb_FieldDef_CType(f) == kUpb_CType_Bytes;
344 }
345 
upb_FieldDef_IsSubMessage(const upb_FieldDef * f)346 bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) {
347   return upb_FieldDef_CType(f) == kUpb_CType_Message;
348 }
349 
between(int32_t x,int32_t low,int32_t high)350 static bool between(int32_t x, int32_t low, int32_t high) {
351   return x >= low && x <= high;
352 }
353 
upb_FieldDef_checklabel(int32_t label)354 bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_FieldDef_checktype(int32_t type)355 bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); }
upb_FieldDef_checkintfmt(int32_t fmt)356 bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
357 
upb_FieldDef_checkdescriptortype(int32_t type)358 bool upb_FieldDef_checkdescriptortype(int32_t type) {
359   return between(type, 1, 18);
360 }
361 
streql2(const char * a,size_t n,const char * b)362 static bool streql2(const char* a, size_t n, const char* b) {
363   return n == strlen(b) && memcmp(a, b, n) == 0;
364 }
365 
366 // Implement the transformation as described in the spec:
367 //   1. upper case all letters after an underscore.
368 //   2. remove all underscores.
make_json_name(const char * name,size_t size,upb_Arena * a)369 static char* make_json_name(const char* name, size_t size, upb_Arena* a) {
370   char* out = upb_Arena_Malloc(a, size + 1);  // +1 is to add a trailing '\0'
371   if (out == NULL) return NULL;
372 
373   bool ucase_next = false;
374   char* des = out;
375   for (size_t i = 0; i < size; i++) {
376     if (name[i] == '_') {
377       ucase_next = true;
378     } else {
379       *des++ = ucase_next ? toupper(name[i]) : name[i];
380       ucase_next = false;
381     }
382   }
383   *des++ = '\0';
384   return out;
385 }
386 
newstr(upb_DefBuilder * ctx,const char * data,size_t len)387 static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) {
388   str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len);
389   if (!ret) _upb_DefBuilder_OomErr(ctx);
390   ret->len = len;
391   if (len) memcpy(ret->str, data, len);
392   ret->str[len] = '\0';
393   return ret;
394 }
395 
unescape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char * data,size_t len)396 static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f,
397                        const char* data, size_t len) {
398   // Size here is an upper bound; escape sequences could ultimately shrink it.
399   str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len);
400   char* dst = &ret->str[0];
401   const char* src = data;
402   const char* end = data + len;
403 
404   while (src < end) {
405     if (*src == '\\') {
406       src++;
407       *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end);
408     } else {
409       *dst++ = *src++;
410     }
411   }
412 
413   ret->len = dst - &ret->str[0];
414   return ret;
415 }
416 
parse_default(upb_DefBuilder * ctx,const char * str,size_t len,upb_FieldDef * f)417 static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len,
418                           upb_FieldDef* f) {
419   char* end;
420   char nullz[64];
421   errno = 0;
422 
423   switch (upb_FieldDef_CType(f)) {
424     case kUpb_CType_Int32:
425     case kUpb_CType_Int64:
426     case kUpb_CType_UInt32:
427     case kUpb_CType_UInt64:
428     case kUpb_CType_Double:
429     case kUpb_CType_Float:
430       // Standard C number parsing functions expect null-terminated strings.
431       if (len >= sizeof(nullz) - 1) {
432         _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str);
433       }
434       memcpy(nullz, str, len);
435       nullz[len] = '\0';
436       str = nullz;
437       break;
438     default:
439       break;
440   }
441 
442   switch (upb_FieldDef_CType(f)) {
443     case kUpb_CType_Int32: {
444       long val = strtol(str, &end, 0);
445       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
446         goto invalid;
447       }
448       f->defaultval.sint = val;
449       break;
450     }
451     case kUpb_CType_Enum: {
452       const upb_EnumDef* e = f->sub.enumdef;
453       const upb_EnumValueDef* ev =
454           upb_EnumDef_FindValueByNameWithSize(e, str, len);
455       if (!ev) {
456         goto invalid;
457       }
458       f->defaultval.sint = upb_EnumValueDef_Number(ev);
459       break;
460     }
461     case kUpb_CType_Int64: {
462       long long val = strtoll(str, &end, 0);
463       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
464         goto invalid;
465       }
466       f->defaultval.sint = val;
467       break;
468     }
469     case kUpb_CType_UInt32: {
470       unsigned long val = strtoul(str, &end, 0);
471       if (val > UINT32_MAX || errno == ERANGE || *end) {
472         goto invalid;
473       }
474       f->defaultval.uint = val;
475       break;
476     }
477     case kUpb_CType_UInt64: {
478       unsigned long long val = strtoull(str, &end, 0);
479       if (val > UINT64_MAX || errno == ERANGE || *end) {
480         goto invalid;
481       }
482       f->defaultval.uint = val;
483       break;
484     }
485     case kUpb_CType_Double: {
486       double val = strtod(str, &end);
487       if (errno == ERANGE || *end) {
488         goto invalid;
489       }
490       f->defaultval.dbl = val;
491       break;
492     }
493     case kUpb_CType_Float: {
494       float val = strtof(str, &end);
495       if (errno == ERANGE || *end) {
496         goto invalid;
497       }
498       f->defaultval.flt = val;
499       break;
500     }
501     case kUpb_CType_Bool: {
502       if (streql2(str, len, "false")) {
503         f->defaultval.boolean = false;
504       } else if (streql2(str, len, "true")) {
505         f->defaultval.boolean = true;
506       } else {
507         goto invalid;
508       }
509       break;
510     }
511     case kUpb_CType_String:
512       f->defaultval.str = newstr(ctx, str, len);
513       break;
514     case kUpb_CType_Bytes:
515       f->defaultval.str = unescape(ctx, f, str, len);
516       break;
517     case kUpb_CType_Message:
518       /* Should not have a default value. */
519       _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)",
520                            upb_FieldDef_FullName(f));
521   }
522 
523   return;
524 
525 invalid:
526   _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d",
527                        (int)len, str, upb_FieldDef_FullName(f),
528                        (int)upb_FieldDef_Type(f));
529 }
530 
set_default_default(upb_DefBuilder * ctx,upb_FieldDef * f)531 static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) {
532   switch (upb_FieldDef_CType(f)) {
533     case kUpb_CType_Int32:
534     case kUpb_CType_Int64:
535       f->defaultval.sint = 0;
536       break;
537     case kUpb_CType_UInt64:
538     case kUpb_CType_UInt32:
539       f->defaultval.uint = 0;
540       break;
541     case kUpb_CType_Double:
542     case kUpb_CType_Float:
543       f->defaultval.dbl = 0;
544       break;
545     case kUpb_CType_String:
546     case kUpb_CType_Bytes:
547       f->defaultval.str = newstr(ctx, NULL, 0);
548       break;
549     case kUpb_CType_Bool:
550       f->defaultval.boolean = false;
551       break;
552     case kUpb_CType_Enum: {
553       const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0);
554       f->defaultval.sint = upb_EnumValueDef_Number(v);
555       break;
556     }
557     case kUpb_CType_Message:
558       break;
559   }
560 }
561 
_upb_FieldDef_InferLegacyFeatures(upb_DefBuilder * ctx,upb_FieldDef * f,const UPB_DESC (FieldDescriptorProto *)proto,const UPB_DESC (FieldOptions *)options,upb_Syntax syntax,UPB_DESC (FeatureSet *)features)562 static bool _upb_FieldDef_InferLegacyFeatures(
563     upb_DefBuilder* ctx, upb_FieldDef* f,
564     const UPB_DESC(FieldDescriptorProto*) proto,
565     const UPB_DESC(FieldOptions*) options, upb_Syntax syntax,
566     UPB_DESC(FeatureSet*) features) {
567   bool ret = false;
568 
569   if (UPB_DESC(FieldDescriptorProto_label)(proto) == kUpb_Label_Required) {
570     if (syntax == kUpb_Syntax_Proto3) {
571       _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)",
572                            f->full_name);
573     }
574     int val = UPB_DESC(FeatureSet_LEGACY_REQUIRED);
575     UPB_DESC(FeatureSet_set_field_presence(features, val));
576     ret = true;
577   }
578 
579   if (UPB_DESC(FieldDescriptorProto_type)(proto) == kUpb_FieldType_Group) {
580     int val = UPB_DESC(FeatureSet_DELIMITED);
581     UPB_DESC(FeatureSet_set_message_encoding(features, val));
582     ret = true;
583   }
584 
585   if (UPB_DESC(FieldOptions_has_packed)(options)) {
586     int val = UPB_DESC(FieldOptions_packed)(options)
587                   ? UPB_DESC(FeatureSet_PACKED)
588                   : UPB_DESC(FeatureSet_EXPANDED);
589     UPB_DESC(FeatureSet_set_repeated_field_encoding(features, val));
590     ret = true;
591   }
592 
593   return ret;
594 }
595 
_upb_FieldDef_Create(upb_DefBuilder * ctx,const char * prefix,const UPB_DESC (FeatureSet *)parent_features,const UPB_DESC (FieldDescriptorProto *)field_proto,upb_MessageDef * m,upb_FieldDef * f)596 static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix,
597                                  const UPB_DESC(FeatureSet*) parent_features,
598                                  const UPB_DESC(FieldDescriptorProto*)
599                                      field_proto,
600                                  upb_MessageDef* m, upb_FieldDef* f) {
601   // Must happen before _upb_DefBuilder_Add()
602   f->file = _upb_DefBuilder_File(ctx);
603 
604   const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto);
605   f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
606   f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto);
607   f->is_proto3_optional =
608       UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto);
609   f->msgdef = m;
610   f->scope.oneof = NULL;
611 
612   UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto);
613 
614   upb_Syntax syntax = upb_FileDef_Syntax(f->file);
615   const UPB_DESC(FeatureSet*) unresolved_features =
616       UPB_DESC(FieldOptions_features)(f->opts);
617   bool implicit = false;
618 
619   if (syntax != kUpb_Syntax_Editions) {
620     upb_Message_Clear(UPB_UPCAST(ctx->legacy_features),
621                       UPB_DESC_MINITABLE(FeatureSet));
622     if (_upb_FieldDef_InferLegacyFeatures(ctx, f, field_proto, f->opts, syntax,
623                                           ctx->legacy_features)) {
624       implicit = true;
625       unresolved_features = ctx->legacy_features;
626     }
627   }
628 
629   if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
630     int oneof_index = UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto);
631 
632     if (!m) {
633       _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg",
634                            f->full_name);
635     }
636 
637     if (oneof_index < 0 || oneof_index >= upb_MessageDef_OneofCount(m)) {
638       _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name);
639     }
640 
641     upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index);
642     f->scope.oneof = oneof;
643     parent_features = upb_OneofDef_ResolvedFeatures(oneof);
644 
645     _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size);
646   }
647 
648   f->resolved_features = _upb_DefBuilder_DoResolveFeatures(
649       ctx, parent_features, unresolved_features, implicit);
650 
651   f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto);
652   if (f->label_ == kUpb_Label_Optional &&
653       // TODO: remove once we can deprecate kUpb_Label_Required.
654       UPB_DESC(FeatureSet_field_presence)(f->resolved_features) ==
655           UPB_DESC(FeatureSet_LEGACY_REQUIRED)) {
656     f->label_ = kUpb_Label_Required;
657   }
658 
659   if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) {
660     _upb_DefBuilder_Errf(ctx, "field has no name");
661   }
662 
663   f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto);
664   if (f->has_json_name) {
665     const upb_StringView sv =
666         UPB_DESC(FieldDescriptorProto_json_name)(field_proto);
667     f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena);
668   } else {
669     f->json_name = make_json_name(name.data, name.size, ctx->arena);
670   }
671   if (!f->json_name) _upb_DefBuilder_OomErr(ctx);
672 
673   const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto);
674   const bool has_type_name =
675       UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto);
676 
677   f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto);
678 
679   if (has_type) {
680     switch (f->type_) {
681       case kUpb_FieldType_Message:
682       case kUpb_FieldType_Group:
683       case kUpb_FieldType_Enum:
684         if (!has_type_name) {
685           _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)",
686                                (int)f->type_, f->full_name);
687         }
688         break;
689       default:
690         if (has_type_name) {
691           _upb_DefBuilder_Errf(
692               ctx, "invalid type for field with type_name set (%s, %d)",
693               f->full_name, (int)f->type_);
694         }
695     }
696   }
697 
698   if ((!has_type && has_type_name) || f->type_ == kUpb_FieldType_Message) {
699     f->type_ =
700         UPB_FIELD_TYPE_UNSPECIFIED;  // We'll assign this in resolve_subdef()
701   } else {
702     if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) {
703       _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name,
704                            f->type_);
705     }
706   }
707 
708   if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) {
709     _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name,
710                          f->label_);
711   }
712 
713   /* We can't resolve the subdef or (in the case of extensions) the containing
714    * message yet, because it may not have been defined yet.  We stash a pointer
715    * to the field_proto until later when we can properly resolve it. */
716   f->sub.unresolved = field_proto;
717 
718   if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
719     if (upb_FieldDef_Label(f) != kUpb_Label_Optional) {
720       _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
721                            f->full_name);
722     }
723   }
724 
725   f->has_presence =
726       (!upb_FieldDef_IsRepeated(f)) &&
727       (f->is_extension ||
728        (f->type_ == kUpb_FieldType_Message ||
729         f->type_ == kUpb_FieldType_Group || upb_FieldDef_ContainingOneof(f) ||
730         UPB_DESC(FeatureSet_field_presence)(f->resolved_features) !=
731             UPB_DESC(FeatureSet_IMPLICIT)));
732 }
733 
_upb_FieldDef_CreateExt(upb_DefBuilder * ctx,const char * prefix,const UPB_DESC (FeatureSet *)parent_features,const UPB_DESC (FieldDescriptorProto *)field_proto,upb_MessageDef * m,upb_FieldDef * f)734 static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix,
735                                     const UPB_DESC(FeatureSet*) parent_features,
736                                     const UPB_DESC(FieldDescriptorProto*)
737                                         field_proto,
738                                     upb_MessageDef* m, upb_FieldDef* f) {
739   f->is_extension = true;
740   _upb_FieldDef_Create(ctx, prefix, parent_features, field_proto, m, f);
741 
742   if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
743     _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)",
744                          f->full_name);
745   }
746 
747   f->scope.extension_scope = m;
748   _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT));
749   f->layout_index = ctx->ext_count++;
750 
751   if (ctx->layout) {
752     UPB_ASSERT(upb_MiniTableExtension_Number(
753                    upb_FieldDef_MiniTableExtension(f)) == f->number_);
754   }
755 }
756 
_upb_FieldDef_CreateNotExt(upb_DefBuilder * ctx,const char * prefix,const UPB_DESC (FeatureSet *)parent_features,const UPB_DESC (FieldDescriptorProto *)field_proto,upb_MessageDef * m,upb_FieldDef * f)757 static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix,
758                                        const UPB_DESC(FeatureSet*)
759                                            parent_features,
760                                        const UPB_DESC(FieldDescriptorProto*)
761                                            field_proto,
762                                        upb_MessageDef* m, upb_FieldDef* f) {
763   f->is_extension = false;
764   _upb_FieldDef_Create(ctx, prefix, parent_features, field_proto, m, f);
765 
766   if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
767     if (f->is_proto3_optional) {
768       _upb_DefBuilder_Errf(
769           ctx,
770           "non-extension field (%s) with proto3_optional was not in a oneof",
771           f->full_name);
772     }
773   }
774 
775   _upb_MessageDef_InsertField(ctx, m, f);
776 }
777 
_upb_Extensions_New(upb_DefBuilder * ctx,int n,const UPB_DESC (FieldDescriptorProto *)const * protos,const UPB_DESC (FeatureSet *)parent_features,const char * prefix,upb_MessageDef * m)778 upb_FieldDef* _upb_Extensions_New(upb_DefBuilder* ctx, int n,
779                                   const UPB_DESC(FieldDescriptorProto*)
780                                       const* protos,
781                                   const UPB_DESC(FeatureSet*) parent_features,
782                                   const char* prefix, upb_MessageDef* m) {
783   _upb_DefType_CheckPadding(sizeof(upb_FieldDef));
784   upb_FieldDef* defs =
785       (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n);
786 
787   for (int i = 0; i < n; i++) {
788     upb_FieldDef* f = &defs[i];
789 
790     _upb_FieldDef_CreateExt(ctx, prefix, parent_features, protos[i], m, f);
791     f->index_ = i;
792   }
793 
794   return defs;
795 }
796 
_upb_FieldDefs_New(upb_DefBuilder * ctx,int n,const UPB_DESC (FieldDescriptorProto *)const * protos,const UPB_DESC (FeatureSet *)parent_features,const char * prefix,upb_MessageDef * m,bool * is_sorted)797 upb_FieldDef* _upb_FieldDefs_New(upb_DefBuilder* ctx, int n,
798                                  const UPB_DESC(FieldDescriptorProto*)
799                                      const* protos,
800                                  const UPB_DESC(FeatureSet*) parent_features,
801                                  const char* prefix, upb_MessageDef* m,
802                                  bool* is_sorted) {
803   _upb_DefType_CheckPadding(sizeof(upb_FieldDef));
804   upb_FieldDef* defs =
805       (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n);
806 
807   uint32_t previous = 0;
808   for (int i = 0; i < n; i++) {
809     upb_FieldDef* f = &defs[i];
810 
811     _upb_FieldDef_CreateNotExt(ctx, prefix, parent_features, protos[i], m, f);
812     f->index_ = i;
813     if (!ctx->layout) {
814       // Speculate that the def fields are sorted.  We will always sort the
815       // MiniTable fields, so if defs are sorted then indices will match.
816       //
817       // If this is incorrect, we will overwrite later.
818       f->layout_index = i;
819     }
820 
821     const uint32_t current = f->number_;
822     if (previous > current) *is_sorted = false;
823     previous = current;
824   }
825 
826   return defs;
827 }
828 
resolve_subdef(upb_DefBuilder * ctx,const char * prefix,upb_FieldDef * f)829 static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix,
830                            upb_FieldDef* f) {
831   const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved;
832   upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto);
833   bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto);
834   switch ((int)f->type_) {
835     case UPB_FIELD_TYPE_UNSPECIFIED: {
836       // Type was not specified and must be inferred.
837       UPB_ASSERT(has_name);
838       upb_deftype_t type;
839       const void* def =
840           _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type);
841       switch (type) {
842         case UPB_DEFTYPE_ENUM:
843           f->sub.enumdef = def;
844           f->type_ = kUpb_FieldType_Enum;
845           break;
846         case UPB_DEFTYPE_MSG:
847           f->sub.msgdef = def;
848           f->type_ = kUpb_FieldType_Message;
849           // TODO: remove once we can deprecate
850           // kUpb_FieldType_Group.
851           if (UPB_DESC(FeatureSet_message_encoding)(f->resolved_features) ==
852                   UPB_DESC(FeatureSet_DELIMITED) &&
853               !upb_MessageDef_IsMapEntry(def) &&
854               !(f->msgdef && upb_MessageDef_IsMapEntry(f->msgdef))) {
855             f->type_ = kUpb_FieldType_Group;
856           }
857           f->has_presence = !upb_FieldDef_IsRepeated(f);
858           break;
859         default:
860           _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s",
861                                f->full_name);
862       }
863       break;
864     }
865     case kUpb_FieldType_Message:
866     case kUpb_FieldType_Group:
867       UPB_ASSERT(has_name);
868       f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name,
869                                               UPB_DEFTYPE_MSG);
870       break;
871     case kUpb_FieldType_Enum:
872       UPB_ASSERT(has_name);
873       f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name,
874                                                UPB_DEFTYPE_ENUM);
875       break;
876     default:
877       // No resolution necessary.
878       break;
879   }
880 }
881 
_upb_FieldDef_Compare(const void * p1,const void * p2)882 static int _upb_FieldDef_Compare(const void* p1, const void* p2) {
883   const uint32_t v1 = (*(upb_FieldDef**)p1)->number_;
884   const uint32_t v2 = (*(upb_FieldDef**)p2)->number_;
885   return (v1 < v2) ? -1 : (v1 > v2);
886 }
887 
888 // _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one
889 // critical side effect that we depend on: it sets layout_index appropriately
890 // for non-sorted lists of fields.
_upb_FieldDefs_Sorted(const upb_FieldDef * f,int n,upb_Arena * a)891 const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n,
892                                            upb_Arena* a) {
893   // TODO: Replace this arena alloc with a persistent scratch buffer.
894   upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*));
895   if (!out) return NULL;
896 
897   for (int i = 0; i < n; i++) {
898     out[i] = (upb_FieldDef*)&f[i];
899   }
900   qsort(out, n, sizeof(void*), _upb_FieldDef_Compare);
901 
902   for (int i = 0; i < n; i++) {
903     out[i]->layout_index = i;
904   }
905   return (const upb_FieldDef**)out;
906 }
907 
upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef * f,upb_Arena * a,upb_StringView * out)908 bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a,
909                                        upb_StringView* out) {
910   UPB_ASSERT(f->is_extension);
911 
912   upb_DescState s;
913   _upb_DescState_Init(&s);
914 
915   const int number = upb_FieldDef_Number(f);
916   const uint64_t modifiers = _upb_FieldDef_Modifiers(f);
917 
918   if (!_upb_DescState_Grow(&s, a)) return false;
919   s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number,
920                                             modifiers);
921   *s.ptr = '\0';
922 
923   out->data = s.buf;
924   out->size = s.ptr - s.buf;
925   return true;
926 }
927 
resolve_extension(upb_DefBuilder * ctx,const char * prefix,upb_FieldDef * f,const UPB_DESC (FieldDescriptorProto)* field_proto)928 static void resolve_extension(upb_DefBuilder* ctx, const char* prefix,
929                               upb_FieldDef* f,
930                               const UPB_DESC(FieldDescriptorProto) *
931                                   field_proto) {
932   if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) {
933     _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee",
934                          f->full_name);
935   }
936 
937   upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto);
938   const upb_MessageDef* m =
939       _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
940   f->msgdef = m;
941 
942   if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) {
943     _upb_DefBuilder_Errf(
944         ctx,
945         "field number %u in extension %s has no extension range in message %s",
946         (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m));
947   }
948 }
949 
_upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder * ctx,const upb_FieldDef * f)950 void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx,
951                                            const upb_FieldDef* f) {
952   const upb_MiniTableExtension* ext = upb_FieldDef_MiniTableExtension(f);
953 
954   if (ctx->layout) {
955     UPB_ASSERT(upb_FieldDef_Number(f) == upb_MiniTableExtension_Number(ext));
956   } else {
957     upb_StringView desc;
958     if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) {
959       _upb_DefBuilder_OomErr(ctx);
960     }
961 
962     upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext;
963     upb_MiniTableSub sub = {NULL};
964     if (upb_FieldDef_IsSubMessage(f)) {
965       const upb_MiniTable* submsg = upb_MessageDef_MiniTable(f->sub.msgdef);
966       sub = upb_MiniTableSub_FromMessage(submsg);
967     } else if (_upb_FieldDef_IsClosedEnum(f)) {
968       const upb_MiniTableEnum* subenum = _upb_EnumDef_MiniTable(f->sub.enumdef);
969       sub = upb_MiniTableSub_FromEnum(subenum);
970     }
971     bool ok2 = _upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext,
972                                             upb_MessageDef_MiniTable(f->msgdef),
973                                             sub, ctx->platform, ctx->status);
974     if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table");
975   }
976 
977   bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f);
978   if (!ok) _upb_DefBuilder_OomErr(ctx);
979 }
980 
resolve_default(upb_DefBuilder * ctx,upb_FieldDef * f,const UPB_DESC (FieldDescriptorProto)* field_proto)981 static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f,
982                             const UPB_DESC(FieldDescriptorProto) *
983                                 field_proto) {
984   // Have to delay resolving of the default value until now because of the enum
985   // case, since enum defaults are specified with a label.
986   if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) {
987     upb_StringView defaultval =
988         UPB_DESC(FieldDescriptorProto_default_value)(field_proto);
989 
990     if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) {
991       _upb_DefBuilder_Errf(ctx,
992                            "proto3 fields cannot have explicit defaults (%s)",
993                            f->full_name);
994     }
995 
996     if (upb_FieldDef_IsSubMessage(f)) {
997       _upb_DefBuilder_Errf(ctx,
998                            "message fields cannot have explicit defaults (%s)",
999                            f->full_name);
1000     }
1001 
1002     parse_default(ctx, defaultval.data, defaultval.size, f);
1003     f->has_default = true;
1004   } else {
1005     set_default_default(ctx, f);
1006     f->has_default = false;
1007   }
1008 }
1009 
_upb_FieldDef_Resolve(upb_DefBuilder * ctx,const char * prefix,upb_FieldDef * f)1010 void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix,
1011                            upb_FieldDef* f) {
1012   // We have to stash this away since resolve_subdef() may overwrite it.
1013   const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved;
1014 
1015   resolve_subdef(ctx, prefix, f);
1016   resolve_default(ctx, f, field_proto);
1017 
1018   if (f->is_extension) {
1019     resolve_extension(ctx, prefix, f, field_proto);
1020   }
1021 }
1022