1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "upb/reflection/internal/field_def.h"
9
10 #include <ctype.h>
11 #include <errno.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include "upb/base/descriptor_constants.h"
18 #include "upb/base/string_view.h"
19 #include "upb/base/upcast.h"
20 #include "upb/mem/arena.h"
21 #include "upb/message/accessors.h"
22 #include "upb/mini_descriptor/decode.h"
23 #include "upb/mini_descriptor/internal/encode.h"
24 #include "upb/mini_descriptor/internal/modifiers.h"
25 #include "upb/mini_table/enum.h"
26 #include "upb/mini_table/extension.h"
27 #include "upb/mini_table/field.h"
28 #include "upb/mini_table/message.h"
29 #include "upb/mini_table/sub.h"
30 #include "upb/reflection/def.h"
31 #include "upb/reflection/def_type.h"
32 #include "upb/reflection/internal/def_builder.h"
33 #include "upb/reflection/internal/def_pool.h"
34 #include "upb/reflection/internal/desc_state.h"
35 #include "upb/reflection/internal/enum_def.h"
36 #include "upb/reflection/internal/file_def.h"
37 #include "upb/reflection/internal/message_def.h"
38 #include "upb/reflection/internal/oneof_def.h"
39 #include "upb/reflection/internal/strdup2.h"
40
41 // Must be last.
42 #include "upb/port/def.inc"
43
44 #define UPB_FIELD_TYPE_UNSPECIFIED 0
45
46 typedef struct {
47 size_t len;
48 char str[1]; // Null-terminated string data follows.
49 } str_t;
50
51 struct upb_FieldDef {
52 const UPB_DESC(FieldOptions*) opts;
53 const UPB_DESC(FeatureSet*) resolved_features;
54 const upb_FileDef* file;
55 const upb_MessageDef* msgdef;
56 const char* full_name;
57 const char* json_name;
58 union {
59 int64_t sint;
60 uint64_t uint;
61 double dbl;
62 float flt;
63 bool boolean;
64 str_t* str;
65 void* msg; // Always NULL.
66 } defaultval;
67 union {
68 const upb_OneofDef* oneof;
69 const upb_MessageDef* extension_scope;
70 } scope;
71 union {
72 const upb_MessageDef* msgdef;
73 const upb_EnumDef* enumdef;
74 const UPB_DESC(FieldDescriptorProto) * unresolved;
75 } sub;
76 uint32_t number_;
77 uint16_t index_;
78 uint16_t layout_index; // Index into msgdef->layout->fields or file->exts
79 bool has_default;
80 bool has_json_name;
81 bool has_presence;
82 bool is_extension;
83 bool is_proto3_optional;
84 upb_FieldType type_;
85 upb_Label label_;
86 };
87
_upb_FieldDef_At(const upb_FieldDef * f,int i)88 upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) {
89 return (upb_FieldDef*)&f[i];
90 }
91
UPB_DESC(FieldOptions)92 const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) {
93 return f->opts;
94 }
95
upb_FieldDef_HasOptions(const upb_FieldDef * f)96 bool upb_FieldDef_HasOptions(const upb_FieldDef* f) {
97 return f->opts != (void*)kUpbDefOptDefault;
98 }
99
UPB_DESC(FeatureSet)100 const UPB_DESC(FeatureSet) *
101 upb_FieldDef_ResolvedFeatures(const upb_FieldDef* f) {
102 return f->resolved_features;
103 }
104
upb_FieldDef_FullName(const upb_FieldDef * f)105 const char* upb_FieldDef_FullName(const upb_FieldDef* f) {
106 return f->full_name;
107 }
108
upb_FieldDef_CType(const upb_FieldDef * f)109 upb_CType upb_FieldDef_CType(const upb_FieldDef* f) {
110 return upb_FieldType_CType(f->type_);
111 }
112
upb_FieldDef_Type(const upb_FieldDef * f)113 upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; }
114
upb_FieldDef_Index(const upb_FieldDef * f)115 uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; }
116
upb_FieldDef_LayoutIndex(const upb_FieldDef * f)117 uint32_t upb_FieldDef_LayoutIndex(const upb_FieldDef* f) {
118 return f->layout_index;
119 }
120
upb_FieldDef_Label(const upb_FieldDef * f)121 upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; }
122
upb_FieldDef_Number(const upb_FieldDef * f)123 uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; }
124
upb_FieldDef_IsExtension(const upb_FieldDef * f)125 bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; }
126
_upb_FieldDef_IsPackable(const upb_FieldDef * f)127 bool _upb_FieldDef_IsPackable(const upb_FieldDef* f) {
128 return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsPrimitive(f);
129 }
130
upb_FieldDef_IsPacked(const upb_FieldDef * f)131 bool upb_FieldDef_IsPacked(const upb_FieldDef* f) {
132 return _upb_FieldDef_IsPackable(f) &&
133 UPB_DESC(FeatureSet_repeated_field_encoding(f->resolved_features)) ==
134 UPB_DESC(FeatureSet_PACKED);
135 }
136
upb_FieldDef_Name(const upb_FieldDef * f)137 const char* upb_FieldDef_Name(const upb_FieldDef* f) {
138 return _upb_DefBuilder_FullToShort(f->full_name);
139 }
140
upb_FieldDef_JsonName(const upb_FieldDef * f)141 const char* upb_FieldDef_JsonName(const upb_FieldDef* f) {
142 return f->json_name;
143 }
144
upb_FieldDef_HasJsonName(const upb_FieldDef * f)145 bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) {
146 return f->has_json_name;
147 }
148
upb_FieldDef_File(const upb_FieldDef * f)149 const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; }
150
upb_FieldDef_ContainingType(const upb_FieldDef * f)151 const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) {
152 return f->msgdef;
153 }
154
upb_FieldDef_ExtensionScope(const upb_FieldDef * f)155 const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) {
156 return f->is_extension ? f->scope.extension_scope : NULL;
157 }
158
upb_FieldDef_ContainingOneof(const upb_FieldDef * f)159 const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) {
160 return f->is_extension ? NULL : f->scope.oneof;
161 }
162
upb_FieldDef_RealContainingOneof(const upb_FieldDef * f)163 const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) {
164 const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f);
165 if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL;
166 return oneof;
167 }
168
upb_FieldDef_Default(const upb_FieldDef * f)169 upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) {
170 upb_MessageValue ret;
171
172 if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) {
173 return (upb_MessageValue){.msg_val = NULL};
174 }
175
176 switch (upb_FieldDef_CType(f)) {
177 case kUpb_CType_Bool:
178 return (upb_MessageValue){.bool_val = f->defaultval.boolean};
179 case kUpb_CType_Int64:
180 return (upb_MessageValue){.int64_val = f->defaultval.sint};
181 case kUpb_CType_UInt64:
182 return (upb_MessageValue){.uint64_val = f->defaultval.uint};
183 case kUpb_CType_Enum:
184 case kUpb_CType_Int32:
185 return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint};
186 case kUpb_CType_UInt32:
187 return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint};
188 case kUpb_CType_Float:
189 return (upb_MessageValue){.float_val = f->defaultval.flt};
190 case kUpb_CType_Double:
191 return (upb_MessageValue){.double_val = f->defaultval.dbl};
192 case kUpb_CType_String:
193 case kUpb_CType_Bytes: {
194 str_t* str = f->defaultval.str;
195 if (str) {
196 return (upb_MessageValue){
197 .str_val = (upb_StringView){.data = str->str, .size = str->len}};
198 } else {
199 return (upb_MessageValue){
200 .str_val = (upb_StringView){.data = NULL, .size = 0}};
201 }
202 }
203 default:
204 UPB_UNREACHABLE();
205 }
206
207 return ret;
208 }
209
upb_FieldDef_MessageSubDef(const upb_FieldDef * f)210 const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) {
211 return upb_FieldDef_IsSubMessage(f) ? f->sub.msgdef : NULL;
212 }
213
upb_FieldDef_EnumSubDef(const upb_FieldDef * f)214 const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) {
215 return upb_FieldDef_IsEnum(f) ? f->sub.enumdef : NULL;
216 }
217
upb_FieldDef_MiniTable(const upb_FieldDef * f)218 const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) {
219 if (upb_FieldDef_IsExtension(f)) {
220 const upb_FileDef* file = upb_FieldDef_File(f);
221 return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable(
222 file, f->layout_index);
223 } else {
224 const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef);
225 return &layout->UPB_PRIVATE(fields)[f->layout_index];
226 }
227 }
228
upb_FieldDef_MiniTableExtension(const upb_FieldDef * f)229 const upb_MiniTableExtension* upb_FieldDef_MiniTableExtension(
230 const upb_FieldDef* f) {
231 UPB_ASSERT(upb_FieldDef_IsExtension(f));
232 const upb_FileDef* file = upb_FieldDef_File(f);
233 return _upb_FileDef_ExtensionMiniTable(file, f->layout_index);
234 }
235
_upb_FieldDef_IsClosedEnum(const upb_FieldDef * f)236 bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) {
237 if (f->type_ != kUpb_FieldType_Enum) return false;
238 return upb_EnumDef_IsClosed(f->sub.enumdef);
239 }
240
_upb_FieldDef_IsProto3Optional(const upb_FieldDef * f)241 bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) {
242 return f->is_proto3_optional;
243 }
244
_upb_FieldDef_LayoutIndex(const upb_FieldDef * f)245 int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; }
246
_upb_FieldDef_ValidateUtf8(const upb_FieldDef * f)247 bool _upb_FieldDef_ValidateUtf8(const upb_FieldDef* f) {
248 if (upb_FieldDef_Type(f) != kUpb_FieldType_String) return false;
249 return UPB_DESC(FeatureSet_utf8_validation(f->resolved_features)) ==
250 UPB_DESC(FeatureSet_VERIFY);
251 }
252
_upb_FieldDef_IsGroupLike(const upb_FieldDef * f)253 bool _upb_FieldDef_IsGroupLike(const upb_FieldDef* f) {
254 // Groups are always tag-delimited.
255 if (f->type_ != kUpb_FieldType_Group) {
256 return false;
257 }
258
259 const upb_MessageDef* msg = upb_FieldDef_MessageSubDef(f);
260
261 // Group fields always are always the lowercase type name.
262 const char* mname = upb_MessageDef_Name(msg);
263 const char* fname = upb_FieldDef_Name(f);
264 size_t name_size = strlen(fname);
265 if (name_size != strlen(mname)) return false;
266 for (size_t i = 0; i < name_size; ++i) {
267 if ((mname[i] | 0x20) != fname[i]) {
268 // Case-insensitive ascii comparison.
269 return false;
270 }
271 }
272
273 if (upb_MessageDef_File(msg) != upb_FieldDef_File(f)) {
274 return false;
275 }
276
277 // Group messages are always defined in the same scope as the field. File
278 // level extensions will compare NULL == NULL here, which is why the file
279 // comparison above is necessary to ensure both come from the same file.
280 return upb_FieldDef_IsExtension(f) ? upb_FieldDef_ExtensionScope(f) ==
281 upb_MessageDef_ContainingType(msg)
282 : upb_FieldDef_ContainingType(f) ==
283 upb_MessageDef_ContainingType(msg);
284 }
285
_upb_FieldDef_Modifiers(const upb_FieldDef * f)286 uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) {
287 uint64_t out = upb_FieldDef_IsPacked(f) ? kUpb_FieldModifier_IsPacked : 0;
288
289 if (upb_FieldDef_IsRepeated(f)) {
290 out |= kUpb_FieldModifier_IsRepeated;
291 } else if (upb_FieldDef_IsRequired(f)) {
292 out |= kUpb_FieldModifier_IsRequired;
293 } else if (!upb_FieldDef_HasPresence(f)) {
294 out |= kUpb_FieldModifier_IsProto3Singular;
295 }
296
297 if (_upb_FieldDef_IsClosedEnum(f)) {
298 out |= kUpb_FieldModifier_IsClosedEnum;
299 }
300
301 if (_upb_FieldDef_ValidateUtf8(f)) {
302 out |= kUpb_FieldModifier_ValidateUtf8;
303 }
304
305 return out;
306 }
307
upb_FieldDef_HasDefault(const upb_FieldDef * f)308 bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; }
upb_FieldDef_HasPresence(const upb_FieldDef * f)309 bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; }
310
upb_FieldDef_HasSubDef(const upb_FieldDef * f)311 bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) {
312 return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsEnum(f);
313 }
314
upb_FieldDef_IsEnum(const upb_FieldDef * f)315 bool upb_FieldDef_IsEnum(const upb_FieldDef* f) {
316 return upb_FieldDef_CType(f) == kUpb_CType_Enum;
317 }
318
upb_FieldDef_IsMap(const upb_FieldDef * f)319 bool upb_FieldDef_IsMap(const upb_FieldDef* f) {
320 return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) &&
321 upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f));
322 }
323
upb_FieldDef_IsOptional(const upb_FieldDef * f)324 bool upb_FieldDef_IsOptional(const upb_FieldDef* f) {
325 return upb_FieldDef_Label(f) == kUpb_Label_Optional;
326 }
327
upb_FieldDef_IsPrimitive(const upb_FieldDef * f)328 bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) {
329 return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f);
330 }
331
upb_FieldDef_IsRepeated(const upb_FieldDef * f)332 bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) {
333 return upb_FieldDef_Label(f) == kUpb_Label_Repeated;
334 }
335
upb_FieldDef_IsRequired(const upb_FieldDef * f)336 bool upb_FieldDef_IsRequired(const upb_FieldDef* f) {
337 return UPB_DESC(FeatureSet_field_presence)(f->resolved_features) ==
338 UPB_DESC(FeatureSet_LEGACY_REQUIRED);
339 }
340
upb_FieldDef_IsString(const upb_FieldDef * f)341 bool upb_FieldDef_IsString(const upb_FieldDef* f) {
342 return upb_FieldDef_CType(f) == kUpb_CType_String ||
343 upb_FieldDef_CType(f) == kUpb_CType_Bytes;
344 }
345
upb_FieldDef_IsSubMessage(const upb_FieldDef * f)346 bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) {
347 return upb_FieldDef_CType(f) == kUpb_CType_Message;
348 }
349
between(int32_t x,int32_t low,int32_t high)350 static bool between(int32_t x, int32_t low, int32_t high) {
351 return x >= low && x <= high;
352 }
353
upb_FieldDef_checklabel(int32_t label)354 bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_FieldDef_checktype(int32_t type)355 bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); }
upb_FieldDef_checkintfmt(int32_t fmt)356 bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
357
upb_FieldDef_checkdescriptortype(int32_t type)358 bool upb_FieldDef_checkdescriptortype(int32_t type) {
359 return between(type, 1, 18);
360 }
361
streql2(const char * a,size_t n,const char * b)362 static bool streql2(const char* a, size_t n, const char* b) {
363 return n == strlen(b) && memcmp(a, b, n) == 0;
364 }
365
366 // Implement the transformation as described in the spec:
367 // 1. upper case all letters after an underscore.
368 // 2. remove all underscores.
make_json_name(const char * name,size_t size,upb_Arena * a)369 static char* make_json_name(const char* name, size_t size, upb_Arena* a) {
370 char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0'
371 if (out == NULL) return NULL;
372
373 bool ucase_next = false;
374 char* des = out;
375 for (size_t i = 0; i < size; i++) {
376 if (name[i] == '_') {
377 ucase_next = true;
378 } else {
379 *des++ = ucase_next ? toupper(name[i]) : name[i];
380 ucase_next = false;
381 }
382 }
383 *des++ = '\0';
384 return out;
385 }
386
newstr(upb_DefBuilder * ctx,const char * data,size_t len)387 static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) {
388 str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len);
389 if (!ret) _upb_DefBuilder_OomErr(ctx);
390 ret->len = len;
391 if (len) memcpy(ret->str, data, len);
392 ret->str[len] = '\0';
393 return ret;
394 }
395
unescape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char * data,size_t len)396 static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f,
397 const char* data, size_t len) {
398 // Size here is an upper bound; escape sequences could ultimately shrink it.
399 str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len);
400 char* dst = &ret->str[0];
401 const char* src = data;
402 const char* end = data + len;
403
404 while (src < end) {
405 if (*src == '\\') {
406 src++;
407 *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end);
408 } else {
409 *dst++ = *src++;
410 }
411 }
412
413 ret->len = dst - &ret->str[0];
414 return ret;
415 }
416
parse_default(upb_DefBuilder * ctx,const char * str,size_t len,upb_FieldDef * f)417 static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len,
418 upb_FieldDef* f) {
419 char* end;
420 char nullz[64];
421 errno = 0;
422
423 switch (upb_FieldDef_CType(f)) {
424 case kUpb_CType_Int32:
425 case kUpb_CType_Int64:
426 case kUpb_CType_UInt32:
427 case kUpb_CType_UInt64:
428 case kUpb_CType_Double:
429 case kUpb_CType_Float:
430 // Standard C number parsing functions expect null-terminated strings.
431 if (len >= sizeof(nullz) - 1) {
432 _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str);
433 }
434 memcpy(nullz, str, len);
435 nullz[len] = '\0';
436 str = nullz;
437 break;
438 default:
439 break;
440 }
441
442 switch (upb_FieldDef_CType(f)) {
443 case kUpb_CType_Int32: {
444 long val = strtol(str, &end, 0);
445 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
446 goto invalid;
447 }
448 f->defaultval.sint = val;
449 break;
450 }
451 case kUpb_CType_Enum: {
452 const upb_EnumDef* e = f->sub.enumdef;
453 const upb_EnumValueDef* ev =
454 upb_EnumDef_FindValueByNameWithSize(e, str, len);
455 if (!ev) {
456 goto invalid;
457 }
458 f->defaultval.sint = upb_EnumValueDef_Number(ev);
459 break;
460 }
461 case kUpb_CType_Int64: {
462 long long val = strtoll(str, &end, 0);
463 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
464 goto invalid;
465 }
466 f->defaultval.sint = val;
467 break;
468 }
469 case kUpb_CType_UInt32: {
470 unsigned long val = strtoul(str, &end, 0);
471 if (val > UINT32_MAX || errno == ERANGE || *end) {
472 goto invalid;
473 }
474 f->defaultval.uint = val;
475 break;
476 }
477 case kUpb_CType_UInt64: {
478 unsigned long long val = strtoull(str, &end, 0);
479 if (val > UINT64_MAX || errno == ERANGE || *end) {
480 goto invalid;
481 }
482 f->defaultval.uint = val;
483 break;
484 }
485 case kUpb_CType_Double: {
486 double val = strtod(str, &end);
487 if (errno == ERANGE || *end) {
488 goto invalid;
489 }
490 f->defaultval.dbl = val;
491 break;
492 }
493 case kUpb_CType_Float: {
494 float val = strtof(str, &end);
495 if (errno == ERANGE || *end) {
496 goto invalid;
497 }
498 f->defaultval.flt = val;
499 break;
500 }
501 case kUpb_CType_Bool: {
502 if (streql2(str, len, "false")) {
503 f->defaultval.boolean = false;
504 } else if (streql2(str, len, "true")) {
505 f->defaultval.boolean = true;
506 } else {
507 goto invalid;
508 }
509 break;
510 }
511 case kUpb_CType_String:
512 f->defaultval.str = newstr(ctx, str, len);
513 break;
514 case kUpb_CType_Bytes:
515 f->defaultval.str = unescape(ctx, f, str, len);
516 break;
517 case kUpb_CType_Message:
518 /* Should not have a default value. */
519 _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)",
520 upb_FieldDef_FullName(f));
521 }
522
523 return;
524
525 invalid:
526 _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d",
527 (int)len, str, upb_FieldDef_FullName(f),
528 (int)upb_FieldDef_Type(f));
529 }
530
set_default_default(upb_DefBuilder * ctx,upb_FieldDef * f)531 static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) {
532 switch (upb_FieldDef_CType(f)) {
533 case kUpb_CType_Int32:
534 case kUpb_CType_Int64:
535 f->defaultval.sint = 0;
536 break;
537 case kUpb_CType_UInt64:
538 case kUpb_CType_UInt32:
539 f->defaultval.uint = 0;
540 break;
541 case kUpb_CType_Double:
542 case kUpb_CType_Float:
543 f->defaultval.dbl = 0;
544 break;
545 case kUpb_CType_String:
546 case kUpb_CType_Bytes:
547 f->defaultval.str = newstr(ctx, NULL, 0);
548 break;
549 case kUpb_CType_Bool:
550 f->defaultval.boolean = false;
551 break;
552 case kUpb_CType_Enum: {
553 const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0);
554 f->defaultval.sint = upb_EnumValueDef_Number(v);
555 break;
556 }
557 case kUpb_CType_Message:
558 break;
559 }
560 }
561
_upb_FieldDef_InferLegacyFeatures(upb_DefBuilder * ctx,upb_FieldDef * f,const UPB_DESC (FieldDescriptorProto *)proto,const UPB_DESC (FieldOptions *)options,upb_Syntax syntax,UPB_DESC (FeatureSet *)features)562 static bool _upb_FieldDef_InferLegacyFeatures(
563 upb_DefBuilder* ctx, upb_FieldDef* f,
564 const UPB_DESC(FieldDescriptorProto*) proto,
565 const UPB_DESC(FieldOptions*) options, upb_Syntax syntax,
566 UPB_DESC(FeatureSet*) features) {
567 bool ret = false;
568
569 if (UPB_DESC(FieldDescriptorProto_label)(proto) == kUpb_Label_Required) {
570 if (syntax == kUpb_Syntax_Proto3) {
571 _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)",
572 f->full_name);
573 }
574 int val = UPB_DESC(FeatureSet_LEGACY_REQUIRED);
575 UPB_DESC(FeatureSet_set_field_presence(features, val));
576 ret = true;
577 }
578
579 if (UPB_DESC(FieldDescriptorProto_type)(proto) == kUpb_FieldType_Group) {
580 int val = UPB_DESC(FeatureSet_DELIMITED);
581 UPB_DESC(FeatureSet_set_message_encoding(features, val));
582 ret = true;
583 }
584
585 if (UPB_DESC(FieldOptions_has_packed)(options)) {
586 int val = UPB_DESC(FieldOptions_packed)(options)
587 ? UPB_DESC(FeatureSet_PACKED)
588 : UPB_DESC(FeatureSet_EXPANDED);
589 UPB_DESC(FeatureSet_set_repeated_field_encoding(features, val));
590 ret = true;
591 }
592
593 return ret;
594 }
595
_upb_FieldDef_Create(upb_DefBuilder * ctx,const char * prefix,const UPB_DESC (FeatureSet *)parent_features,const UPB_DESC (FieldDescriptorProto *)field_proto,upb_MessageDef * m,upb_FieldDef * f)596 static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix,
597 const UPB_DESC(FeatureSet*) parent_features,
598 const UPB_DESC(FieldDescriptorProto*)
599 field_proto,
600 upb_MessageDef* m, upb_FieldDef* f) {
601 // Must happen before _upb_DefBuilder_Add()
602 f->file = _upb_DefBuilder_File(ctx);
603
604 const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto);
605 f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
606 f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto);
607 f->is_proto3_optional =
608 UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto);
609 f->msgdef = m;
610 f->scope.oneof = NULL;
611
612 UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto);
613
614 upb_Syntax syntax = upb_FileDef_Syntax(f->file);
615 const UPB_DESC(FeatureSet*) unresolved_features =
616 UPB_DESC(FieldOptions_features)(f->opts);
617 bool implicit = false;
618
619 if (syntax != kUpb_Syntax_Editions) {
620 upb_Message_Clear(UPB_UPCAST(ctx->legacy_features),
621 UPB_DESC_MINITABLE(FeatureSet));
622 if (_upb_FieldDef_InferLegacyFeatures(ctx, f, field_proto, f->opts, syntax,
623 ctx->legacy_features)) {
624 implicit = true;
625 unresolved_features = ctx->legacy_features;
626 }
627 }
628
629 if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
630 int oneof_index = UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto);
631
632 if (!m) {
633 _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg",
634 f->full_name);
635 }
636
637 if (oneof_index < 0 || oneof_index >= upb_MessageDef_OneofCount(m)) {
638 _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name);
639 }
640
641 upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index);
642 f->scope.oneof = oneof;
643 parent_features = upb_OneofDef_ResolvedFeatures(oneof);
644
645 _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size);
646 }
647
648 f->resolved_features = _upb_DefBuilder_DoResolveFeatures(
649 ctx, parent_features, unresolved_features, implicit);
650
651 f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto);
652 if (f->label_ == kUpb_Label_Optional &&
653 // TODO: remove once we can deprecate kUpb_Label_Required.
654 UPB_DESC(FeatureSet_field_presence)(f->resolved_features) ==
655 UPB_DESC(FeatureSet_LEGACY_REQUIRED)) {
656 f->label_ = kUpb_Label_Required;
657 }
658
659 if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) {
660 _upb_DefBuilder_Errf(ctx, "field has no name");
661 }
662
663 f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto);
664 if (f->has_json_name) {
665 const upb_StringView sv =
666 UPB_DESC(FieldDescriptorProto_json_name)(field_proto);
667 f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena);
668 } else {
669 f->json_name = make_json_name(name.data, name.size, ctx->arena);
670 }
671 if (!f->json_name) _upb_DefBuilder_OomErr(ctx);
672
673 const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto);
674 const bool has_type_name =
675 UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto);
676
677 f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto);
678
679 if (has_type) {
680 switch (f->type_) {
681 case kUpb_FieldType_Message:
682 case kUpb_FieldType_Group:
683 case kUpb_FieldType_Enum:
684 if (!has_type_name) {
685 _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)",
686 (int)f->type_, f->full_name);
687 }
688 break;
689 default:
690 if (has_type_name) {
691 _upb_DefBuilder_Errf(
692 ctx, "invalid type for field with type_name set (%s, %d)",
693 f->full_name, (int)f->type_);
694 }
695 }
696 }
697
698 if ((!has_type && has_type_name) || f->type_ == kUpb_FieldType_Message) {
699 f->type_ =
700 UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef()
701 } else {
702 if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) {
703 _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name,
704 f->type_);
705 }
706 }
707
708 if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) {
709 _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name,
710 f->label_);
711 }
712
713 /* We can't resolve the subdef or (in the case of extensions) the containing
714 * message yet, because it may not have been defined yet. We stash a pointer
715 * to the field_proto until later when we can properly resolve it. */
716 f->sub.unresolved = field_proto;
717
718 if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
719 if (upb_FieldDef_Label(f) != kUpb_Label_Optional) {
720 _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
721 f->full_name);
722 }
723 }
724
725 f->has_presence =
726 (!upb_FieldDef_IsRepeated(f)) &&
727 (f->is_extension ||
728 (f->type_ == kUpb_FieldType_Message ||
729 f->type_ == kUpb_FieldType_Group || upb_FieldDef_ContainingOneof(f) ||
730 UPB_DESC(FeatureSet_field_presence)(f->resolved_features) !=
731 UPB_DESC(FeatureSet_IMPLICIT)));
732 }
733
_upb_FieldDef_CreateExt(upb_DefBuilder * ctx,const char * prefix,const UPB_DESC (FeatureSet *)parent_features,const UPB_DESC (FieldDescriptorProto *)field_proto,upb_MessageDef * m,upb_FieldDef * f)734 static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix,
735 const UPB_DESC(FeatureSet*) parent_features,
736 const UPB_DESC(FieldDescriptorProto*)
737 field_proto,
738 upb_MessageDef* m, upb_FieldDef* f) {
739 f->is_extension = true;
740 _upb_FieldDef_Create(ctx, prefix, parent_features, field_proto, m, f);
741
742 if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
743 _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)",
744 f->full_name);
745 }
746
747 f->scope.extension_scope = m;
748 _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT));
749 f->layout_index = ctx->ext_count++;
750
751 if (ctx->layout) {
752 UPB_ASSERT(upb_MiniTableExtension_Number(
753 upb_FieldDef_MiniTableExtension(f)) == f->number_);
754 }
755 }
756
_upb_FieldDef_CreateNotExt(upb_DefBuilder * ctx,const char * prefix,const UPB_DESC (FeatureSet *)parent_features,const UPB_DESC (FieldDescriptorProto *)field_proto,upb_MessageDef * m,upb_FieldDef * f)757 static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix,
758 const UPB_DESC(FeatureSet*)
759 parent_features,
760 const UPB_DESC(FieldDescriptorProto*)
761 field_proto,
762 upb_MessageDef* m, upb_FieldDef* f) {
763 f->is_extension = false;
764 _upb_FieldDef_Create(ctx, prefix, parent_features, field_proto, m, f);
765
766 if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
767 if (f->is_proto3_optional) {
768 _upb_DefBuilder_Errf(
769 ctx,
770 "non-extension field (%s) with proto3_optional was not in a oneof",
771 f->full_name);
772 }
773 }
774
775 _upb_MessageDef_InsertField(ctx, m, f);
776 }
777
_upb_Extensions_New(upb_DefBuilder * ctx,int n,const UPB_DESC (FieldDescriptorProto *)const * protos,const UPB_DESC (FeatureSet *)parent_features,const char * prefix,upb_MessageDef * m)778 upb_FieldDef* _upb_Extensions_New(upb_DefBuilder* ctx, int n,
779 const UPB_DESC(FieldDescriptorProto*)
780 const* protos,
781 const UPB_DESC(FeatureSet*) parent_features,
782 const char* prefix, upb_MessageDef* m) {
783 _upb_DefType_CheckPadding(sizeof(upb_FieldDef));
784 upb_FieldDef* defs =
785 (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n);
786
787 for (int i = 0; i < n; i++) {
788 upb_FieldDef* f = &defs[i];
789
790 _upb_FieldDef_CreateExt(ctx, prefix, parent_features, protos[i], m, f);
791 f->index_ = i;
792 }
793
794 return defs;
795 }
796
_upb_FieldDefs_New(upb_DefBuilder * ctx,int n,const UPB_DESC (FieldDescriptorProto *)const * protos,const UPB_DESC (FeatureSet *)parent_features,const char * prefix,upb_MessageDef * m,bool * is_sorted)797 upb_FieldDef* _upb_FieldDefs_New(upb_DefBuilder* ctx, int n,
798 const UPB_DESC(FieldDescriptorProto*)
799 const* protos,
800 const UPB_DESC(FeatureSet*) parent_features,
801 const char* prefix, upb_MessageDef* m,
802 bool* is_sorted) {
803 _upb_DefType_CheckPadding(sizeof(upb_FieldDef));
804 upb_FieldDef* defs =
805 (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n);
806
807 uint32_t previous = 0;
808 for (int i = 0; i < n; i++) {
809 upb_FieldDef* f = &defs[i];
810
811 _upb_FieldDef_CreateNotExt(ctx, prefix, parent_features, protos[i], m, f);
812 f->index_ = i;
813 if (!ctx->layout) {
814 // Speculate that the def fields are sorted. We will always sort the
815 // MiniTable fields, so if defs are sorted then indices will match.
816 //
817 // If this is incorrect, we will overwrite later.
818 f->layout_index = i;
819 }
820
821 const uint32_t current = f->number_;
822 if (previous > current) *is_sorted = false;
823 previous = current;
824 }
825
826 return defs;
827 }
828
resolve_subdef(upb_DefBuilder * ctx,const char * prefix,upb_FieldDef * f)829 static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix,
830 upb_FieldDef* f) {
831 const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved;
832 upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto);
833 bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto);
834 switch ((int)f->type_) {
835 case UPB_FIELD_TYPE_UNSPECIFIED: {
836 // Type was not specified and must be inferred.
837 UPB_ASSERT(has_name);
838 upb_deftype_t type;
839 const void* def =
840 _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type);
841 switch (type) {
842 case UPB_DEFTYPE_ENUM:
843 f->sub.enumdef = def;
844 f->type_ = kUpb_FieldType_Enum;
845 break;
846 case UPB_DEFTYPE_MSG:
847 f->sub.msgdef = def;
848 f->type_ = kUpb_FieldType_Message;
849 // TODO: remove once we can deprecate
850 // kUpb_FieldType_Group.
851 if (UPB_DESC(FeatureSet_message_encoding)(f->resolved_features) ==
852 UPB_DESC(FeatureSet_DELIMITED) &&
853 !upb_MessageDef_IsMapEntry(def) &&
854 !(f->msgdef && upb_MessageDef_IsMapEntry(f->msgdef))) {
855 f->type_ = kUpb_FieldType_Group;
856 }
857 f->has_presence = !upb_FieldDef_IsRepeated(f);
858 break;
859 default:
860 _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s",
861 f->full_name);
862 }
863 break;
864 }
865 case kUpb_FieldType_Message:
866 case kUpb_FieldType_Group:
867 UPB_ASSERT(has_name);
868 f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name,
869 UPB_DEFTYPE_MSG);
870 break;
871 case kUpb_FieldType_Enum:
872 UPB_ASSERT(has_name);
873 f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name,
874 UPB_DEFTYPE_ENUM);
875 break;
876 default:
877 // No resolution necessary.
878 break;
879 }
880 }
881
_upb_FieldDef_Compare(const void * p1,const void * p2)882 static int _upb_FieldDef_Compare(const void* p1, const void* p2) {
883 const uint32_t v1 = (*(upb_FieldDef**)p1)->number_;
884 const uint32_t v2 = (*(upb_FieldDef**)p2)->number_;
885 return (v1 < v2) ? -1 : (v1 > v2);
886 }
887
888 // _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one
889 // critical side effect that we depend on: it sets layout_index appropriately
890 // for non-sorted lists of fields.
_upb_FieldDefs_Sorted(const upb_FieldDef * f,int n,upb_Arena * a)891 const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n,
892 upb_Arena* a) {
893 // TODO: Replace this arena alloc with a persistent scratch buffer.
894 upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*));
895 if (!out) return NULL;
896
897 for (int i = 0; i < n; i++) {
898 out[i] = (upb_FieldDef*)&f[i];
899 }
900 qsort(out, n, sizeof(void*), _upb_FieldDef_Compare);
901
902 for (int i = 0; i < n; i++) {
903 out[i]->layout_index = i;
904 }
905 return (const upb_FieldDef**)out;
906 }
907
upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef * f,upb_Arena * a,upb_StringView * out)908 bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a,
909 upb_StringView* out) {
910 UPB_ASSERT(f->is_extension);
911
912 upb_DescState s;
913 _upb_DescState_Init(&s);
914
915 const int number = upb_FieldDef_Number(f);
916 const uint64_t modifiers = _upb_FieldDef_Modifiers(f);
917
918 if (!_upb_DescState_Grow(&s, a)) return false;
919 s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number,
920 modifiers);
921 *s.ptr = '\0';
922
923 out->data = s.buf;
924 out->size = s.ptr - s.buf;
925 return true;
926 }
927
resolve_extension(upb_DefBuilder * ctx,const char * prefix,upb_FieldDef * f,const UPB_DESC (FieldDescriptorProto)* field_proto)928 static void resolve_extension(upb_DefBuilder* ctx, const char* prefix,
929 upb_FieldDef* f,
930 const UPB_DESC(FieldDescriptorProto) *
931 field_proto) {
932 if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) {
933 _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee",
934 f->full_name);
935 }
936
937 upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto);
938 const upb_MessageDef* m =
939 _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
940 f->msgdef = m;
941
942 if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) {
943 _upb_DefBuilder_Errf(
944 ctx,
945 "field number %u in extension %s has no extension range in message %s",
946 (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m));
947 }
948 }
949
_upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder * ctx,const upb_FieldDef * f)950 void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx,
951 const upb_FieldDef* f) {
952 const upb_MiniTableExtension* ext = upb_FieldDef_MiniTableExtension(f);
953
954 if (ctx->layout) {
955 UPB_ASSERT(upb_FieldDef_Number(f) == upb_MiniTableExtension_Number(ext));
956 } else {
957 upb_StringView desc;
958 if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) {
959 _upb_DefBuilder_OomErr(ctx);
960 }
961
962 upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext;
963 upb_MiniTableSub sub = {NULL};
964 if (upb_FieldDef_IsSubMessage(f)) {
965 const upb_MiniTable* submsg = upb_MessageDef_MiniTable(f->sub.msgdef);
966 sub = upb_MiniTableSub_FromMessage(submsg);
967 } else if (_upb_FieldDef_IsClosedEnum(f)) {
968 const upb_MiniTableEnum* subenum = _upb_EnumDef_MiniTable(f->sub.enumdef);
969 sub = upb_MiniTableSub_FromEnum(subenum);
970 }
971 bool ok2 = _upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext,
972 upb_MessageDef_MiniTable(f->msgdef),
973 sub, ctx->platform, ctx->status);
974 if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table");
975 }
976
977 bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f);
978 if (!ok) _upb_DefBuilder_OomErr(ctx);
979 }
980
resolve_default(upb_DefBuilder * ctx,upb_FieldDef * f,const UPB_DESC (FieldDescriptorProto)* field_proto)981 static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f,
982 const UPB_DESC(FieldDescriptorProto) *
983 field_proto) {
984 // Have to delay resolving of the default value until now because of the enum
985 // case, since enum defaults are specified with a label.
986 if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) {
987 upb_StringView defaultval =
988 UPB_DESC(FieldDescriptorProto_default_value)(field_proto);
989
990 if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) {
991 _upb_DefBuilder_Errf(ctx,
992 "proto3 fields cannot have explicit defaults (%s)",
993 f->full_name);
994 }
995
996 if (upb_FieldDef_IsSubMessage(f)) {
997 _upb_DefBuilder_Errf(ctx,
998 "message fields cannot have explicit defaults (%s)",
999 f->full_name);
1000 }
1001
1002 parse_default(ctx, defaultval.data, defaultval.size, f);
1003 f->has_default = true;
1004 } else {
1005 set_default_default(ctx, f);
1006 f->has_default = false;
1007 }
1008 }
1009
_upb_FieldDef_Resolve(upb_DefBuilder * ctx,const char * prefix,upb_FieldDef * f)1010 void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix,
1011 upb_FieldDef* f) {
1012 // We have to stash this away since resolve_subdef() may overwrite it.
1013 const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved;
1014
1015 resolve_subdef(ctx, prefix, f);
1016 resolve_default(ctx, f, field_proto);
1017
1018 if (f->is_extension) {
1019 resolve_extension(ctx, prefix, f, field_proto);
1020 }
1021 }
1022