1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "upb/mini_descriptor/decode.h"
9
10 #include <inttypes.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14
15 #include "upb/base/descriptor_constants.h"
16 #include "upb/base/internal/log2.h"
17 #include "upb/base/status.h"
18 #include "upb/base/string_view.h"
19 #include "upb/mem/arena.h"
20 #include "upb/message/internal/map_entry.h"
21 #include "upb/message/internal/types.h"
22 #include "upb/mini_descriptor/internal/base92.h"
23 #include "upb/mini_descriptor/internal/decoder.h"
24 #include "upb/mini_descriptor/internal/modifiers.h"
25 #include "upb/mini_descriptor/internal/wire_constants.h"
26 #include "upb/mini_table/extension.h"
27 #include "upb/mini_table/field.h"
28 #include "upb/mini_table/internal/field.h"
29 #include "upb/mini_table/internal/message.h"
30 #include "upb/mini_table/internal/sub.h"
31 #include "upb/mini_table/message.h"
32 #include "upb/mini_table/sub.h"
33
34 // Must be last.
35 #include "upb/port/def.inc"
36
37 // We reserve unused hasbits to make room for upb_Message fields.
38 #define kUpb_Reserved_Hasbytes sizeof(struct upb_Message)
39
40 // 64 is the first hasbit that we currently use.
41 #define kUpb_Reserved_Hasbits (kUpb_Reserved_Hasbytes * 8)
42
43 // Note: we sort by this number when calculating layout order.
44 typedef enum {
45 kUpb_LayoutItemType_OneofCase, // Oneof case.
46 kUpb_LayoutItemType_OneofField, // Oneof field data.
47 kUpb_LayoutItemType_Field, // Non-oneof field data.
48
49 kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
50 } upb_LayoutItemType;
51
52 #define kUpb_LayoutItem_IndexSentinel ((uint16_t) - 1)
53
54 typedef struct {
55 // Index of the corresponding field. When this is a oneof field, the field's
56 // offset will be the index of the next field in a linked list.
57 uint16_t field_index;
58 uint16_t offset;
59 upb_FieldRep rep;
60 upb_LayoutItemType type;
61 } upb_LayoutItem;
62
63 typedef struct {
64 upb_LayoutItem* data;
65 size_t size;
66 size_t capacity;
67 } upb_LayoutItemVector;
68
69 typedef struct {
70 upb_MdDecoder base;
71 upb_MiniTable* table;
72 upb_MiniTableField* fields;
73 upb_MiniTablePlatform platform;
74 upb_LayoutItemVector vec;
75 upb_Arena* arena;
76 } upb_MtDecoder;
77
78 // In each field's offset, we temporarily store a presence classifier:
79 enum PresenceClass {
80 kNoPresence = 0,
81 kHasbitPresence = 1,
82 kRequiredPresence = 2,
83 kOneofBase = 3,
84 // Negative values refer to a specific oneof with that number. Positive
85 // values >= kOneofBase indicate that this field is in a oneof, and specify
86 // the next field in this oneof's linked list.
87 };
88
upb_MtDecoder_FieldIsPackable(upb_MiniTableField * field)89 static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) {
90 return (field->UPB_PRIVATE(mode) & kUpb_FieldMode_Array) &&
91 upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype));
92 }
93
94 typedef struct {
95 uint16_t submsg_count;
96 uint16_t subenum_count;
97 } upb_SubCounts;
98
upb_MiniTable_SetTypeAndSub(upb_MiniTableField * field,upb_FieldType type,upb_SubCounts * sub_counts,uint64_t msg_modifiers,bool is_proto3_enum)99 static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field,
100 upb_FieldType type,
101 upb_SubCounts* sub_counts,
102 uint64_t msg_modifiers,
103 bool is_proto3_enum) {
104 if (is_proto3_enum) {
105 UPB_ASSERT(type == kUpb_FieldType_Enum);
106 type = kUpb_FieldType_Int32;
107 field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
108 } else if (type == kUpb_FieldType_String &&
109 !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
110 type = kUpb_FieldType_Bytes;
111 field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
112 }
113
114 field->UPB_PRIVATE(descriptortype) = type;
115
116 if (upb_MtDecoder_FieldIsPackable(field) &&
117 (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
118 field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsPacked;
119 }
120
121 if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) {
122 field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++;
123 } else if (type == kUpb_FieldType_Enum) {
124 // We will need to update this later once we know the total number of
125 // submsg fields.
126 field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++;
127 } else {
128 field->UPB_PRIVATE(submsg_index) = kUpb_NoSub;
129 }
130 }
131
132 static const char kUpb_EncodedToType[] = {
133 [kUpb_EncodedType_Double] = kUpb_FieldType_Double,
134 [kUpb_EncodedType_Float] = kUpb_FieldType_Float,
135 [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
136 [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
137 [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
138 [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
139 [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
140 [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
141 [kUpb_EncodedType_String] = kUpb_FieldType_String,
142 [kUpb_EncodedType_Group] = kUpb_FieldType_Group,
143 [kUpb_EncodedType_Message] = kUpb_FieldType_Message,
144 [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
145 [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
146 [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum,
147 [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
148 [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
149 [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
150 [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
151 [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
152 };
153
upb_MiniTable_SetField(upb_MtDecoder * d,uint8_t ch,upb_MiniTableField * field,uint64_t msg_modifiers,upb_SubCounts * sub_counts)154 static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
155 upb_MiniTableField* field,
156 uint64_t msg_modifiers,
157 upb_SubCounts* sub_counts) {
158 static const char kUpb_EncodedToFieldRep[] = {
159 [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
160 [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
161 [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
162 [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
163 [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
164 [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
165 [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
166 [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
167 [kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
168 [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
169 [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
170 [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte,
171 [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
172 [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
173 [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
174 [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
175 [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte,
176 };
177
178 char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit
179 ? kUpb_FieldRep_4Byte
180 : kUpb_FieldRep_8Byte;
181
182 int8_t type = _upb_FromBase92(ch);
183 if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
184 type -= kUpb_EncodedType_RepeatedBase;
185 field->UPB_PRIVATE(mode) = kUpb_FieldMode_Array;
186 field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
187 field->UPB_PRIVATE(offset) = kNoPresence;
188 } else {
189 field->UPB_PRIVATE(mode) = kUpb_FieldMode_Scalar;
190 field->UPB_PRIVATE(offset) = kHasbitPresence;
191 if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) {
192 field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
193 } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) {
194 upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
195 } else {
196 field->UPB_PRIVATE(mode) |= kUpb_EncodedToFieldRep[type]
197 << kUpb_FieldRep_Shift;
198 }
199 }
200 if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) {
201 upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
202 }
203 upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts,
204 msg_modifiers, type == kUpb_EncodedType_OpenEnum);
205 }
206
upb_MtDecoder_ModifyField(upb_MtDecoder * d,uint32_t message_modifiers,uint32_t field_modifiers,upb_MiniTableField * field)207 static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
208 uint32_t message_modifiers,
209 uint32_t field_modifiers,
210 upb_MiniTableField* field) {
211 if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
212 if (!upb_MtDecoder_FieldIsPackable(field)) {
213 upb_MdDecoder_ErrorJmp(&d->base,
214 "Cannot flip packed on unpackable field %" PRIu32,
215 upb_MiniTableField_Number(field));
216 }
217 field->UPB_PRIVATE(mode) ^= kUpb_LabelFlags_IsPacked;
218 }
219
220 if (field_modifiers & kUpb_EncodedFieldModifier_FlipValidateUtf8) {
221 if (field->UPB_PRIVATE(descriptortype) != kUpb_FieldType_Bytes ||
222 !(field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsAlternate)) {
223 upb_MdDecoder_ErrorJmp(&d->base,
224 "Cannot flip ValidateUtf8 on field %" PRIu32
225 ", type=%d, mode=%d",
226 upb_MiniTableField_Number(field),
227 (int)field->UPB_PRIVATE(descriptortype),
228 (int)field->UPB_PRIVATE(mode));
229 }
230 field->UPB_PRIVATE(descriptortype) = kUpb_FieldType_String;
231 field->UPB_PRIVATE(mode) &= ~kUpb_LabelFlags_IsAlternate;
232 }
233
234 bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
235 bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
236
237 // Validate.
238 if ((singular || required) && field->UPB_PRIVATE(offset) != kHasbitPresence) {
239 upb_MdDecoder_ErrorJmp(&d->base,
240 "Invalid modifier(s) for repeated field %" PRIu32,
241 upb_MiniTableField_Number(field));
242 }
243 if (singular && required) {
244 upb_MdDecoder_ErrorJmp(
245 &d->base, "Field %" PRIu32 " cannot be both singular and required",
246 upb_MiniTableField_Number(field));
247 }
248
249 if (singular && upb_MiniTableField_IsSubMessage(field)) {
250 upb_MdDecoder_ErrorJmp(&d->base,
251 "Field %" PRIu32 " cannot be a singular submessage",
252 upb_MiniTableField_Number(field));
253 }
254
255 if (singular) field->UPB_PRIVATE(offset) = kNoPresence;
256 if (required) {
257 field->UPB_PRIVATE(offset) = kRequiredPresence;
258 }
259 }
260
upb_MtDecoder_PushItem(upb_MtDecoder * d,upb_LayoutItem item)261 static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
262 if (d->vec.size == d->vec.capacity) {
263 size_t new_cap = UPB_MAX(8, d->vec.size * 2);
264 d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
265 upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data);
266 d->vec.capacity = new_cap;
267 }
268 d->vec.data[d->vec.size++] = item;
269 }
270
upb_MtDecoder_PushOneof(upb_MtDecoder * d,upb_LayoutItem item)271 static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
272 if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
273 upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof");
274 }
275 item.field_index -= kOneofBase;
276
277 // Push oneof data.
278 item.type = kUpb_LayoutItemType_OneofField;
279 upb_MtDecoder_PushItem(d, item);
280
281 // Push oneof case.
282 item.rep = kUpb_FieldRep_4Byte; // Field Number.
283 item.type = kUpb_LayoutItemType_OneofCase;
284 upb_MtDecoder_PushItem(d, item);
285 }
286
upb_MtDecoder_SizeOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)287 static size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
288 upb_MiniTablePlatform platform) {
289 static const uint8_t kRepToSize32[] = {
290 [kUpb_FieldRep_1Byte] = 1,
291 [kUpb_FieldRep_4Byte] = 4,
292 [kUpb_FieldRep_StringView] = 8,
293 [kUpb_FieldRep_8Byte] = 8,
294 };
295 static const uint8_t kRepToSize64[] = {
296 [kUpb_FieldRep_1Byte] = 1,
297 [kUpb_FieldRep_4Byte] = 4,
298 [kUpb_FieldRep_StringView] = 16,
299 [kUpb_FieldRep_8Byte] = 8,
300 };
301 UPB_ASSERT(sizeof(upb_StringView) ==
302 UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
303 return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
304 : kRepToSize64[rep];
305 }
306
upb_MtDecoder_AlignOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)307 static size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
308 upb_MiniTablePlatform platform) {
309 static const uint8_t kRepToAlign32[] = {
310 [kUpb_FieldRep_1Byte] = 1,
311 [kUpb_FieldRep_4Byte] = 4,
312 [kUpb_FieldRep_StringView] = 4,
313 [kUpb_FieldRep_8Byte] = 8,
314 };
315 static const uint8_t kRepToAlign64[] = {
316 [kUpb_FieldRep_1Byte] = 1,
317 [kUpb_FieldRep_4Byte] = 4,
318 [kUpb_FieldRep_StringView] = 8,
319 [kUpb_FieldRep_8Byte] = 8,
320 };
321 UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
322 UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
323 return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
324 : kRepToAlign64[rep];
325 }
326
upb_MtDecoder_DecodeOneofField(upb_MtDecoder * d,const char * ptr,char first_ch,upb_LayoutItem * item)327 static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
328 const char* ptr,
329 char first_ch,
330 upb_LayoutItem* item) {
331 uint32_t field_num;
332 ptr = upb_MdDecoder_DecodeBase92Varint(
333 &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
334 kUpb_EncodedValue_MaxOneofField, &field_num);
335 upb_MiniTableField* f =
336 (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
337
338 if (!f) {
339 upb_MdDecoder_ErrorJmp(&d->base,
340 "Couldn't add field number %" PRIu32
341 " to oneof, no such field number.",
342 field_num);
343 }
344 if (f->UPB_PRIVATE(offset) != kHasbitPresence) {
345 upb_MdDecoder_ErrorJmp(
346 &d->base,
347 "Cannot add repeated, required, or singular field %" PRIu32
348 " to oneof.",
349 field_num);
350 }
351
352 // Oneof storage must be large enough to accommodate the largest member.
353 int rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift;
354 if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
355 upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
356 item->rep = rep;
357 }
358 // Prepend this field to the linked list.
359 f->UPB_PRIVATE(offset) = item->field_index;
360 item->field_index = (f - d->fields) + kOneofBase;
361 return ptr;
362 }
363
upb_MtDecoder_DecodeOneofs(upb_MtDecoder * d,const char * ptr)364 static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
365 const char* ptr) {
366 upb_LayoutItem item = {.rep = 0,
367 .field_index = kUpb_LayoutItem_IndexSentinel};
368 while (ptr < d->base.end) {
369 char ch = *ptr++;
370 if (ch == kUpb_EncodedValue_FieldSeparator) {
371 // Field separator, no action needed.
372 } else if (ch == kUpb_EncodedValue_OneofSeparator) {
373 // End of oneof.
374 upb_MtDecoder_PushOneof(d, item);
375 item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof.
376 } else {
377 ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
378 }
379 }
380
381 // Push final oneof.
382 upb_MtDecoder_PushOneof(d, item);
383 return ptr;
384 }
385
upb_MtDecoder_ParseModifier(upb_MtDecoder * d,const char * ptr,char first_ch,upb_MiniTableField * last_field,uint64_t * msg_modifiers)386 static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
387 const char* ptr, char first_ch,
388 upb_MiniTableField* last_field,
389 uint64_t* msg_modifiers) {
390 uint32_t mod;
391 ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch,
392 kUpb_EncodedValue_MinModifier,
393 kUpb_EncodedValue_MaxModifier, &mod);
394 if (last_field) {
395 upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
396 } else {
397 if (!d->table) {
398 upb_MdDecoder_ErrorJmp(&d->base,
399 "Extensions cannot have message modifiers");
400 }
401 *msg_modifiers = mod;
402 }
403
404 return ptr;
405 }
406
upb_MtDecoder_AllocateSubs(upb_MtDecoder * d,upb_SubCounts sub_counts)407 static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d,
408 upb_SubCounts sub_counts) {
409 uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count;
410 size_t subs_bytes = sizeof(*d->table->UPB_PRIVATE(subs)) * total_count;
411 size_t ptrs_bytes = sizeof(upb_MiniTable*) * sub_counts.submsg_count;
412 upb_MiniTableSubInternal* subs = upb_Arena_Malloc(d->arena, subs_bytes);
413 const upb_MiniTable** subs_ptrs = upb_Arena_Malloc(d->arena, ptrs_bytes);
414 upb_MdDecoder_CheckOutOfMemory(&d->base, subs);
415 upb_MdDecoder_CheckOutOfMemory(&d->base, subs_ptrs);
416 uint32_t i = 0;
417 for (; i < sub_counts.submsg_count; i++) {
418 subs_ptrs[i] = UPB_PRIVATE(_upb_MiniTable_Empty)();
419 subs[i].UPB_PRIVATE(submsg) = &subs_ptrs[i];
420 }
421 if (sub_counts.subenum_count) {
422 upb_MiniTableField* f = d->fields;
423 upb_MiniTableField* end_f = f + d->table->UPB_PRIVATE(field_count);
424 for (; f < end_f; f++) {
425 if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) {
426 f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count;
427 }
428 }
429 for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) {
430 subs[i].UPB_PRIVATE(subenum) = NULL;
431 }
432 }
433 d->table->UPB_PRIVATE(subs) = subs;
434 }
435
upb_MtDecoder_Parse(upb_MtDecoder * d,const char * ptr,size_t len,void * fields,size_t field_size,uint16_t * field_count,upb_SubCounts * sub_counts)436 static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr,
437 size_t len, void* fields,
438 size_t field_size, uint16_t* field_count,
439 upb_SubCounts* sub_counts) {
440 uint64_t msg_modifiers = 0;
441 uint32_t last_field_number = 0;
442 upb_MiniTableField* last_field = NULL;
443 bool need_dense_below = d->table != NULL;
444
445 d->base.end = UPB_PTRADD(ptr, len);
446
447 while (ptr < d->base.end) {
448 char ch = *ptr++;
449 if (ch <= kUpb_EncodedValue_MaxField) {
450 if (!d->table && last_field) {
451 // For extensions, consume only a single field and then return.
452 return --ptr;
453 }
454 upb_MiniTableField* field = fields;
455 *field_count += 1;
456 fields = (char*)fields + field_size;
457 field->UPB_PRIVATE(number) = ++last_field_number;
458 last_field = field;
459 upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts);
460 } else if (kUpb_EncodedValue_MinModifier <= ch &&
461 ch <= kUpb_EncodedValue_MaxModifier) {
462 ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
463 if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
464 d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_Extendable;
465 }
466 } else if (ch == kUpb_EncodedValue_End) {
467 if (!d->table) {
468 upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs.");
469 }
470 ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
471 } else if (kUpb_EncodedValue_MinSkip <= ch &&
472 ch <= kUpb_EncodedValue_MaxSkip) {
473 if (need_dense_below) {
474 d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
475 need_dense_below = false;
476 }
477 uint32_t skip;
478 ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch,
479 kUpb_EncodedValue_MinSkip,
480 kUpb_EncodedValue_MaxSkip, &skip);
481 last_field_number += skip;
482 last_field_number--; // Next field seen will increment.
483 } else {
484 upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch);
485 }
486 }
487
488 if (need_dense_below) {
489 d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
490 }
491
492 return ptr;
493 }
494
upb_MtDecoder_ParseMessage(upb_MtDecoder * d,const char * data,size_t len)495 static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
496 size_t len) {
497 // Buffer length is an upper bound on the number of fields. We will return
498 // what we don't use.
499 d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
500 upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields);
501
502 upb_SubCounts sub_counts = {0, 0};
503 d->table->UPB_PRIVATE(field_count) = 0;
504 d->table->UPB_PRIVATE(fields) = d->fields;
505 upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
506 &d->table->UPB_PRIVATE(field_count), &sub_counts);
507
508 upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
509 sizeof(*d->fields) * d->table->UPB_PRIVATE(field_count));
510 d->table->UPB_PRIVATE(fields) = d->fields;
511 upb_MtDecoder_AllocateSubs(d, sub_counts);
512 }
513
upb_MtDecoder_CompareFields(const void * _a,const void * _b)514 static int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
515 const upb_LayoutItem* a = _a;
516 const upb_LayoutItem* b = _b;
517 // Currently we just sort by:
518 // 1. rep (smallest fields first)
519 // 2. type (oneof cases first)
520 // 2. field_index (smallest numbers first)
521 // The main goal of this is to reduce space lost to padding.
522 // Later we may have more subtle reasons to prefer a different ordering.
523 const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max);
524 const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max);
525 const int idx_bits = (sizeof(a->field_index) * 8);
526 UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
527 #define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
528 uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
529 uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
530 UPB_ASSERT(a_packed != b_packed);
531 #undef UPB_COMBINE
532 return a_packed < b_packed ? -1 : 1;
533 }
534
upb_MtDecoder_SortLayoutItems(upb_MtDecoder * d)535 static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
536 // Add items for all non-oneof fields (oneofs were already added).
537 int n = d->table->UPB_PRIVATE(field_count);
538 for (int i = 0; i < n; i++) {
539 upb_MiniTableField* f = &d->fields[i];
540 if (f->UPB_PRIVATE(offset) >= kOneofBase) continue;
541 upb_LayoutItem item = {.field_index = i,
542 .rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift,
543 .type = kUpb_LayoutItemType_Field};
544 upb_MtDecoder_PushItem(d, item);
545 }
546
547 if (d->vec.size) {
548 qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
549 upb_MtDecoder_CompareFields);
550 }
551
552 return true;
553 }
554
upb_MiniTable_DivideRoundUp(size_t n,size_t d)555 static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
556 return (n + d - 1) / d;
557 }
558
upb_MtDecoder_AssignHasbits(upb_MtDecoder * d)559 static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) {
560 upb_MiniTable* ret = d->table;
561 int n = ret->UPB_PRIVATE(field_count);
562 size_t last_hasbit = kUpb_Reserved_Hasbits - 1;
563
564 // First assign required fields, which must have the lowest hasbits.
565 for (int i = 0; i < n; i++) {
566 upb_MiniTableField* field =
567 (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
568 if (field->UPB_PRIVATE(offset) == kRequiredPresence) {
569 field->presence = ++last_hasbit;
570 } else if (field->UPB_PRIVATE(offset) == kNoPresence) {
571 field->presence = 0;
572 }
573 }
574 if (last_hasbit > kUpb_Reserved_Hasbits + 63) {
575 upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields");
576 }
577
578 ret->UPB_PRIVATE(required_count) = last_hasbit - (kUpb_Reserved_Hasbits - 1);
579
580 // Next assign non-required hasbit fields.
581 for (int i = 0; i < n; i++) {
582 upb_MiniTableField* field =
583 (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
584 if (field->UPB_PRIVATE(offset) == kHasbitPresence) {
585 field->presence = ++last_hasbit;
586 }
587 }
588
589 ret->UPB_PRIVATE(size) =
590 last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
591 }
592
upb_MtDecoder_Place(upb_MtDecoder * d,upb_FieldRep rep)593 static size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
594 size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
595 size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
596 size_t ret = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), align);
597 static const size_t max = UINT16_MAX;
598 size_t new_size = ret + size;
599 if (new_size > max) {
600 upb_MdDecoder_ErrorJmp(
601 &d->base, "Message size exceeded maximum size of %zu bytes", max);
602 }
603 d->table->UPB_PRIVATE(size) = new_size;
604 return ret;
605 }
606
upb_MtDecoder_AssignOffsets(upb_MtDecoder * d)607 static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
608 upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
609
610 // Compute offsets.
611 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
612 item->offset = upb_MtDecoder_Place(d, item->rep);
613 }
614
615 // Assign oneof case offsets. We must do these first, since assigning
616 // actual offsets will overwrite the links of the linked list.
617 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
618 if (item->type != kUpb_LayoutItemType_OneofCase) continue;
619 upb_MiniTableField* f = &d->fields[item->field_index];
620 while (true) {
621 f->presence = ~item->offset;
622 if (f->UPB_PRIVATE(offset) == kUpb_LayoutItem_IndexSentinel) break;
623 UPB_ASSERT(f->UPB_PRIVATE(offset) - kOneofBase <
624 d->table->UPB_PRIVATE(field_count));
625 f = &d->fields[f->UPB_PRIVATE(offset) - kOneofBase];
626 }
627 }
628
629 // Assign offsets.
630 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
631 upb_MiniTableField* f = &d->fields[item->field_index];
632 switch (item->type) {
633 case kUpb_LayoutItemType_OneofField:
634 while (true) {
635 uint16_t next_offset = f->UPB_PRIVATE(offset);
636 f->UPB_PRIVATE(offset) = item->offset;
637 if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
638 f = &d->fields[next_offset - kOneofBase];
639 }
640 break;
641 case kUpb_LayoutItemType_Field:
642 f->UPB_PRIVATE(offset) = item->offset;
643 break;
644 default:
645 break;
646 }
647 }
648
649 // The fasttable parser (supported on 64-bit only) depends on this being a
650 // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
651 //
652 // On 32-bit we could potentially make this smaller, but there is no
653 // compelling reason to optimize this right now.
654 d->table->UPB_PRIVATE(size) = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), 8);
655 }
656
upb_MtDecoder_ValidateEntryField(upb_MtDecoder * d,const upb_MiniTableField * f,uint32_t expected_num)657 static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d,
658 const upb_MiniTableField* f,
659 uint32_t expected_num) {
660 const char* name = expected_num == 1 ? "key" : "val";
661 const uint32_t f_number = upb_MiniTableField_Number(f);
662 if (f_number != expected_num) {
663 upb_MdDecoder_ErrorJmp(&d->base,
664 "map %s did not have expected number (%d vs %d)",
665 name, expected_num, f_number);
666 }
667
668 if (!upb_MiniTableField_IsScalar(f)) {
669 upb_MdDecoder_ErrorJmp(
670 &d->base, "map %s cannot be repeated or map, or be in oneof", name);
671 }
672
673 uint32_t not_ok_types;
674 if (expected_num == 1) {
675 not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) |
676 (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) |
677 (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum);
678 } else {
679 not_ok_types = 1 << kUpb_FieldType_Group;
680 }
681
682 if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) {
683 upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name,
684 (int)f->UPB_PRIVATE(descriptortype));
685 }
686 }
687
upb_MtDecoder_ParseMap(upb_MtDecoder * d,const char * data,size_t len)688 static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data,
689 size_t len) {
690 upb_MtDecoder_ParseMessage(d, data, len);
691 upb_MtDecoder_AssignHasbits(d);
692
693 if (UPB_UNLIKELY(d->table->UPB_PRIVATE(field_count) != 2)) {
694 upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map",
695 d->table->UPB_PRIVATE(field_count));
696 UPB_UNREACHABLE();
697 }
698
699 upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
700 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
701 if (item->type == kUpb_LayoutItemType_OneofCase) {
702 upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof");
703 }
704 }
705
706 upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[0], 1);
707 upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[1], 2);
708
709 d->fields[0].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, k);
710 d->fields[1].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, v);
711 d->table->UPB_PRIVATE(size) = sizeof(upb_MapEntry);
712
713 // Map entries have a special bit set to signal it's a map entry, used in
714 // upb_MiniTable_SetSubMessage() below.
715 d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_IsMapEntry;
716 }
717
upb_MtDecoder_ParseMessageSet(upb_MtDecoder * d,const char * data,size_t len)718 static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data,
719 size_t len) {
720 if (len > 0) {
721 upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu",
722 len);
723 }
724
725 upb_MiniTable* ret = d->table;
726 ret->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
727 ret->UPB_PRIVATE(field_count) = 0;
728 ret->UPB_PRIVATE(ext) = kUpb_ExtMode_IsMessageSet;
729 ret->UPB_PRIVATE(dense_below) = 0;
730 ret->UPB_PRIVATE(table_mask) = -1;
731 ret->UPB_PRIVATE(required_count) = 0;
732 }
733
upb_MtDecoder_DoBuildMiniTableWithBuf(upb_MtDecoder * decoder,const char * data,size_t len,void ** buf,size_t * buf_size)734 static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf(
735 upb_MtDecoder* decoder, const char* data, size_t len, void** buf,
736 size_t* buf_size) {
737 upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table);
738
739 decoder->table->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
740 decoder->table->UPB_PRIVATE(field_count) = 0;
741 decoder->table->UPB_PRIVATE(ext) = kUpb_ExtMode_NonExtendable;
742 decoder->table->UPB_PRIVATE(dense_below) = 0;
743 decoder->table->UPB_PRIVATE(table_mask) = -1;
744 decoder->table->UPB_PRIVATE(required_count) = 0;
745 #if UPB_TRACING_ENABLED
746 // MiniTables built from MiniDescriptors will not be able to vend the message
747 // name unless it is explicitly set with upb_MiniTable_SetFullName().
748 decoder->table->UPB_PRIVATE(full_name) = 0;
749 #endif
750
751 // Strip off and verify the version tag.
752 if (!len--) goto done;
753 const char vers = *data++;
754
755 switch (vers) {
756 case kUpb_EncodedVersion_MapV1:
757 upb_MtDecoder_ParseMap(decoder, data, len);
758 break;
759
760 case kUpb_EncodedVersion_MessageV1:
761 upb_MtDecoder_ParseMessage(decoder, data, len);
762 upb_MtDecoder_AssignHasbits(decoder);
763 upb_MtDecoder_SortLayoutItems(decoder);
764 upb_MtDecoder_AssignOffsets(decoder);
765 break;
766
767 case kUpb_EncodedVersion_MessageSetV1:
768 upb_MtDecoder_ParseMessageSet(decoder, data, len);
769 break;
770
771 default:
772 upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c",
773 vers);
774 }
775
776 done:
777 *buf = decoder->vec.data;
778 *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
779 return decoder->table;
780 }
781
upb_MtDecoder_BuildMiniTableWithBuf(upb_MtDecoder * const decoder,const char * const data,const size_t len,void ** const buf,size_t * const buf_size)782 static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf(
783 upb_MtDecoder* const decoder, const char* const data, const size_t len,
784 void** const buf, size_t* const buf_size) {
785 if (UPB_SETJMP(decoder->base.err) != 0) {
786 *buf = decoder->vec.data;
787 *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
788 return NULL;
789 }
790
791 return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf,
792 buf_size);
793 }
794
upb_MiniTable_BuildWithBuf(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,void ** buf,size_t * buf_size,upb_Status * status)795 upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
796 upb_MiniTablePlatform platform,
797 upb_Arena* arena, void** buf,
798 size_t* buf_size,
799 upb_Status* status) {
800 upb_MtDecoder decoder = {
801 .base = {.status = status},
802 .platform = platform,
803 .vec =
804 {
805 .data = *buf,
806 .capacity = *buf_size / sizeof(*decoder.vec.data),
807 .size = 0,
808 },
809 .arena = arena,
810 .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
811 };
812
813 return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf,
814 buf_size);
815 }
816
upb_MtDecoder_DoBuildMiniTableExtension(upb_MtDecoder * decoder,const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub)817 static const char* upb_MtDecoder_DoBuildMiniTableExtension(
818 upb_MtDecoder* decoder, const char* data, size_t len,
819 upb_MiniTableExtension* ext, const upb_MiniTable* extendee,
820 upb_MiniTableSub sub) {
821 // If the string is non-empty then it must begin with a version tag.
822 if (len) {
823 if (*data != kUpb_EncodedVersion_ExtensionV1) {
824 upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data);
825 }
826 data++;
827 len--;
828 }
829
830 uint16_t count = 0;
831 upb_SubCounts sub_counts = {0, 0};
832 const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext),
833 &count, &sub_counts);
834 if (!ret || count != 1) return NULL;
835
836 upb_MiniTableField* f = &ext->UPB_PRIVATE(field);
837
838 f->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsExtension;
839 f->UPB_PRIVATE(offset) = 0;
840 f->presence = 0;
841
842 if (extendee->UPB_PRIVATE(ext) & kUpb_ExtMode_IsMessageSet) {
843 // Extensions of MessageSet must be messages.
844 if (!upb_MiniTableField_IsSubMessage(f)) return NULL;
845
846 // Extensions of MessageSet must be non-repeating.
847 if (upb_MiniTableField_IsArray(f)) return NULL;
848 }
849
850 ext->UPB_PRIVATE(extendee) = extendee;
851 ext->UPB_PRIVATE(sub) = sub;
852
853 return ret;
854 }
855
upb_MtDecoder_BuildMiniTableExtension(upb_MtDecoder * const decoder,const char * const data,const size_t len,upb_MiniTableExtension * const ext,const upb_MiniTable * const extendee,const upb_MiniTableSub sub)856 static const char* upb_MtDecoder_BuildMiniTableExtension(
857 upb_MtDecoder* const decoder, const char* const data, const size_t len,
858 upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee,
859 const upb_MiniTableSub sub) {
860 if (UPB_SETJMP(decoder->base.err) != 0) return NULL;
861 return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext,
862 extendee, sub);
863 }
864
_upb_MiniTableExtension_Init(const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Status * status)865 const char* _upb_MiniTableExtension_Init(const char* data, size_t len,
866 upb_MiniTableExtension* ext,
867 const upb_MiniTable* extendee,
868 upb_MiniTableSub sub,
869 upb_MiniTablePlatform platform,
870 upb_Status* status) {
871 upb_MtDecoder decoder = {
872 .base = {.status = status},
873 .arena = NULL,
874 .table = NULL,
875 .platform = platform,
876 };
877
878 return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext,
879 extendee, sub);
880 }
881
_upb_MiniTableExtension_Build(const char * data,size_t len,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)882 upb_MiniTableExtension* _upb_MiniTableExtension_Build(
883 const char* data, size_t len, const upb_MiniTable* extendee,
884 upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena,
885 upb_Status* status) {
886 upb_MiniTableExtension* ext =
887 upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension));
888 if (UPB_UNLIKELY(!ext)) return NULL;
889
890 const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub,
891 platform, status);
892 if (UPB_UNLIKELY(!ptr)) return NULL;
893
894 return ext;
895 }
896
_upb_MiniTable_Build(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)897 upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len,
898 upb_MiniTablePlatform platform,
899 upb_Arena* arena, upb_Status* status) {
900 void* buf = NULL;
901 size_t size = 0;
902 upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
903 &buf, &size, status);
904 free(buf);
905 return ret;
906 }
907