• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "upb/mini_descriptor/decode.h"
9 
10 #include <inttypes.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include "upb/base/descriptor_constants.h"
16 #include "upb/base/internal/log2.h"
17 #include "upb/base/status.h"
18 #include "upb/base/string_view.h"
19 #include "upb/mem/arena.h"
20 #include "upb/message/internal/map_entry.h"
21 #include "upb/message/internal/types.h"
22 #include "upb/mini_descriptor/internal/base92.h"
23 #include "upb/mini_descriptor/internal/decoder.h"
24 #include "upb/mini_descriptor/internal/modifiers.h"
25 #include "upb/mini_descriptor/internal/wire_constants.h"
26 #include "upb/mini_table/extension.h"
27 #include "upb/mini_table/field.h"
28 #include "upb/mini_table/internal/field.h"
29 #include "upb/mini_table/internal/message.h"
30 #include "upb/mini_table/internal/sub.h"
31 #include "upb/mini_table/message.h"
32 #include "upb/mini_table/sub.h"
33 
34 // Must be last.
35 #include "upb/port/def.inc"
36 
37 // We reserve unused hasbits to make room for upb_Message fields.
38 #define kUpb_Reserved_Hasbytes sizeof(struct upb_Message)
39 
40 // 64 is the first hasbit that we currently use.
41 #define kUpb_Reserved_Hasbits (kUpb_Reserved_Hasbytes * 8)
42 
43 // Note: we sort by this number when calculating layout order.
44 typedef enum {
45   kUpb_LayoutItemType_OneofCase,   // Oneof case.
46   kUpb_LayoutItemType_OneofField,  // Oneof field data.
47   kUpb_LayoutItemType_Field,       // Non-oneof field data.
48 
49   kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
50 } upb_LayoutItemType;
51 
52 #define kUpb_LayoutItem_IndexSentinel ((uint16_t) - 1)
53 
54 typedef struct {
55   // Index of the corresponding field.  When this is a oneof field, the field's
56   // offset will be the index of the next field in a linked list.
57   uint16_t field_index;
58   uint16_t offset;
59   upb_FieldRep rep;
60   upb_LayoutItemType type;
61 } upb_LayoutItem;
62 
63 typedef struct {
64   upb_LayoutItem* data;
65   size_t size;
66   size_t capacity;
67 } upb_LayoutItemVector;
68 
69 typedef struct {
70   upb_MdDecoder base;
71   upb_MiniTable* table;
72   upb_MiniTableField* fields;
73   upb_MiniTablePlatform platform;
74   upb_LayoutItemVector vec;
75   upb_Arena* arena;
76 } upb_MtDecoder;
77 
78 // In each field's offset, we temporarily store a presence classifier:
79 enum PresenceClass {
80   kNoPresence = 0,
81   kHasbitPresence = 1,
82   kRequiredPresence = 2,
83   kOneofBase = 3,
84   // Negative values refer to a specific oneof with that number.  Positive
85   // values >= kOneofBase indicate that this field is in a oneof, and specify
86   // the next field in this oneof's linked list.
87 };
88 
upb_MtDecoder_FieldIsPackable(upb_MiniTableField * field)89 static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) {
90   return (field->UPB_PRIVATE(mode) & kUpb_FieldMode_Array) &&
91          upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype));
92 }
93 
94 typedef struct {
95   uint16_t submsg_count;
96   uint16_t subenum_count;
97 } upb_SubCounts;
98 
upb_MiniTable_SetTypeAndSub(upb_MiniTableField * field,upb_FieldType type,upb_SubCounts * sub_counts,uint64_t msg_modifiers,bool is_proto3_enum)99 static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field,
100                                         upb_FieldType type,
101                                         upb_SubCounts* sub_counts,
102                                         uint64_t msg_modifiers,
103                                         bool is_proto3_enum) {
104   if (is_proto3_enum) {
105     UPB_ASSERT(type == kUpb_FieldType_Enum);
106     type = kUpb_FieldType_Int32;
107     field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
108   } else if (type == kUpb_FieldType_String &&
109              !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
110     type = kUpb_FieldType_Bytes;
111     field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsAlternate;
112   }
113 
114   field->UPB_PRIVATE(descriptortype) = type;
115 
116   if (upb_MtDecoder_FieldIsPackable(field) &&
117       (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
118     field->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsPacked;
119   }
120 
121   if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) {
122     field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++;
123   } else if (type == kUpb_FieldType_Enum) {
124     // We will need to update this later once we know the total number of
125     // submsg fields.
126     field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++;
127   } else {
128     field->UPB_PRIVATE(submsg_index) = kUpb_NoSub;
129   }
130 }
131 
132 static const char kUpb_EncodedToType[] = {
133     [kUpb_EncodedType_Double] = kUpb_FieldType_Double,
134     [kUpb_EncodedType_Float] = kUpb_FieldType_Float,
135     [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
136     [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
137     [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
138     [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
139     [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
140     [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
141     [kUpb_EncodedType_String] = kUpb_FieldType_String,
142     [kUpb_EncodedType_Group] = kUpb_FieldType_Group,
143     [kUpb_EncodedType_Message] = kUpb_FieldType_Message,
144     [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
145     [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
146     [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum,
147     [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
148     [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
149     [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
150     [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
151     [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
152 };
153 
upb_MiniTable_SetField(upb_MtDecoder * d,uint8_t ch,upb_MiniTableField * field,uint64_t msg_modifiers,upb_SubCounts * sub_counts)154 static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
155                                    upb_MiniTableField* field,
156                                    uint64_t msg_modifiers,
157                                    upb_SubCounts* sub_counts) {
158   static const char kUpb_EncodedToFieldRep[] = {
159       [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
160       [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
161       [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
162       [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
163       [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
164       [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
165       [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
166       [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
167       [kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
168       [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
169       [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
170       [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte,
171       [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
172       [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
173       [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
174       [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
175       [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte,
176   };
177 
178   char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit
179                          ? kUpb_FieldRep_4Byte
180                          : kUpb_FieldRep_8Byte;
181 
182   int8_t type = _upb_FromBase92(ch);
183   if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
184     type -= kUpb_EncodedType_RepeatedBase;
185     field->UPB_PRIVATE(mode) = kUpb_FieldMode_Array;
186     field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
187     field->UPB_PRIVATE(offset) = kNoPresence;
188   } else {
189     field->UPB_PRIVATE(mode) = kUpb_FieldMode_Scalar;
190     field->UPB_PRIVATE(offset) = kHasbitPresence;
191     if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) {
192       field->UPB_PRIVATE(mode) |= pointer_rep << kUpb_FieldRep_Shift;
193     } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) {
194       upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
195     } else {
196       field->UPB_PRIVATE(mode) |= kUpb_EncodedToFieldRep[type]
197                                   << kUpb_FieldRep_Shift;
198     }
199   }
200   if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) {
201     upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
202   }
203   upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts,
204                               msg_modifiers, type == kUpb_EncodedType_OpenEnum);
205 }
206 
upb_MtDecoder_ModifyField(upb_MtDecoder * d,uint32_t message_modifiers,uint32_t field_modifiers,upb_MiniTableField * field)207 static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
208                                       uint32_t message_modifiers,
209                                       uint32_t field_modifiers,
210                                       upb_MiniTableField* field) {
211   if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
212     if (!upb_MtDecoder_FieldIsPackable(field)) {
213       upb_MdDecoder_ErrorJmp(&d->base,
214                              "Cannot flip packed on unpackable field %" PRIu32,
215                              upb_MiniTableField_Number(field));
216     }
217     field->UPB_PRIVATE(mode) ^= kUpb_LabelFlags_IsPacked;
218   }
219 
220   if (field_modifiers & kUpb_EncodedFieldModifier_FlipValidateUtf8) {
221     if (field->UPB_PRIVATE(descriptortype) != kUpb_FieldType_Bytes ||
222         !(field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsAlternate)) {
223       upb_MdDecoder_ErrorJmp(&d->base,
224                              "Cannot flip ValidateUtf8 on field %" PRIu32
225                              ", type=%d, mode=%d",
226                              upb_MiniTableField_Number(field),
227                              (int)field->UPB_PRIVATE(descriptortype),
228                              (int)field->UPB_PRIVATE(mode));
229     }
230     field->UPB_PRIVATE(descriptortype) = kUpb_FieldType_String;
231     field->UPB_PRIVATE(mode) &= ~kUpb_LabelFlags_IsAlternate;
232   }
233 
234   bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
235   bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
236 
237   // Validate.
238   if ((singular || required) && field->UPB_PRIVATE(offset) != kHasbitPresence) {
239     upb_MdDecoder_ErrorJmp(&d->base,
240                            "Invalid modifier(s) for repeated field %" PRIu32,
241                            upb_MiniTableField_Number(field));
242   }
243   if (singular && required) {
244     upb_MdDecoder_ErrorJmp(
245         &d->base, "Field %" PRIu32 " cannot be both singular and required",
246         upb_MiniTableField_Number(field));
247   }
248 
249   if (singular && upb_MiniTableField_IsSubMessage(field)) {
250     upb_MdDecoder_ErrorJmp(&d->base,
251                            "Field %" PRIu32 " cannot be a singular submessage",
252                            upb_MiniTableField_Number(field));
253   }
254 
255   if (singular) field->UPB_PRIVATE(offset) = kNoPresence;
256   if (required) {
257     field->UPB_PRIVATE(offset) = kRequiredPresence;
258   }
259 }
260 
upb_MtDecoder_PushItem(upb_MtDecoder * d,upb_LayoutItem item)261 static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
262   if (d->vec.size == d->vec.capacity) {
263     size_t new_cap = UPB_MAX(8, d->vec.size * 2);
264     d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
265     upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data);
266     d->vec.capacity = new_cap;
267   }
268   d->vec.data[d->vec.size++] = item;
269 }
270 
upb_MtDecoder_PushOneof(upb_MtDecoder * d,upb_LayoutItem item)271 static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
272   if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
273     upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof");
274   }
275   item.field_index -= kOneofBase;
276 
277   // Push oneof data.
278   item.type = kUpb_LayoutItemType_OneofField;
279   upb_MtDecoder_PushItem(d, item);
280 
281   // Push oneof case.
282   item.rep = kUpb_FieldRep_4Byte;  // Field Number.
283   item.type = kUpb_LayoutItemType_OneofCase;
284   upb_MtDecoder_PushItem(d, item);
285 }
286 
upb_MtDecoder_SizeOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)287 static size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
288                                       upb_MiniTablePlatform platform) {
289   static const uint8_t kRepToSize32[] = {
290       [kUpb_FieldRep_1Byte] = 1,
291       [kUpb_FieldRep_4Byte] = 4,
292       [kUpb_FieldRep_StringView] = 8,
293       [kUpb_FieldRep_8Byte] = 8,
294   };
295   static const uint8_t kRepToSize64[] = {
296       [kUpb_FieldRep_1Byte] = 1,
297       [kUpb_FieldRep_4Byte] = 4,
298       [kUpb_FieldRep_StringView] = 16,
299       [kUpb_FieldRep_8Byte] = 8,
300   };
301   UPB_ASSERT(sizeof(upb_StringView) ==
302              UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
303   return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
304                                                   : kRepToSize64[rep];
305 }
306 
upb_MtDecoder_AlignOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)307 static size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
308                                        upb_MiniTablePlatform platform) {
309   static const uint8_t kRepToAlign32[] = {
310       [kUpb_FieldRep_1Byte] = 1,
311       [kUpb_FieldRep_4Byte] = 4,
312       [kUpb_FieldRep_StringView] = 4,
313       [kUpb_FieldRep_8Byte] = 8,
314   };
315   static const uint8_t kRepToAlign64[] = {
316       [kUpb_FieldRep_1Byte] = 1,
317       [kUpb_FieldRep_4Byte] = 4,
318       [kUpb_FieldRep_StringView] = 8,
319       [kUpb_FieldRep_8Byte] = 8,
320   };
321   UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
322              UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
323   return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
324                                                   : kRepToAlign64[rep];
325 }
326 
upb_MtDecoder_DecodeOneofField(upb_MtDecoder * d,const char * ptr,char first_ch,upb_LayoutItem * item)327 static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
328                                                   const char* ptr,
329                                                   char first_ch,
330                                                   upb_LayoutItem* item) {
331   uint32_t field_num;
332   ptr = upb_MdDecoder_DecodeBase92Varint(
333       &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
334       kUpb_EncodedValue_MaxOneofField, &field_num);
335   upb_MiniTableField* f =
336       (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
337 
338   if (!f) {
339     upb_MdDecoder_ErrorJmp(&d->base,
340                            "Couldn't add field number %" PRIu32
341                            " to oneof, no such field number.",
342                            field_num);
343   }
344   if (f->UPB_PRIVATE(offset) != kHasbitPresence) {
345     upb_MdDecoder_ErrorJmp(
346         &d->base,
347         "Cannot add repeated, required, or singular field %" PRIu32
348         " to oneof.",
349         field_num);
350   }
351 
352   // Oneof storage must be large enough to accommodate the largest member.
353   int rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift;
354   if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
355       upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
356     item->rep = rep;
357   }
358   // Prepend this field to the linked list.
359   f->UPB_PRIVATE(offset) = item->field_index;
360   item->field_index = (f - d->fields) + kOneofBase;
361   return ptr;
362 }
363 
upb_MtDecoder_DecodeOneofs(upb_MtDecoder * d,const char * ptr)364 static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
365                                               const char* ptr) {
366   upb_LayoutItem item = {.rep = 0,
367                          .field_index = kUpb_LayoutItem_IndexSentinel};
368   while (ptr < d->base.end) {
369     char ch = *ptr++;
370     if (ch == kUpb_EncodedValue_FieldSeparator) {
371       // Field separator, no action needed.
372     } else if (ch == kUpb_EncodedValue_OneofSeparator) {
373       // End of oneof.
374       upb_MtDecoder_PushOneof(d, item);
375       item.field_index = kUpb_LayoutItem_IndexSentinel;  // Move to next oneof.
376     } else {
377       ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
378     }
379   }
380 
381   // Push final oneof.
382   upb_MtDecoder_PushOneof(d, item);
383   return ptr;
384 }
385 
upb_MtDecoder_ParseModifier(upb_MtDecoder * d,const char * ptr,char first_ch,upb_MiniTableField * last_field,uint64_t * msg_modifiers)386 static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
387                                                const char* ptr, char first_ch,
388                                                upb_MiniTableField* last_field,
389                                                uint64_t* msg_modifiers) {
390   uint32_t mod;
391   ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch,
392                                          kUpb_EncodedValue_MinModifier,
393                                          kUpb_EncodedValue_MaxModifier, &mod);
394   if (last_field) {
395     upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
396   } else {
397     if (!d->table) {
398       upb_MdDecoder_ErrorJmp(&d->base,
399                              "Extensions cannot have message modifiers");
400     }
401     *msg_modifiers = mod;
402   }
403 
404   return ptr;
405 }
406 
upb_MtDecoder_AllocateSubs(upb_MtDecoder * d,upb_SubCounts sub_counts)407 static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d,
408                                        upb_SubCounts sub_counts) {
409   uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count;
410   size_t subs_bytes = sizeof(*d->table->UPB_PRIVATE(subs)) * total_count;
411   size_t ptrs_bytes = sizeof(upb_MiniTable*) * sub_counts.submsg_count;
412   upb_MiniTableSubInternal* subs = upb_Arena_Malloc(d->arena, subs_bytes);
413   const upb_MiniTable** subs_ptrs = upb_Arena_Malloc(d->arena, ptrs_bytes);
414   upb_MdDecoder_CheckOutOfMemory(&d->base, subs);
415   upb_MdDecoder_CheckOutOfMemory(&d->base, subs_ptrs);
416   uint32_t i = 0;
417   for (; i < sub_counts.submsg_count; i++) {
418     subs_ptrs[i] = UPB_PRIVATE(_upb_MiniTable_Empty)();
419     subs[i].UPB_PRIVATE(submsg) = &subs_ptrs[i];
420   }
421   if (sub_counts.subenum_count) {
422     upb_MiniTableField* f = d->fields;
423     upb_MiniTableField* end_f = f + d->table->UPB_PRIVATE(field_count);
424     for (; f < end_f; f++) {
425       if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) {
426         f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count;
427       }
428     }
429     for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) {
430       subs[i].UPB_PRIVATE(subenum) = NULL;
431     }
432   }
433   d->table->UPB_PRIVATE(subs) = subs;
434 }
435 
upb_MtDecoder_Parse(upb_MtDecoder * d,const char * ptr,size_t len,void * fields,size_t field_size,uint16_t * field_count,upb_SubCounts * sub_counts)436 static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr,
437                                        size_t len, void* fields,
438                                        size_t field_size, uint16_t* field_count,
439                                        upb_SubCounts* sub_counts) {
440   uint64_t msg_modifiers = 0;
441   uint32_t last_field_number = 0;
442   upb_MiniTableField* last_field = NULL;
443   bool need_dense_below = d->table != NULL;
444 
445   d->base.end = UPB_PTRADD(ptr, len);
446 
447   while (ptr < d->base.end) {
448     char ch = *ptr++;
449     if (ch <= kUpb_EncodedValue_MaxField) {
450       if (!d->table && last_field) {
451         // For extensions, consume only a single field and then return.
452         return --ptr;
453       }
454       upb_MiniTableField* field = fields;
455       *field_count += 1;
456       fields = (char*)fields + field_size;
457       field->UPB_PRIVATE(number) = ++last_field_number;
458       last_field = field;
459       upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts);
460     } else if (kUpb_EncodedValue_MinModifier <= ch &&
461                ch <= kUpb_EncodedValue_MaxModifier) {
462       ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
463       if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
464         d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_Extendable;
465       }
466     } else if (ch == kUpb_EncodedValue_End) {
467       if (!d->table) {
468         upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs.");
469       }
470       ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
471     } else if (kUpb_EncodedValue_MinSkip <= ch &&
472                ch <= kUpb_EncodedValue_MaxSkip) {
473       if (need_dense_below) {
474         d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
475         need_dense_below = false;
476       }
477       uint32_t skip;
478       ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch,
479                                              kUpb_EncodedValue_MinSkip,
480                                              kUpb_EncodedValue_MaxSkip, &skip);
481       last_field_number += skip;
482       last_field_number--;  // Next field seen will increment.
483     } else {
484       upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch);
485     }
486   }
487 
488   if (need_dense_below) {
489     d->table->UPB_PRIVATE(dense_below) = d->table->UPB_PRIVATE(field_count);
490   }
491 
492   return ptr;
493 }
494 
upb_MtDecoder_ParseMessage(upb_MtDecoder * d,const char * data,size_t len)495 static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
496                                        size_t len) {
497   // Buffer length is an upper bound on the number of fields. We will return
498   // what we don't use.
499   d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
500   upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields);
501 
502   upb_SubCounts sub_counts = {0, 0};
503   d->table->UPB_PRIVATE(field_count) = 0;
504   d->table->UPB_PRIVATE(fields) = d->fields;
505   upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
506                       &d->table->UPB_PRIVATE(field_count), &sub_counts);
507 
508   upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
509                        sizeof(*d->fields) * d->table->UPB_PRIVATE(field_count));
510   d->table->UPB_PRIVATE(fields) = d->fields;
511   upb_MtDecoder_AllocateSubs(d, sub_counts);
512 }
513 
upb_MtDecoder_CompareFields(const void * _a,const void * _b)514 static int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
515   const upb_LayoutItem* a = _a;
516   const upb_LayoutItem* b = _b;
517   // Currently we just sort by:
518   //  1. rep (smallest fields first)
519   //  2. type (oneof cases first)
520   //  2. field_index (smallest numbers first)
521   // The main goal of this is to reduce space lost to padding.
522   // Later we may have more subtle reasons to prefer a different ordering.
523   const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max);
524   const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max);
525   const int idx_bits = (sizeof(a->field_index) * 8);
526   UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
527 #define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
528   uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
529   uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
530   UPB_ASSERT(a_packed != b_packed);
531 #undef UPB_COMBINE
532   return a_packed < b_packed ? -1 : 1;
533 }
534 
upb_MtDecoder_SortLayoutItems(upb_MtDecoder * d)535 static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
536   // Add items for all non-oneof fields (oneofs were already added).
537   int n = d->table->UPB_PRIVATE(field_count);
538   for (int i = 0; i < n; i++) {
539     upb_MiniTableField* f = &d->fields[i];
540     if (f->UPB_PRIVATE(offset) >= kOneofBase) continue;
541     upb_LayoutItem item = {.field_index = i,
542                            .rep = f->UPB_PRIVATE(mode) >> kUpb_FieldRep_Shift,
543                            .type = kUpb_LayoutItemType_Field};
544     upb_MtDecoder_PushItem(d, item);
545   }
546 
547   if (d->vec.size) {
548     qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
549           upb_MtDecoder_CompareFields);
550   }
551 
552   return true;
553 }
554 
upb_MiniTable_DivideRoundUp(size_t n,size_t d)555 static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
556   return (n + d - 1) / d;
557 }
558 
upb_MtDecoder_AssignHasbits(upb_MtDecoder * d)559 static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) {
560   upb_MiniTable* ret = d->table;
561   int n = ret->UPB_PRIVATE(field_count);
562   size_t last_hasbit = kUpb_Reserved_Hasbits - 1;
563 
564   // First assign required fields, which must have the lowest hasbits.
565   for (int i = 0; i < n; i++) {
566     upb_MiniTableField* field =
567         (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
568     if (field->UPB_PRIVATE(offset) == kRequiredPresence) {
569       field->presence = ++last_hasbit;
570     } else if (field->UPB_PRIVATE(offset) == kNoPresence) {
571       field->presence = 0;
572     }
573   }
574   if (last_hasbit > kUpb_Reserved_Hasbits + 63) {
575     upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields");
576   }
577 
578   ret->UPB_PRIVATE(required_count) = last_hasbit - (kUpb_Reserved_Hasbits - 1);
579 
580   // Next assign non-required hasbit fields.
581   for (int i = 0; i < n; i++) {
582     upb_MiniTableField* field =
583         (upb_MiniTableField*)&ret->UPB_PRIVATE(fields)[i];
584     if (field->UPB_PRIVATE(offset) == kHasbitPresence) {
585       field->presence = ++last_hasbit;
586     }
587   }
588 
589   ret->UPB_PRIVATE(size) =
590       last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
591 }
592 
upb_MtDecoder_Place(upb_MtDecoder * d,upb_FieldRep rep)593 static size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
594   size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
595   size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
596   size_t ret = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), align);
597   static const size_t max = UINT16_MAX;
598   size_t new_size = ret + size;
599   if (new_size > max) {
600     upb_MdDecoder_ErrorJmp(
601         &d->base, "Message size exceeded maximum size of %zu bytes", max);
602   }
603   d->table->UPB_PRIVATE(size) = new_size;
604   return ret;
605 }
606 
upb_MtDecoder_AssignOffsets(upb_MtDecoder * d)607 static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
608   upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
609 
610   // Compute offsets.
611   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
612     item->offset = upb_MtDecoder_Place(d, item->rep);
613   }
614 
615   // Assign oneof case offsets.  We must do these first, since assigning
616   // actual offsets will overwrite the links of the linked list.
617   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
618     if (item->type != kUpb_LayoutItemType_OneofCase) continue;
619     upb_MiniTableField* f = &d->fields[item->field_index];
620     while (true) {
621       f->presence = ~item->offset;
622       if (f->UPB_PRIVATE(offset) == kUpb_LayoutItem_IndexSentinel) break;
623       UPB_ASSERT(f->UPB_PRIVATE(offset) - kOneofBase <
624                  d->table->UPB_PRIVATE(field_count));
625       f = &d->fields[f->UPB_PRIVATE(offset) - kOneofBase];
626     }
627   }
628 
629   // Assign offsets.
630   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
631     upb_MiniTableField* f = &d->fields[item->field_index];
632     switch (item->type) {
633       case kUpb_LayoutItemType_OneofField:
634         while (true) {
635           uint16_t next_offset = f->UPB_PRIVATE(offset);
636           f->UPB_PRIVATE(offset) = item->offset;
637           if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
638           f = &d->fields[next_offset - kOneofBase];
639         }
640         break;
641       case kUpb_LayoutItemType_Field:
642         f->UPB_PRIVATE(offset) = item->offset;
643         break;
644       default:
645         break;
646     }
647   }
648 
649   // The fasttable parser (supported on 64-bit only) depends on this being a
650   // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
651   //
652   // On 32-bit we could potentially make this smaller, but there is no
653   // compelling reason to optimize this right now.
654   d->table->UPB_PRIVATE(size) = UPB_ALIGN_UP(d->table->UPB_PRIVATE(size), 8);
655 }
656 
upb_MtDecoder_ValidateEntryField(upb_MtDecoder * d,const upb_MiniTableField * f,uint32_t expected_num)657 static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d,
658                                              const upb_MiniTableField* f,
659                                              uint32_t expected_num) {
660   const char* name = expected_num == 1 ? "key" : "val";
661   const uint32_t f_number = upb_MiniTableField_Number(f);
662   if (f_number != expected_num) {
663     upb_MdDecoder_ErrorJmp(&d->base,
664                            "map %s did not have expected number (%d vs %d)",
665                            name, expected_num, f_number);
666   }
667 
668   if (!upb_MiniTableField_IsScalar(f)) {
669     upb_MdDecoder_ErrorJmp(
670         &d->base, "map %s cannot be repeated or map, or be in oneof", name);
671   }
672 
673   uint32_t not_ok_types;
674   if (expected_num == 1) {
675     not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) |
676                    (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) |
677                    (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum);
678   } else {
679     not_ok_types = 1 << kUpb_FieldType_Group;
680   }
681 
682   if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) {
683     upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name,
684                            (int)f->UPB_PRIVATE(descriptortype));
685   }
686 }
687 
upb_MtDecoder_ParseMap(upb_MtDecoder * d,const char * data,size_t len)688 static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data,
689                                    size_t len) {
690   upb_MtDecoder_ParseMessage(d, data, len);
691   upb_MtDecoder_AssignHasbits(d);
692 
693   if (UPB_UNLIKELY(d->table->UPB_PRIVATE(field_count) != 2)) {
694     upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map",
695                            d->table->UPB_PRIVATE(field_count));
696     UPB_UNREACHABLE();
697   }
698 
699   upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
700   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
701     if (item->type == kUpb_LayoutItemType_OneofCase) {
702       upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof");
703     }
704   }
705 
706   upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[0], 1);
707   upb_MtDecoder_ValidateEntryField(d, &d->table->UPB_PRIVATE(fields)[1], 2);
708 
709   d->fields[0].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, k);
710   d->fields[1].UPB_PRIVATE(offset) = offsetof(upb_MapEntry, v);
711   d->table->UPB_PRIVATE(size) = sizeof(upb_MapEntry);
712 
713   // Map entries have a special bit set to signal it's a map entry, used in
714   // upb_MiniTable_SetSubMessage() below.
715   d->table->UPB_PRIVATE(ext) |= kUpb_ExtMode_IsMapEntry;
716 }
717 
upb_MtDecoder_ParseMessageSet(upb_MtDecoder * d,const char * data,size_t len)718 static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data,
719                                           size_t len) {
720   if (len > 0) {
721     upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu",
722                            len);
723   }
724 
725   upb_MiniTable* ret = d->table;
726   ret->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
727   ret->UPB_PRIVATE(field_count) = 0;
728   ret->UPB_PRIVATE(ext) = kUpb_ExtMode_IsMessageSet;
729   ret->UPB_PRIVATE(dense_below) = 0;
730   ret->UPB_PRIVATE(table_mask) = -1;
731   ret->UPB_PRIVATE(required_count) = 0;
732 }
733 
upb_MtDecoder_DoBuildMiniTableWithBuf(upb_MtDecoder * decoder,const char * data,size_t len,void ** buf,size_t * buf_size)734 static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf(
735     upb_MtDecoder* decoder, const char* data, size_t len, void** buf,
736     size_t* buf_size) {
737   upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table);
738 
739   decoder->table->UPB_PRIVATE(size) = kUpb_Reserved_Hasbytes;
740   decoder->table->UPB_PRIVATE(field_count) = 0;
741   decoder->table->UPB_PRIVATE(ext) = kUpb_ExtMode_NonExtendable;
742   decoder->table->UPB_PRIVATE(dense_below) = 0;
743   decoder->table->UPB_PRIVATE(table_mask) = -1;
744   decoder->table->UPB_PRIVATE(required_count) = 0;
745 #if UPB_TRACING_ENABLED
746   // MiniTables built from MiniDescriptors will not be able to vend the message
747   // name unless it is explicitly set with upb_MiniTable_SetFullName().
748   decoder->table->UPB_PRIVATE(full_name) = 0;
749 #endif
750 
751   // Strip off and verify the version tag.
752   if (!len--) goto done;
753   const char vers = *data++;
754 
755   switch (vers) {
756     case kUpb_EncodedVersion_MapV1:
757       upb_MtDecoder_ParseMap(decoder, data, len);
758       break;
759 
760     case kUpb_EncodedVersion_MessageV1:
761       upb_MtDecoder_ParseMessage(decoder, data, len);
762       upb_MtDecoder_AssignHasbits(decoder);
763       upb_MtDecoder_SortLayoutItems(decoder);
764       upb_MtDecoder_AssignOffsets(decoder);
765       break;
766 
767     case kUpb_EncodedVersion_MessageSetV1:
768       upb_MtDecoder_ParseMessageSet(decoder, data, len);
769       break;
770 
771     default:
772       upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c",
773                              vers);
774   }
775 
776 done:
777   *buf = decoder->vec.data;
778   *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
779   return decoder->table;
780 }
781 
upb_MtDecoder_BuildMiniTableWithBuf(upb_MtDecoder * const decoder,const char * const data,const size_t len,void ** const buf,size_t * const buf_size)782 static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf(
783     upb_MtDecoder* const decoder, const char* const data, const size_t len,
784     void** const buf, size_t* const buf_size) {
785   if (UPB_SETJMP(decoder->base.err) != 0) {
786     *buf = decoder->vec.data;
787     *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
788     return NULL;
789   }
790 
791   return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf,
792                                                buf_size);
793 }
794 
upb_MiniTable_BuildWithBuf(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,void ** buf,size_t * buf_size,upb_Status * status)795 upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
796                                           upb_MiniTablePlatform platform,
797                                           upb_Arena* arena, void** buf,
798                                           size_t* buf_size,
799                                           upb_Status* status) {
800   upb_MtDecoder decoder = {
801       .base = {.status = status},
802       .platform = platform,
803       .vec =
804           {
805               .data = *buf,
806               .capacity = *buf_size / sizeof(*decoder.vec.data),
807               .size = 0,
808           },
809       .arena = arena,
810       .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
811   };
812 
813   return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf,
814                                              buf_size);
815 }
816 
upb_MtDecoder_DoBuildMiniTableExtension(upb_MtDecoder * decoder,const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub)817 static const char* upb_MtDecoder_DoBuildMiniTableExtension(
818     upb_MtDecoder* decoder, const char* data, size_t len,
819     upb_MiniTableExtension* ext, const upb_MiniTable* extendee,
820     upb_MiniTableSub sub) {
821   // If the string is non-empty then it must begin with a version tag.
822   if (len) {
823     if (*data != kUpb_EncodedVersion_ExtensionV1) {
824       upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data);
825     }
826     data++;
827     len--;
828   }
829 
830   uint16_t count = 0;
831   upb_SubCounts sub_counts = {0, 0};
832   const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext),
833                                         &count, &sub_counts);
834   if (!ret || count != 1) return NULL;
835 
836   upb_MiniTableField* f = &ext->UPB_PRIVATE(field);
837 
838   f->UPB_PRIVATE(mode) |= kUpb_LabelFlags_IsExtension;
839   f->UPB_PRIVATE(offset) = 0;
840   f->presence = 0;
841 
842   if (extendee->UPB_PRIVATE(ext) & kUpb_ExtMode_IsMessageSet) {
843     // Extensions of MessageSet must be messages.
844     if (!upb_MiniTableField_IsSubMessage(f)) return NULL;
845 
846     // Extensions of MessageSet must be non-repeating.
847     if (upb_MiniTableField_IsArray(f)) return NULL;
848   }
849 
850   ext->UPB_PRIVATE(extendee) = extendee;
851   ext->UPB_PRIVATE(sub) = sub;
852 
853   return ret;
854 }
855 
upb_MtDecoder_BuildMiniTableExtension(upb_MtDecoder * const decoder,const char * const data,const size_t len,upb_MiniTableExtension * const ext,const upb_MiniTable * const extendee,const upb_MiniTableSub sub)856 static const char* upb_MtDecoder_BuildMiniTableExtension(
857     upb_MtDecoder* const decoder, const char* const data, const size_t len,
858     upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee,
859     const upb_MiniTableSub sub) {
860   if (UPB_SETJMP(decoder->base.err) != 0) return NULL;
861   return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext,
862                                                  extendee, sub);
863 }
864 
_upb_MiniTableExtension_Init(const char * data,size_t len,upb_MiniTableExtension * ext,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Status * status)865 const char* _upb_MiniTableExtension_Init(const char* data, size_t len,
866                                          upb_MiniTableExtension* ext,
867                                          const upb_MiniTable* extendee,
868                                          upb_MiniTableSub sub,
869                                          upb_MiniTablePlatform platform,
870                                          upb_Status* status) {
871   upb_MtDecoder decoder = {
872       .base = {.status = status},
873       .arena = NULL,
874       .table = NULL,
875       .platform = platform,
876   };
877 
878   return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext,
879                                                extendee, sub);
880 }
881 
_upb_MiniTableExtension_Build(const char * data,size_t len,const upb_MiniTable * extendee,upb_MiniTableSub sub,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)882 upb_MiniTableExtension* _upb_MiniTableExtension_Build(
883     const char* data, size_t len, const upb_MiniTable* extendee,
884     upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena,
885     upb_Status* status) {
886   upb_MiniTableExtension* ext =
887       upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension));
888   if (UPB_UNLIKELY(!ext)) return NULL;
889 
890   const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub,
891                                                  platform, status);
892   if (UPB_UNLIKELY(!ptr)) return NULL;
893 
894   return ext;
895 }
896 
_upb_MiniTable_Build(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)897 upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len,
898                                     upb_MiniTablePlatform platform,
899                                     upb_Arena* arena, upb_Status* status) {
900   void* buf = NULL;
901   size_t size = 0;
902   upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
903                                                   &buf, &size, status);
904   free(buf);
905   return ret;
906 }
907