1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "upb/message/promote.h"
9
10 #include <stdbool.h>
11 #include <stdint.h>
12 #include <string.h>
13
14 #include "upb/base/descriptor_constants.h"
15 #include "upb/mem/arena.h"
16 #include "upb/message/accessors.h"
17 #include "upb/message/array.h"
18 #include "upb/message/internal/array.h"
19 #include "upb/message/internal/extension.h"
20 #include "upb/message/internal/message.h"
21 #include "upb/message/internal/tagged_ptr.h"
22 #include "upb/message/map.h"
23 #include "upb/message/message.h"
24 #include "upb/message/tagged_ptr.h"
25 #include "upb/mini_table/extension.h"
26 #include "upb/mini_table/field.h"
27 #include "upb/mini_table/message.h"
28 #include "upb/mini_table/sub.h"
29 #include "upb/wire/decode.h"
30 #include "upb/wire/eps_copy_input_stream.h"
31 #include "upb/wire/reader.h"
32
33 // Must be last.
34 #include "upb/port/def.inc"
35
36 // Parses unknown data by merging into existing base_message or creating a
37 // new message usingg mini_table.
upb_MiniTable_ParseUnknownMessage(const char * unknown_data,size_t unknown_size,const upb_MiniTable * mini_table,upb_Message * base_message,int decode_options,upb_Arena * arena)38 static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage(
39 const char* unknown_data, size_t unknown_size,
40 const upb_MiniTable* mini_table, upb_Message* base_message,
41 int decode_options, upb_Arena* arena) {
42 upb_UnknownToMessageRet ret;
43 ret.message =
44 base_message ? base_message : _upb_Message_New(mini_table, arena);
45 if (!ret.message) {
46 ret.status = kUpb_UnknownToMessage_OutOfMemory;
47 return ret;
48 }
49 // Decode sub message using unknown field contents.
50 const char* data = unknown_data;
51 uint32_t tag;
52 uint64_t message_len = 0;
53 data = upb_WireReader_ReadTag(data, &tag);
54 data = upb_WireReader_ReadVarint(data, &message_len);
55 upb_DecodeStatus status = upb_Decode(data, message_len, ret.message,
56 mini_table, NULL, decode_options, arena);
57 if (status == kUpb_DecodeStatus_OutOfMemory) {
58 ret.status = kUpb_UnknownToMessage_OutOfMemory;
59 } else if (status == kUpb_DecodeStatus_Ok) {
60 ret.status = kUpb_UnknownToMessage_Ok;
61 } else {
62 ret.status = kUpb_UnknownToMessage_ParseError;
63 }
64 return ret;
65 }
66
upb_Message_GetOrPromoteExtension(upb_Message * msg,const upb_MiniTableExtension * ext_table,int decode_options,upb_Arena * arena,upb_MessageValue * value)67 upb_GetExtension_Status upb_Message_GetOrPromoteExtension(
68 upb_Message* msg, const upb_MiniTableExtension* ext_table,
69 int decode_options, upb_Arena* arena, upb_MessageValue* value) {
70 UPB_ASSERT(!upb_Message_IsFrozen(msg));
71 UPB_ASSERT(upb_MiniTableExtension_CType(ext_table) == kUpb_CType_Message);
72 const upb_Extension* extension =
73 UPB_PRIVATE(_upb_Message_Getext)(msg, ext_table);
74 if (extension) {
75 memcpy(value, &extension->data, sizeof(upb_MessageValue));
76 return kUpb_GetExtension_Ok;
77 }
78
79 // Check unknown fields, if available promote.
80 int field_number = upb_MiniTableExtension_Number(ext_table);
81 upb_FindUnknownRet result = upb_Message_FindUnknown(msg, field_number, 0);
82 if (result.status != kUpb_FindUnknown_Ok) {
83 return kUpb_GetExtension_NotPresent;
84 }
85 size_t len;
86 size_t ofs = result.ptr - upb_Message_GetUnknown(msg, &len);
87 // Decode and promote from unknown.
88 const upb_MiniTable* extension_table =
89 upb_MiniTableExtension_GetSubMessage(ext_table);
90 upb_UnknownToMessageRet parse_result = upb_MiniTable_ParseUnknownMessage(
91 result.ptr, result.len, extension_table,
92 /* base_message= */ NULL, decode_options, arena);
93 switch (parse_result.status) {
94 case kUpb_UnknownToMessage_OutOfMemory:
95 return kUpb_GetExtension_OutOfMemory;
96 case kUpb_UnknownToMessage_ParseError:
97 return kUpb_GetExtension_ParseError;
98 case kUpb_UnknownToMessage_NotFound:
99 return kUpb_GetExtension_NotPresent;
100 case kUpb_UnknownToMessage_Ok:
101 break;
102 }
103 upb_Message* extension_msg = parse_result.message;
104 // Add to extensions.
105 upb_Extension* ext =
106 UPB_PRIVATE(_upb_Message_GetOrCreateExtension)(msg, ext_table, arena);
107 if (!ext) {
108 return kUpb_GetExtension_OutOfMemory;
109 }
110 ext->data.msg_val = extension_msg;
111 value->msg_val = extension_msg;
112 const char* delete_ptr = upb_Message_GetUnknown(msg, &len) + ofs;
113 upb_Message_DeleteUnknown(msg, delete_ptr, result.len);
114 return kUpb_GetExtension_Ok;
115 }
116
upb_FindUnknownRet_ParseError(void)117 static upb_FindUnknownRet upb_FindUnknownRet_ParseError(void) {
118 return (upb_FindUnknownRet){.status = kUpb_FindUnknown_ParseError};
119 }
120
upb_Message_FindUnknown(const upb_Message * msg,uint32_t field_number,int depth_limit)121 upb_FindUnknownRet upb_Message_FindUnknown(const upb_Message* msg,
122 uint32_t field_number,
123 int depth_limit) {
124 depth_limit = depth_limit ? depth_limit : 100;
125
126 size_t size;
127 upb_FindUnknownRet ret;
128
129 const char* ptr = upb_Message_GetUnknown(msg, &size);
130 upb_EpsCopyInputStream stream;
131 upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
132
133 while (!upb_EpsCopyInputStream_IsDone(&stream, &ptr)) {
134 uint32_t tag;
135 const char* unknown_begin = ptr;
136 ptr = upb_WireReader_ReadTag(ptr, &tag);
137 if (!ptr) return upb_FindUnknownRet_ParseError();
138 if (field_number == upb_WireReader_GetFieldNumber(tag)) {
139 ret.status = kUpb_FindUnknown_Ok;
140 ret.ptr = upb_EpsCopyInputStream_GetAliasedPtr(&stream, unknown_begin);
141 ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream);
142 // Because we know that the input is a flat buffer, it is safe to perform
143 // pointer arithmetic on aliased pointers.
144 ret.len = upb_EpsCopyInputStream_GetAliasedPtr(&stream, ptr) - ret.ptr;
145 return ret;
146 }
147
148 ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream);
149 if (!ptr) return upb_FindUnknownRet_ParseError();
150 }
151 ret.status = kUpb_FindUnknown_NotPresent;
152 ret.ptr = NULL;
153 ret.len = 0;
154 return ret;
155 }
156
upb_Message_PromoteOne(upb_TaggedMessagePtr * tagged,const upb_MiniTable * mini_table,int decode_options,upb_Arena * arena)157 static upb_DecodeStatus upb_Message_PromoteOne(upb_TaggedMessagePtr* tagged,
158 const upb_MiniTable* mini_table,
159 int decode_options,
160 upb_Arena* arena) {
161 upb_Message* empty =
162 UPB_PRIVATE(_upb_TaggedMessagePtr_GetEmptyMessage)(*tagged);
163 size_t unknown_size;
164 const char* unknown_data = upb_Message_GetUnknown(empty, &unknown_size);
165 upb_Message* promoted = upb_Message_New(mini_table, arena);
166 if (!promoted) return kUpb_DecodeStatus_OutOfMemory;
167 upb_DecodeStatus status = upb_Decode(unknown_data, unknown_size, promoted,
168 mini_table, NULL, decode_options, arena);
169 if (status == kUpb_DecodeStatus_Ok) {
170 *tagged = UPB_PRIVATE(_upb_TaggedMessagePtr_Pack)(promoted, false);
171 }
172 return status;
173 }
174
upb_Message_PromoteMessage(upb_Message * parent,const upb_MiniTable * mini_table,const upb_MiniTableField * field,int decode_options,upb_Arena * arena,upb_Message ** promoted)175 upb_DecodeStatus upb_Message_PromoteMessage(upb_Message* parent,
176 const upb_MiniTable* mini_table,
177 const upb_MiniTableField* field,
178 int decode_options,
179 upb_Arena* arena,
180 upb_Message** promoted) {
181 UPB_ASSERT(!upb_Message_IsFrozen(parent));
182 const upb_MiniTable* sub_table =
183 upb_MiniTable_GetSubMessageTable(mini_table, field);
184 UPB_ASSERT(sub_table);
185 upb_TaggedMessagePtr tagged =
186 upb_Message_GetTaggedMessagePtr(parent, field, NULL);
187 upb_DecodeStatus ret =
188 upb_Message_PromoteOne(&tagged, sub_table, decode_options, arena);
189 if (ret == kUpb_DecodeStatus_Ok) {
190 *promoted = upb_TaggedMessagePtr_GetNonEmptyMessage(tagged);
191 upb_Message_SetMessage(parent, field, *promoted);
192 }
193 return ret;
194 }
195
upb_Array_PromoteMessages(upb_Array * arr,const upb_MiniTable * mini_table,int decode_options,upb_Arena * arena)196 upb_DecodeStatus upb_Array_PromoteMessages(upb_Array* arr,
197 const upb_MiniTable* mini_table,
198 int decode_options,
199 upb_Arena* arena) {
200 void** data = upb_Array_MutableDataPtr(arr);
201 size_t size = upb_Array_Size(arr);
202 for (size_t i = 0; i < size; i++) {
203 upb_TaggedMessagePtr tagged;
204 memcpy(&tagged, &data[i], sizeof(tagged));
205 if (!upb_TaggedMessagePtr_IsEmpty(tagged)) continue;
206 upb_DecodeStatus status =
207 upb_Message_PromoteOne(&tagged, mini_table, decode_options, arena);
208 if (status != kUpb_DecodeStatus_Ok) return status;
209 memcpy(&data[i], &tagged, sizeof(tagged));
210 }
211 return kUpb_DecodeStatus_Ok;
212 }
213
upb_Map_PromoteMessages(upb_Map * map,const upb_MiniTable * mini_table,int decode_options,upb_Arena * arena)214 upb_DecodeStatus upb_Map_PromoteMessages(upb_Map* map,
215 const upb_MiniTable* mini_table,
216 int decode_options, upb_Arena* arena) {
217 size_t iter = kUpb_Map_Begin;
218 upb_MessageValue key, val;
219 while (upb_Map_Next(map, &key, &val, &iter)) {
220 if (!upb_TaggedMessagePtr_IsEmpty(val.tagged_msg_val)) continue;
221 upb_DecodeStatus status = upb_Message_PromoteOne(
222 &val.tagged_msg_val, mini_table, decode_options, arena);
223 if (status != kUpb_DecodeStatus_Ok) return status;
224 upb_Map_SetEntryValue(map, iter, val);
225 }
226 return kUpb_DecodeStatus_Ok;
227 }
228
229 ////////////////////////////////////////////////////////////////////////////////
230 // OLD promotion functions, will be removed!
231 ////////////////////////////////////////////////////////////////////////////////
232
233 // Warning: See TODO
upb_MiniTable_PromoteUnknownToMessage(upb_Message * msg,const upb_MiniTable * mini_table,const upb_MiniTableField * field,const upb_MiniTable * sub_mini_table,int decode_options,upb_Arena * arena)234 upb_UnknownToMessageRet upb_MiniTable_PromoteUnknownToMessage(
235 upb_Message* msg, const upb_MiniTable* mini_table,
236 const upb_MiniTableField* field, const upb_MiniTable* sub_mini_table,
237 int decode_options, upb_Arena* arena) {
238 UPB_ASSERT(!upb_Message_IsFrozen(msg));
239 upb_FindUnknownRet unknown;
240 // We need to loop and merge unknowns that have matching tag field->number.
241 upb_Message* message = NULL;
242 // Callers should check that message is not set first before calling
243 // PromotoUnknownToMessage.
244 UPB_ASSERT(upb_MiniTable_GetSubMessageTable(mini_table, field) ==
245 sub_mini_table);
246 bool is_oneof = upb_MiniTableField_IsInOneof(field);
247 if (!is_oneof || UPB_PRIVATE(_upb_Message_GetOneofCase)(msg, field) ==
248 upb_MiniTableField_Number(field)) {
249 UPB_ASSERT(upb_Message_GetMessage(msg, field) == NULL);
250 }
251 upb_UnknownToMessageRet ret;
252 ret.status = kUpb_UnknownToMessage_Ok;
253 do {
254 unknown =
255 upb_Message_FindUnknown(msg, upb_MiniTableField_Number(field),
256 upb_DecodeOptions_GetMaxDepth(decode_options));
257 switch (unknown.status) {
258 case kUpb_FindUnknown_Ok: {
259 const char* unknown_data = unknown.ptr;
260 size_t unknown_size = unknown.len;
261 ret = upb_MiniTable_ParseUnknownMessage(unknown_data, unknown_size,
262 sub_mini_table, message,
263 decode_options, arena);
264 if (ret.status == kUpb_UnknownToMessage_Ok) {
265 message = ret.message;
266 upb_Message_DeleteUnknown(msg, unknown_data, unknown_size);
267 }
268 } break;
269 case kUpb_FindUnknown_ParseError:
270 ret.status = kUpb_UnknownToMessage_ParseError;
271 break;
272 case kUpb_FindUnknown_NotPresent:
273 // If we parsed at least one unknown, we are done.
274 ret.status =
275 message ? kUpb_UnknownToMessage_Ok : kUpb_UnknownToMessage_NotFound;
276 break;
277 }
278 } while (unknown.status == kUpb_FindUnknown_Ok);
279 if (message) {
280 if (is_oneof) {
281 UPB_PRIVATE(_upb_Message_SetOneofCase)(msg, field);
282 }
283 upb_Message_SetMessage(msg, field, message);
284 ret.message = message;
285 }
286 return ret;
287 }
288
289 // Moves repeated messages in unknowns to a upb_Array.
290 //
291 // Since the repeated field is not a scalar type we don't check for
292 // kUpb_LabelFlags_IsPacked.
293 // TODO: Optimize. Instead of converting messages one at a time,
294 // scan all unknown data once and compact.
upb_MiniTable_PromoteUnknownToMessageArray(upb_Message * msg,const upb_MiniTableField * field,const upb_MiniTable * mini_table,int decode_options,upb_Arena * arena)295 upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMessageArray(
296 upb_Message* msg, const upb_MiniTableField* field,
297 const upb_MiniTable* mini_table, int decode_options, upb_Arena* arena) {
298 UPB_ASSERT(!upb_Message_IsFrozen(msg));
299
300 upb_Array* repeated_messages = upb_Message_GetMutableArray(msg, field);
301 // Find all unknowns with given field number and parse.
302 upb_FindUnknownRet unknown;
303 do {
304 unknown =
305 upb_Message_FindUnknown(msg, upb_MiniTableField_Number(field),
306 upb_DecodeOptions_GetMaxDepth(decode_options));
307 if (unknown.status == kUpb_FindUnknown_Ok) {
308 upb_UnknownToMessageRet ret = upb_MiniTable_ParseUnknownMessage(
309 unknown.ptr, unknown.len, mini_table,
310 /* base_message= */ NULL, decode_options, arena);
311 if (ret.status == kUpb_UnknownToMessage_Ok) {
312 upb_MessageValue value;
313 value.msg_val = ret.message;
314 // Allocate array on demand before append.
315 if (!repeated_messages) {
316 upb_Message_ResizeArrayUninitialized(msg, field, 0, arena);
317 repeated_messages = upb_Message_GetMutableArray(msg, field);
318 }
319 if (!upb_Array_Append(repeated_messages, value, arena)) {
320 return kUpb_UnknownToMessage_OutOfMemory;
321 }
322 upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
323 } else {
324 return ret.status;
325 }
326 }
327 } while (unknown.status == kUpb_FindUnknown_Ok);
328 return kUpb_UnknownToMessage_Ok;
329 }
330
331 // Moves repeated messages in unknowns to a upb_Map.
upb_MiniTable_PromoteUnknownToMap(upb_Message * msg,const upb_MiniTable * mini_table,const upb_MiniTableField * field,int decode_options,upb_Arena * arena)332 upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap(
333 upb_Message* msg, const upb_MiniTable* mini_table,
334 const upb_MiniTableField* field, int decode_options, upb_Arena* arena) {
335 UPB_ASSERT(!upb_Message_IsFrozen(msg));
336
337 const upb_MiniTable* map_entry_mini_table =
338 upb_MiniTable_MapEntrySubMessage(mini_table, field);
339 UPB_ASSERT(upb_MiniTable_FieldCount(map_entry_mini_table) == 2);
340 // Find all unknowns with given field number and parse.
341 upb_FindUnknownRet unknown;
342 while (1) {
343 unknown =
344 upb_Message_FindUnknown(msg, upb_MiniTableField_Number(field),
345 upb_DecodeOptions_GetMaxDepth(decode_options));
346 if (unknown.status != kUpb_FindUnknown_Ok) break;
347 upb_UnknownToMessageRet ret = upb_MiniTable_ParseUnknownMessage(
348 unknown.ptr, unknown.len, map_entry_mini_table,
349 /* base_message= */ NULL, decode_options, arena);
350 if (ret.status != kUpb_UnknownToMessage_Ok) return ret.status;
351 // Allocate map on demand before append.
352 upb_Map* map = upb_Message_GetOrCreateMutableMap(msg, map_entry_mini_table,
353 field, arena);
354 upb_Message* map_entry_message = ret.message;
355 bool insert_success = upb_Message_SetMapEntry(map, mini_table, field,
356 map_entry_message, arena);
357 if (!insert_success) return kUpb_UnknownToMessage_OutOfMemory;
358 upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
359 }
360 return kUpb_UnknownToMessage_Ok;
361 }
362