• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "python/unknown_fields.h"
9 
10 #include "python/message.h"
11 #include "python/protobuf.h"
12 #include "upb/wire/eps_copy_input_stream.h"
13 #include "upb/wire/reader.h"
14 #include "upb/wire/types.h"
15 
16 // -----------------------------------------------------------------------------
17 // UnknownFieldSet
18 // -----------------------------------------------------------------------------
19 
20 typedef struct {
21   PyObject_HEAD;
22   PyObject* fields;
23 } PyUpb_UnknownFieldSet;
24 
PyUpb_UnknownFieldSet_Dealloc(PyObject * _self)25 static void PyUpb_UnknownFieldSet_Dealloc(PyObject* _self) {
26   PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
27   Py_XDECREF(self->fields);
28   PyUpb_Dealloc(self);
29 }
30 
PyUpb_UnknownFieldSet_NewBare(void)31 PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) {
32   PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
33   PyUpb_UnknownFieldSet* self =
34       (void*)PyType_GenericAlloc(s->unknown_fields_type, 0);
35   return self;
36 }
37 
38 // For MessageSet the established behavior is for UnknownFieldSet to interpret
39 // the MessageSet wire format:
40 //    message MessageSet {
41 //      repeated group Item = 1 {
42 //        required int32 type_id = 2;
43 //        required bytes message = 3;
44 //      }
45 //    }
46 //
47 // And create unknown fields like:
48 //   UnknownField(type_id, WIRE_TYPE_DELIMITED, message)
49 //
50 // For any unknown fields that are unexpected per the wire format defined above,
51 // we drop them on the floor.
52 
53 enum {
54   kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup,
55   kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup,
56   kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint,
57   kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited,
58 };
59 
PyUpb_UnknownFieldSet_BuildMessageSetItem(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr)60 static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem(
61     PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
62     const char* ptr) {
63   PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
64   int type_id = 0;
65   PyObject* msg = NULL;
66   while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
67     uint32_t tag;
68     ptr = upb_WireReader_ReadTag(ptr, &tag);
69     if (!ptr) goto err;
70     switch (tag) {
71       case kUpb_MessageSet_EndItemTag:
72         goto done;
73       case kUpb_MessageSet_TypeIdTag: {
74         uint64_t tmp;
75         ptr = upb_WireReader_ReadVarint(ptr, &tmp);
76         if (!ptr) goto err;
77         if (!type_id) type_id = tmp;
78         break;
79       }
80       case kUpb_MessageSet_MessageTag: {
81         int size;
82         ptr = upb_WireReader_ReadSize(ptr, &size);
83         if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
84           goto err;
85         }
86         const char* str = ptr;
87         ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
88         if (!msg) {
89           msg = PyBytes_FromStringAndSize(str, size);
90           if (!msg) goto err;
91         } else {
92           // already saw a message here so deliberately skipping the duplicate
93         }
94         break;
95       }
96       default:
97         ptr = upb_WireReader_SkipValue(ptr, tag, stream);
98         if (!ptr) goto err;
99     }
100   }
101 
102 done:
103   if (type_id && msg) {
104     PyObject* field = PyObject_CallFunction(
105         s->unknown_field_type, "iiO", type_id, kUpb_WireType_Delimited, msg);
106     if (!field) goto err;
107     PyList_Append(self->fields, field);
108     Py_DECREF(field);
109   }
110   Py_XDECREF(msg);
111   return ptr;
112 
113 err:
114   Py_XDECREF(msg);
115   return NULL;
116 }
117 
PyUpb_UnknownFieldSet_BuildMessageSet(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr)118 static const char* PyUpb_UnknownFieldSet_BuildMessageSet(
119     PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
120     const char* ptr) {
121   self->fields = PyList_New(0);
122   while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
123     uint32_t tag;
124     ptr = upb_WireReader_ReadTag(ptr, &tag);
125     if (!ptr) goto err;
126     if (tag == kUpb_MessageSet_StartItemTag) {
127       ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr);
128     } else {
129       ptr = upb_WireReader_SkipValue(ptr, tag, stream);
130     }
131     if (!ptr) goto err;
132   }
133   if (upb_EpsCopyInputStream_IsError(stream)) goto err;
134   return ptr;
135 
136 err:
137   Py_DECREF(self->fields);
138   self->fields = NULL;
139   return NULL;
140 }
141 
142 static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
143                                                upb_EpsCopyInputStream* stream,
144                                                const char* ptr,
145                                                int group_number);
146 
PyUpb_UnknownFieldSet_BuildValue(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr,int field_number,int wire_type,int group_number,PyObject ** data)147 static const char* PyUpb_UnknownFieldSet_BuildValue(
148     PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
149     const char* ptr, int field_number, int wire_type, int group_number,
150     PyObject** data) {
151   switch (wire_type) {
152     case kUpb_WireType_Varint: {
153       uint64_t val;
154       ptr = upb_WireReader_ReadVarint(ptr, &val);
155       if (!ptr) return NULL;
156       *data = PyLong_FromUnsignedLongLong(val);
157       return ptr;
158     }
159     case kUpb_WireType_64Bit: {
160       uint64_t val;
161       ptr = upb_WireReader_ReadFixed64(ptr, &val);
162       *data = PyLong_FromUnsignedLongLong(val);
163       return ptr;
164     }
165     case kUpb_WireType_32Bit: {
166       uint32_t val;
167       ptr = upb_WireReader_ReadFixed32(ptr, &val);
168       *data = PyLong_FromUnsignedLongLong(val);
169       return ptr;
170     }
171     case kUpb_WireType_Delimited: {
172       int size;
173       ptr = upb_WireReader_ReadSize(ptr, &size);
174       if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
175         return NULL;
176       }
177       const char* str = ptr;
178       ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
179       *data = PyBytes_FromStringAndSize(str, size);
180       return ptr;
181     }
182     case kUpb_WireType_StartGroup: {
183       PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare();
184       if (!sub) return NULL;
185       *data = &sub->ob_base;
186       return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number);
187     }
188     default:
189       assert(0);
190       *data = NULL;
191       return NULL;
192   }
193 }
194 
195 // For non-MessageSet we just build the unknown fields exactly as they exist on
196 // the wire.
PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr,int group_number)197 static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
198                                                upb_EpsCopyInputStream* stream,
199                                                const char* ptr,
200                                                int group_number) {
201   PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
202   self->fields = PyList_New(0);
203   while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
204     uint32_t tag;
205     ptr = upb_WireReader_ReadTag(ptr, &tag);
206     if (!ptr) goto err;
207     PyObject* data = NULL;
208     int field_number = upb_WireReader_GetFieldNumber(tag);
209     int wire_type = upb_WireReader_GetWireType(tag);
210     if (wire_type == kUpb_WireType_EndGroup) {
211       if (field_number != group_number) return NULL;
212       return ptr;
213     }
214     ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number,
215                                            wire_type, group_number, &data);
216     if (!ptr) {
217       Py_XDECREF(data);
218       goto err;
219     }
220     assert(data);
221     PyObject* field = PyObject_CallFunction(s->unknown_field_type, "iiN",
222                                             field_number, wire_type, data);
223     PyList_Append(self->fields, field);
224     Py_DECREF(field);
225   }
226   if (upb_EpsCopyInputStream_IsError(stream)) goto err;
227   return ptr;
228 
229 err:
230   Py_DECREF(self->fields);
231   self->fields = NULL;
232   return NULL;
233 }
234 
PyUpb_UnknownFieldSet_New(PyTypeObject * type,PyObject * args,PyObject * kwargs)235 static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args,
236                                            PyObject* kwargs) {
237   char* kwlist[] = {"message", 0};
238   PyObject* py_msg = NULL;
239 
240   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &py_msg)) {
241     return NULL;
242   }
243 
244   if (!PyUpb_Message_Verify(py_msg)) return NULL;
245   PyUpb_UnknownFieldSet* self = PyUpb_UnknownFieldSet_NewBare();
246   upb_Message* msg = PyUpb_Message_GetIfReified(py_msg);
247   if (!msg) return &self->ob_base;
248 
249   size_t size;
250   const char* ptr = upb_Message_GetUnknown(msg, &size);
251   if (size == 0) return &self->ob_base;
252 
253   upb_EpsCopyInputStream stream;
254   upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
255   const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg);
256 
257   bool ok;
258   if (upb_MessageDef_IsMessageSet(msgdef)) {
259     ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL;
260   } else {
261     ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL;
262   }
263 
264   if (!ok) {
265     Py_DECREF(&self->ob_base);
266     return NULL;
267   }
268 
269   return &self->ob_base;
270 }
271 
PyUpb_UnknownFieldSet_Length(PyObject * _self)272 static Py_ssize_t PyUpb_UnknownFieldSet_Length(PyObject* _self) {
273   PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
274   return self->fields ? PyObject_Length(self->fields) : 0;
275 }
276 
PyUpb_UnknownFieldSet_GetItem(PyObject * _self,Py_ssize_t index)277 static PyObject* PyUpb_UnknownFieldSet_GetItem(PyObject* _self,
278                                                Py_ssize_t index) {
279   PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
280   if (!self->fields) {
281     PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
282     return NULL;
283   }
284   PyObject* ret = PyList_GetItem(self->fields, index);
285   if (ret) Py_INCREF(ret);
286   return ret;
287 }
288 
289 static PyType_Slot PyUpb_UnknownFieldSet_Slots[] = {
290     {Py_tp_new, &PyUpb_UnknownFieldSet_New},
291     {Py_tp_dealloc, &PyUpb_UnknownFieldSet_Dealloc},
292     {Py_sq_length, PyUpb_UnknownFieldSet_Length},
293     {Py_sq_item, PyUpb_UnknownFieldSet_GetItem},
294     {Py_tp_hash, PyObject_HashNotImplemented},
295     {0, NULL},
296 };
297 
298 static PyType_Spec PyUpb_UnknownFieldSet_Spec = {
299     PYUPB_MODULE_NAME ".UnknownFieldSet",  // tp_name
300     sizeof(PyUpb_UnknownFieldSet),         // tp_basicsize
301     0,                                     // tp_itemsize
302     Py_TPFLAGS_DEFAULT,                    // tp_flags
303     PyUpb_UnknownFieldSet_Slots,
304 };
305 
306 // -----------------------------------------------------------------------------
307 // Top Level
308 // -----------------------------------------------------------------------------
309 
PyUpb_UnknownFieldSet_CreateNamedTuple(void)310 PyObject* PyUpb_UnknownFieldSet_CreateNamedTuple(void) {
311   PyObject* mod = NULL;
312   PyObject* namedtuple = NULL;
313   PyObject* ret = NULL;
314 
315   mod = PyImport_ImportModule("collections");
316   if (!mod) goto done;
317   namedtuple = PyObject_GetAttrString(mod, "namedtuple");
318   if (!namedtuple) goto done;
319   ret = PyObject_CallFunction(namedtuple, "s[sss]", "PyUnknownField",
320                               "field_number", "wire_type", "data");
321 
322 done:
323   Py_XDECREF(mod);
324   Py_XDECREF(namedtuple);
325   return ret;
326 }
327 
PyUpb_UnknownFields_Init(PyObject * m)328 bool PyUpb_UnknownFields_Init(PyObject* m) {
329   PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
330 
331   s->unknown_fields_type = PyUpb_AddClass(m, &PyUpb_UnknownFieldSet_Spec);
332   s->unknown_field_type = PyUpb_UnknownFieldSet_CreateNamedTuple();
333 
334   return s->unknown_fields_type && s->unknown_field_type;
335 }
336