• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "python/convert.h"
9 
10 #include "python/message.h"
11 #include "python/protobuf.h"
12 #include "upb/message/compare.h"
13 #include "upb/message/map.h"
14 #include "upb/reflection/def.h"
15 #include "upb/reflection/message.h"
16 #include "utf8_range.h"
17 
18 // Must be last.
19 #include "upb/port/def.inc"
20 
PyUpb_UpbToPy(upb_MessageValue val,const upb_FieldDef * f,PyObject * arena)21 PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f,
22                         PyObject* arena) {
23   switch (upb_FieldDef_CType(f)) {
24     case kUpb_CType_Enum:
25     case kUpb_CType_Int32:
26       return PyLong_FromLong(val.int32_val);
27     case kUpb_CType_Int64:
28       return PyLong_FromLongLong(val.int64_val);
29     case kUpb_CType_UInt32:
30       return PyLong_FromSize_t(val.uint32_val);
31     case kUpb_CType_UInt64:
32       return PyLong_FromUnsignedLongLong(val.uint64_val);
33     case kUpb_CType_Float:
34       return PyFloat_FromDouble(val.float_val);
35     case kUpb_CType_Double:
36       return PyFloat_FromDouble(val.double_val);
37     case kUpb_CType_Bool:
38       return PyBool_FromLong(val.bool_val);
39     case kUpb_CType_Bytes:
40       return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
41     case kUpb_CType_String: {
42       PyObject* ret =
43           PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
44       // If the string can't be decoded in UTF-8, just return a bytes object
45       // that contains the raw bytes. This can't happen if the value was
46       // assigned using the members of the Python message object, but can happen
47       // if the values were parsed from the wire (binary).
48       if (ret == NULL) {
49         PyErr_Clear();
50         ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
51       }
52       return ret;
53     }
54     case kUpb_CType_Message:
55       return PyUpb_Message_Get((upb_Message*)val.msg_val,
56                                upb_FieldDef_MessageSubDef(f), arena);
57     default:
58       PyErr_Format(PyExc_SystemError,
59                    "Getting a value from a field of unknown type %d",
60                    upb_FieldDef_CType(f));
61       return NULL;
62   }
63 }
64 
PyUpb_GetInt64(PyObject * obj,int64_t * val)65 static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) {
66   // We require that the value is either an integer or has an __index__
67   // conversion.
68   obj = PyNumber_Index(obj);
69   if (!obj) return false;
70   // If the value is already a Python long, PyLong_AsLongLong() retrieves it.
71   // Otherwise is converts to integer using __int__.
72   *val = PyLong_AsLongLong(obj);
73   bool ok = true;
74   if (PyErr_Occurred()) {
75     assert(PyErr_ExceptionMatches(PyExc_OverflowError));
76     PyErr_Clear();
77     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
78     ok = false;
79   }
80   Py_DECREF(obj);
81   return ok;
82 }
83 
PyUpb_GetUint64(PyObject * obj,uint64_t * val)84 static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) {
85   // We require that the value is either an integer or has an __index__
86   // conversion.
87   obj = PyNumber_Index(obj);
88   if (!obj) return false;
89   *val = PyLong_AsUnsignedLongLong(obj);
90   bool ok = true;
91   if (PyErr_Occurred()) {
92     assert(PyErr_ExceptionMatches(PyExc_OverflowError));
93     PyErr_Clear();
94     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
95     ok = false;
96   }
97   Py_DECREF(obj);
98   return ok;
99 }
100 
PyUpb_GetInt32(PyObject * obj,int32_t * val)101 static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) {
102   int64_t i64;
103   if (!PyUpb_GetInt64(obj, &i64)) return false;
104   if (i64 < INT32_MIN || i64 > INT32_MAX) {
105     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
106     return false;
107   }
108   *val = i64;
109   return true;
110 }
111 
PyUpb_GetUint32(PyObject * obj,uint32_t * val)112 static bool PyUpb_GetUint32(PyObject* obj, uint32_t* val) {
113   uint64_t u64;
114   if (!PyUpb_GetUint64(obj, &u64)) return false;
115   if (u64 > UINT32_MAX) {
116     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
117     return false;
118   }
119   *val = u64;
120   return true;
121 }
122 
123 // If `arena` is specified, copies the string data into the given arena.
124 // Otherwise aliases the given data.
PyUpb_MaybeCopyString(const char * ptr,size_t size,upb_Arena * arena)125 static upb_MessageValue PyUpb_MaybeCopyString(const char* ptr, size_t size,
126                                               upb_Arena* arena) {
127   upb_MessageValue ret;
128   ret.str_val.size = size;
129   if (arena) {
130     char* buf = upb_Arena_Malloc(arena, size);
131     memcpy(buf, ptr, size);
132     ret.str_val.data = buf;
133   } else {
134     ret.str_val.data = ptr;
135   }
136   return ret;
137 }
138 
upb_FieldDef_TypeString(const upb_FieldDef * f)139 const char* upb_FieldDef_TypeString(const upb_FieldDef* f) {
140   switch (upb_FieldDef_CType(f)) {
141     case kUpb_CType_Double:
142       return "double";
143     case kUpb_CType_Float:
144       return "float";
145     case kUpb_CType_Int64:
146       return "int64";
147     case kUpb_CType_Int32:
148       return "int32";
149     case kUpb_CType_UInt64:
150       return "uint64";
151     case kUpb_CType_UInt32:
152       return "uint32";
153     case kUpb_CType_Enum:
154       return "enum";
155     case kUpb_CType_Bool:
156       return "bool";
157     case kUpb_CType_String:
158       return "string";
159     case kUpb_CType_Bytes:
160       return "bytes";
161     case kUpb_CType_Message:
162       return "message";
163   }
164   UPB_UNREACHABLE();
165 }
166 
PyUpb_PyToUpbEnum(PyObject * obj,const upb_EnumDef * e,upb_MessageValue * val)167 static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e,
168                               upb_MessageValue* val) {
169   if (PyUnicode_Check(obj)) {
170     Py_ssize_t size;
171     const char* name = PyUnicode_AsUTF8AndSize(obj, &size);
172     const upb_EnumValueDef* ev =
173         upb_EnumDef_FindValueByNameWithSize(e, name, size);
174     if (!ev) {
175       PyErr_Format(PyExc_ValueError, "unknown enum label \"%s\"", name);
176       return false;
177     }
178     val->int32_val = upb_EnumValueDef_Number(ev);
179     return true;
180   } else {
181     int32_t i32;
182     if (!PyUpb_GetInt32(obj, &i32)) return false;
183 #ifdef UPB_FUTURE_PYTHON_CLOSED_ENUM_ENFORCEMENT
184     if (upb_EnumDef_IsClosed(e) && !upb_EnumDef_CheckNumber(e, i32)) {
185 #else
186     if (upb_FileDef_Syntax(upb_EnumDef_File(e)) == kUpb_Syntax_Proto2 &&
187         !upb_EnumDef_CheckNumber(e, i32)) {
188 #endif
189       PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32);
190       return false;
191     }
192     val->int32_val = i32;
193     return true;
194   }
195 }
196 
197 bool PyUpb_IsNumpyNdarray(PyObject* obj, const upb_FieldDef* f) {
198   PyObject* type_name_obj =
199       PyObject_GetAttrString((PyObject*)Py_TYPE(obj), "__name__");
200   bool is_ndarray = false;
201   if (!strcmp(PyUpb_GetStrData(type_name_obj), "ndarray")) {
202     PyErr_Format(PyExc_TypeError,
203                  "%S has type ndarray, but expected one of: %s", obj,
204                  upb_FieldDef_TypeString(f));
205     is_ndarray = true;
206   }
207   Py_DECREF(type_name_obj);
208   return is_ndarray;
209 }
210 
211 bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
212                    upb_Arena* arena) {
213   switch (upb_FieldDef_CType(f)) {
214     case kUpb_CType_Enum:
215       return PyUpb_PyToUpbEnum(obj, upb_FieldDef_EnumSubDef(f), val);
216     case kUpb_CType_Int32:
217       return PyUpb_GetInt32(obj, &val->int32_val);
218     case kUpb_CType_Int64:
219       return PyUpb_GetInt64(obj, &val->int64_val);
220     case kUpb_CType_UInt32:
221       return PyUpb_GetUint32(obj, &val->uint32_val);
222     case kUpb_CType_UInt64:
223       return PyUpb_GetUint64(obj, &val->uint64_val);
224     case kUpb_CType_Float:
225       if (PyUpb_IsNumpyNdarray(obj, f)) return false;
226       val->float_val = PyFloat_AsDouble(obj);
227       return !PyErr_Occurred();
228     case kUpb_CType_Double:
229       if (PyUpb_IsNumpyNdarray(obj, f)) return false;
230       val->double_val = PyFloat_AsDouble(obj);
231       return !PyErr_Occurred();
232     case kUpb_CType_Bool:
233       if (PyUpb_IsNumpyNdarray(obj, f)) return false;
234       val->bool_val = PyLong_AsLong(obj);
235       return !PyErr_Occurred();
236     case kUpb_CType_Bytes: {
237       char* ptr;
238       Py_ssize_t size;
239       if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
240       *val = PyUpb_MaybeCopyString(ptr, size, arena);
241       return true;
242     }
243     case kUpb_CType_String: {
244       Py_ssize_t size;
245       if (PyBytes_Check(obj)) {
246         // Use the object's bytes if they are valid UTF-8.
247         char* ptr;
248         if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
249         if (!utf8_range_IsValid(ptr, size)) {
250           // Invalid UTF-8.  Try to convert the message to a Python Unicode
251           // object, even though we know this will fail, just to get the
252           // idiomatic Python error message.
253           obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
254           assert(!obj);
255           return false;
256         }
257         *val = PyUpb_MaybeCopyString(ptr, size, arena);
258         return true;
259       } else {
260         const char* ptr;
261         ptr = PyUnicode_AsUTF8AndSize(obj, &size);
262         if (PyErr_Occurred()) return false;
263         *val = PyUpb_MaybeCopyString(ptr, size, arena);
264         return true;
265       }
266     }
267     case kUpb_CType_Message:
268       PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");
269       return false;
270     default:
271       PyErr_Format(PyExc_SystemError,
272                    "Getting a value from a field of unknown type %d",
273                    upb_FieldDef_CType(f));
274       return false;
275   }
276 }
277 
278 bool upb_Message_IsEqualByDef(const upb_Message* msg1, const upb_Message* msg2,
279                               const upb_MessageDef* msgdef, int options) {
280   const upb_MiniTable* m = upb_MessageDef_MiniTable(msgdef);
281   return upb_Message_IsEqual(msg1, msg2, m, options);
282 }
283 
284 #include "upb/port/undef.inc"
285