1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "python/protobuf.h"
9
10 #include "python/descriptor.h"
11 #include "python/descriptor_containers.h"
12 #include "python/descriptor_pool.h"
13 #include "python/extension_dict.h"
14 #include "python/map.h"
15 #include "python/message.h"
16 #include "python/repeated.h"
17 #include "python/unknown_fields.h"
18
19 static upb_Arena* PyUpb_NewArena(void);
20
PyUpb_ModuleDealloc(void * module)21 static void PyUpb_ModuleDealloc(void* module) {
22 PyUpb_ModuleState* s = PyModule_GetState(module);
23 PyUpb_WeakMap_Free(s->obj_cache);
24 if (s->c_descriptor_symtab) {
25 upb_DefPool_Free(s->c_descriptor_symtab);
26 }
27 }
28
PyUpb_SetAllowOversizeProtos(PyObject * m,PyObject * arg)29 PyObject* PyUpb_SetAllowOversizeProtos(PyObject* m, PyObject* arg) {
30 if (!arg || !PyBool_Check(arg)) {
31 PyErr_SetString(PyExc_TypeError,
32 "Argument to SetAllowOversizeProtos must be boolean");
33 return NULL;
34 }
35 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
36 state->allow_oversize_protos = PyObject_IsTrue(arg);
37 Py_INCREF(arg);
38 return arg;
39 }
40
41 static PyMethodDef PyUpb_ModuleMethods[] = {
42 {"SetAllowOversizeProtos", PyUpb_SetAllowOversizeProtos, METH_O,
43 "Enable/disable oversize proto parsing."},
44 {NULL, NULL}};
45
46 static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT,
47 PYUPB_MODULE_NAME,
48 "Protobuf Module",
49 sizeof(PyUpb_ModuleState),
50 PyUpb_ModuleMethods, // m_methods
51 NULL, // m_slots
52 NULL, // m_traverse
53 NULL, // m_clear
54 PyUpb_ModuleDealloc};
55
56 // -----------------------------------------------------------------------------
57 // ModuleState
58 // -----------------------------------------------------------------------------
59
PyUpb_ModuleState_MaybeGet(void)60 PyUpb_ModuleState* PyUpb_ModuleState_MaybeGet(void) {
61 PyObject* module = PyState_FindModule(&module_def);
62 return module ? PyModule_GetState(module) : NULL;
63 }
64
PyUpb_ModuleState_GetFromModule(PyObject * module)65 PyUpb_ModuleState* PyUpb_ModuleState_GetFromModule(PyObject* module) {
66 PyUpb_ModuleState* state = PyModule_GetState(module);
67 assert(state);
68 assert(PyModule_GetDef(module) == &module_def);
69 return state;
70 }
71
PyUpb_ModuleState_Get(void)72 PyUpb_ModuleState* PyUpb_ModuleState_Get(void) {
73 PyObject* module = PyState_FindModule(&module_def);
74 assert(module);
75 return PyUpb_ModuleState_GetFromModule(module);
76 }
77
PyUpb_GetWktBases(PyUpb_ModuleState * state)78 PyObject* PyUpb_GetWktBases(PyUpb_ModuleState* state) {
79 if (!state->wkt_bases) {
80 PyObject* wkt_module = PyImport_ImportModule(PYUPB_PROTOBUF_INTERNAL_PACKAGE
81 ".well_known_types");
82
83 if (wkt_module == NULL) {
84 return false;
85 }
86
87 state->wkt_bases = PyObject_GetAttrString(wkt_module, "WKTBASES");
88 PyObject* m = PyState_FindModule(&module_def);
89 // Reparent ownership to m.
90 PyModule_AddObject(m, "__internal_wktbases", state->wkt_bases);
91 Py_DECREF(wkt_module);
92 }
93
94 return state->wkt_bases;
95 }
96
97 // -----------------------------------------------------------------------------
98 // WeakMap
99 // -----------------------------------------------------------------------------
100
101 struct PyUpb_WeakMap {
102 upb_inttable table;
103 upb_Arena* arena;
104 };
105
PyUpb_WeakMap_New(void)106 PyUpb_WeakMap* PyUpb_WeakMap_New(void) {
107 upb_Arena* arena = PyUpb_NewArena();
108 PyUpb_WeakMap* map = upb_Arena_Malloc(arena, sizeof(*map));
109 map->arena = arena;
110 upb_inttable_init(&map->table, map->arena);
111 return map;
112 }
113
PyUpb_WeakMap_Free(PyUpb_WeakMap * map)114 void PyUpb_WeakMap_Free(PyUpb_WeakMap* map) { upb_Arena_Free(map->arena); }
115
116 // To give better entropy in the table key, we shift away low bits that are
117 // always zero.
118 static const int PyUpb_PtrShift = (sizeof(void*) == 4) ? 2 : 3;
119
PyUpb_WeakMap_GetKey(const void * key)120 uintptr_t PyUpb_WeakMap_GetKey(const void* key) {
121 uintptr_t n = (uintptr_t)key;
122 assert((n & ((1 << PyUpb_PtrShift) - 1)) == 0);
123 return n >> PyUpb_PtrShift;
124 }
125
PyUpb_WeakMap_Add(PyUpb_WeakMap * map,const void * key,PyObject * py_obj)126 void PyUpb_WeakMap_Add(PyUpb_WeakMap* map, const void* key, PyObject* py_obj) {
127 upb_inttable_insert(&map->table, PyUpb_WeakMap_GetKey(key),
128 upb_value_ptr(py_obj), map->arena);
129 }
130
PyUpb_WeakMap_Delete(PyUpb_WeakMap * map,const void * key)131 void PyUpb_WeakMap_Delete(PyUpb_WeakMap* map, const void* key) {
132 upb_value val;
133 bool removed =
134 upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), &val);
135 (void)removed;
136 assert(removed);
137 }
138
PyUpb_WeakMap_TryDelete(PyUpb_WeakMap * map,const void * key)139 void PyUpb_WeakMap_TryDelete(PyUpb_WeakMap* map, const void* key) {
140 upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), NULL);
141 }
142
PyUpb_WeakMap_Get(PyUpb_WeakMap * map,const void * key)143 PyObject* PyUpb_WeakMap_Get(PyUpb_WeakMap* map, const void* key) {
144 upb_value val;
145 if (upb_inttable_lookup(&map->table, PyUpb_WeakMap_GetKey(key), &val)) {
146 PyObject* ret = upb_value_getptr(val);
147 Py_INCREF(ret);
148 return ret;
149 } else {
150 return NULL;
151 }
152 }
153
PyUpb_WeakMap_Next(PyUpb_WeakMap * map,const void ** key,PyObject ** obj,intptr_t * iter)154 bool PyUpb_WeakMap_Next(PyUpb_WeakMap* map, const void** key, PyObject** obj,
155 intptr_t* iter) {
156 uintptr_t u_key;
157 upb_value val;
158 if (!upb_inttable_next(&map->table, &u_key, &val, iter)) return false;
159 *key = (void*)(u_key << PyUpb_PtrShift);
160 *obj = upb_value_getptr(val);
161 return true;
162 }
163
PyUpb_WeakMap_DeleteIter(PyUpb_WeakMap * map,intptr_t * iter)164 void PyUpb_WeakMap_DeleteIter(PyUpb_WeakMap* map, intptr_t* iter) {
165 upb_inttable_removeiter(&map->table, iter);
166 }
167
168 // -----------------------------------------------------------------------------
169 // ObjCache
170 // -----------------------------------------------------------------------------
171
PyUpb_ObjCache_Instance(void)172 PyUpb_WeakMap* PyUpb_ObjCache_Instance(void) {
173 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
174 return state->obj_cache;
175 }
176
PyUpb_ObjCache_Add(const void * key,PyObject * py_obj)177 void PyUpb_ObjCache_Add(const void* key, PyObject* py_obj) {
178 PyUpb_WeakMap_Add(PyUpb_ObjCache_Instance(), key, py_obj);
179 }
180
PyUpb_ObjCache_Delete(const void * key)181 void PyUpb_ObjCache_Delete(const void* key) {
182 PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
183 if (!state) {
184 // During the shutdown sequence, our object's Dealloc() methods can be
185 // called *after* our module Dealloc() method has been called. At that
186 // point our state will be NULL and there is nothing to delete out of the
187 // map.
188 return;
189 }
190 PyUpb_WeakMap_Delete(state->obj_cache, key);
191 }
192
PyUpb_ObjCache_Get(const void * key)193 PyObject* PyUpb_ObjCache_Get(const void* key) {
194 return PyUpb_WeakMap_Get(PyUpb_ObjCache_Instance(), key);
195 }
196
197 // -----------------------------------------------------------------------------
198 // Arena
199 // -----------------------------------------------------------------------------
200
201 typedef struct {
202 PyObject_HEAD;
203 upb_Arena* arena;
204 } PyUpb_Arena;
205
206 #ifdef __GLIBC__
207 #include <malloc.h> // malloc_trim()
208 #endif
209
210 // A special allocator that calls malloc_trim() periodically to release
211 // memory to the OS. Without this call, we appear to leak memory, at least
212 // as measured in RSS.
213 //
214 // We opt to use this instead of PyMalloc (which would also solve the
215 // problem) because the latter requires the GIL to be held. This would make
216 // our messages unsafe to share with other languages that could free at
217 // unpredictable
218 // times.
upb_trim_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)219 static void* upb_trim_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize,
220 size_t size) {
221 (void)alloc;
222 (void)oldsize;
223 if (size == 0) {
224 free(ptr);
225 #ifdef __GLIBC__
226 static int count = 0;
227 if (++count == 10000) {
228 malloc_trim(0);
229 count = 0;
230 }
231 #endif
232 return NULL;
233 } else {
234 return realloc(ptr, size);
235 }
236 }
237 static upb_alloc trim_alloc = {&upb_trim_allocfunc};
238 static upb_alloc* global_alloc = &trim_alloc;
239
PyUpb_NewArena(void)240 static upb_Arena* PyUpb_NewArena(void) {
241 return upb_Arena_Init(NULL, 0, global_alloc);
242 }
243
PyUpb_Arena_New(void)244 PyObject* PyUpb_Arena_New(void) {
245 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
246 PyUpb_Arena* arena = (void*)PyType_GenericAlloc(state->arena_type, 0);
247 arena->arena = PyUpb_NewArena();
248 return &arena->ob_base;
249 }
250
PyUpb_Arena_Dealloc(PyObject * self)251 static void PyUpb_Arena_Dealloc(PyObject* self) {
252 upb_Arena_Free(PyUpb_Arena_Get(self));
253 PyUpb_Dealloc(self);
254 }
255
PyUpb_Arena_Get(PyObject * arena)256 upb_Arena* PyUpb_Arena_Get(PyObject* arena) {
257 return ((PyUpb_Arena*)arena)->arena;
258 }
259
260 static PyType_Slot PyUpb_Arena_Slots[] = {
261 {Py_tp_dealloc, PyUpb_Arena_Dealloc},
262 {0, NULL},
263 };
264
265 static PyType_Spec PyUpb_Arena_Spec = {
266 PYUPB_MODULE_NAME ".Arena",
267 sizeof(PyUpb_Arena),
268 0, // itemsize
269 Py_TPFLAGS_DEFAULT,
270 PyUpb_Arena_Slots,
271 };
272
PyUpb_InitArena(PyObject * m)273 static bool PyUpb_InitArena(PyObject* m) {
274 PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
275 state->arena_type = PyUpb_AddClass(m, &PyUpb_Arena_Spec);
276 return state->arena_type;
277 }
278
279 // -----------------------------------------------------------------------------
280 // Utilities
281 // -----------------------------------------------------------------------------
282
AddObject(PyObject * m,const char * name,PyType_Spec * spec)283 PyTypeObject* AddObject(PyObject* m, const char* name, PyType_Spec* spec) {
284 PyObject* type = PyType_FromSpec(spec);
285 return type && PyModule_AddObject(m, name, type) == 0 ? (PyTypeObject*)type
286 : NULL;
287 }
288
PyUpb_GetClassName(PyType_Spec * spec)289 static const char* PyUpb_GetClassName(PyType_Spec* spec) {
290 // spec->name contains a fully-qualified name, like:
291 // google.protobuf.pyext._message.FooBar
292 //
293 // Find the rightmost '.' to get "FooBar".
294 const char* name = strrchr(spec->name, '.');
295 assert(name);
296 return name + 1;
297 }
298
PyUpb_AddClass(PyObject * m,PyType_Spec * spec)299 PyTypeObject* PyUpb_AddClass(PyObject* m, PyType_Spec* spec) {
300 PyObject* type = PyType_FromSpec(spec);
301 const char* name = PyUpb_GetClassName(spec);
302 if (PyModule_AddObject(m, name, type) < 0) {
303 Py_XDECREF(type);
304 return NULL;
305 }
306 return (PyTypeObject*)type;
307 }
308
PyUpb_AddClassWithBases(PyObject * m,PyType_Spec * spec,PyObject * bases)309 PyTypeObject* PyUpb_AddClassWithBases(PyObject* m, PyType_Spec* spec,
310 PyObject* bases) {
311 PyObject* type = PyType_FromSpecWithBases(spec, bases);
312 const char* name = PyUpb_GetClassName(spec);
313 if (PyModule_AddObject(m, name, type) < 0) {
314 Py_XDECREF(type);
315 return NULL;
316 }
317 return (PyTypeObject*)type;
318 }
319
PyUpb_AddClassWithRegister(PyObject * m,PyType_Spec * spec,PyObject * virtual_base,const char ** methods)320 PyTypeObject* PyUpb_AddClassWithRegister(PyObject* m, PyType_Spec* spec,
321 PyObject* virtual_base,
322 const char** methods) {
323 PyObject* type = PyType_FromSpec(spec);
324 PyObject* ret1 = PyObject_CallMethod(virtual_base, "register", "O", type);
325 if (!ret1) {
326 Py_XDECREF(type);
327 return NULL;
328 }
329 for (size_t i = 0; methods[i] != NULL; i++) {
330 PyObject* method = PyObject_GetAttrString(virtual_base, methods[i]);
331 if (!method) {
332 Py_XDECREF(type);
333 return NULL;
334 }
335 int ret2 = PyObject_SetAttrString(type, methods[i], method);
336 if (ret2 < 0) {
337 Py_XDECREF(type);
338 return NULL;
339 }
340 }
341
342 return (PyTypeObject*)type;
343 }
344
PyUpb_GetStrData(PyObject * obj)345 const char* PyUpb_GetStrData(PyObject* obj) {
346 if (PyUnicode_Check(obj)) {
347 return PyUnicode_AsUTF8AndSize(obj, NULL);
348 } else if (PyBytes_Check(obj)) {
349 return PyBytes_AsString(obj);
350 } else {
351 return NULL;
352 }
353 }
354
PyUpb_VerifyStrData(PyObject * obj)355 const char* PyUpb_VerifyStrData(PyObject* obj) {
356 const char* ret = PyUpb_GetStrData(obj);
357 if (ret) return ret;
358 PyErr_Format(PyExc_TypeError, "Expected string: %S", obj);
359 return NULL;
360 }
361
PyUpb_Forbidden_New(PyObject * cls,PyObject * args,PyObject * kwds)362 PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds) {
363 PyObject* name = PyObject_GetAttrString(cls, "__name__");
364 PyErr_Format(PyExc_RuntimeError,
365 "Objects of type %U may not be created directly.", name);
366 Py_XDECREF(name);
367 return NULL;
368 }
369
PyUpb_IndexToRange(PyObject * index,Py_ssize_t size,Py_ssize_t * i,Py_ssize_t * count,Py_ssize_t * step)370 bool PyUpb_IndexToRange(PyObject* index, Py_ssize_t size, Py_ssize_t* i,
371 Py_ssize_t* count, Py_ssize_t* step) {
372 assert(i && count && step);
373 if (PySlice_Check(index)) {
374 Py_ssize_t start, stop;
375 if (PySlice_Unpack(index, &start, &stop, step) < 0) return false;
376 *count = PySlice_AdjustIndices(size, &start, &stop, *step);
377 *i = start;
378 } else {
379 *i = PyNumber_AsSsize_t(index, PyExc_IndexError);
380
381 if (*i == -1 && PyErr_Occurred()) {
382 PyErr_SetString(PyExc_TypeError, "list indices must be integers");
383 return false;
384 }
385
386 if (*i < 0) *i += size;
387 *step = 0;
388 *count = 1;
389
390 if (*i < 0 || size <= *i) {
391 PyErr_Format(PyExc_IndexError, "list index out of range");
392 return false;
393 }
394 }
395 return true;
396 }
397
398 // -----------------------------------------------------------------------------
399 // Module Entry Point
400 // -----------------------------------------------------------------------------
401
PyInit__message(void)402 __attribute__((visibility("default"))) PyMODINIT_FUNC PyInit__message(void) {
403 PyObject* m = PyModule_Create(&module_def);
404 if (!m) return NULL;
405
406 PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
407
408 state->allow_oversize_protos = false;
409 state->wkt_bases = NULL;
410 state->obj_cache = PyUpb_WeakMap_New();
411 state->c_descriptor_symtab = NULL;
412
413 if (!PyUpb_InitDescriptorContainers(m) || !PyUpb_InitDescriptorPool(m) ||
414 !PyUpb_InitDescriptor(m) || !PyUpb_InitArena(m) ||
415 !PyUpb_InitExtensionDict(m) || !PyUpb_Map_Init(m) ||
416 !PyUpb_InitMessage(m) || !PyUpb_Repeated_Init(m) ||
417 !PyUpb_UnknownFields_Init(m)) {
418 Py_DECREF(m);
419 return NULL;
420 }
421
422 // Temporary: an cookie we can use in the tests to ensure we are testing upb
423 // and not another protobuf library on the system.
424 PyModule_AddIntConstant(m, "_IS_UPB", 1);
425
426 return m;
427 }
428