1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: anuraag@google.com (Anuraag Agrawal)
32 // Author: tibell@google.com (Johan Tibell)
33
34 #include <google/protobuf/pyext/message.h>
35
36 #include <map>
37 #include <memory>
38 #ifndef _SHARED_PTR_H
39 #include <google/protobuf/stubs/shared_ptr.h>
40 #endif
41 #include <string>
42 #include <vector>
43 #include <structmember.h> // A Python header file.
44
45 #ifndef PyVarObject_HEAD_INIT
46 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
47 #endif
48 #ifndef Py_TYPE
49 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
50 #endif
51 #include <google/protobuf/descriptor.pb.h>
52 #include <google/protobuf/stubs/common.h>
53 #include <google/protobuf/stubs/logging.h>
54 #include <google/protobuf/io/coded_stream.h>
55 #include <google/protobuf/util/message_differencer.h>
56 #include <google/protobuf/descriptor.h>
57 #include <google/protobuf/message.h>
58 #include <google/protobuf/text_format.h>
59 #include <google/protobuf/unknown_field_set.h>
60 #include <google/protobuf/pyext/descriptor.h>
61 #include <google/protobuf/pyext/descriptor_pool.h>
62 #include <google/protobuf/pyext/extension_dict.h>
63 #include <google/protobuf/pyext/repeated_composite_container.h>
64 #include <google/protobuf/pyext/repeated_scalar_container.h>
65 #include <google/protobuf/pyext/map_container.h>
66 #include <google/protobuf/pyext/scoped_pyobject_ptr.h>
67 #include <google/protobuf/stubs/strutil.h>
68
69 #if PY_MAJOR_VERSION >= 3
70 #define PyInt_Check PyLong_Check
71 #define PyInt_AsLong PyLong_AsLong
72 #define PyInt_FromLong PyLong_FromLong
73 #define PyInt_FromSize_t PyLong_FromSize_t
74 #define PyString_Check PyUnicode_Check
75 #define PyString_FromString PyUnicode_FromString
76 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
77 #if PY_VERSION_HEX < 0x03030000
78 #error "Python 3.0 - 3.2 are not supported."
79 #else
80 #define PyString_AsString(ob) \
81 (PyUnicode_Check(ob)? PyUnicode_AsUTF8(ob): PyBytes_AsString(ob))
82 #define PyString_AsStringAndSize(ob, charpp, sizep) \
83 (PyUnicode_Check(ob)? \
84 ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \
85 PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
86 #endif
87 #endif
88
89 namespace google {
90 namespace protobuf {
91 namespace python {
92
93 static PyObject* kDESCRIPTOR;
94 static PyObject* k_extensions_by_name;
95 static PyObject* k_extensions_by_number;
96 PyObject* EnumTypeWrapper_class;
97 static PyObject* PythonMessage_class;
98 static PyObject* kEmptyWeakref;
99 static PyObject* WKT_classes = NULL;
100
101 namespace message_meta {
102
103 static int InsertEmptyWeakref(PyTypeObject* base);
104
105 // Add the number of a field descriptor to the containing message class.
106 // Equivalent to:
107 // _cls.<field>_FIELD_NUMBER = <number>
AddFieldNumberToClass(PyObject * cls,const FieldDescriptor * field_descriptor)108 static bool AddFieldNumberToClass(
109 PyObject* cls, const FieldDescriptor* field_descriptor) {
110 string constant_name = field_descriptor->name() + "_FIELD_NUMBER";
111 UpperString(&constant_name);
112 ScopedPyObjectPtr attr_name(PyString_FromStringAndSize(
113 constant_name.c_str(), constant_name.size()));
114 if (attr_name == NULL) {
115 return false;
116 }
117 ScopedPyObjectPtr number(PyInt_FromLong(field_descriptor->number()));
118 if (number == NULL) {
119 return false;
120 }
121 if (PyObject_SetAttr(cls, attr_name.get(), number.get()) == -1) {
122 return false;
123 }
124 return true;
125 }
126
127
128 // Finalize the creation of the Message class.
AddDescriptors(PyObject * cls,const Descriptor * descriptor)129 static int AddDescriptors(PyObject* cls, const Descriptor* descriptor) {
130 // If there are extension_ranges, the message is "extendable", and extension
131 // classes will register themselves in this class.
132 if (descriptor->extension_range_count() > 0) {
133 ScopedPyObjectPtr by_name(PyDict_New());
134 if (PyObject_SetAttr(cls, k_extensions_by_name, by_name.get()) < 0) {
135 return -1;
136 }
137 ScopedPyObjectPtr by_number(PyDict_New());
138 if (PyObject_SetAttr(cls, k_extensions_by_number, by_number.get()) < 0) {
139 return -1;
140 }
141 }
142
143 // For each field set: cls.<field>_FIELD_NUMBER = <number>
144 for (int i = 0; i < descriptor->field_count(); ++i) {
145 if (!AddFieldNumberToClass(cls, descriptor->field(i))) {
146 return -1;
147 }
148 }
149
150 // For each enum set cls.<enum name> = EnumTypeWrapper(<enum descriptor>).
151 for (int i = 0; i < descriptor->enum_type_count(); ++i) {
152 const EnumDescriptor* enum_descriptor = descriptor->enum_type(i);
153 ScopedPyObjectPtr enum_type(
154 PyEnumDescriptor_FromDescriptor(enum_descriptor));
155 if (enum_type == NULL) {
156 return -1;
157 }
158 // Add wrapped enum type to message class.
159 ScopedPyObjectPtr wrapped(PyObject_CallFunctionObjArgs(
160 EnumTypeWrapper_class, enum_type.get(), NULL));
161 if (wrapped == NULL) {
162 return -1;
163 }
164 if (PyObject_SetAttrString(
165 cls, enum_descriptor->name().c_str(), wrapped.get()) == -1) {
166 return -1;
167 }
168
169 // For each enum value add cls.<name> = <number>
170 for (int j = 0; j < enum_descriptor->value_count(); ++j) {
171 const EnumValueDescriptor* enum_value_descriptor =
172 enum_descriptor->value(j);
173 ScopedPyObjectPtr value_number(PyInt_FromLong(
174 enum_value_descriptor->number()));
175 if (value_number == NULL) {
176 return -1;
177 }
178 if (PyObject_SetAttrString(cls, enum_value_descriptor->name().c_str(),
179 value_number.get()) == -1) {
180 return -1;
181 }
182 }
183 }
184
185 // For each extension set cls.<extension name> = <extension descriptor>.
186 //
187 // Extension descriptors come from
188 // <message descriptor>.extensions_by_name[name]
189 // which was defined previously.
190 for (int i = 0; i < descriptor->extension_count(); ++i) {
191 const google::protobuf::FieldDescriptor* field = descriptor->extension(i);
192 ScopedPyObjectPtr extension_field(PyFieldDescriptor_FromDescriptor(field));
193 if (extension_field == NULL) {
194 return -1;
195 }
196
197 // Add the extension field to the message class.
198 if (PyObject_SetAttrString(
199 cls, field->name().c_str(), extension_field.get()) == -1) {
200 return -1;
201 }
202
203 // For each extension set cls.<extension name>_FIELD_NUMBER = <number>.
204 if (!AddFieldNumberToClass(cls, field)) {
205 return -1;
206 }
207 }
208
209 return 0;
210 }
211
New(PyTypeObject * type,PyObject * args,PyObject * kwargs)212 static PyObject* New(PyTypeObject* type,
213 PyObject* args, PyObject* kwargs) {
214 static char *kwlist[] = {"name", "bases", "dict", 0};
215 PyObject *bases, *dict;
216 const char* name;
217
218 // Check arguments: (name, bases, dict)
219 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", kwlist,
220 &name,
221 &PyTuple_Type, &bases,
222 &PyDict_Type, &dict)) {
223 return NULL;
224 }
225
226 // Check bases: only (), or (message.Message,) are allowed
227 if (!(PyTuple_GET_SIZE(bases) == 0 ||
228 (PyTuple_GET_SIZE(bases) == 1 &&
229 PyTuple_GET_ITEM(bases, 0) == PythonMessage_class))) {
230 PyErr_SetString(PyExc_TypeError,
231 "A Message class can only inherit from Message");
232 return NULL;
233 }
234
235 // Check dict['DESCRIPTOR']
236 PyObject* py_descriptor = PyDict_GetItem(dict, kDESCRIPTOR);
237 if (py_descriptor == NULL) {
238 PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
239 return NULL;
240 }
241 if (!PyObject_TypeCheck(py_descriptor, &PyMessageDescriptor_Type)) {
242 PyErr_Format(PyExc_TypeError, "Expected a message Descriptor, got %s",
243 py_descriptor->ob_type->tp_name);
244 return NULL;
245 }
246
247 // Build the arguments to the base metaclass.
248 // We change the __bases__ classes.
249 ScopedPyObjectPtr new_args;
250 const Descriptor* message_descriptor =
251 PyMessageDescriptor_AsDescriptor(py_descriptor);
252 if (message_descriptor == NULL) {
253 return NULL;
254 }
255
256 if (WKT_classes == NULL) {
257 ScopedPyObjectPtr well_known_types(PyImport_ImportModule(
258 "google.protobuf.internal.well_known_types"));
259 GOOGLE_DCHECK(well_known_types != NULL);
260
261 WKT_classes = PyObject_GetAttrString(well_known_types.get(), "WKTBASES");
262 GOOGLE_DCHECK(WKT_classes != NULL);
263 }
264
265 PyObject* well_known_class = PyDict_GetItemString(
266 WKT_classes, message_descriptor->full_name().c_str());
267 if (well_known_class == NULL) {
268 new_args.reset(Py_BuildValue("s(OO)O", name, &CMessage_Type,
269 PythonMessage_class, dict));
270 } else {
271 new_args.reset(Py_BuildValue("s(OOO)O", name, &CMessage_Type,
272 PythonMessage_class, well_known_class, dict));
273 }
274
275 if (new_args == NULL) {
276 return NULL;
277 }
278 // Call the base metaclass.
279 ScopedPyObjectPtr result(PyType_Type.tp_new(type, new_args.get(), NULL));
280 if (result == NULL) {
281 return NULL;
282 }
283 CMessageClass* newtype = reinterpret_cast<CMessageClass*>(result.get());
284
285 // Insert the empty weakref into the base classes.
286 if (InsertEmptyWeakref(
287 reinterpret_cast<PyTypeObject*>(PythonMessage_class)) < 0 ||
288 InsertEmptyWeakref(&CMessage_Type) < 0) {
289 return NULL;
290 }
291
292 // Cache the descriptor, both as Python object and as C++ pointer.
293 const Descriptor* descriptor =
294 PyMessageDescriptor_AsDescriptor(py_descriptor);
295 if (descriptor == NULL) {
296 return NULL;
297 }
298 Py_INCREF(py_descriptor);
299 newtype->py_message_descriptor = py_descriptor;
300 newtype->message_descriptor = descriptor;
301 // TODO(amauryfa): Don't always use the canonical pool of the descriptor,
302 // use the MessageFactory optionally passed in the class dict.
303 newtype->py_descriptor_pool = GetDescriptorPool_FromPool(
304 descriptor->file()->pool());
305 if (newtype->py_descriptor_pool == NULL) {
306 return NULL;
307 }
308 Py_INCREF(newtype->py_descriptor_pool);
309
310 // Add the message to the DescriptorPool.
311 if (cdescriptor_pool::RegisterMessageClass(newtype->py_descriptor_pool,
312 descriptor, newtype) < 0) {
313 return NULL;
314 }
315
316 // Continue with type initialization: add other descriptors, enum values...
317 if (AddDescriptors(result.get(), descriptor) < 0) {
318 return NULL;
319 }
320 return result.release();
321 }
322
Dealloc(CMessageClass * self)323 static void Dealloc(CMessageClass *self) {
324 Py_DECREF(self->py_message_descriptor);
325 Py_DECREF(self->py_descriptor_pool);
326 Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
327 }
328
329
330 // This function inserts and empty weakref at the end of the list of
331 // subclasses for the main protocol buffer Message class.
332 //
333 // This eliminates a O(n^2) behaviour in the internal add_subclass
334 // routine.
InsertEmptyWeakref(PyTypeObject * base_type)335 static int InsertEmptyWeakref(PyTypeObject *base_type) {
336 #if PY_MAJOR_VERSION >= 3
337 // Python 3.4 has already included the fix for the issue that this
338 // hack addresses. For further background and the fix please see
339 // https://bugs.python.org/issue17936.
340 return 0;
341 #else
342 PyObject *subclasses = base_type->tp_subclasses;
343 if (subclasses && PyList_CheckExact(subclasses)) {
344 return PyList_Append(subclasses, kEmptyWeakref);
345 }
346 return 0;
347 #endif // PY_MAJOR_VERSION >= 3
348 }
349
350 } // namespace message_meta
351
352 PyTypeObject CMessageClass_Type = {
353 PyVarObject_HEAD_INIT(&PyType_Type, 0)
354 FULL_MODULE_NAME ".MessageMeta", // tp_name
355 sizeof(CMessageClass), // tp_basicsize
356 0, // tp_itemsize
357 (destructor)message_meta::Dealloc, // tp_dealloc
358 0, // tp_print
359 0, // tp_getattr
360 0, // tp_setattr
361 0, // tp_compare
362 0, // tp_repr
363 0, // tp_as_number
364 0, // tp_as_sequence
365 0, // tp_as_mapping
366 0, // tp_hash
367 0, // tp_call
368 0, // tp_str
369 0, // tp_getattro
370 0, // tp_setattro
371 0, // tp_as_buffer
372 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
373 "The metaclass of ProtocolMessages", // tp_doc
374 0, // tp_traverse
375 0, // tp_clear
376 0, // tp_richcompare
377 0, // tp_weaklistoffset
378 0, // tp_iter
379 0, // tp_iternext
380 0, // tp_methods
381 0, // tp_members
382 0, // tp_getset
383 0, // tp_base
384 0, // tp_dict
385 0, // tp_descr_get
386 0, // tp_descr_set
387 0, // tp_dictoffset
388 0, // tp_init
389 0, // tp_alloc
390 message_meta::New, // tp_new
391 };
392
CheckMessageClass(PyTypeObject * cls)393 static CMessageClass* CheckMessageClass(PyTypeObject* cls) {
394 if (!PyObject_TypeCheck(cls, &CMessageClass_Type)) {
395 PyErr_Format(PyExc_TypeError, "Class %s is not a Message", cls->tp_name);
396 return NULL;
397 }
398 return reinterpret_cast<CMessageClass*>(cls);
399 }
400
GetMessageDescriptor(PyTypeObject * cls)401 static const Descriptor* GetMessageDescriptor(PyTypeObject* cls) {
402 CMessageClass* type = CheckMessageClass(cls);
403 if (type == NULL) {
404 return NULL;
405 }
406 return type->message_descriptor;
407 }
408
409 // Forward declarations
410 namespace cmessage {
411 int InternalReleaseFieldByDescriptor(
412 CMessage* self,
413 const FieldDescriptor* field_descriptor,
414 PyObject* composite_field);
415 } // namespace cmessage
416
417 // ---------------------------------------------------------------------
418 // Visiting the composite children of a CMessage
419
420 struct ChildVisitor {
421 // Returns 0 on success, -1 on failure.
VisitRepeatedCompositeContainergoogle::protobuf::python::ChildVisitor422 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
423 return 0;
424 }
425
426 // Returns 0 on success, -1 on failure.
VisitRepeatedScalarContainergoogle::protobuf::python::ChildVisitor427 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
428 return 0;
429 }
430
431 // Returns 0 on success, -1 on failure.
VisitCMessagegoogle::protobuf::python::ChildVisitor432 int VisitCMessage(CMessage* cmessage,
433 const FieldDescriptor* field_descriptor) {
434 return 0;
435 }
436 };
437
438 // Apply a function to a composite field. Does nothing if child is of
439 // non-composite type.
440 template<class Visitor>
VisitCompositeField(const FieldDescriptor * descriptor,PyObject * child,Visitor visitor)441 static int VisitCompositeField(const FieldDescriptor* descriptor,
442 PyObject* child,
443 Visitor visitor) {
444 if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
445 if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
446 if (descriptor->is_map()) {
447 MapContainer* container = reinterpret_cast<MapContainer*>(child);
448 if (visitor.VisitMapContainer(container) == -1) {
449 return -1;
450 }
451 } else {
452 RepeatedCompositeContainer* container =
453 reinterpret_cast<RepeatedCompositeContainer*>(child);
454 if (visitor.VisitRepeatedCompositeContainer(container) == -1)
455 return -1;
456 }
457 } else {
458 RepeatedScalarContainer* container =
459 reinterpret_cast<RepeatedScalarContainer*>(child);
460 if (visitor.VisitRepeatedScalarContainer(container) == -1)
461 return -1;
462 }
463 } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
464 CMessage* cmsg = reinterpret_cast<CMessage*>(child);
465 if (visitor.VisitCMessage(cmsg, descriptor) == -1)
466 return -1;
467 }
468 // The ExtensionDict might contain non-composite fields, which we
469 // skip here.
470 return 0;
471 }
472
473 // Visit each composite field and extension field of this CMessage.
474 // Returns -1 on error and 0 on success.
475 template<class Visitor>
ForEachCompositeField(CMessage * self,Visitor visitor)476 int ForEachCompositeField(CMessage* self, Visitor visitor) {
477 Py_ssize_t pos = 0;
478 PyObject* key;
479 PyObject* field;
480
481 // Visit normal fields.
482 if (self->composite_fields) {
483 // Never use self->message in this function, it may be already freed.
484 const Descriptor* message_descriptor =
485 GetMessageDescriptor(Py_TYPE(self));
486 while (PyDict_Next(self->composite_fields, &pos, &key, &field)) {
487 Py_ssize_t key_str_size;
488 char *key_str_data;
489 if (PyString_AsStringAndSize(key, &key_str_data, &key_str_size) != 0)
490 return -1;
491 const string key_str(key_str_data, key_str_size);
492 const FieldDescriptor* descriptor =
493 message_descriptor->FindFieldByName(key_str);
494 if (descriptor != NULL) {
495 if (VisitCompositeField(descriptor, field, visitor) == -1)
496 return -1;
497 }
498 }
499 }
500
501 // Visit extension fields.
502 if (self->extensions != NULL) {
503 pos = 0;
504 while (PyDict_Next(self->extensions->values, &pos, &key, &field)) {
505 const FieldDescriptor* descriptor = cmessage::GetExtensionDescriptor(key);
506 if (descriptor == NULL)
507 return -1;
508 if (VisitCompositeField(descriptor, field, visitor) == -1)
509 return -1;
510 }
511 }
512
513 return 0;
514 }
515
516 // ---------------------------------------------------------------------
517
518 // Constants used for integer type range checking.
519 PyObject* kPythonZero;
520 PyObject* kint32min_py;
521 PyObject* kint32max_py;
522 PyObject* kuint32max_py;
523 PyObject* kint64min_py;
524 PyObject* kint64max_py;
525 PyObject* kuint64max_py;
526
527 PyObject* EncodeError_class;
528 PyObject* DecodeError_class;
529 PyObject* PickleError_class;
530
531 // Constant PyString values used for GetAttr/GetItem.
532 static PyObject* k_cdescriptor;
533 static PyObject* kfull_name;
534
535 /* Is 64bit */
FormatTypeError(PyObject * arg,char * expected_types)536 void FormatTypeError(PyObject* arg, char* expected_types) {
537 PyObject* repr = PyObject_Repr(arg);
538 if (repr) {
539 PyErr_Format(PyExc_TypeError,
540 "%.100s has type %.100s, but expected one of: %s",
541 PyString_AsString(repr),
542 Py_TYPE(arg)->tp_name,
543 expected_types);
544 Py_DECREF(repr);
545 }
546 }
547
548 template<class T>
CheckAndGetInteger(PyObject * arg,T * value,PyObject * min,PyObject * max)549 bool CheckAndGetInteger(
550 PyObject* arg, T* value, PyObject* min, PyObject* max) {
551 bool is_long = PyLong_Check(arg);
552 #if PY_MAJOR_VERSION < 3
553 if (!PyInt_Check(arg) && !is_long) {
554 FormatTypeError(arg, "int, long");
555 return false;
556 }
557 if (PyObject_Compare(min, arg) > 0 || PyObject_Compare(max, arg) < 0) {
558 #else
559 if (!is_long) {
560 FormatTypeError(arg, "int");
561 return false;
562 }
563 if (PyObject_RichCompareBool(min, arg, Py_LE) != 1 ||
564 PyObject_RichCompareBool(max, arg, Py_GE) != 1) {
565 #endif
566 if (!PyErr_Occurred()) {
567 PyObject *s = PyObject_Str(arg);
568 if (s) {
569 PyErr_Format(PyExc_ValueError,
570 "Value out of range: %s",
571 PyString_AsString(s));
572 Py_DECREF(s);
573 }
574 }
575 return false;
576 }
577 #if PY_MAJOR_VERSION < 3
578 if (!is_long) {
579 *value = static_cast<T>(PyInt_AsLong(arg));
580 } else // NOLINT
581 #endif
582 {
583 if (min == kPythonZero) {
584 *value = static_cast<T>(PyLong_AsUnsignedLongLong(arg));
585 } else {
586 *value = static_cast<T>(PyLong_AsLongLong(arg));
587 }
588 }
589 return true;
590 }
591
592 // These are referenced by repeated_scalar_container, and must
593 // be explicitly instantiated.
594 template bool CheckAndGetInteger<int32>(
595 PyObject*, int32*, PyObject*, PyObject*);
596 template bool CheckAndGetInteger<int64>(
597 PyObject*, int64*, PyObject*, PyObject*);
598 template bool CheckAndGetInteger<uint32>(
599 PyObject*, uint32*, PyObject*, PyObject*);
600 template bool CheckAndGetInteger<uint64>(
601 PyObject*, uint64*, PyObject*, PyObject*);
602
603 bool CheckAndGetDouble(PyObject* arg, double* value) {
604 if (!PyInt_Check(arg) && !PyLong_Check(arg) &&
605 !PyFloat_Check(arg)) {
606 FormatTypeError(arg, "int, long, float");
607 return false;
608 }
609 *value = PyFloat_AsDouble(arg);
610 return true;
611 }
612
613 bool CheckAndGetFloat(PyObject* arg, float* value) {
614 double double_value;
615 if (!CheckAndGetDouble(arg, &double_value)) {
616 return false;
617 }
618 *value = static_cast<float>(double_value);
619 return true;
620 }
621
622 bool CheckAndGetBool(PyObject* arg, bool* value) {
623 if (!PyInt_Check(arg) && !PyBool_Check(arg) && !PyLong_Check(arg)) {
624 FormatTypeError(arg, "int, long, bool");
625 return false;
626 }
627 *value = static_cast<bool>(PyInt_AsLong(arg));
628 return true;
629 }
630
631 // Checks whether the given object (which must be "bytes" or "unicode") contains
632 // valid UTF-8.
633 bool IsValidUTF8(PyObject* obj) {
634 if (PyBytes_Check(obj)) {
635 PyObject* unicode = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
636
637 // Clear the error indicator; we report our own error when desired.
638 PyErr_Clear();
639
640 if (unicode) {
641 Py_DECREF(unicode);
642 return true;
643 } else {
644 return false;
645 }
646 } else {
647 // Unicode object, known to be valid UTF-8.
648 return true;
649 }
650 }
651
652 bool AllowInvalidUTF8(const FieldDescriptor* field) { return false; }
653
654 PyObject* CheckString(PyObject* arg, const FieldDescriptor* descriptor) {
655 GOOGLE_DCHECK(descriptor->type() == FieldDescriptor::TYPE_STRING ||
656 descriptor->type() == FieldDescriptor::TYPE_BYTES);
657 if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
658 if (!PyBytes_Check(arg) && !PyUnicode_Check(arg)) {
659 FormatTypeError(arg, "bytes, unicode");
660 return NULL;
661 }
662
663 if (!IsValidUTF8(arg) && !AllowInvalidUTF8(descriptor)) {
664 PyObject* repr = PyObject_Repr(arg);
665 PyErr_Format(PyExc_ValueError,
666 "%s has type str, but isn't valid UTF-8 "
667 "encoding. Non-UTF-8 strings must be converted to "
668 "unicode objects before being added.",
669 PyString_AsString(repr));
670 Py_DECREF(repr);
671 return NULL;
672 }
673 } else if (!PyBytes_Check(arg)) {
674 FormatTypeError(arg, "bytes");
675 return NULL;
676 }
677
678 PyObject* encoded_string = NULL;
679 if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
680 if (PyBytes_Check(arg)) {
681 // The bytes were already validated as correctly encoded UTF-8 above.
682 encoded_string = arg; // Already encoded.
683 Py_INCREF(encoded_string);
684 } else {
685 encoded_string = PyUnicode_AsEncodedObject(arg, "utf-8", NULL);
686 }
687 } else {
688 // In this case field type is "bytes".
689 encoded_string = arg;
690 Py_INCREF(encoded_string);
691 }
692
693 return encoded_string;
694 }
695
696 bool CheckAndSetString(
697 PyObject* arg, Message* message,
698 const FieldDescriptor* descriptor,
699 const Reflection* reflection,
700 bool append,
701 int index) {
702 ScopedPyObjectPtr encoded_string(CheckString(arg, descriptor));
703
704 if (encoded_string.get() == NULL) {
705 return false;
706 }
707
708 char* value;
709 Py_ssize_t value_len;
710 if (PyBytes_AsStringAndSize(encoded_string.get(), &value, &value_len) < 0) {
711 return false;
712 }
713
714 string value_string(value, value_len);
715 if (append) {
716 reflection->AddString(message, descriptor, value_string);
717 } else if (index < 0) {
718 reflection->SetString(message, descriptor, value_string);
719 } else {
720 reflection->SetRepeatedString(message, descriptor, index, value_string);
721 }
722 return true;
723 }
724
725 PyObject* ToStringObject(const FieldDescriptor* descriptor, string value) {
726 if (descriptor->type() != FieldDescriptor::TYPE_STRING) {
727 return PyBytes_FromStringAndSize(value.c_str(), value.length());
728 }
729
730 PyObject* result = PyUnicode_DecodeUTF8(value.c_str(), value.length(), NULL);
731 // If the string can't be decoded in UTF-8, just return a string object that
732 // contains the raw bytes. This can't happen if the value was assigned using
733 // the members of the Python message object, but can happen if the values were
734 // parsed from the wire (binary).
735 if (result == NULL) {
736 PyErr_Clear();
737 result = PyBytes_FromStringAndSize(value.c_str(), value.length());
738 }
739 return result;
740 }
741
742 bool CheckFieldBelongsToMessage(const FieldDescriptor* field_descriptor,
743 const Message* message) {
744 if (message->GetDescriptor() == field_descriptor->containing_type()) {
745 return true;
746 }
747 PyErr_Format(PyExc_KeyError, "Field '%s' does not belong to message '%s'",
748 field_descriptor->full_name().c_str(),
749 message->GetDescriptor()->full_name().c_str());
750 return false;
751 }
752
753 namespace cmessage {
754
755 PyDescriptorPool* GetDescriptorPoolForMessage(CMessage* message) {
756 // No need to check the type: the type of instances of CMessage is always
757 // an instance of CMessageClass. Let's prove it with a debug-only check.
758 GOOGLE_DCHECK(PyObject_TypeCheck(message, &CMessage_Type));
759 return reinterpret_cast<CMessageClass*>(Py_TYPE(message))->py_descriptor_pool;
760 }
761
762 MessageFactory* GetFactoryForMessage(CMessage* message) {
763 return GetDescriptorPoolForMessage(message)->message_factory;
764 }
765
766 static int MaybeReleaseOverlappingOneofField(
767 CMessage* cmessage,
768 const FieldDescriptor* field) {
769 #ifdef GOOGLE_PROTOBUF_HAS_ONEOF
770 Message* message = cmessage->message;
771 const Reflection* reflection = message->GetReflection();
772 if (!field->containing_oneof() ||
773 !reflection->HasOneof(*message, field->containing_oneof()) ||
774 reflection->HasField(*message, field)) {
775 // No other field in this oneof, no need to release.
776 return 0;
777 }
778
779 const OneofDescriptor* oneof = field->containing_oneof();
780 const FieldDescriptor* existing_field =
781 reflection->GetOneofFieldDescriptor(*message, oneof);
782 if (existing_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
783 // Non-message fields don't need to be released.
784 return 0;
785 }
786 const char* field_name = existing_field->name().c_str();
787 PyObject* child_message = cmessage->composite_fields ?
788 PyDict_GetItemString(cmessage->composite_fields, field_name) : NULL;
789 if (child_message == NULL) {
790 // No python reference to this field so no need to release.
791 return 0;
792 }
793
794 if (InternalReleaseFieldByDescriptor(
795 cmessage, existing_field, child_message) < 0) {
796 return -1;
797 }
798 return PyDict_DelItemString(cmessage->composite_fields, field_name);
799 #else
800 return 0;
801 #endif
802 }
803
804 // ---------------------------------------------------------------------
805 // Making a message writable
806
807 static Message* GetMutableMessage(
808 CMessage* parent,
809 const FieldDescriptor* parent_field) {
810 Message* parent_message = parent->message;
811 const Reflection* reflection = parent_message->GetReflection();
812 if (MaybeReleaseOverlappingOneofField(parent, parent_field) < 0) {
813 return NULL;
814 }
815 return reflection->MutableMessage(
816 parent_message, parent_field, GetFactoryForMessage(parent));
817 }
818
819 struct FixupMessageReference : public ChildVisitor {
820 // message must outlive this object.
821 explicit FixupMessageReference(Message* message) :
822 message_(message) {}
823
824 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
825 container->message = message_;
826 return 0;
827 }
828
829 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
830 container->message = message_;
831 return 0;
832 }
833
834 int VisitMapContainer(MapContainer* container) {
835 container->message = message_;
836 return 0;
837 }
838
839 private:
840 Message* message_;
841 };
842
843 int AssureWritable(CMessage* self) {
844 if (self == NULL || !self->read_only) {
845 return 0;
846 }
847
848 if (self->parent == NULL) {
849 // If parent is NULL but we are trying to modify a read-only message, this
850 // is a reference to a constant default instance that needs to be replaced
851 // with a mutable top-level message.
852 self->message = self->message->New();
853 self->owner.reset(self->message);
854 // Cascade the new owner to eventual children: even if this message is
855 // empty, some submessages or repeated containers might exist already.
856 SetOwner(self, self->owner);
857 } else {
858 // Otherwise, we need a mutable child message.
859 if (AssureWritable(self->parent) == -1)
860 return -1;
861
862 // Make self->message writable.
863 Message* mutable_message = GetMutableMessage(
864 self->parent,
865 self->parent_field_descriptor);
866 if (mutable_message == NULL) {
867 return -1;
868 }
869 self->message = mutable_message;
870 }
871 self->read_only = false;
872
873 // When a CMessage is made writable its Message pointer is updated
874 // to point to a new mutable Message. When that happens we need to
875 // update any references to the old, read-only CMessage. There are
876 // four places such references occur: RepeatedScalarContainer,
877 // RepeatedCompositeContainer, MapContainer, and ExtensionDict.
878 if (self->extensions != NULL)
879 self->extensions->message = self->message;
880 if (ForEachCompositeField(self, FixupMessageReference(self->message)) == -1)
881 return -1;
882
883 return 0;
884 }
885
886 // --- Globals:
887
888 // Retrieve a C++ FieldDescriptor for a message attribute.
889 // The C++ message must be valid.
890 // TODO(amauryfa): This function should stay internal, because exception
891 // handling is not consistent.
892 static const FieldDescriptor* GetFieldDescriptor(
893 CMessage* self, PyObject* name) {
894 const Descriptor *message_descriptor = self->message->GetDescriptor();
895 char* field_name;
896 Py_ssize_t size;
897 if (PyString_AsStringAndSize(name, &field_name, &size) < 0) {
898 return NULL;
899 }
900 const FieldDescriptor *field_descriptor =
901 message_descriptor->FindFieldByName(string(field_name, size));
902 if (field_descriptor == NULL) {
903 // Note: No exception is set!
904 return NULL;
905 }
906 return field_descriptor;
907 }
908
909 // Retrieve a C++ FieldDescriptor for an extension handle.
910 const FieldDescriptor* GetExtensionDescriptor(PyObject* extension) {
911 ScopedPyObjectPtr cdescriptor;
912 if (!PyObject_TypeCheck(extension, &PyFieldDescriptor_Type)) {
913 // Most callers consider extensions as a plain dictionary. We should
914 // allow input which is not a field descriptor, and simply pretend it does
915 // not exist.
916 PyErr_SetObject(PyExc_KeyError, extension);
917 return NULL;
918 }
919 return PyFieldDescriptor_AsDescriptor(extension);
920 }
921
922 // If value is a string, convert it into an enum value based on the labels in
923 // descriptor, otherwise simply return value. Always returns a new reference.
924 static PyObject* GetIntegerEnumValue(const FieldDescriptor& descriptor,
925 PyObject* value) {
926 if (PyString_Check(value) || PyUnicode_Check(value)) {
927 const EnumDescriptor* enum_descriptor = descriptor.enum_type();
928 if (enum_descriptor == NULL) {
929 PyErr_SetString(PyExc_TypeError, "not an enum field");
930 return NULL;
931 }
932 char* enum_label;
933 Py_ssize_t size;
934 if (PyString_AsStringAndSize(value, &enum_label, &size) < 0) {
935 return NULL;
936 }
937 const EnumValueDescriptor* enum_value_descriptor =
938 enum_descriptor->FindValueByName(string(enum_label, size));
939 if (enum_value_descriptor == NULL) {
940 PyErr_SetString(PyExc_ValueError, "unknown enum label");
941 return NULL;
942 }
943 return PyInt_FromLong(enum_value_descriptor->number());
944 }
945 Py_INCREF(value);
946 return value;
947 }
948
949 // If cmessage_list is not NULL, this function releases values into the
950 // container CMessages instead of just removing. Repeated composite container
951 // needs to do this to make sure CMessages stay alive if they're still
952 // referenced after deletion. Repeated scalar container doesn't need to worry.
953 int InternalDeleteRepeatedField(
954 CMessage* self,
955 const FieldDescriptor* field_descriptor,
956 PyObject* slice,
957 PyObject* cmessage_list) {
958 Message* message = self->message;
959 Py_ssize_t length, from, to, step, slice_length;
960 const Reflection* reflection = message->GetReflection();
961 int min, max;
962 length = reflection->FieldSize(*message, field_descriptor);
963
964 if (PyInt_Check(slice) || PyLong_Check(slice)) {
965 from = to = PyLong_AsLong(slice);
966 if (from < 0) {
967 from = to = length + from;
968 }
969 step = 1;
970 min = max = from;
971
972 // Range check.
973 if (from < 0 || from >= length) {
974 PyErr_Format(PyExc_IndexError, "list assignment index out of range");
975 return -1;
976 }
977 } else if (PySlice_Check(slice)) {
978 from = to = step = slice_length = 0;
979 PySlice_GetIndicesEx(
980 #if PY_MAJOR_VERSION < 3
981 reinterpret_cast<PySliceObject*>(slice),
982 #else
983 slice,
984 #endif
985 length, &from, &to, &step, &slice_length);
986 if (from < to) {
987 min = from;
988 max = to - 1;
989 } else {
990 min = to + 1;
991 max = from;
992 }
993 } else {
994 PyErr_SetString(PyExc_TypeError, "list indices must be integers");
995 return -1;
996 }
997
998 Py_ssize_t i = from;
999 std::vector<bool> to_delete(length, false);
1000 while (i >= min && i <= max) {
1001 to_delete[i] = true;
1002 i += step;
1003 }
1004
1005 to = 0;
1006 for (i = 0; i < length; ++i) {
1007 if (!to_delete[i]) {
1008 if (i != to) {
1009 reflection->SwapElements(message, field_descriptor, i, to);
1010 if (cmessage_list != NULL) {
1011 // If a list of cmessages is passed in (i.e. from a repeated
1012 // composite container), swap those as well to correspond to the
1013 // swaps in the underlying message so they're in the right order
1014 // when we start releasing.
1015 PyObject* tmp = PyList_GET_ITEM(cmessage_list, i);
1016 PyList_SET_ITEM(cmessage_list, i,
1017 PyList_GET_ITEM(cmessage_list, to));
1018 PyList_SET_ITEM(cmessage_list, to, tmp);
1019 }
1020 }
1021 ++to;
1022 }
1023 }
1024
1025 while (i > to) {
1026 if (cmessage_list == NULL) {
1027 reflection->RemoveLast(message, field_descriptor);
1028 } else {
1029 CMessage* last_cmessage = reinterpret_cast<CMessage*>(
1030 PyList_GET_ITEM(cmessage_list, PyList_GET_SIZE(cmessage_list) - 1));
1031 repeated_composite_container::ReleaseLastTo(
1032 self, field_descriptor, last_cmessage);
1033 if (PySequence_DelItem(cmessage_list, -1) < 0) {
1034 return -1;
1035 }
1036 }
1037 --i;
1038 }
1039
1040 return 0;
1041 }
1042
1043 // Initializes fields of a message. Used in constructors.
1044 int InitAttributes(CMessage* self, PyObject* args, PyObject* kwargs) {
1045 if (args != NULL && PyTuple_Size(args) != 0) {
1046 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
1047 return -1;
1048 }
1049
1050 if (kwargs == NULL) {
1051 return 0;
1052 }
1053
1054 Py_ssize_t pos = 0;
1055 PyObject* name;
1056 PyObject* value;
1057 while (PyDict_Next(kwargs, &pos, &name, &value)) {
1058 if (!PyString_Check(name)) {
1059 PyErr_SetString(PyExc_ValueError, "Field name must be a string");
1060 return -1;
1061 }
1062 const FieldDescriptor* descriptor = GetFieldDescriptor(self, name);
1063 if (descriptor == NULL) {
1064 PyErr_Format(PyExc_ValueError, "Protocol message %s has no \"%s\" field.",
1065 self->message->GetDescriptor()->name().c_str(),
1066 PyString_AsString(name));
1067 return -1;
1068 }
1069 if (value == Py_None) {
1070 // field=None is the same as no field at all.
1071 continue;
1072 }
1073 if (descriptor->is_map()) {
1074 ScopedPyObjectPtr map(GetAttr(self, name));
1075 const FieldDescriptor* value_descriptor =
1076 descriptor->message_type()->FindFieldByName("value");
1077 if (value_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1078 Py_ssize_t map_pos = 0;
1079 PyObject* map_key;
1080 PyObject* map_value;
1081 while (PyDict_Next(value, &map_pos, &map_key, &map_value)) {
1082 ScopedPyObjectPtr function_return;
1083 function_return.reset(PyObject_GetItem(map.get(), map_key));
1084 if (function_return.get() == NULL) {
1085 return -1;
1086 }
1087 ScopedPyObjectPtr ok(PyObject_CallMethod(
1088 function_return.get(), "MergeFrom", "O", map_value));
1089 if (ok.get() == NULL) {
1090 return -1;
1091 }
1092 }
1093 } else {
1094 ScopedPyObjectPtr function_return;
1095 function_return.reset(
1096 PyObject_CallMethod(map.get(), "update", "O", value));
1097 if (function_return.get() == NULL) {
1098 return -1;
1099 }
1100 }
1101 } else if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1102 ScopedPyObjectPtr container(GetAttr(self, name));
1103 if (container == NULL) {
1104 return -1;
1105 }
1106 if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1107 RepeatedCompositeContainer* rc_container =
1108 reinterpret_cast<RepeatedCompositeContainer*>(container.get());
1109 ScopedPyObjectPtr iter(PyObject_GetIter(value));
1110 if (iter == NULL) {
1111 PyErr_SetString(PyExc_TypeError, "Value must be iterable");
1112 return -1;
1113 }
1114 ScopedPyObjectPtr next;
1115 while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
1116 PyObject* kwargs = (PyDict_Check(next.get()) ? next.get() : NULL);
1117 ScopedPyObjectPtr new_msg(
1118 repeated_composite_container::Add(rc_container, NULL, kwargs));
1119 if (new_msg == NULL) {
1120 return -1;
1121 }
1122 if (kwargs == NULL) {
1123 // next was not a dict, it's a message we need to merge
1124 ScopedPyObjectPtr merged(MergeFrom(
1125 reinterpret_cast<CMessage*>(new_msg.get()), next.get()));
1126 if (merged.get() == NULL) {
1127 return -1;
1128 }
1129 }
1130 }
1131 if (PyErr_Occurred()) {
1132 // Check to see how PyIter_Next() exited.
1133 return -1;
1134 }
1135 } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1136 RepeatedScalarContainer* rs_container =
1137 reinterpret_cast<RepeatedScalarContainer*>(container.get());
1138 ScopedPyObjectPtr iter(PyObject_GetIter(value));
1139 if (iter == NULL) {
1140 PyErr_SetString(PyExc_TypeError, "Value must be iterable");
1141 return -1;
1142 }
1143 ScopedPyObjectPtr next;
1144 while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
1145 ScopedPyObjectPtr enum_value(
1146 GetIntegerEnumValue(*descriptor, next.get()));
1147 if (enum_value == NULL) {
1148 return -1;
1149 }
1150 ScopedPyObjectPtr new_msg(repeated_scalar_container::Append(
1151 rs_container, enum_value.get()));
1152 if (new_msg == NULL) {
1153 return -1;
1154 }
1155 }
1156 if (PyErr_Occurred()) {
1157 // Check to see how PyIter_Next() exited.
1158 return -1;
1159 }
1160 } else {
1161 if (ScopedPyObjectPtr(repeated_scalar_container::Extend(
1162 reinterpret_cast<RepeatedScalarContainer*>(container.get()),
1163 value)) ==
1164 NULL) {
1165 return -1;
1166 }
1167 }
1168 } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1169 ScopedPyObjectPtr message(GetAttr(self, name));
1170 if (message == NULL) {
1171 return -1;
1172 }
1173 CMessage* cmessage = reinterpret_cast<CMessage*>(message.get());
1174 if (PyDict_Check(value)) {
1175 if (InitAttributes(cmessage, NULL, value) < 0) {
1176 return -1;
1177 }
1178 } else {
1179 ScopedPyObjectPtr merged(MergeFrom(cmessage, value));
1180 if (merged == NULL) {
1181 return -1;
1182 }
1183 }
1184 } else {
1185 ScopedPyObjectPtr new_val;
1186 if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1187 new_val.reset(GetIntegerEnumValue(*descriptor, value));
1188 if (new_val == NULL) {
1189 return -1;
1190 }
1191 }
1192 if (SetAttr(self, name, (new_val.get() == NULL) ? value : new_val.get()) <
1193 0) {
1194 return -1;
1195 }
1196 }
1197 }
1198 return 0;
1199 }
1200
1201 // Allocates an incomplete Python Message: the caller must fill self->message,
1202 // self->owner and eventually self->parent.
1203 CMessage* NewEmptyMessage(CMessageClass* type) {
1204 CMessage* self = reinterpret_cast<CMessage*>(
1205 PyType_GenericAlloc(&type->super.ht_type, 0));
1206 if (self == NULL) {
1207 return NULL;
1208 }
1209
1210 self->message = NULL;
1211 self->parent = NULL;
1212 self->parent_field_descriptor = NULL;
1213 self->read_only = false;
1214 self->extensions = NULL;
1215
1216 self->composite_fields = NULL;
1217
1218 return self;
1219 }
1220
1221 // The __new__ method of Message classes.
1222 // Creates a new C++ message and takes ownership.
1223 static PyObject* New(PyTypeObject* cls,
1224 PyObject* unused_args, PyObject* unused_kwargs) {
1225 CMessageClass* type = CheckMessageClass(cls);
1226 if (type == NULL) {
1227 return NULL;
1228 }
1229 // Retrieve the message descriptor and the default instance (=prototype).
1230 const Descriptor* message_descriptor = type->message_descriptor;
1231 if (message_descriptor == NULL) {
1232 return NULL;
1233 }
1234 const Message* default_message = type->py_descriptor_pool->message_factory
1235 ->GetPrototype(message_descriptor);
1236 if (default_message == NULL) {
1237 PyErr_SetString(PyExc_TypeError, message_descriptor->full_name().c_str());
1238 return NULL;
1239 }
1240
1241 CMessage* self = NewEmptyMessage(type);
1242 if (self == NULL) {
1243 return NULL;
1244 }
1245 self->message = default_message->New();
1246 self->owner.reset(self->message);
1247 return reinterpret_cast<PyObject*>(self);
1248 }
1249
1250 // The __init__ method of Message classes.
1251 // It initializes fields from keywords passed to the constructor.
1252 static int Init(CMessage* self, PyObject* args, PyObject* kwargs) {
1253 return InitAttributes(self, args, kwargs);
1254 }
1255
1256 // ---------------------------------------------------------------------
1257 // Deallocating a CMessage
1258 //
1259 // Deallocating a CMessage requires that we clear any weak references
1260 // from children to the message being deallocated.
1261
1262 // Clear the weak reference from the child to the parent.
1263 struct ClearWeakReferences : public ChildVisitor {
1264 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1265 container->parent = NULL;
1266 // The elements in the container have the same parent as the
1267 // container itself, so NULL out that pointer as well.
1268 const Py_ssize_t n = PyList_GET_SIZE(container->child_messages);
1269 for (Py_ssize_t i = 0; i < n; ++i) {
1270 CMessage* child_cmessage = reinterpret_cast<CMessage*>(
1271 PyList_GET_ITEM(container->child_messages, i));
1272 child_cmessage->parent = NULL;
1273 }
1274 return 0;
1275 }
1276
1277 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1278 container->parent = NULL;
1279 return 0;
1280 }
1281
1282 int VisitMapContainer(MapContainer* container) {
1283 container->parent = NULL;
1284 return 0;
1285 }
1286
1287 int VisitCMessage(CMessage* cmessage,
1288 const FieldDescriptor* field_descriptor) {
1289 cmessage->parent = NULL;
1290 return 0;
1291 }
1292 };
1293
1294 static void Dealloc(CMessage* self) {
1295 // Null out all weak references from children to this message.
1296 GOOGLE_CHECK_EQ(0, ForEachCompositeField(self, ClearWeakReferences()));
1297 if (self->extensions) {
1298 self->extensions->parent = NULL;
1299 }
1300
1301 Py_CLEAR(self->extensions);
1302 Py_CLEAR(self->composite_fields);
1303 self->owner.reset();
1304 Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
1305 }
1306
1307 // ---------------------------------------------------------------------
1308
1309
1310 PyObject* IsInitialized(CMessage* self, PyObject* args) {
1311 PyObject* errors = NULL;
1312 if (PyArg_ParseTuple(args, "|O", &errors) < 0) {
1313 return NULL;
1314 }
1315 if (self->message->IsInitialized()) {
1316 Py_RETURN_TRUE;
1317 }
1318 if (errors != NULL) {
1319 ScopedPyObjectPtr initialization_errors(
1320 FindInitializationErrors(self));
1321 if (initialization_errors == NULL) {
1322 return NULL;
1323 }
1324 ScopedPyObjectPtr extend_name(PyString_FromString("extend"));
1325 if (extend_name == NULL) {
1326 return NULL;
1327 }
1328 ScopedPyObjectPtr result(PyObject_CallMethodObjArgs(
1329 errors,
1330 extend_name.get(),
1331 initialization_errors.get(),
1332 NULL));
1333 if (result == NULL) {
1334 return NULL;
1335 }
1336 }
1337 Py_RETURN_FALSE;
1338 }
1339
1340 PyObject* HasFieldByDescriptor(
1341 CMessage* self, const FieldDescriptor* field_descriptor) {
1342 Message* message = self->message;
1343 if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
1344 return NULL;
1345 }
1346 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1347 PyErr_SetString(PyExc_KeyError,
1348 "Field is repeated. A singular method is required.");
1349 return NULL;
1350 }
1351 bool has_field =
1352 message->GetReflection()->HasField(*message, field_descriptor);
1353 return PyBool_FromLong(has_field ? 1 : 0);
1354 }
1355
1356 const FieldDescriptor* FindFieldWithOneofs(
1357 const Message* message, const string& field_name, bool* in_oneof) {
1358 *in_oneof = false;
1359 const Descriptor* descriptor = message->GetDescriptor();
1360 const FieldDescriptor* field_descriptor =
1361 descriptor->FindFieldByName(field_name);
1362 if (field_descriptor != NULL) {
1363 return field_descriptor;
1364 }
1365 const OneofDescriptor* oneof_desc =
1366 descriptor->FindOneofByName(field_name);
1367 if (oneof_desc != NULL) {
1368 *in_oneof = true;
1369 return message->GetReflection()->GetOneofFieldDescriptor(*message,
1370 oneof_desc);
1371 }
1372 return NULL;
1373 }
1374
1375 bool CheckHasPresence(const FieldDescriptor* field_descriptor, bool in_oneof) {
1376 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1377 PyErr_Format(PyExc_ValueError,
1378 "Protocol message has no singular \"%s\" field.",
1379 field_descriptor->name().c_str());
1380 return false;
1381 }
1382
1383 if (field_descriptor->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
1384 // HasField() for a oneof *itself* isn't supported.
1385 if (in_oneof) {
1386 PyErr_Format(PyExc_ValueError,
1387 "Can't test oneof field \"%s\" for presence in proto3, use "
1388 "WhichOneof instead.",
1389 field_descriptor->containing_oneof()->name().c_str());
1390 return false;
1391 }
1392
1393 // ...but HasField() for fields *in* a oneof is supported.
1394 if (field_descriptor->containing_oneof() != NULL) {
1395 return true;
1396 }
1397
1398 if (field_descriptor->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
1399 PyErr_Format(
1400 PyExc_ValueError,
1401 "Can't test non-submessage field \"%s\" for presence in proto3.",
1402 field_descriptor->name().c_str());
1403 return false;
1404 }
1405 }
1406
1407 return true;
1408 }
1409
1410 PyObject* HasField(CMessage* self, PyObject* arg) {
1411 char* field_name;
1412 Py_ssize_t size;
1413 #if PY_MAJOR_VERSION < 3
1414 if (PyString_AsStringAndSize(arg, &field_name, &size) < 0) {
1415 return NULL;
1416 }
1417 #else
1418 field_name = PyUnicode_AsUTF8AndSize(arg, &size);
1419 if (!field_name) {
1420 return NULL;
1421 }
1422 #endif
1423
1424 Message* message = self->message;
1425 bool is_in_oneof;
1426 const FieldDescriptor* field_descriptor =
1427 FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
1428 if (field_descriptor == NULL) {
1429 if (!is_in_oneof) {
1430 PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name);
1431 return NULL;
1432 } else {
1433 Py_RETURN_FALSE;
1434 }
1435 }
1436
1437 if (!CheckHasPresence(field_descriptor, is_in_oneof)) {
1438 return NULL;
1439 }
1440
1441 if (message->GetReflection()->HasField(*message, field_descriptor)) {
1442 Py_RETURN_TRUE;
1443 }
1444 if (!message->GetReflection()->SupportsUnknownEnumValues() &&
1445 field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1446 // Special case: Python HasField() differs in semantics from C++
1447 // slightly: we return HasField('enum_field') == true if there is
1448 // an unknown enum value present. To implement this we have to
1449 // look in the UnknownFieldSet.
1450 const UnknownFieldSet& unknown_field_set =
1451 message->GetReflection()->GetUnknownFields(*message);
1452 for (int i = 0; i < unknown_field_set.field_count(); ++i) {
1453 if (unknown_field_set.field(i).number() == field_descriptor->number()) {
1454 Py_RETURN_TRUE;
1455 }
1456 }
1457 }
1458 Py_RETURN_FALSE;
1459 }
1460
1461 PyObject* ClearExtension(CMessage* self, PyObject* extension) {
1462 if (self->extensions != NULL) {
1463 return extension_dict::ClearExtension(self->extensions, extension);
1464 } else {
1465 const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
1466 if (descriptor == NULL) {
1467 return NULL;
1468 }
1469 if (ScopedPyObjectPtr(ClearFieldByDescriptor(self, descriptor)) == NULL) {
1470 return NULL;
1471 }
1472 }
1473 Py_RETURN_NONE;
1474 }
1475
1476 PyObject* HasExtension(CMessage* self, PyObject* extension) {
1477 const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
1478 if (descriptor == NULL) {
1479 return NULL;
1480 }
1481 return HasFieldByDescriptor(self, descriptor);
1482 }
1483
1484 // ---------------------------------------------------------------------
1485 // Releasing messages
1486 //
1487 // The Python API's ClearField() and Clear() methods behave
1488 // differently than their C++ counterparts. While the C++ versions
1489 // clears the children the Python versions detaches the children,
1490 // without touching their content. This impedance mismatch causes
1491 // some complexity in the implementation, which is captured in this
1492 // section.
1493 //
1494 // When a CMessage field is cleared we need to:
1495 //
1496 // * Release the Message used as the backing store for the CMessage
1497 // from its parent.
1498 //
1499 // * Change the owner field of the released CMessage and all of its
1500 // children to point to the newly released Message.
1501 //
1502 // * Clear the weak references from the released CMessage to the
1503 // parent.
1504 //
1505 // When a RepeatedCompositeContainer field is cleared we need to:
1506 //
1507 // * Release all the Message used as the backing store for the
1508 // CMessages stored in the container.
1509 //
1510 // * Change the owner field of all the released CMessage and all of
1511 // their children to point to the newly released Messages.
1512 //
1513 // * Clear the weak references from the released container to the
1514 // parent.
1515
1516 struct SetOwnerVisitor : public ChildVisitor {
1517 // new_owner must outlive this object.
1518 explicit SetOwnerVisitor(const shared_ptr<Message>& new_owner)
1519 : new_owner_(new_owner) {}
1520
1521 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1522 repeated_composite_container::SetOwner(container, new_owner_);
1523 return 0;
1524 }
1525
1526 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1527 repeated_scalar_container::SetOwner(container, new_owner_);
1528 return 0;
1529 }
1530
1531 int VisitMapContainer(MapContainer* container) {
1532 container->SetOwner(new_owner_);
1533 return 0;
1534 }
1535
1536 int VisitCMessage(CMessage* cmessage,
1537 const FieldDescriptor* field_descriptor) {
1538 return SetOwner(cmessage, new_owner_);
1539 }
1540
1541 private:
1542 const shared_ptr<Message>& new_owner_;
1543 };
1544
1545 // Change the owner of this CMessage and all its children, recursively.
1546 int SetOwner(CMessage* self, const shared_ptr<Message>& new_owner) {
1547 self->owner = new_owner;
1548 if (ForEachCompositeField(self, SetOwnerVisitor(new_owner)) == -1)
1549 return -1;
1550 return 0;
1551 }
1552
1553 // Releases the message specified by 'field' and returns the
1554 // pointer. If the field does not exist a new message is created using
1555 // 'descriptor'. The caller takes ownership of the returned pointer.
1556 Message* ReleaseMessage(CMessage* self,
1557 const Descriptor* descriptor,
1558 const FieldDescriptor* field_descriptor) {
1559 MessageFactory* message_factory = GetFactoryForMessage(self);
1560 Message* released_message = self->message->GetReflection()->ReleaseMessage(
1561 self->message, field_descriptor, message_factory);
1562 // ReleaseMessage will return NULL which differs from
1563 // child_cmessage->message, if the field does not exist. In this case,
1564 // the latter points to the default instance via a const_cast<>, so we
1565 // have to reset it to a new mutable object since we are taking ownership.
1566 if (released_message == NULL) {
1567 const Message* prototype = message_factory->GetPrototype(descriptor);
1568 GOOGLE_DCHECK(prototype != NULL);
1569 released_message = prototype->New();
1570 }
1571
1572 return released_message;
1573 }
1574
1575 int ReleaseSubMessage(CMessage* self,
1576 const FieldDescriptor* field_descriptor,
1577 CMessage* child_cmessage) {
1578 // Release the Message
1579 shared_ptr<Message> released_message(ReleaseMessage(
1580 self, child_cmessage->message->GetDescriptor(), field_descriptor));
1581 child_cmessage->message = released_message.get();
1582 child_cmessage->owner.swap(released_message);
1583 child_cmessage->parent = NULL;
1584 child_cmessage->parent_field_descriptor = NULL;
1585 child_cmessage->read_only = false;
1586 return ForEachCompositeField(child_cmessage,
1587 SetOwnerVisitor(child_cmessage->owner));
1588 }
1589
1590 struct ReleaseChild : public ChildVisitor {
1591 // message must outlive this object.
1592 explicit ReleaseChild(CMessage* parent) :
1593 parent_(parent) {}
1594
1595 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1596 return repeated_composite_container::Release(container);
1597 }
1598
1599 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1600 return repeated_scalar_container::Release(container);
1601 }
1602
1603 int VisitMapContainer(MapContainer* container) {
1604 return container->Release();
1605 }
1606
1607 int VisitCMessage(CMessage* cmessage,
1608 const FieldDescriptor* field_descriptor) {
1609 return ReleaseSubMessage(parent_, field_descriptor, cmessage);
1610 }
1611
1612 CMessage* parent_;
1613 };
1614
1615 int InternalReleaseFieldByDescriptor(
1616 CMessage* self,
1617 const FieldDescriptor* field_descriptor,
1618 PyObject* composite_field) {
1619 return VisitCompositeField(
1620 field_descriptor,
1621 composite_field,
1622 ReleaseChild(self));
1623 }
1624
1625 PyObject* ClearFieldByDescriptor(
1626 CMessage* self,
1627 const FieldDescriptor* descriptor) {
1628 if (!CheckFieldBelongsToMessage(descriptor, self->message)) {
1629 return NULL;
1630 }
1631 AssureWritable(self);
1632 self->message->GetReflection()->ClearField(self->message, descriptor);
1633 Py_RETURN_NONE;
1634 }
1635
1636 PyObject* ClearField(CMessage* self, PyObject* arg) {
1637 if (!PyString_Check(arg)) {
1638 PyErr_SetString(PyExc_TypeError, "field name must be a string");
1639 return NULL;
1640 }
1641 #if PY_MAJOR_VERSION < 3
1642 const char* field_name = PyString_AS_STRING(arg);
1643 Py_ssize_t size = PyString_GET_SIZE(arg);
1644 #else
1645 Py_ssize_t size;
1646 const char* field_name = PyUnicode_AsUTF8AndSize(arg, &size);
1647 #endif
1648 AssureWritable(self);
1649 Message* message = self->message;
1650 ScopedPyObjectPtr arg_in_oneof;
1651 bool is_in_oneof;
1652 const FieldDescriptor* field_descriptor =
1653 FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
1654 if (field_descriptor == NULL) {
1655 if (!is_in_oneof) {
1656 PyErr_Format(PyExc_ValueError,
1657 "Protocol message has no \"%s\" field.", field_name);
1658 return NULL;
1659 } else {
1660 Py_RETURN_NONE;
1661 }
1662 } else if (is_in_oneof) {
1663 const string& name = field_descriptor->name();
1664 arg_in_oneof.reset(PyString_FromStringAndSize(name.c_str(), name.size()));
1665 arg = arg_in_oneof.get();
1666 }
1667
1668 PyObject* composite_field = self->composite_fields ?
1669 PyDict_GetItem(self->composite_fields, arg) : NULL;
1670
1671 // Only release the field if there's a possibility that there are
1672 // references to it.
1673 if (composite_field != NULL) {
1674 if (InternalReleaseFieldByDescriptor(self, field_descriptor,
1675 composite_field) < 0) {
1676 return NULL;
1677 }
1678 PyDict_DelItem(self->composite_fields, arg);
1679 }
1680 message->GetReflection()->ClearField(message, field_descriptor);
1681 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
1682 !message->GetReflection()->SupportsUnknownEnumValues()) {
1683 UnknownFieldSet* unknown_field_set =
1684 message->GetReflection()->MutableUnknownFields(message);
1685 unknown_field_set->DeleteByNumber(field_descriptor->number());
1686 }
1687
1688 Py_RETURN_NONE;
1689 }
1690
1691 PyObject* Clear(CMessage* self) {
1692 AssureWritable(self);
1693 if (ForEachCompositeField(self, ReleaseChild(self)) == -1)
1694 return NULL;
1695 Py_CLEAR(self->extensions);
1696 if (self->composite_fields) {
1697 PyDict_Clear(self->composite_fields);
1698 }
1699 self->message->Clear();
1700 Py_RETURN_NONE;
1701 }
1702
1703 // ---------------------------------------------------------------------
1704
1705 static string GetMessageName(CMessage* self) {
1706 if (self->parent_field_descriptor != NULL) {
1707 return self->parent_field_descriptor->full_name();
1708 } else {
1709 return self->message->GetDescriptor()->full_name();
1710 }
1711 }
1712
1713 static PyObject* SerializeToString(CMessage* self, PyObject* args) {
1714 if (!self->message->IsInitialized()) {
1715 ScopedPyObjectPtr errors(FindInitializationErrors(self));
1716 if (errors == NULL) {
1717 return NULL;
1718 }
1719 ScopedPyObjectPtr comma(PyString_FromString(","));
1720 if (comma == NULL) {
1721 return NULL;
1722 }
1723 ScopedPyObjectPtr joined(
1724 PyObject_CallMethod(comma.get(), "join", "O", errors.get()));
1725 if (joined == NULL) {
1726 return NULL;
1727 }
1728
1729 // TODO(haberman): this is a (hopefully temporary) hack. The unit testing
1730 // infrastructure reloads all pure-Python modules for every test, but not
1731 // C++ modules (because that's generally impossible:
1732 // http://bugs.python.org/issue1144263). But if we cache EncodeError, we'll
1733 // return the EncodeError from a previous load of the module, which won't
1734 // match a user's attempt to catch EncodeError. So we have to look it up
1735 // again every time.
1736 ScopedPyObjectPtr message_module(PyImport_ImportModule(
1737 "google.protobuf.message"));
1738 if (message_module.get() == NULL) {
1739 return NULL;
1740 }
1741
1742 ScopedPyObjectPtr encode_error(
1743 PyObject_GetAttrString(message_module.get(), "EncodeError"));
1744 if (encode_error.get() == NULL) {
1745 return NULL;
1746 }
1747 PyErr_Format(encode_error.get(),
1748 "Message %s is missing required fields: %s",
1749 GetMessageName(self).c_str(), PyString_AsString(joined.get()));
1750 return NULL;
1751 }
1752 int size = self->message->ByteSize();
1753 if (size <= 0) {
1754 return PyBytes_FromString("");
1755 }
1756 PyObject* result = PyBytes_FromStringAndSize(NULL, size);
1757 if (result == NULL) {
1758 return NULL;
1759 }
1760 char* buffer = PyBytes_AS_STRING(result);
1761 self->message->SerializeWithCachedSizesToArray(
1762 reinterpret_cast<uint8*>(buffer));
1763 return result;
1764 }
1765
1766 static PyObject* SerializePartialToString(CMessage* self) {
1767 string contents;
1768 self->message->SerializePartialToString(&contents);
1769 return PyBytes_FromStringAndSize(contents.c_str(), contents.size());
1770 }
1771
1772 // Formats proto fields for ascii dumps using python formatting functions where
1773 // appropriate.
1774 class PythonFieldValuePrinter : public TextFormat::FieldValuePrinter {
1775 public:
1776 // Python has some differences from C++ when printing floating point numbers.
1777 //
1778 // 1) Trailing .0 is always printed.
1779 // 2) (Python2) Output is rounded to 12 digits.
1780 // 3) (Python3) The full precision of the double is preserved (and Python uses
1781 // David M. Gay's dtoa(), when the C++ code uses SimpleDtoa. There are some
1782 // differences, but they rarely happen)
1783 //
1784 // We override floating point printing with the C-API function for printing
1785 // Python floats to ensure consistency.
1786 string PrintFloat(float value) const { return PrintDouble(value); }
1787 string PrintDouble(double value) const {
1788 // This implementation is not highly optimized (it allocates two temporary
1789 // Python objects) but it is simple and portable. If this is shown to be a
1790 // performance bottleneck, we can optimize it, but the results will likely
1791 // be more complicated to accommodate the differing behavior of double
1792 // formatting between Python 2 and Python 3.
1793 //
1794 // (Though a valid question is: do we really want to make out output
1795 // dependent on the Python version?)
1796 ScopedPyObjectPtr py_value(PyFloat_FromDouble(value));
1797 if (!py_value.get()) {
1798 return string();
1799 }
1800
1801 ScopedPyObjectPtr py_str(PyObject_Str(py_value.get()));
1802 if (!py_str.get()) {
1803 return string();
1804 }
1805
1806 return string(PyString_AsString(py_str.get()));
1807 }
1808 };
1809
1810 static PyObject* ToStr(CMessage* self) {
1811 TextFormat::Printer printer;
1812 // Passes ownership
1813 printer.SetDefaultFieldValuePrinter(new PythonFieldValuePrinter());
1814 printer.SetHideUnknownFields(true);
1815 string output;
1816 if (!printer.PrintToString(*self->message, &output)) {
1817 PyErr_SetString(PyExc_ValueError, "Unable to convert message to str");
1818 return NULL;
1819 }
1820 return PyString_FromString(output.c_str());
1821 }
1822
1823 PyObject* MergeFrom(CMessage* self, PyObject* arg) {
1824 CMessage* other_message;
1825 if (!PyObject_TypeCheck(arg, &CMessage_Type)) {
1826 PyErr_Format(PyExc_TypeError,
1827 "Parameter to MergeFrom() must be instance of same class: "
1828 "expected %s got %s.",
1829 self->message->GetDescriptor()->full_name().c_str(),
1830 Py_TYPE(arg)->tp_name);
1831 return NULL;
1832 }
1833
1834 other_message = reinterpret_cast<CMessage*>(arg);
1835 if (other_message->message->GetDescriptor() !=
1836 self->message->GetDescriptor()) {
1837 PyErr_Format(PyExc_TypeError,
1838 "Parameter to MergeFrom() must be instance of same class: "
1839 "expected %s got %s.",
1840 self->message->GetDescriptor()->full_name().c_str(),
1841 other_message->message->GetDescriptor()->full_name().c_str());
1842 return NULL;
1843 }
1844 AssureWritable(self);
1845
1846 // TODO(tibell): Message::MergeFrom might turn some child Messages
1847 // into mutable messages, invalidating the message field in the
1848 // corresponding CMessages. We should run a FixupMessageReferences
1849 // pass here.
1850
1851 self->message->MergeFrom(*other_message->message);
1852 Py_RETURN_NONE;
1853 }
1854
1855 static PyObject* CopyFrom(CMessage* self, PyObject* arg) {
1856 CMessage* other_message;
1857 if (!PyObject_TypeCheck(arg, &CMessage_Type)) {
1858 PyErr_Format(PyExc_TypeError,
1859 "Parameter to CopyFrom() must be instance of same class: "
1860 "expected %s got %s.",
1861 self->message->GetDescriptor()->full_name().c_str(),
1862 Py_TYPE(arg)->tp_name);
1863 return NULL;
1864 }
1865
1866 other_message = reinterpret_cast<CMessage*>(arg);
1867
1868 if (self == other_message) {
1869 Py_RETURN_NONE;
1870 }
1871
1872 if (other_message->message->GetDescriptor() !=
1873 self->message->GetDescriptor()) {
1874 PyErr_Format(PyExc_TypeError,
1875 "Parameter to CopyFrom() must be instance of same class: "
1876 "expected %s got %s.",
1877 self->message->GetDescriptor()->full_name().c_str(),
1878 other_message->message->GetDescriptor()->full_name().c_str());
1879 return NULL;
1880 }
1881
1882 AssureWritable(self);
1883
1884 // CopyFrom on the message will not clean up self->composite_fields,
1885 // which can leave us in an inconsistent state, so clear it out here.
1886 (void)ScopedPyObjectPtr(Clear(self));
1887
1888 self->message->CopyFrom(*other_message->message);
1889
1890 Py_RETURN_NONE;
1891 }
1892
1893 // Protobuf has a 64MB limit built in, this variable will override this. Please
1894 // do not enable this unless you fully understand the implications: protobufs
1895 // must all be kept in memory at the same time, so if they grow too big you may
1896 // get OOM errors. The protobuf APIs do not provide any tools for processing
1897 // protobufs in chunks. If you have protos this big you should break them up if
1898 // it is at all convenient to do so.
1899 static bool allow_oversize_protos = false;
1900
1901 // Provide a method in the module to set allow_oversize_protos to a boolean
1902 // value. This method returns the newly value of allow_oversize_protos.
1903 PyObject* SetAllowOversizeProtos(PyObject* m, PyObject* arg) {
1904 if (!arg || !PyBool_Check(arg)) {
1905 PyErr_SetString(PyExc_TypeError,
1906 "Argument to SetAllowOversizeProtos must be boolean");
1907 return NULL;
1908 }
1909 allow_oversize_protos = PyObject_IsTrue(arg);
1910 if (allow_oversize_protos) {
1911 Py_RETURN_TRUE;
1912 } else {
1913 Py_RETURN_FALSE;
1914 }
1915 }
1916
1917 static PyObject* MergeFromString(CMessage* self, PyObject* arg) {
1918 const void* data;
1919 Py_ssize_t data_length;
1920 if (PyObject_AsReadBuffer(arg, &data, &data_length) < 0) {
1921 return NULL;
1922 }
1923
1924 AssureWritable(self);
1925 io::CodedInputStream input(
1926 reinterpret_cast<const uint8*>(data), data_length);
1927 if (allow_oversize_protos) {
1928 input.SetTotalBytesLimit(INT_MAX, INT_MAX);
1929 }
1930 PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
1931 input.SetExtensionRegistry(pool->pool, pool->message_factory);
1932 bool success = self->message->MergePartialFromCodedStream(&input);
1933 if (success) {
1934 return PyInt_FromLong(input.CurrentPosition());
1935 } else {
1936 PyErr_Format(DecodeError_class, "Error parsing message");
1937 return NULL;
1938 }
1939 }
1940
1941 static PyObject* ParseFromString(CMessage* self, PyObject* arg) {
1942 if (ScopedPyObjectPtr(Clear(self)) == NULL) {
1943 return NULL;
1944 }
1945 return MergeFromString(self, arg);
1946 }
1947
1948 static PyObject* ByteSize(CMessage* self, PyObject* args) {
1949 return PyLong_FromLong(self->message->ByteSize());
1950 }
1951
1952 static PyObject* RegisterExtension(PyObject* cls,
1953 PyObject* extension_handle) {
1954 const FieldDescriptor* descriptor =
1955 GetExtensionDescriptor(extension_handle);
1956 if (descriptor == NULL) {
1957 return NULL;
1958 }
1959
1960 ScopedPyObjectPtr extensions_by_name(
1961 PyObject_GetAttr(cls, k_extensions_by_name));
1962 if (extensions_by_name == NULL) {
1963 PyErr_SetString(PyExc_TypeError, "no extensions_by_name on class");
1964 return NULL;
1965 }
1966 ScopedPyObjectPtr full_name(PyObject_GetAttr(extension_handle, kfull_name));
1967 if (full_name == NULL) {
1968 return NULL;
1969 }
1970
1971 // If the extension was already registered, check that it is the same.
1972 PyObject* existing_extension =
1973 PyDict_GetItem(extensions_by_name.get(), full_name.get());
1974 if (existing_extension != NULL) {
1975 const FieldDescriptor* existing_extension_descriptor =
1976 GetExtensionDescriptor(existing_extension);
1977 if (existing_extension_descriptor != descriptor) {
1978 PyErr_SetString(PyExc_ValueError, "Double registration of Extensions");
1979 return NULL;
1980 }
1981 // Nothing else to do.
1982 Py_RETURN_NONE;
1983 }
1984
1985 if (PyDict_SetItem(extensions_by_name.get(), full_name.get(),
1986 extension_handle) < 0) {
1987 return NULL;
1988 }
1989
1990 // Also store a mapping from extension number to implementing class.
1991 ScopedPyObjectPtr extensions_by_number(
1992 PyObject_GetAttr(cls, k_extensions_by_number));
1993 if (extensions_by_number == NULL) {
1994 PyErr_SetString(PyExc_TypeError, "no extensions_by_number on class");
1995 return NULL;
1996 }
1997
1998 ScopedPyObjectPtr number(PyObject_GetAttrString(extension_handle, "number"));
1999 if (number == NULL) {
2000 return NULL;
2001 }
2002
2003 // If the extension was already registered by number, check that it is the
2004 // same.
2005 existing_extension = PyDict_GetItem(extensions_by_number.get(), number.get());
2006 if (existing_extension != NULL) {
2007 const FieldDescriptor* existing_extension_descriptor =
2008 GetExtensionDescriptor(existing_extension);
2009 if (existing_extension_descriptor != descriptor) {
2010 const Descriptor* msg_desc = GetMessageDescriptor(
2011 reinterpret_cast<PyTypeObject*>(cls));
2012 PyErr_Format(
2013 PyExc_ValueError,
2014 "Extensions \"%s\" and \"%s\" both try to extend message type "
2015 "\"%s\" with field number %ld.",
2016 existing_extension_descriptor->full_name().c_str(),
2017 descriptor->full_name().c_str(),
2018 msg_desc->full_name().c_str(),
2019 PyInt_AsLong(number.get()));
2020 return NULL;
2021 }
2022 // Nothing else to do.
2023 Py_RETURN_NONE;
2024 }
2025 if (PyDict_SetItem(extensions_by_number.get(), number.get(),
2026 extension_handle) < 0) {
2027 return NULL;
2028 }
2029
2030 // Check if it's a message set
2031 if (descriptor->is_extension() &&
2032 descriptor->containing_type()->options().message_set_wire_format() &&
2033 descriptor->type() == FieldDescriptor::TYPE_MESSAGE &&
2034 descriptor->label() == FieldDescriptor::LABEL_OPTIONAL) {
2035 ScopedPyObjectPtr message_name(PyString_FromStringAndSize(
2036 descriptor->message_type()->full_name().c_str(),
2037 descriptor->message_type()->full_name().size()));
2038 if (message_name == NULL) {
2039 return NULL;
2040 }
2041 PyDict_SetItem(extensions_by_name.get(), message_name.get(),
2042 extension_handle);
2043 }
2044
2045 Py_RETURN_NONE;
2046 }
2047
2048 static PyObject* SetInParent(CMessage* self, PyObject* args) {
2049 AssureWritable(self);
2050 Py_RETURN_NONE;
2051 }
2052
2053 static PyObject* WhichOneof(CMessage* self, PyObject* arg) {
2054 Py_ssize_t name_size;
2055 char *name_data;
2056 if (PyString_AsStringAndSize(arg, &name_data, &name_size) < 0)
2057 return NULL;
2058 string oneof_name = string(name_data, name_size);
2059 const OneofDescriptor* oneof_desc =
2060 self->message->GetDescriptor()->FindOneofByName(oneof_name);
2061 if (oneof_desc == NULL) {
2062 PyErr_Format(PyExc_ValueError,
2063 "Protocol message has no oneof \"%s\" field.",
2064 oneof_name.c_str());
2065 return NULL;
2066 }
2067 const FieldDescriptor* field_in_oneof =
2068 self->message->GetReflection()->GetOneofFieldDescriptor(
2069 *self->message, oneof_desc);
2070 if (field_in_oneof == NULL) {
2071 Py_RETURN_NONE;
2072 } else {
2073 const string& name = field_in_oneof->name();
2074 return PyString_FromStringAndSize(name.c_str(), name.size());
2075 }
2076 }
2077
2078 static PyObject* GetExtensionDict(CMessage* self, void *closure);
2079
2080 static PyObject* ListFields(CMessage* self) {
2081 vector<const FieldDescriptor*> fields;
2082 self->message->GetReflection()->ListFields(*self->message, &fields);
2083
2084 // Normally, the list will be exactly the size of the fields.
2085 ScopedPyObjectPtr all_fields(PyList_New(fields.size()));
2086 if (all_fields == NULL) {
2087 return NULL;
2088 }
2089
2090 // When there are unknown extensions, the py list will *not* contain
2091 // the field information. Thus the actual size of the py list will be
2092 // smaller than the size of fields. Set the actual size at the end.
2093 Py_ssize_t actual_size = 0;
2094 for (size_t i = 0; i < fields.size(); ++i) {
2095 ScopedPyObjectPtr t(PyTuple_New(2));
2096 if (t == NULL) {
2097 return NULL;
2098 }
2099
2100 if (fields[i]->is_extension()) {
2101 ScopedPyObjectPtr extension_field(
2102 PyFieldDescriptor_FromDescriptor(fields[i]));
2103 if (extension_field == NULL) {
2104 return NULL;
2105 }
2106 // With C++ descriptors, the field can always be retrieved, but for
2107 // unknown extensions which have not been imported in Python code, there
2108 // is no message class and we cannot retrieve the value.
2109 // TODO(amauryfa): consider building the class on the fly!
2110 if (fields[i]->message_type() != NULL &&
2111 cdescriptor_pool::GetMessageClass(
2112 GetDescriptorPoolForMessage(self),
2113 fields[i]->message_type()) == NULL) {
2114 PyErr_Clear();
2115 continue;
2116 }
2117 ScopedPyObjectPtr extensions(GetExtensionDict(self, NULL));
2118 if (extensions == NULL) {
2119 return NULL;
2120 }
2121 // 'extension' reference later stolen by PyTuple_SET_ITEM.
2122 PyObject* extension = PyObject_GetItem(
2123 extensions.get(), extension_field.get());
2124 if (extension == NULL) {
2125 return NULL;
2126 }
2127 PyTuple_SET_ITEM(t.get(), 0, extension_field.release());
2128 // Steals reference to 'extension'
2129 PyTuple_SET_ITEM(t.get(), 1, extension);
2130 } else {
2131 // Normal field
2132 const string& field_name = fields[i]->name();
2133 ScopedPyObjectPtr py_field_name(PyString_FromStringAndSize(
2134 field_name.c_str(), field_name.length()));
2135 if (py_field_name == NULL) {
2136 PyErr_SetString(PyExc_ValueError, "bad string");
2137 return NULL;
2138 }
2139 ScopedPyObjectPtr field_descriptor(
2140 PyFieldDescriptor_FromDescriptor(fields[i]));
2141 if (field_descriptor == NULL) {
2142 return NULL;
2143 }
2144
2145 PyObject* field_value = GetAttr(self, py_field_name.get());
2146 if (field_value == NULL) {
2147 PyErr_SetObject(PyExc_ValueError, py_field_name.get());
2148 return NULL;
2149 }
2150 PyTuple_SET_ITEM(t.get(), 0, field_descriptor.release());
2151 PyTuple_SET_ITEM(t.get(), 1, field_value);
2152 }
2153 PyList_SET_ITEM(all_fields.get(), actual_size, t.release());
2154 ++actual_size;
2155 }
2156 if (static_cast<size_t>(actual_size) != fields.size() &&
2157 (PyList_SetSlice(all_fields.get(), actual_size, fields.size(), NULL) <
2158 0)) {
2159 return NULL;
2160 }
2161 return all_fields.release();
2162 }
2163
2164 static PyObject* DiscardUnknownFields(CMessage* self) {
2165 AssureWritable(self);
2166 self->message->DiscardUnknownFields();
2167 Py_RETURN_NONE;
2168 }
2169
2170 PyObject* FindInitializationErrors(CMessage* self) {
2171 Message* message = self->message;
2172 vector<string> errors;
2173 message->FindInitializationErrors(&errors);
2174
2175 PyObject* error_list = PyList_New(errors.size());
2176 if (error_list == NULL) {
2177 return NULL;
2178 }
2179 for (size_t i = 0; i < errors.size(); ++i) {
2180 const string& error = errors[i];
2181 PyObject* error_string = PyString_FromStringAndSize(
2182 error.c_str(), error.length());
2183 if (error_string == NULL) {
2184 Py_DECREF(error_list);
2185 return NULL;
2186 }
2187 PyList_SET_ITEM(error_list, i, error_string);
2188 }
2189 return error_list;
2190 }
2191
2192 static PyObject* RichCompare(CMessage* self, PyObject* other, int opid) {
2193 // Only equality comparisons are implemented.
2194 if (opid != Py_EQ && opid != Py_NE) {
2195 Py_INCREF(Py_NotImplemented);
2196 return Py_NotImplemented;
2197 }
2198 bool equals = true;
2199 // If other is not a message, it cannot be equal.
2200 if (!PyObject_TypeCheck(other, &CMessage_Type)) {
2201 equals = false;
2202 }
2203 const google::protobuf::Message* other_message =
2204 reinterpret_cast<CMessage*>(other)->message;
2205 // If messages don't have the same descriptors, they are not equal.
2206 if (equals &&
2207 self->message->GetDescriptor() != other_message->GetDescriptor()) {
2208 equals = false;
2209 }
2210 // Check the message contents.
2211 if (equals && !google::protobuf::util::MessageDifferencer::Equals(
2212 *self->message,
2213 *reinterpret_cast<CMessage*>(other)->message)) {
2214 equals = false;
2215 }
2216 if (equals ^ (opid == Py_EQ)) {
2217 Py_RETURN_FALSE;
2218 } else {
2219 Py_RETURN_TRUE;
2220 }
2221 }
2222
2223 PyObject* InternalGetScalar(const Message* message,
2224 const FieldDescriptor* field_descriptor) {
2225 const Reflection* reflection = message->GetReflection();
2226
2227 if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
2228 return NULL;
2229 }
2230
2231 PyObject* result = NULL;
2232 switch (field_descriptor->cpp_type()) {
2233 case FieldDescriptor::CPPTYPE_INT32: {
2234 int32 value = reflection->GetInt32(*message, field_descriptor);
2235 result = PyInt_FromLong(value);
2236 break;
2237 }
2238 case FieldDescriptor::CPPTYPE_INT64: {
2239 int64 value = reflection->GetInt64(*message, field_descriptor);
2240 result = PyLong_FromLongLong(value);
2241 break;
2242 }
2243 case FieldDescriptor::CPPTYPE_UINT32: {
2244 uint32 value = reflection->GetUInt32(*message, field_descriptor);
2245 result = PyInt_FromSize_t(value);
2246 break;
2247 }
2248 case FieldDescriptor::CPPTYPE_UINT64: {
2249 uint64 value = reflection->GetUInt64(*message, field_descriptor);
2250 result = PyLong_FromUnsignedLongLong(value);
2251 break;
2252 }
2253 case FieldDescriptor::CPPTYPE_FLOAT: {
2254 float value = reflection->GetFloat(*message, field_descriptor);
2255 result = PyFloat_FromDouble(value);
2256 break;
2257 }
2258 case FieldDescriptor::CPPTYPE_DOUBLE: {
2259 double value = reflection->GetDouble(*message, field_descriptor);
2260 result = PyFloat_FromDouble(value);
2261 break;
2262 }
2263 case FieldDescriptor::CPPTYPE_BOOL: {
2264 bool value = reflection->GetBool(*message, field_descriptor);
2265 result = PyBool_FromLong(value);
2266 break;
2267 }
2268 case FieldDescriptor::CPPTYPE_STRING: {
2269 string value = reflection->GetString(*message, field_descriptor);
2270 result = ToStringObject(field_descriptor, value);
2271 break;
2272 }
2273 case FieldDescriptor::CPPTYPE_ENUM: {
2274 if (!message->GetReflection()->SupportsUnknownEnumValues() &&
2275 !message->GetReflection()->HasField(*message, field_descriptor)) {
2276 // Look for the value in the unknown fields.
2277 const UnknownFieldSet& unknown_field_set =
2278 message->GetReflection()->GetUnknownFields(*message);
2279 for (int i = 0; i < unknown_field_set.field_count(); ++i) {
2280 if (unknown_field_set.field(i).number() ==
2281 field_descriptor->number() &&
2282 unknown_field_set.field(i).type() ==
2283 google::protobuf::UnknownField::TYPE_VARINT) {
2284 result = PyInt_FromLong(unknown_field_set.field(i).varint());
2285 break;
2286 }
2287 }
2288 }
2289
2290 if (result == NULL) {
2291 const EnumValueDescriptor* enum_value =
2292 message->GetReflection()->GetEnum(*message, field_descriptor);
2293 result = PyInt_FromLong(enum_value->number());
2294 }
2295 break;
2296 }
2297 default:
2298 PyErr_Format(
2299 PyExc_SystemError, "Getting a value from a field of unknown type %d",
2300 field_descriptor->cpp_type());
2301 }
2302
2303 return result;
2304 }
2305
2306 PyObject* InternalGetSubMessage(
2307 CMessage* self, const FieldDescriptor* field_descriptor) {
2308 const Reflection* reflection = self->message->GetReflection();
2309 PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
2310 const Message& sub_message = reflection->GetMessage(
2311 *self->message, field_descriptor, pool->message_factory);
2312
2313 CMessageClass* message_class = cdescriptor_pool::GetMessageClass(
2314 pool, field_descriptor->message_type());
2315 if (message_class == NULL) {
2316 return NULL;
2317 }
2318
2319 CMessage* cmsg = cmessage::NewEmptyMessage(message_class);
2320 if (cmsg == NULL) {
2321 return NULL;
2322 }
2323
2324 cmsg->owner = self->owner;
2325 cmsg->parent = self;
2326 cmsg->parent_field_descriptor = field_descriptor;
2327 cmsg->read_only = !reflection->HasField(*self->message, field_descriptor);
2328 cmsg->message = const_cast<Message*>(&sub_message);
2329
2330 return reinterpret_cast<PyObject*>(cmsg);
2331 }
2332
2333 int InternalSetNonOneofScalar(
2334 Message* message,
2335 const FieldDescriptor* field_descriptor,
2336 PyObject* arg) {
2337 const Reflection* reflection = message->GetReflection();
2338
2339 if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
2340 return -1;
2341 }
2342
2343 switch (field_descriptor->cpp_type()) {
2344 case FieldDescriptor::CPPTYPE_INT32: {
2345 GOOGLE_CHECK_GET_INT32(arg, value, -1);
2346 reflection->SetInt32(message, field_descriptor, value);
2347 break;
2348 }
2349 case FieldDescriptor::CPPTYPE_INT64: {
2350 GOOGLE_CHECK_GET_INT64(arg, value, -1);
2351 reflection->SetInt64(message, field_descriptor, value);
2352 break;
2353 }
2354 case FieldDescriptor::CPPTYPE_UINT32: {
2355 GOOGLE_CHECK_GET_UINT32(arg, value, -1);
2356 reflection->SetUInt32(message, field_descriptor, value);
2357 break;
2358 }
2359 case FieldDescriptor::CPPTYPE_UINT64: {
2360 GOOGLE_CHECK_GET_UINT64(arg, value, -1);
2361 reflection->SetUInt64(message, field_descriptor, value);
2362 break;
2363 }
2364 case FieldDescriptor::CPPTYPE_FLOAT: {
2365 GOOGLE_CHECK_GET_FLOAT(arg, value, -1);
2366 reflection->SetFloat(message, field_descriptor, value);
2367 break;
2368 }
2369 case FieldDescriptor::CPPTYPE_DOUBLE: {
2370 GOOGLE_CHECK_GET_DOUBLE(arg, value, -1);
2371 reflection->SetDouble(message, field_descriptor, value);
2372 break;
2373 }
2374 case FieldDescriptor::CPPTYPE_BOOL: {
2375 GOOGLE_CHECK_GET_BOOL(arg, value, -1);
2376 reflection->SetBool(message, field_descriptor, value);
2377 break;
2378 }
2379 case FieldDescriptor::CPPTYPE_STRING: {
2380 if (!CheckAndSetString(
2381 arg, message, field_descriptor, reflection, false, -1)) {
2382 return -1;
2383 }
2384 break;
2385 }
2386 case FieldDescriptor::CPPTYPE_ENUM: {
2387 GOOGLE_CHECK_GET_INT32(arg, value, -1);
2388 if (reflection->SupportsUnknownEnumValues()) {
2389 reflection->SetEnumValue(message, field_descriptor, value);
2390 } else {
2391 const EnumDescriptor* enum_descriptor = field_descriptor->enum_type();
2392 const EnumValueDescriptor* enum_value =
2393 enum_descriptor->FindValueByNumber(value);
2394 if (enum_value != NULL) {
2395 reflection->SetEnum(message, field_descriptor, enum_value);
2396 } else {
2397 PyErr_Format(PyExc_ValueError, "Unknown enum value: %d", value);
2398 return -1;
2399 }
2400 }
2401 break;
2402 }
2403 default:
2404 PyErr_Format(
2405 PyExc_SystemError, "Setting value to a field of unknown type %d",
2406 field_descriptor->cpp_type());
2407 return -1;
2408 }
2409
2410 return 0;
2411 }
2412
2413 int InternalSetScalar(
2414 CMessage* self,
2415 const FieldDescriptor* field_descriptor,
2416 PyObject* arg) {
2417 if (!CheckFieldBelongsToMessage(field_descriptor, self->message)) {
2418 return -1;
2419 }
2420
2421 if (MaybeReleaseOverlappingOneofField(self, field_descriptor) < 0) {
2422 return -1;
2423 }
2424
2425 return InternalSetNonOneofScalar(self->message, field_descriptor, arg);
2426 }
2427
2428 PyObject* FromString(PyTypeObject* cls, PyObject* serialized) {
2429 PyObject* py_cmsg = PyObject_CallObject(
2430 reinterpret_cast<PyObject*>(cls), NULL);
2431 if (py_cmsg == NULL) {
2432 return NULL;
2433 }
2434 CMessage* cmsg = reinterpret_cast<CMessage*>(py_cmsg);
2435
2436 ScopedPyObjectPtr py_length(MergeFromString(cmsg, serialized));
2437 if (py_length == NULL) {
2438 Py_DECREF(py_cmsg);
2439 return NULL;
2440 }
2441
2442 return py_cmsg;
2443 }
2444
2445 PyObject* DeepCopy(CMessage* self, PyObject* arg) {
2446 PyObject* clone = PyObject_CallObject(
2447 reinterpret_cast<PyObject*>(Py_TYPE(self)), NULL);
2448 if (clone == NULL) {
2449 return NULL;
2450 }
2451 if (!PyObject_TypeCheck(clone, &CMessage_Type)) {
2452 Py_DECREF(clone);
2453 return NULL;
2454 }
2455 if (ScopedPyObjectPtr(MergeFrom(
2456 reinterpret_cast<CMessage*>(clone),
2457 reinterpret_cast<PyObject*>(self))) == NULL) {
2458 Py_DECREF(clone);
2459 return NULL;
2460 }
2461 return clone;
2462 }
2463
2464 PyObject* ToUnicode(CMessage* self) {
2465 // Lazy import to prevent circular dependencies
2466 ScopedPyObjectPtr text_format(
2467 PyImport_ImportModule("google.protobuf.text_format"));
2468 if (text_format == NULL) {
2469 return NULL;
2470 }
2471 ScopedPyObjectPtr method_name(PyString_FromString("MessageToString"));
2472 if (method_name == NULL) {
2473 return NULL;
2474 }
2475 Py_INCREF(Py_True);
2476 ScopedPyObjectPtr encoded(PyObject_CallMethodObjArgs(
2477 text_format.get(), method_name.get(), self, Py_True, NULL));
2478 Py_DECREF(Py_True);
2479 if (encoded == NULL) {
2480 return NULL;
2481 }
2482 #if PY_MAJOR_VERSION < 3
2483 PyObject* decoded = PyString_AsDecodedObject(encoded.get(), "utf-8", NULL);
2484 #else
2485 PyObject* decoded = PyUnicode_FromEncodedObject(encoded.get(), "utf-8", NULL);
2486 #endif
2487 if (decoded == NULL) {
2488 return NULL;
2489 }
2490 return decoded;
2491 }
2492
2493 PyObject* Reduce(CMessage* self) {
2494 ScopedPyObjectPtr constructor(reinterpret_cast<PyObject*>(Py_TYPE(self)));
2495 constructor.inc();
2496 ScopedPyObjectPtr args(PyTuple_New(0));
2497 if (args == NULL) {
2498 return NULL;
2499 }
2500 ScopedPyObjectPtr state(PyDict_New());
2501 if (state == NULL) {
2502 return NULL;
2503 }
2504 ScopedPyObjectPtr serialized(SerializePartialToString(self));
2505 if (serialized == NULL) {
2506 return NULL;
2507 }
2508 if (PyDict_SetItemString(state.get(), "serialized", serialized.get()) < 0) {
2509 return NULL;
2510 }
2511 return Py_BuildValue("OOO", constructor.get(), args.get(), state.get());
2512 }
2513
2514 PyObject* SetState(CMessage* self, PyObject* state) {
2515 if (!PyDict_Check(state)) {
2516 PyErr_SetString(PyExc_TypeError, "state not a dict");
2517 return NULL;
2518 }
2519 PyObject* serialized = PyDict_GetItemString(state, "serialized");
2520 if (serialized == NULL) {
2521 return NULL;
2522 }
2523 if (ScopedPyObjectPtr(ParseFromString(self, serialized)) == NULL) {
2524 return NULL;
2525 }
2526 Py_RETURN_NONE;
2527 }
2528
2529 // CMessage static methods:
2530 PyObject* _CheckCalledFromGeneratedFile(PyObject* unused,
2531 PyObject* unused_arg) {
2532 if (!_CalledFromGeneratedFile(1)) {
2533 PyErr_SetString(PyExc_TypeError,
2534 "Descriptors should not be created directly, "
2535 "but only retrieved from their parent.");
2536 return NULL;
2537 }
2538 Py_RETURN_NONE;
2539 }
2540
2541 static PyObject* GetExtensionDict(CMessage* self, void *closure) {
2542 if (self->extensions) {
2543 Py_INCREF(self->extensions);
2544 return reinterpret_cast<PyObject*>(self->extensions);
2545 }
2546
2547 // If there are extension_ranges, the message is "extendable". Allocate a
2548 // dictionary to store the extension fields.
2549 const Descriptor* descriptor = GetMessageDescriptor(Py_TYPE(self));
2550 if (descriptor->extension_range_count() > 0) {
2551 ExtensionDict* extension_dict = extension_dict::NewExtensionDict(self);
2552 if (extension_dict == NULL) {
2553 return NULL;
2554 }
2555 self->extensions = extension_dict;
2556 Py_INCREF(self->extensions);
2557 return reinterpret_cast<PyObject*>(self->extensions);
2558 }
2559
2560 PyErr_SetNone(PyExc_AttributeError);
2561 return NULL;
2562 }
2563
2564 static PyGetSetDef Getters[] = {
2565 {"Extensions", (getter)GetExtensionDict, NULL, "Extension dict"},
2566 {NULL}
2567 };
2568
2569 static PyMethodDef Methods[] = {
2570 { "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
2571 "Makes a deep copy of the class." },
2572 { "__reduce__", (PyCFunction)Reduce, METH_NOARGS,
2573 "Outputs picklable representation of the message." },
2574 { "__setstate__", (PyCFunction)SetState, METH_O,
2575 "Inputs picklable representation of the message." },
2576 { "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
2577 "Outputs a unicode representation of the message." },
2578 { "ByteSize", (PyCFunction)ByteSize, METH_NOARGS,
2579 "Returns the size of the message in bytes." },
2580 { "Clear", (PyCFunction)Clear, METH_NOARGS,
2581 "Clears the message." },
2582 { "ClearExtension", (PyCFunction)ClearExtension, METH_O,
2583 "Clears a message field." },
2584 { "ClearField", (PyCFunction)ClearField, METH_O,
2585 "Clears a message field." },
2586 { "CopyFrom", (PyCFunction)CopyFrom, METH_O,
2587 "Copies a protocol message into the current message." },
2588 { "DiscardUnknownFields", (PyCFunction)DiscardUnknownFields, METH_NOARGS,
2589 "Discards the unknown fields." },
2590 { "FindInitializationErrors", (PyCFunction)FindInitializationErrors,
2591 METH_NOARGS,
2592 "Finds unset required fields." },
2593 { "FromString", (PyCFunction)FromString, METH_O | METH_CLASS,
2594 "Creates new method instance from given serialized data." },
2595 { "HasExtension", (PyCFunction)HasExtension, METH_O,
2596 "Checks if a message field is set." },
2597 { "HasField", (PyCFunction)HasField, METH_O,
2598 "Checks if a message field is set." },
2599 { "IsInitialized", (PyCFunction)IsInitialized, METH_VARARGS,
2600 "Checks if all required fields of a protocol message are set." },
2601 { "ListFields", (PyCFunction)ListFields, METH_NOARGS,
2602 "Lists all set fields of a message." },
2603 { "MergeFrom", (PyCFunction)MergeFrom, METH_O,
2604 "Merges a protocol message into the current message." },
2605 { "MergeFromString", (PyCFunction)MergeFromString, METH_O,
2606 "Merges a serialized message into the current message." },
2607 { "ParseFromString", (PyCFunction)ParseFromString, METH_O,
2608 "Parses a serialized message into the current message." },
2609 { "RegisterExtension", (PyCFunction)RegisterExtension, METH_O | METH_CLASS,
2610 "Registers an extension with the current message." },
2611 { "SerializePartialToString", (PyCFunction)SerializePartialToString,
2612 METH_NOARGS,
2613 "Serializes the message to a string, even if it isn't initialized." },
2614 { "SerializeToString", (PyCFunction)SerializeToString, METH_NOARGS,
2615 "Serializes the message to a string, only for initialized messages." },
2616 { "SetInParent", (PyCFunction)SetInParent, METH_NOARGS,
2617 "Sets the has bit of the given field in its parent message." },
2618 { "WhichOneof", (PyCFunction)WhichOneof, METH_O,
2619 "Returns the name of the field set inside a oneof, "
2620 "or None if no field is set." },
2621
2622 // Static Methods.
2623 { "_CheckCalledFromGeneratedFile", (PyCFunction)_CheckCalledFromGeneratedFile,
2624 METH_NOARGS | METH_STATIC,
2625 "Raises TypeError if the caller is not in a _pb2.py file."},
2626 { NULL, NULL}
2627 };
2628
2629 static bool SetCompositeField(
2630 CMessage* self, PyObject* name, PyObject* value) {
2631 if (self->composite_fields == NULL) {
2632 self->composite_fields = PyDict_New();
2633 if (self->composite_fields == NULL) {
2634 return false;
2635 }
2636 }
2637 return PyDict_SetItem(self->composite_fields, name, value) == 0;
2638 }
2639
2640 PyObject* GetAttr(CMessage* self, PyObject* name) {
2641 PyObject* value = self->composite_fields ?
2642 PyDict_GetItem(self->composite_fields, name) : NULL;
2643 if (value != NULL) {
2644 Py_INCREF(value);
2645 return value;
2646 }
2647
2648 const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
2649 if (field_descriptor == NULL) {
2650 return CMessage_Type.tp_base->tp_getattro(
2651 reinterpret_cast<PyObject*>(self), name);
2652 }
2653
2654 if (field_descriptor->is_map()) {
2655 PyObject* py_container = NULL;
2656 const Descriptor* entry_type = field_descriptor->message_type();
2657 const FieldDescriptor* value_type = entry_type->FindFieldByName("value");
2658 if (value_type->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2659 CMessageClass* value_class = cdescriptor_pool::GetMessageClass(
2660 GetDescriptorPoolForMessage(self), value_type->message_type());
2661 if (value_class == NULL) {
2662 return NULL;
2663 }
2664 py_container =
2665 NewMessageMapContainer(self, field_descriptor, value_class);
2666 } else {
2667 py_container = NewScalarMapContainer(self, field_descriptor);
2668 }
2669 if (py_container == NULL) {
2670 return NULL;
2671 }
2672 if (!SetCompositeField(self, name, py_container)) {
2673 Py_DECREF(py_container);
2674 return NULL;
2675 }
2676 return py_container;
2677 }
2678
2679 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
2680 PyObject* py_container = NULL;
2681 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2682 CMessageClass* message_class = cdescriptor_pool::GetMessageClass(
2683 GetDescriptorPoolForMessage(self), field_descriptor->message_type());
2684 if (message_class == NULL) {
2685 return NULL;
2686 }
2687 py_container = repeated_composite_container::NewContainer(
2688 self, field_descriptor, message_class);
2689 } else {
2690 py_container = repeated_scalar_container::NewContainer(
2691 self, field_descriptor);
2692 }
2693 if (py_container == NULL) {
2694 return NULL;
2695 }
2696 if (!SetCompositeField(self, name, py_container)) {
2697 Py_DECREF(py_container);
2698 return NULL;
2699 }
2700 return py_container;
2701 }
2702
2703 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2704 PyObject* sub_message = InternalGetSubMessage(self, field_descriptor);
2705 if (sub_message == NULL) {
2706 return NULL;
2707 }
2708 if (!SetCompositeField(self, name, sub_message)) {
2709 Py_DECREF(sub_message);
2710 return NULL;
2711 }
2712 return sub_message;
2713 }
2714
2715 return InternalGetScalar(self->message, field_descriptor);
2716 }
2717
2718 int SetAttr(CMessage* self, PyObject* name, PyObject* value) {
2719 if (self->composite_fields && PyDict_Contains(self->composite_fields, name)) {
2720 PyErr_SetString(PyExc_TypeError, "Can't set composite field");
2721 return -1;
2722 }
2723
2724 const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
2725 if (field_descriptor != NULL) {
2726 AssureWritable(self);
2727 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
2728 PyErr_Format(PyExc_AttributeError, "Assignment not allowed to repeated "
2729 "field \"%s\" in protocol message object.",
2730 field_descriptor->name().c_str());
2731 return -1;
2732 } else {
2733 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2734 PyErr_Format(PyExc_AttributeError, "Assignment not allowed to "
2735 "field \"%s\" in protocol message object.",
2736 field_descriptor->name().c_str());
2737 return -1;
2738 } else {
2739 return InternalSetScalar(self, field_descriptor, value);
2740 }
2741 }
2742 }
2743
2744 PyErr_Format(PyExc_AttributeError,
2745 "Assignment not allowed "
2746 "(no field \"%s\" in protocol message object).",
2747 PyString_AsString(name));
2748 return -1;
2749 }
2750
2751 } // namespace cmessage
2752
2753 PyTypeObject CMessage_Type = {
2754 PyVarObject_HEAD_INIT(&CMessageClass_Type, 0)
2755 FULL_MODULE_NAME ".CMessage", // tp_name
2756 sizeof(CMessage), // tp_basicsize
2757 0, // tp_itemsize
2758 (destructor)cmessage::Dealloc, // tp_dealloc
2759 0, // tp_print
2760 0, // tp_getattr
2761 0, // tp_setattr
2762 0, // tp_compare
2763 (reprfunc)cmessage::ToStr, // tp_repr
2764 0, // tp_as_number
2765 0, // tp_as_sequence
2766 0, // tp_as_mapping
2767 PyObject_HashNotImplemented, // tp_hash
2768 0, // tp_call
2769 (reprfunc)cmessage::ToStr, // tp_str
2770 (getattrofunc)cmessage::GetAttr, // tp_getattro
2771 (setattrofunc)cmessage::SetAttr, // tp_setattro
2772 0, // tp_as_buffer
2773 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
2774 "A ProtocolMessage", // tp_doc
2775 0, // tp_traverse
2776 0, // tp_clear
2777 (richcmpfunc)cmessage::RichCompare, // tp_richcompare
2778 0, // tp_weaklistoffset
2779 0, // tp_iter
2780 0, // tp_iternext
2781 cmessage::Methods, // tp_methods
2782 0, // tp_members
2783 cmessage::Getters, // tp_getset
2784 0, // tp_base
2785 0, // tp_dict
2786 0, // tp_descr_get
2787 0, // tp_descr_set
2788 0, // tp_dictoffset
2789 (initproc)cmessage::Init, // tp_init
2790 0, // tp_alloc
2791 cmessage::New, // tp_new
2792 };
2793
2794 // --- Exposing the C proto living inside Python proto to C code:
2795
2796 const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg);
2797 Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg);
2798
2799 static const Message* GetCProtoInsidePyProtoImpl(PyObject* msg) {
2800 if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
2801 return NULL;
2802 }
2803 CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
2804 return cmsg->message;
2805 }
2806
2807 static Message* MutableCProtoInsidePyProtoImpl(PyObject* msg) {
2808 if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
2809 return NULL;
2810 }
2811 CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
2812 if ((cmsg->composite_fields && PyDict_Size(cmsg->composite_fields) != 0) ||
2813 (cmsg->extensions != NULL &&
2814 PyDict_Size(cmsg->extensions->values) != 0)) {
2815 // There is currently no way of accurately syncing arbitrary changes to
2816 // the underlying C++ message back to the CMessage (e.g. removed repeated
2817 // composite containers). We only allow direct mutation of the underlying
2818 // C++ message if there is no child data in the CMessage.
2819 return NULL;
2820 }
2821 cmessage::AssureWritable(cmsg);
2822 return cmsg->message;
2823 }
2824
2825 static const char module_docstring[] =
2826 "python-proto2 is a module that can be used to enhance proto2 Python API\n"
2827 "performance.\n"
2828 "\n"
2829 "It provides access to the protocol buffers C++ reflection API that\n"
2830 "implements the basic protocol buffer functions.";
2831
2832 void InitGlobals() {
2833 // TODO(gps): Check all return values in this function for NULL and propagate
2834 // the error (MemoryError) on up to result in an import failure. These should
2835 // also be freed and reset to NULL during finalization.
2836 kPythonZero = PyInt_FromLong(0);
2837 kint32min_py = PyInt_FromLong(kint32min);
2838 kint32max_py = PyInt_FromLong(kint32max);
2839 kuint32max_py = PyLong_FromLongLong(kuint32max);
2840 kint64min_py = PyLong_FromLongLong(kint64min);
2841 kint64max_py = PyLong_FromLongLong(kint64max);
2842 kuint64max_py = PyLong_FromUnsignedLongLong(kuint64max);
2843
2844 kDESCRIPTOR = PyString_FromString("DESCRIPTOR");
2845 k_cdescriptor = PyString_FromString("_cdescriptor");
2846 kfull_name = PyString_FromString("full_name");
2847 k_extensions_by_name = PyString_FromString("_extensions_by_name");
2848 k_extensions_by_number = PyString_FromString("_extensions_by_number");
2849
2850 PyObject *dummy_obj = PySet_New(NULL);
2851 kEmptyWeakref = PyWeakref_NewRef(dummy_obj, NULL);
2852 Py_DECREF(dummy_obj);
2853 }
2854
2855 bool InitProto2MessageModule(PyObject *m) {
2856 // Initialize types and globals in descriptor.cc
2857 if (!InitDescriptor()) {
2858 return false;
2859 }
2860
2861 // Initialize types and globals in descriptor_pool.cc
2862 if (!InitDescriptorPool()) {
2863 return false;
2864 }
2865
2866 // Initialize constants defined in this file.
2867 InitGlobals();
2868
2869 CMessageClass_Type.tp_base = &PyType_Type;
2870 if (PyType_Ready(&CMessageClass_Type) < 0) {
2871 return false;
2872 }
2873 PyModule_AddObject(m, "MessageMeta",
2874 reinterpret_cast<PyObject*>(&CMessageClass_Type));
2875
2876 if (PyType_Ready(&CMessage_Type) < 0) {
2877 return false;
2878 }
2879
2880 // DESCRIPTOR is set on each protocol buffer message class elsewhere, but set
2881 // it here as well to document that subclasses need to set it.
2882 PyDict_SetItem(CMessage_Type.tp_dict, kDESCRIPTOR, Py_None);
2883 // Subclasses with message extensions will override _extensions_by_name and
2884 // _extensions_by_number with fresh mutable dictionaries in AddDescriptors.
2885 // All other classes can share this same immutable mapping.
2886 ScopedPyObjectPtr empty_dict(PyDict_New());
2887 if (empty_dict == NULL) {
2888 return false;
2889 }
2890 ScopedPyObjectPtr immutable_dict(PyDictProxy_New(empty_dict.get()));
2891 if (immutable_dict == NULL) {
2892 return false;
2893 }
2894 if (PyDict_SetItem(CMessage_Type.tp_dict,
2895 k_extensions_by_name, immutable_dict.get()) < 0) {
2896 return false;
2897 }
2898 if (PyDict_SetItem(CMessage_Type.tp_dict,
2899 k_extensions_by_number, immutable_dict.get()) < 0) {
2900 return false;
2901 }
2902
2903 PyModule_AddObject(m, "Message", reinterpret_cast<PyObject*>(&CMessage_Type));
2904
2905 // Initialize Repeated container types.
2906 {
2907 if (PyType_Ready(&RepeatedScalarContainer_Type) < 0) {
2908 return false;
2909 }
2910
2911 PyModule_AddObject(m, "RepeatedScalarContainer",
2912 reinterpret_cast<PyObject*>(
2913 &RepeatedScalarContainer_Type));
2914
2915 if (PyType_Ready(&RepeatedCompositeContainer_Type) < 0) {
2916 return false;
2917 }
2918
2919 PyModule_AddObject(
2920 m, "RepeatedCompositeContainer",
2921 reinterpret_cast<PyObject*>(
2922 &RepeatedCompositeContainer_Type));
2923
2924 // Register them as collections.Sequence
2925 ScopedPyObjectPtr collections(PyImport_ImportModule("collections"));
2926 if (collections == NULL) {
2927 return false;
2928 }
2929 ScopedPyObjectPtr mutable_sequence(
2930 PyObject_GetAttrString(collections.get(), "MutableSequence"));
2931 if (mutable_sequence == NULL) {
2932 return false;
2933 }
2934 if (ScopedPyObjectPtr(
2935 PyObject_CallMethod(mutable_sequence.get(), "register", "O",
2936 &RepeatedScalarContainer_Type)) == NULL) {
2937 return false;
2938 }
2939 if (ScopedPyObjectPtr(
2940 PyObject_CallMethod(mutable_sequence.get(), "register", "O",
2941 &RepeatedCompositeContainer_Type)) == NULL) {
2942 return false;
2943 }
2944 }
2945
2946 // Initialize Map container types.
2947 {
2948 // ScalarMapContainer_Type derives from our MutableMapping type.
2949 ScopedPyObjectPtr containers(PyImport_ImportModule(
2950 "google.protobuf.internal.containers"));
2951 if (containers == NULL) {
2952 return false;
2953 }
2954
2955 ScopedPyObjectPtr mutable_mapping(
2956 PyObject_GetAttrString(containers.get(), "MutableMapping"));
2957 if (mutable_mapping == NULL) {
2958 return false;
2959 }
2960
2961 if (!PyObject_TypeCheck(mutable_mapping.get(), &PyType_Type)) {
2962 return false;
2963 }
2964
2965 Py_INCREF(mutable_mapping.get());
2966 #if PY_MAJOR_VERSION >= 3
2967 PyObject* bases = PyTuple_New(1);
2968 PyTuple_SET_ITEM(bases, 0, mutable_mapping.get());
2969
2970 ScalarMapContainer_Type =
2971 PyType_FromSpecWithBases(&ScalarMapContainer_Type_spec, bases);
2972 PyModule_AddObject(m, "ScalarMapContainer", ScalarMapContainer_Type);
2973 #else
2974 ScalarMapContainer_Type.tp_base =
2975 reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
2976
2977 if (PyType_Ready(&ScalarMapContainer_Type) < 0) {
2978 return false;
2979 }
2980
2981 PyModule_AddObject(m, "ScalarMapContainer",
2982 reinterpret_cast<PyObject*>(&ScalarMapContainer_Type));
2983 #endif
2984
2985 if (PyType_Ready(&MapIterator_Type) < 0) {
2986 return false;
2987 }
2988
2989 PyModule_AddObject(m, "MapIterator",
2990 reinterpret_cast<PyObject*>(&MapIterator_Type));
2991
2992
2993 #if PY_MAJOR_VERSION >= 3
2994 MessageMapContainer_Type =
2995 PyType_FromSpecWithBases(&MessageMapContainer_Type_spec, bases);
2996 PyModule_AddObject(m, "MessageMapContainer", MessageMapContainer_Type);
2997 #else
2998 Py_INCREF(mutable_mapping.get());
2999 MessageMapContainer_Type.tp_base =
3000 reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
3001
3002 if (PyType_Ready(&MessageMapContainer_Type) < 0) {
3003 return false;
3004 }
3005
3006 PyModule_AddObject(m, "MessageMapContainer",
3007 reinterpret_cast<PyObject*>(&MessageMapContainer_Type));
3008 #endif
3009 }
3010
3011 if (PyType_Ready(&ExtensionDict_Type) < 0) {
3012 return false;
3013 }
3014 PyModule_AddObject(
3015 m, "ExtensionDict",
3016 reinterpret_cast<PyObject*>(&ExtensionDict_Type));
3017
3018 // Expose the DescriptorPool used to hold all descriptors added from generated
3019 // pb2.py files.
3020 // PyModule_AddObject steals a reference.
3021 Py_INCREF(GetDefaultDescriptorPool());
3022 PyModule_AddObject(m, "default_pool",
3023 reinterpret_cast<PyObject*>(GetDefaultDescriptorPool()));
3024
3025 PyModule_AddObject(m, "DescriptorPool", reinterpret_cast<PyObject*>(
3026 &PyDescriptorPool_Type));
3027
3028 // This implementation provides full Descriptor types, we advertise it so that
3029 // descriptor.py can use them in replacement of the Python classes.
3030 PyModule_AddIntConstant(m, "_USE_C_DESCRIPTORS", 1);
3031
3032 PyModule_AddObject(m, "Descriptor", reinterpret_cast<PyObject*>(
3033 &PyMessageDescriptor_Type));
3034 PyModule_AddObject(m, "FieldDescriptor", reinterpret_cast<PyObject*>(
3035 &PyFieldDescriptor_Type));
3036 PyModule_AddObject(m, "EnumDescriptor", reinterpret_cast<PyObject*>(
3037 &PyEnumDescriptor_Type));
3038 PyModule_AddObject(m, "EnumValueDescriptor", reinterpret_cast<PyObject*>(
3039 &PyEnumValueDescriptor_Type));
3040 PyModule_AddObject(m, "FileDescriptor", reinterpret_cast<PyObject*>(
3041 &PyFileDescriptor_Type));
3042 PyModule_AddObject(m, "OneofDescriptor", reinterpret_cast<PyObject*>(
3043 &PyOneofDescriptor_Type));
3044 PyModule_AddObject(m, "ServiceDescriptor", reinterpret_cast<PyObject*>(
3045 &PyServiceDescriptor_Type));
3046 PyModule_AddObject(m, "MethodDescriptor", reinterpret_cast<PyObject*>(
3047 &PyMethodDescriptor_Type));
3048
3049 PyObject* enum_type_wrapper = PyImport_ImportModule(
3050 "google.protobuf.internal.enum_type_wrapper");
3051 if (enum_type_wrapper == NULL) {
3052 return false;
3053 }
3054 EnumTypeWrapper_class =
3055 PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
3056 Py_DECREF(enum_type_wrapper);
3057
3058 PyObject* message_module = PyImport_ImportModule(
3059 "google.protobuf.message");
3060 if (message_module == NULL) {
3061 return false;
3062 }
3063 EncodeError_class = PyObject_GetAttrString(message_module, "EncodeError");
3064 DecodeError_class = PyObject_GetAttrString(message_module, "DecodeError");
3065 PythonMessage_class = PyObject_GetAttrString(message_module, "Message");
3066 Py_DECREF(message_module);
3067
3068 PyObject* pickle_module = PyImport_ImportModule("pickle");
3069 if (pickle_module == NULL) {
3070 return false;
3071 }
3072 PickleError_class = PyObject_GetAttrString(pickle_module, "PickleError");
3073 Py_DECREF(pickle_module);
3074
3075 // Override {Get,Mutable}CProtoInsidePyProto.
3076 GetCProtoInsidePyProtoPtr = GetCProtoInsidePyProtoImpl;
3077 MutableCProtoInsidePyProtoPtr = MutableCProtoInsidePyProtoImpl;
3078
3079 return true;
3080 }
3081
3082 } // namespace python
3083 } // namespace protobuf
3084
3085 } // namespace google
3086