1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 // Author: anuraag@google.com (Anuraag Agrawal) 9 // Author: tibell@google.com (Johan Tibell) 10 11 #ifndef GOOGLE_PROTOBUF_PYTHON_CPP_MESSAGE_H__ 12 #define GOOGLE_PROTOBUF_PYTHON_CPP_MESSAGE_H__ 13 14 #define PY_SSIZE_T_CLEAN 15 #include <Python.h> 16 17 #include <cstdint> 18 #include <memory> 19 #include <string> 20 #include <unordered_map> 21 22 #include "absl/strings/string_view.h" 23 24 namespace google { 25 namespace protobuf { 26 27 class Message; 28 class Reflection; 29 class FieldDescriptor; 30 class Descriptor; 31 class DescriptorPool; 32 class MessageFactory; 33 34 namespace python { 35 36 struct ExtensionDict; 37 struct PyMessageFactory; 38 struct CMessageClass; 39 40 // Most of the complexity of the Message class comes from the "Release" 41 // behavior: 42 // 43 // When a field is cleared, it is only detached from its message. Existing 44 // references to submessages, to repeated container etc. won't see any change, 45 // as if the data was effectively managed by these containers. 46 // 47 // ExtensionDicts and UnknownFields containers do NOT follow this rule. They 48 // don't store any data, and always refer to their parent message. 49 50 struct ContainerBase { 51 PyObject_HEAD; 52 53 // Strong reference to a parent message object. For a CMessage there are three 54 // cases: 55 // - For a top-level message, this pointer is NULL. 56 // - For a sub-message, this points to the parent message. 57 // - For a message managed externally, this is a owned reference to Py_None. 58 // 59 // For all other types: repeated containers, maps, it always point to a 60 // valid parent CMessage. 61 struct CMessage* parent; 62 63 // If this object belongs to a parent message, describes which field it comes 64 // from. 65 // The pointer is owned by the DescriptorPool (which is kept alive 66 // through the message's Python class) 67 const FieldDescriptor* parent_field_descriptor; 68 AsPyObjectContainerBase69 PyObject* AsPyObject() { return reinterpret_cast<PyObject*>(this); } 70 71 // The Three methods below are only used by Repeated containers, and Maps. 72 73 // This implementation works for all containers which have a parent. 74 PyObject* DeepCopy(); 75 // Delete this container object from its parent. Does not work for messages. 76 void RemoveFromParentCache(); 77 }; 78 79 typedef struct CMessage : public ContainerBase { 80 // Pointer to the C++ Message object for this CMessage. 81 // - If this object has no parent, we own this pointer. 82 // - If this object has a parent message, the parent owns this pointer. 83 Message* message; 84 85 // Indicates this submessage is pointing to a default instance of a message. 86 // Submessages are always first created as read only messages and are then 87 // made writable, at which point this field is set to false. 88 bool read_only; 89 90 // A mapping indexed by field, containing weak references to contained objects 91 // which need to implement the "Release" mechanism: 92 // direct submessages, RepeatedCompositeContainer, RepeatedScalarContainer 93 // and MapContainer. 94 typedef std::unordered_map<const FieldDescriptor*, ContainerBase*> 95 CompositeFieldsMap; 96 CompositeFieldsMap* composite_fields; 97 98 // A mapping containing weak references to indirect child messages, accessed 99 // through containers: repeated messages, and values of message maps. 100 // This avoid the creation of similar maps in each of those containers. 101 typedef std::unordered_map<const Message*, CMessage*> SubMessagesMap; 102 SubMessagesMap* child_submessages; 103 104 // Implements the "weakref" protocol for this object. 105 PyObject* weakreflist; 106 107 // Return a *borrowed* reference to the message class. GetMessageClassCMessage108 CMessageClass* GetMessageClass() { 109 return reinterpret_cast<CMessageClass*>(Py_TYPE(this)); 110 } 111 112 // For container containing messages, return a Python object for the given 113 // pointer to a message. 114 CMessage* BuildSubMessageFromPointer(const FieldDescriptor* field_descriptor, 115 Message* sub_message, 116 CMessageClass* message_class); 117 CMessage* MaybeReleaseSubMessage(Message* sub_message); 118 } CMessage; 119 120 // The (meta) type of all Messages classes. 121 // It allows us to cache some C++ pointers in the class object itself, they are 122 // faster to extract than from the type's dictionary. 123 124 struct CMessageClass { 125 // This is how CPython subclasses C structures: the base structure must be 126 // the first member of the object. 127 PyHeapTypeObject super; 128 129 // C++ descriptor of this message. 130 const Descriptor* message_descriptor; 131 132 // Owned reference, used to keep the pointer above alive. 133 // This reference must stay alive until all message pointers are destructed. 134 PyObject* py_message_descriptor; 135 136 // The Python MessageFactory used to create the class. It is needed to resolve 137 // fields descriptors, including extensions fields; its C++ MessageFactory is 138 // used to instantiate submessages. 139 // This reference must stay alive until all message pointers are destructed. 140 PyMessageFactory* py_message_factory; 141 AsPyObjectCMessageClass142 PyObject* AsPyObject() { 143 return reinterpret_cast<PyObject*>(this); 144 } 145 }; 146 147 extern PyTypeObject* CMessageClass_Type; 148 extern PyTypeObject* CMessage_Type; 149 150 namespace cmessage { 151 152 // Internal function to create a new empty Message Python object, but with empty 153 // pointers to the C++ objects. 154 // The caller must fill self->message, self->owner and eventually self->parent. 155 CMessage* NewEmptyMessage(CMessageClass* type); 156 157 // Retrieves the C++ descriptor of a Python Extension descriptor. 158 // On error, return NULL with an exception set. 159 const FieldDescriptor* GetExtensionDescriptor(PyObject* extension); 160 161 // Initializes a new CMessage instance for a submessage. Only called once per 162 // submessage as the result is cached in composite_fields. 163 // 164 // Corresponds to reflection api method GetMessage. 165 CMessage* InternalGetSubMessage( 166 CMessage* self, const FieldDescriptor* field_descriptor); 167 168 // Deletes a range of items in a repeated field (following a 169 // removal in a RepeatedCompositeContainer). 170 // 171 // Corresponds to reflection api method RemoveLast. 172 int DeleteRepeatedField(CMessage* self, 173 const FieldDescriptor* field_descriptor, 174 PyObject* slice); 175 176 // Sets the specified scalar value to the message. 177 int InternalSetScalar(CMessage* self, 178 const FieldDescriptor* field_descriptor, 179 PyObject* value); 180 181 // Sets the specified scalar value to the message. Requires it is not a Oneof. 182 int InternalSetNonOneofScalar(Message* message, 183 const FieldDescriptor* field_descriptor, 184 PyObject* arg); 185 186 // Retrieves the specified scalar value from the message. 187 // 188 // Returns a new python reference. 189 PyObject* InternalGetScalar(const Message* message, 190 const FieldDescriptor* field_descriptor); 191 192 bool SetCompositeField(CMessage* self, const FieldDescriptor* field, 193 ContainerBase* value); 194 195 bool SetSubmessage(CMessage* self, CMessage* submessage); 196 197 // Clears the message, removing all contained data. Extension dictionary and 198 // submessages are released first if there are remaining external references. 199 // 200 // Corresponds to message api method Clear. 201 PyObject* Clear(CMessage* self); 202 203 // Clears the data described by the given descriptor. 204 // Returns -1 on error. 205 // 206 // Corresponds to reflection api method ClearField. 207 int ClearFieldByDescriptor(CMessage* self, const FieldDescriptor* descriptor); 208 209 // Checks if the message has the field described by the descriptor. Used for 210 // extensions (which have no name). 211 // Returns 1 if true, 0 if false, and -1 on error. 212 // 213 // Corresponds to reflection api method HasField 214 int HasFieldByDescriptor(CMessage* self, 215 const FieldDescriptor* field_descriptor); 216 217 // Checks if the message has the named field. 218 // 219 // Corresponds to reflection api method HasField. 220 PyObject* HasField(CMessage* self, PyObject* arg); 221 222 // Initializes values of fields on a newly constructed message. 223 // Note that positional arguments are disallowed: 'args' must be NULL or the 224 // empty tuple. 225 int InitAttributes(CMessage* self, PyObject* args, PyObject* kwargs); 226 227 PyObject* MergeFrom(CMessage* self, PyObject* arg); 228 229 // Get a field from a message. 230 PyObject* GetFieldValue(CMessage* self, 231 const FieldDescriptor* field_descriptor); 232 // Sets the value of a scalar field in a message. 233 // On error, return -1 with an extension set. 234 int SetFieldValue(CMessage* self, const FieldDescriptor* field_descriptor, 235 PyObject* value); 236 237 PyObject* FindInitializationErrors(CMessage* self); 238 239 int AssureWritable(CMessage* self); 240 241 // Returns the message factory for the given message. 242 // This is equivalent to message.MESSAGE_FACTORY 243 // 244 // The returned factory is suitable for finding fields and building submessages, 245 // even in the case of extensions. 246 // Returns a *borrowed* reference, and never fails because we pass a CMessage. 247 PyMessageFactory* GetFactoryForMessage(CMessage* message); 248 249 PyObject* SetAllowOversizeProtos(PyObject* m, PyObject* arg); 250 251 } // namespace cmessage 252 253 254 /* Is 64bit */ 255 #define IS_64BIT (SIZEOF_LONG == 8) 256 257 #define FIELD_IS_REPEATED(field_descriptor) \ 258 ((field_descriptor)->label() == FieldDescriptor::LABEL_REPEATED) 259 260 #define PROTOBUF_CHECK_GET_INT32(arg, value, err) \ 261 int32_t value; \ 262 if (!CheckAndGetInteger(arg, &value)) { \ 263 return err; \ 264 } 265 266 #define PROTOBUF_CHECK_GET_INT64(arg, value, err) \ 267 int64_t value; \ 268 if (!CheckAndGetInteger(arg, &value)) { \ 269 return err; \ 270 } 271 272 #define PROTOBUF_CHECK_GET_UINT32(arg, value, err) \ 273 uint32_t value; \ 274 if (!CheckAndGetInteger(arg, &value)) { \ 275 return err; \ 276 } 277 278 #define PROTOBUF_CHECK_GET_UINT64(arg, value, err) \ 279 uint64_t value; \ 280 if (!CheckAndGetInteger(arg, &value)) { \ 281 return err; \ 282 } 283 284 #define PROTOBUF_CHECK_GET_FLOAT(arg, value, err) \ 285 float value; \ 286 if (!CheckAndGetFloat(arg, &value)) { \ 287 return err; \ 288 } 289 290 #define PROTOBUF_CHECK_GET_DOUBLE(arg, value, err) \ 291 double value; \ 292 if (!CheckAndGetDouble(arg, &value)) { \ 293 return err; \ 294 } 295 296 #define PROTOBUF_CHECK_GET_BOOL(arg, value, err) \ 297 bool value; \ 298 if (!CheckAndGetBool(arg, &value)) { \ 299 return err; \ 300 } 301 302 #define FULL_MODULE_NAME "google.protobuf.pyext._message" 303 304 void FormatTypeError(PyObject* arg, const char* expected_types); 305 template<class T> 306 bool CheckAndGetInteger(PyObject* arg, T* value); 307 bool CheckAndGetDouble(PyObject* arg, double* value); 308 bool CheckAndGetFloat(PyObject* arg, float* value); 309 bool CheckAndGetBool(PyObject* arg, bool* value); 310 PyObject* CheckString(PyObject* arg, const FieldDescriptor* descriptor); 311 bool CheckAndSetString( 312 PyObject* arg, Message* message, 313 const FieldDescriptor* descriptor, 314 const Reflection* reflection, 315 bool append, 316 int index); 317 PyObject* ToStringObject(const FieldDescriptor* descriptor, 318 absl::string_view value); 319 320 // Check if the passed field descriptor belongs to the given message. 321 // If not, return false and set a Python exception (a KeyError) 322 bool CheckFieldBelongsToMessage(const FieldDescriptor* field_descriptor, 323 const Message* message); 324 325 extern PyObject* PickleError_class; 326 327 PyObject* PyMessage_New(const Descriptor* descriptor, 328 PyObject* py_message_factory); 329 const Message* PyMessage_GetMessagePointer(PyObject* msg); 330 Message* PyMessage_GetMutableMessagePointer(PyObject* msg); 331 PyObject* PyMessage_NewMessageOwnedExternally(Message* message, 332 PyObject* py_message_factory); 333 334 bool InitProto2MessageModule(PyObject *m); 335 336 // These are referenced by repeated_scalar_container, and must 337 // be explicitly instantiated. 338 extern template bool CheckAndGetInteger<int32_t>(PyObject*, int32_t*); 339 extern template bool CheckAndGetInteger<int64_t>(PyObject*, int64_t*); 340 extern template bool CheckAndGetInteger<uint32_t>(PyObject*, uint32_t*); 341 extern template bool CheckAndGetInteger<uint64_t>(PyObject*, uint64_t*); 342 343 } // namespace python 344 } // namespace protobuf 345 } // namespace google 346 347 #endif // GOOGLE_PROTOBUF_PYTHON_CPP_MESSAGE_H__ 348