1# Protocol Buffers - Google's data interchange format 2# Copyright 2008 Google Inc. All rights reserved. 3# https://developers.google.com/protocol-buffers/ 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Provides type checking routines. 32 33This module defines type checking utilities in the forms of dictionaries: 34 35VALUE_CHECKERS: A dictionary of field types and a value validation object. 36TYPE_TO_BYTE_SIZE_FN: A dictionary with field types and a size computing 37 function. 38TYPE_TO_SERIALIZE_METHOD: A dictionary with field types and serialization 39 function. 40FIELD_TYPE_TO_WIRE_TYPE: A dictionary with field typed and their 41 coresponding wire types. 42TYPE_TO_DESERIALIZE_METHOD: A dictionary with field types and deserialization 43 function. 44""" 45 46__author__ = 'robinson@google.com (Will Robinson)' 47 48import numbers 49import six 50 51if six.PY3: 52 long = int 53 54from google.protobuf.internal import api_implementation 55from google.protobuf.internal import decoder 56from google.protobuf.internal import encoder 57from google.protobuf.internal import wire_format 58from google.protobuf import descriptor 59 60_FieldDescriptor = descriptor.FieldDescriptor 61 62def SupportsOpenEnums(field_descriptor): 63 return field_descriptor.containing_type.syntax == "proto3" 64 65def GetTypeChecker(field): 66 """Returns a type checker for a message field of the specified types. 67 68 Args: 69 field: FieldDescriptor object for this field. 70 71 Returns: 72 An instance of TypeChecker which can be used to verify the types 73 of values assigned to a field of the specified type. 74 """ 75 if (field.cpp_type == _FieldDescriptor.CPPTYPE_STRING and 76 field.type == _FieldDescriptor.TYPE_STRING): 77 return UnicodeValueChecker() 78 if field.cpp_type == _FieldDescriptor.CPPTYPE_ENUM: 79 if SupportsOpenEnums(field): 80 # When open enums are supported, any int32 can be assigned. 81 return _VALUE_CHECKERS[_FieldDescriptor.CPPTYPE_INT32] 82 else: 83 return EnumValueChecker(field.enum_type) 84 return _VALUE_CHECKERS[field.cpp_type] 85 86 87# None of the typecheckers below make any attempt to guard against people 88# subclassing builtin types and doing weird things. We're not trying to 89# protect against malicious clients here, just people accidentally shooting 90# themselves in the foot in obvious ways. 91 92class TypeChecker(object): 93 94 """Type checker used to catch type errors as early as possible 95 when the client is setting scalar fields in protocol messages. 96 """ 97 98 def __init__(self, *acceptable_types): 99 self._acceptable_types = acceptable_types 100 101 def CheckValue(self, proposed_value): 102 """Type check the provided value and return it. 103 104 The returned value might have been normalized to another type. 105 """ 106 if not isinstance(proposed_value, self._acceptable_types): 107 message = ('%.1024r has type %s, but expected one of: %s' % 108 (proposed_value, type(proposed_value), self._acceptable_types)) 109 raise TypeError(message) 110 # Some field types(float, double and bool) accept other types, must 111 # convert to the correct type in such cases. 112 if self._acceptable_types: 113 if self._acceptable_types[0] in (bool, float): 114 return self._acceptable_types[0](proposed_value) 115 return proposed_value 116 117 118class TypeCheckerWithDefault(TypeChecker): 119 120 def __init__(self, default_value, *acceptable_types): 121 TypeChecker.__init__(self, *acceptable_types) 122 self._default_value = default_value 123 124 def DefaultValue(self): 125 return self._default_value 126 127 128# IntValueChecker and its subclasses perform integer type-checks 129# and bounds-checks. 130class IntValueChecker(object): 131 132 """Checker used for integer fields. Performs type-check and range check.""" 133 134 def CheckValue(self, proposed_value): 135 if not isinstance(proposed_value, numbers.Integral): 136 message = ('%.1024r has type %s, but expected one of: %s' % 137 (proposed_value, type(proposed_value), six.integer_types)) 138 raise TypeError(message) 139 if not self._MIN <= int(proposed_value) <= self._MAX: 140 raise ValueError('Value out of range: %d' % proposed_value) 141 # We force 32-bit values to int and 64-bit values to long to make 142 # alternate implementations where the distinction is more significant 143 # (e.g. the C++ implementation) simpler. 144 proposed_value = self._TYPE(proposed_value) 145 return proposed_value 146 147 def DefaultValue(self): 148 return 0 149 150 151class EnumValueChecker(object): 152 153 """Checker used for enum fields. Performs type-check and range check.""" 154 155 def __init__(self, enum_type): 156 self._enum_type = enum_type 157 158 def CheckValue(self, proposed_value): 159 if not isinstance(proposed_value, numbers.Integral): 160 message = ('%.1024r has type %s, but expected one of: %s' % 161 (proposed_value, type(proposed_value), six.integer_types)) 162 raise TypeError(message) 163 if int(proposed_value) not in self._enum_type.values_by_number: 164 raise ValueError('Unknown enum value: %d' % proposed_value) 165 return proposed_value 166 167 def DefaultValue(self): 168 return self._enum_type.values[0].number 169 170 171class UnicodeValueChecker(object): 172 173 """Checker used for string fields. 174 175 Always returns a unicode value, even if the input is of type str. 176 """ 177 178 def CheckValue(self, proposed_value): 179 if not isinstance(proposed_value, (bytes, six.text_type)): 180 message = ('%.1024r has type %s, but expected one of: %s' % 181 (proposed_value, type(proposed_value), (bytes, six.text_type))) 182 raise TypeError(message) 183 184 # If the value is of type 'bytes' make sure that it is valid UTF-8 data. 185 if isinstance(proposed_value, bytes): 186 try: 187 proposed_value = proposed_value.decode('utf-8') 188 except UnicodeDecodeError: 189 raise ValueError('%.1024r has type bytes, but isn\'t valid UTF-8 ' 190 'encoding. Non-UTF-8 strings must be converted to ' 191 'unicode objects before being added.' % 192 (proposed_value)) 193 else: 194 try: 195 proposed_value.encode('utf8') 196 except UnicodeEncodeError: 197 raise ValueError('%.1024r isn\'t a valid unicode string and ' 198 'can\'t be encoded in UTF-8.'% 199 (proposed_value)) 200 201 return proposed_value 202 203 def DefaultValue(self): 204 return u"" 205 206 207class Int32ValueChecker(IntValueChecker): 208 # We're sure to use ints instead of longs here since comparison may be more 209 # efficient. 210 _MIN = -2147483648 211 _MAX = 2147483647 212 _TYPE = int 213 214 215class Uint32ValueChecker(IntValueChecker): 216 _MIN = 0 217 _MAX = (1 << 32) - 1 218 _TYPE = int 219 220 221class Int64ValueChecker(IntValueChecker): 222 _MIN = -(1 << 63) 223 _MAX = (1 << 63) - 1 224 _TYPE = long 225 226 227class Uint64ValueChecker(IntValueChecker): 228 _MIN = 0 229 _MAX = (1 << 64) - 1 230 _TYPE = long 231 232 233# The max 4 bytes float is about 3.4028234663852886e+38 234_FLOAT_MAX = float.fromhex('0x1.fffffep+127') 235_FLOAT_MIN = -_FLOAT_MAX 236_INF = float('inf') 237_NEG_INF = float('-inf') 238 239 240class FloatValueChecker(object): 241 242 """Checker used for float fields. Performs type-check and range check. 243 244 Values exceeding a 32-bit float will be converted to inf/-inf. 245 """ 246 247 def CheckValue(self, proposed_value): 248 """Check and convert proposed_value to float.""" 249 if not isinstance(proposed_value, numbers.Real): 250 message = ('%.1024r has type %s, but expected one of: numbers.Real' % 251 (proposed_value, type(proposed_value))) 252 raise TypeError(message) 253 converted_value = float(proposed_value) 254 # This inf rounding matches the C++ proto SafeDoubleToFloat logic. 255 if converted_value > _FLOAT_MAX: 256 return _INF 257 if converted_value < _FLOAT_MIN: 258 return _NEG_INF 259 260 return converted_value 261 # TODO(jieluo): convert to 4 bytes float (c style float) at setters: 262 # return struct.unpack('f', struct.pack('f', converted_value)) 263 264 def DefaultValue(self): 265 return 0.0 266 267 268# Type-checkers for all scalar CPPTYPEs. 269_VALUE_CHECKERS = { 270 _FieldDescriptor.CPPTYPE_INT32: Int32ValueChecker(), 271 _FieldDescriptor.CPPTYPE_INT64: Int64ValueChecker(), 272 _FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(), 273 _FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(), 274 _FieldDescriptor.CPPTYPE_DOUBLE: TypeCheckerWithDefault( 275 0.0, float, numbers.Real), 276 _FieldDescriptor.CPPTYPE_FLOAT: FloatValueChecker(), 277 _FieldDescriptor.CPPTYPE_BOOL: TypeCheckerWithDefault( 278 False, bool, numbers.Integral), 279 _FieldDescriptor.CPPTYPE_STRING: TypeCheckerWithDefault(b'', bytes), 280 } 281 282 283# Map from field type to a function F, such that F(field_num, value) 284# gives the total byte size for a value of the given type. This 285# byte size includes tag information and any other additional space 286# associated with serializing "value". 287TYPE_TO_BYTE_SIZE_FN = { 288 _FieldDescriptor.TYPE_DOUBLE: wire_format.DoubleByteSize, 289 _FieldDescriptor.TYPE_FLOAT: wire_format.FloatByteSize, 290 _FieldDescriptor.TYPE_INT64: wire_format.Int64ByteSize, 291 _FieldDescriptor.TYPE_UINT64: wire_format.UInt64ByteSize, 292 _FieldDescriptor.TYPE_INT32: wire_format.Int32ByteSize, 293 _FieldDescriptor.TYPE_FIXED64: wire_format.Fixed64ByteSize, 294 _FieldDescriptor.TYPE_FIXED32: wire_format.Fixed32ByteSize, 295 _FieldDescriptor.TYPE_BOOL: wire_format.BoolByteSize, 296 _FieldDescriptor.TYPE_STRING: wire_format.StringByteSize, 297 _FieldDescriptor.TYPE_GROUP: wire_format.GroupByteSize, 298 _FieldDescriptor.TYPE_MESSAGE: wire_format.MessageByteSize, 299 _FieldDescriptor.TYPE_BYTES: wire_format.BytesByteSize, 300 _FieldDescriptor.TYPE_UINT32: wire_format.UInt32ByteSize, 301 _FieldDescriptor.TYPE_ENUM: wire_format.EnumByteSize, 302 _FieldDescriptor.TYPE_SFIXED32: wire_format.SFixed32ByteSize, 303 _FieldDescriptor.TYPE_SFIXED64: wire_format.SFixed64ByteSize, 304 _FieldDescriptor.TYPE_SINT32: wire_format.SInt32ByteSize, 305 _FieldDescriptor.TYPE_SINT64: wire_format.SInt64ByteSize 306 } 307 308 309# Maps from field types to encoder constructors. 310TYPE_TO_ENCODER = { 311 _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleEncoder, 312 _FieldDescriptor.TYPE_FLOAT: encoder.FloatEncoder, 313 _FieldDescriptor.TYPE_INT64: encoder.Int64Encoder, 314 _FieldDescriptor.TYPE_UINT64: encoder.UInt64Encoder, 315 _FieldDescriptor.TYPE_INT32: encoder.Int32Encoder, 316 _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Encoder, 317 _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Encoder, 318 _FieldDescriptor.TYPE_BOOL: encoder.BoolEncoder, 319 _FieldDescriptor.TYPE_STRING: encoder.StringEncoder, 320 _FieldDescriptor.TYPE_GROUP: encoder.GroupEncoder, 321 _FieldDescriptor.TYPE_MESSAGE: encoder.MessageEncoder, 322 _FieldDescriptor.TYPE_BYTES: encoder.BytesEncoder, 323 _FieldDescriptor.TYPE_UINT32: encoder.UInt32Encoder, 324 _FieldDescriptor.TYPE_ENUM: encoder.EnumEncoder, 325 _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Encoder, 326 _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Encoder, 327 _FieldDescriptor.TYPE_SINT32: encoder.SInt32Encoder, 328 _FieldDescriptor.TYPE_SINT64: encoder.SInt64Encoder, 329 } 330 331 332# Maps from field types to sizer constructors. 333TYPE_TO_SIZER = { 334 _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleSizer, 335 _FieldDescriptor.TYPE_FLOAT: encoder.FloatSizer, 336 _FieldDescriptor.TYPE_INT64: encoder.Int64Sizer, 337 _FieldDescriptor.TYPE_UINT64: encoder.UInt64Sizer, 338 _FieldDescriptor.TYPE_INT32: encoder.Int32Sizer, 339 _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Sizer, 340 _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Sizer, 341 _FieldDescriptor.TYPE_BOOL: encoder.BoolSizer, 342 _FieldDescriptor.TYPE_STRING: encoder.StringSizer, 343 _FieldDescriptor.TYPE_GROUP: encoder.GroupSizer, 344 _FieldDescriptor.TYPE_MESSAGE: encoder.MessageSizer, 345 _FieldDescriptor.TYPE_BYTES: encoder.BytesSizer, 346 _FieldDescriptor.TYPE_UINT32: encoder.UInt32Sizer, 347 _FieldDescriptor.TYPE_ENUM: encoder.EnumSizer, 348 _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Sizer, 349 _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Sizer, 350 _FieldDescriptor.TYPE_SINT32: encoder.SInt32Sizer, 351 _FieldDescriptor.TYPE_SINT64: encoder.SInt64Sizer, 352 } 353 354 355# Maps from field type to a decoder constructor. 356TYPE_TO_DECODER = { 357 _FieldDescriptor.TYPE_DOUBLE: decoder.DoubleDecoder, 358 _FieldDescriptor.TYPE_FLOAT: decoder.FloatDecoder, 359 _FieldDescriptor.TYPE_INT64: decoder.Int64Decoder, 360 _FieldDescriptor.TYPE_UINT64: decoder.UInt64Decoder, 361 _FieldDescriptor.TYPE_INT32: decoder.Int32Decoder, 362 _FieldDescriptor.TYPE_FIXED64: decoder.Fixed64Decoder, 363 _FieldDescriptor.TYPE_FIXED32: decoder.Fixed32Decoder, 364 _FieldDescriptor.TYPE_BOOL: decoder.BoolDecoder, 365 _FieldDescriptor.TYPE_STRING: decoder.StringDecoder, 366 _FieldDescriptor.TYPE_GROUP: decoder.GroupDecoder, 367 _FieldDescriptor.TYPE_MESSAGE: decoder.MessageDecoder, 368 _FieldDescriptor.TYPE_BYTES: decoder.BytesDecoder, 369 _FieldDescriptor.TYPE_UINT32: decoder.UInt32Decoder, 370 _FieldDescriptor.TYPE_ENUM: decoder.EnumDecoder, 371 _FieldDescriptor.TYPE_SFIXED32: decoder.SFixed32Decoder, 372 _FieldDescriptor.TYPE_SFIXED64: decoder.SFixed64Decoder, 373 _FieldDescriptor.TYPE_SINT32: decoder.SInt32Decoder, 374 _FieldDescriptor.TYPE_SINT64: decoder.SInt64Decoder, 375 } 376 377# Maps from field type to expected wiretype. 378FIELD_TYPE_TO_WIRE_TYPE = { 379 _FieldDescriptor.TYPE_DOUBLE: wire_format.WIRETYPE_FIXED64, 380 _FieldDescriptor.TYPE_FLOAT: wire_format.WIRETYPE_FIXED32, 381 _FieldDescriptor.TYPE_INT64: wire_format.WIRETYPE_VARINT, 382 _FieldDescriptor.TYPE_UINT64: wire_format.WIRETYPE_VARINT, 383 _FieldDescriptor.TYPE_INT32: wire_format.WIRETYPE_VARINT, 384 _FieldDescriptor.TYPE_FIXED64: wire_format.WIRETYPE_FIXED64, 385 _FieldDescriptor.TYPE_FIXED32: wire_format.WIRETYPE_FIXED32, 386 _FieldDescriptor.TYPE_BOOL: wire_format.WIRETYPE_VARINT, 387 _FieldDescriptor.TYPE_STRING: 388 wire_format.WIRETYPE_LENGTH_DELIMITED, 389 _FieldDescriptor.TYPE_GROUP: wire_format.WIRETYPE_START_GROUP, 390 _FieldDescriptor.TYPE_MESSAGE: 391 wire_format.WIRETYPE_LENGTH_DELIMITED, 392 _FieldDescriptor.TYPE_BYTES: 393 wire_format.WIRETYPE_LENGTH_DELIMITED, 394 _FieldDescriptor.TYPE_UINT32: wire_format.WIRETYPE_VARINT, 395 _FieldDescriptor.TYPE_ENUM: wire_format.WIRETYPE_VARINT, 396 _FieldDescriptor.TYPE_SFIXED32: wire_format.WIRETYPE_FIXED32, 397 _FieldDescriptor.TYPE_SFIXED64: wire_format.WIRETYPE_FIXED64, 398 _FieldDescriptor.TYPE_SINT32: wire_format.WIRETYPE_VARINT, 399 _FieldDescriptor.TYPE_SINT64: wire_format.WIRETYPE_VARINT, 400 } 401