1# Protocol Buffers - Google's data interchange format 2# Copyright 2008 Google Inc. All rights reserved. 3# https://developers.google.com/protocol-buffers/ 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Provides type checking routines. 32 33This module defines type checking utilities in the forms of dictionaries: 34 35VALUE_CHECKERS: A dictionary of field types and a value validation object. 36TYPE_TO_BYTE_SIZE_FN: A dictionary with field types and a size computing 37 function. 38TYPE_TO_SERIALIZE_METHOD: A dictionary with field types and serialization 39 function. 40FIELD_TYPE_TO_WIRE_TYPE: A dictionary with field typed and their 41 corresponding wire types. 42TYPE_TO_DESERIALIZE_METHOD: A dictionary with field types and deserialization 43 function. 44""" 45 46__author__ = 'robinson@google.com (Will Robinson)' 47 48try: 49 import ctypes 50except Exception: # pylint: disable=broad-except 51 ctypes = None 52 import struct 53import numbers 54import six 55 56if six.PY3: 57 long = int 58 59from google.protobuf.internal import api_implementation 60from google.protobuf.internal import decoder 61from google.protobuf.internal import encoder 62from google.protobuf.internal import wire_format 63from google.protobuf import descriptor 64 65_FieldDescriptor = descriptor.FieldDescriptor 66 67 68def TruncateToFourByteFloat(original): 69 if ctypes: 70 return ctypes.c_float(original).value 71 else: 72 return struct.unpack('<f', struct.pack('<f', original))[0] 73 74 75def ToShortestFloat(original): 76 """Returns the shortest float that has same value in wire.""" 77 # Return the original value if it is not truncated. This may happen 78 # if someone mixes this code with an old protobuf runtime. 79 # TODO(jieluo): Remove it after maybe 2022. 80 if TruncateToFourByteFloat(original) != original: 81 return original 82 # All 4 byte floats have between 6 and 9 significant digits, so we 83 # start with 6 as the lower bound. 84 # It has to be iterative because use '.9g' directly can not get rid 85 # of the noises for most values. For example if set a float_field=0.9 86 # use '.9g' will print 0.899999976. 87 precision = 6 88 rounded = float('{0:.{1}g}'.format(original, precision)) 89 while TruncateToFourByteFloat(rounded) != original: 90 precision += 1 91 rounded = float('{0:.{1}g}'.format(original, precision)) 92 return rounded 93 94 95def SupportsOpenEnums(field_descriptor): 96 return field_descriptor.containing_type.syntax == "proto3" 97 98def GetTypeChecker(field): 99 """Returns a type checker for a message field of the specified types. 100 101 Args: 102 field: FieldDescriptor object for this field. 103 104 Returns: 105 An instance of TypeChecker which can be used to verify the types 106 of values assigned to a field of the specified type. 107 """ 108 if (field.cpp_type == _FieldDescriptor.CPPTYPE_STRING and 109 field.type == _FieldDescriptor.TYPE_STRING): 110 return UnicodeValueChecker() 111 if field.cpp_type == _FieldDescriptor.CPPTYPE_ENUM: 112 if SupportsOpenEnums(field): 113 # When open enums are supported, any int32 can be assigned. 114 return _VALUE_CHECKERS[_FieldDescriptor.CPPTYPE_INT32] 115 else: 116 return EnumValueChecker(field.enum_type) 117 return _VALUE_CHECKERS[field.cpp_type] 118 119 120# None of the typecheckers below make any attempt to guard against people 121# subclassing builtin types and doing weird things. We're not trying to 122# protect against malicious clients here, just people accidentally shooting 123# themselves in the foot in obvious ways. 124 125class TypeChecker(object): 126 127 """Type checker used to catch type errors as early as possible 128 when the client is setting scalar fields in protocol messages. 129 """ 130 131 def __init__(self, *acceptable_types): 132 self._acceptable_types = acceptable_types 133 134 def CheckValue(self, proposed_value): 135 """Type check the provided value and return it. 136 137 The returned value might have been normalized to another type. 138 """ 139 if not isinstance(proposed_value, self._acceptable_types): 140 message = ('%.1024r has type %s, but expected one of: %s' % 141 (proposed_value, type(proposed_value), self._acceptable_types)) 142 raise TypeError(message) 143 # Some field types(float, double and bool) accept other types, must 144 # convert to the correct type in such cases. 145 if self._acceptable_types: 146 if self._acceptable_types[0] in (bool, float): 147 return self._acceptable_types[0](proposed_value) 148 return proposed_value 149 150 151class TypeCheckerWithDefault(TypeChecker): 152 153 def __init__(self, default_value, *acceptable_types): 154 TypeChecker.__init__(self, *acceptable_types) 155 self._default_value = default_value 156 157 def DefaultValue(self): 158 return self._default_value 159 160 161# IntValueChecker and its subclasses perform integer type-checks 162# and bounds-checks. 163class IntValueChecker(object): 164 165 """Checker used for integer fields. Performs type-check and range check.""" 166 167 def CheckValue(self, proposed_value): 168 if not isinstance(proposed_value, numbers.Integral): 169 message = ('%.1024r has type %s, but expected one of: %s' % 170 (proposed_value, type(proposed_value), six.integer_types)) 171 raise TypeError(message) 172 if not self._MIN <= int(proposed_value) <= self._MAX: 173 raise ValueError('Value out of range: %d' % proposed_value) 174 # We force 32-bit values to int and 64-bit values to long to make 175 # alternate implementations where the distinction is more significant 176 # (e.g. the C++ implementation) simpler. 177 proposed_value = self._TYPE(proposed_value) 178 return proposed_value 179 180 def DefaultValue(self): 181 return 0 182 183 184class EnumValueChecker(object): 185 186 """Checker used for enum fields. Performs type-check and range check.""" 187 188 def __init__(self, enum_type): 189 self._enum_type = enum_type 190 191 def CheckValue(self, proposed_value): 192 if not isinstance(proposed_value, numbers.Integral): 193 message = ('%.1024r has type %s, but expected one of: %s' % 194 (proposed_value, type(proposed_value), six.integer_types)) 195 raise TypeError(message) 196 if int(proposed_value) not in self._enum_type.values_by_number: 197 raise ValueError('Unknown enum value: %d' % proposed_value) 198 return proposed_value 199 200 def DefaultValue(self): 201 return self._enum_type.values[0].number 202 203 204class UnicodeValueChecker(object): 205 206 """Checker used for string fields. 207 208 Always returns a unicode value, even if the input is of type str. 209 """ 210 211 def CheckValue(self, proposed_value): 212 if not isinstance(proposed_value, (bytes, six.text_type)): 213 message = ('%.1024r has type %s, but expected one of: %s' % 214 (proposed_value, type(proposed_value), (bytes, six.text_type))) 215 raise TypeError(message) 216 217 # If the value is of type 'bytes' make sure that it is valid UTF-8 data. 218 if isinstance(proposed_value, bytes): 219 try: 220 proposed_value = proposed_value.decode('utf-8') 221 except UnicodeDecodeError: 222 raise ValueError('%.1024r has type bytes, but isn\'t valid UTF-8 ' 223 'encoding. Non-UTF-8 strings must be converted to ' 224 'unicode objects before being added.' % 225 (proposed_value)) 226 else: 227 try: 228 proposed_value.encode('utf8') 229 except UnicodeEncodeError: 230 raise ValueError('%.1024r isn\'t a valid unicode string and ' 231 'can\'t be encoded in UTF-8.'% 232 (proposed_value)) 233 234 return proposed_value 235 236 def DefaultValue(self): 237 return u"" 238 239 240class Int32ValueChecker(IntValueChecker): 241 # We're sure to use ints instead of longs here since comparison may be more 242 # efficient. 243 _MIN = -2147483648 244 _MAX = 2147483647 245 _TYPE = int 246 247 248class Uint32ValueChecker(IntValueChecker): 249 _MIN = 0 250 _MAX = (1 << 32) - 1 251 _TYPE = int 252 253 254class Int64ValueChecker(IntValueChecker): 255 _MIN = -(1 << 63) 256 _MAX = (1 << 63) - 1 257 _TYPE = long 258 259 260class Uint64ValueChecker(IntValueChecker): 261 _MIN = 0 262 _MAX = (1 << 64) - 1 263 _TYPE = long 264 265 266# The max 4 bytes float is about 3.4028234663852886e+38 267_FLOAT_MAX = float.fromhex('0x1.fffffep+127') 268_FLOAT_MIN = -_FLOAT_MAX 269_INF = float('inf') 270_NEG_INF = float('-inf') 271 272 273class FloatValueChecker(object): 274 275 """Checker used for float fields. Performs type-check and range check. 276 277 Values exceeding a 32-bit float will be converted to inf/-inf. 278 """ 279 280 def CheckValue(self, proposed_value): 281 """Check and convert proposed_value to float.""" 282 if not isinstance(proposed_value, numbers.Real): 283 message = ('%.1024r has type %s, but expected one of: numbers.Real' % 284 (proposed_value, type(proposed_value))) 285 raise TypeError(message) 286 converted_value = float(proposed_value) 287 # This inf rounding matches the C++ proto SafeDoubleToFloat logic. 288 if converted_value > _FLOAT_MAX: 289 return _INF 290 if converted_value < _FLOAT_MIN: 291 return _NEG_INF 292 293 return TruncateToFourByteFloat(converted_value) 294 295 def DefaultValue(self): 296 return 0.0 297 298 299# Type-checkers for all scalar CPPTYPEs. 300_VALUE_CHECKERS = { 301 _FieldDescriptor.CPPTYPE_INT32: Int32ValueChecker(), 302 _FieldDescriptor.CPPTYPE_INT64: Int64ValueChecker(), 303 _FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(), 304 _FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(), 305 _FieldDescriptor.CPPTYPE_DOUBLE: TypeCheckerWithDefault( 306 0.0, float, numbers.Real), 307 _FieldDescriptor.CPPTYPE_FLOAT: FloatValueChecker(), 308 _FieldDescriptor.CPPTYPE_BOOL: TypeCheckerWithDefault( 309 False, bool, numbers.Integral), 310 _FieldDescriptor.CPPTYPE_STRING: TypeCheckerWithDefault(b'', bytes), 311 } 312 313 314# Map from field type to a function F, such that F(field_num, value) 315# gives the total byte size for a value of the given type. This 316# byte size includes tag information and any other additional space 317# associated with serializing "value". 318TYPE_TO_BYTE_SIZE_FN = { 319 _FieldDescriptor.TYPE_DOUBLE: wire_format.DoubleByteSize, 320 _FieldDescriptor.TYPE_FLOAT: wire_format.FloatByteSize, 321 _FieldDescriptor.TYPE_INT64: wire_format.Int64ByteSize, 322 _FieldDescriptor.TYPE_UINT64: wire_format.UInt64ByteSize, 323 _FieldDescriptor.TYPE_INT32: wire_format.Int32ByteSize, 324 _FieldDescriptor.TYPE_FIXED64: wire_format.Fixed64ByteSize, 325 _FieldDescriptor.TYPE_FIXED32: wire_format.Fixed32ByteSize, 326 _FieldDescriptor.TYPE_BOOL: wire_format.BoolByteSize, 327 _FieldDescriptor.TYPE_STRING: wire_format.StringByteSize, 328 _FieldDescriptor.TYPE_GROUP: wire_format.GroupByteSize, 329 _FieldDescriptor.TYPE_MESSAGE: wire_format.MessageByteSize, 330 _FieldDescriptor.TYPE_BYTES: wire_format.BytesByteSize, 331 _FieldDescriptor.TYPE_UINT32: wire_format.UInt32ByteSize, 332 _FieldDescriptor.TYPE_ENUM: wire_format.EnumByteSize, 333 _FieldDescriptor.TYPE_SFIXED32: wire_format.SFixed32ByteSize, 334 _FieldDescriptor.TYPE_SFIXED64: wire_format.SFixed64ByteSize, 335 _FieldDescriptor.TYPE_SINT32: wire_format.SInt32ByteSize, 336 _FieldDescriptor.TYPE_SINT64: wire_format.SInt64ByteSize 337 } 338 339 340# Maps from field types to encoder constructors. 341TYPE_TO_ENCODER = { 342 _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleEncoder, 343 _FieldDescriptor.TYPE_FLOAT: encoder.FloatEncoder, 344 _FieldDescriptor.TYPE_INT64: encoder.Int64Encoder, 345 _FieldDescriptor.TYPE_UINT64: encoder.UInt64Encoder, 346 _FieldDescriptor.TYPE_INT32: encoder.Int32Encoder, 347 _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Encoder, 348 _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Encoder, 349 _FieldDescriptor.TYPE_BOOL: encoder.BoolEncoder, 350 _FieldDescriptor.TYPE_STRING: encoder.StringEncoder, 351 _FieldDescriptor.TYPE_GROUP: encoder.GroupEncoder, 352 _FieldDescriptor.TYPE_MESSAGE: encoder.MessageEncoder, 353 _FieldDescriptor.TYPE_BYTES: encoder.BytesEncoder, 354 _FieldDescriptor.TYPE_UINT32: encoder.UInt32Encoder, 355 _FieldDescriptor.TYPE_ENUM: encoder.EnumEncoder, 356 _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Encoder, 357 _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Encoder, 358 _FieldDescriptor.TYPE_SINT32: encoder.SInt32Encoder, 359 _FieldDescriptor.TYPE_SINT64: encoder.SInt64Encoder, 360 } 361 362 363# Maps from field types to sizer constructors. 364TYPE_TO_SIZER = { 365 _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleSizer, 366 _FieldDescriptor.TYPE_FLOAT: encoder.FloatSizer, 367 _FieldDescriptor.TYPE_INT64: encoder.Int64Sizer, 368 _FieldDescriptor.TYPE_UINT64: encoder.UInt64Sizer, 369 _FieldDescriptor.TYPE_INT32: encoder.Int32Sizer, 370 _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Sizer, 371 _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Sizer, 372 _FieldDescriptor.TYPE_BOOL: encoder.BoolSizer, 373 _FieldDescriptor.TYPE_STRING: encoder.StringSizer, 374 _FieldDescriptor.TYPE_GROUP: encoder.GroupSizer, 375 _FieldDescriptor.TYPE_MESSAGE: encoder.MessageSizer, 376 _FieldDescriptor.TYPE_BYTES: encoder.BytesSizer, 377 _FieldDescriptor.TYPE_UINT32: encoder.UInt32Sizer, 378 _FieldDescriptor.TYPE_ENUM: encoder.EnumSizer, 379 _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Sizer, 380 _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Sizer, 381 _FieldDescriptor.TYPE_SINT32: encoder.SInt32Sizer, 382 _FieldDescriptor.TYPE_SINT64: encoder.SInt64Sizer, 383 } 384 385 386# Maps from field type to a decoder constructor. 387TYPE_TO_DECODER = { 388 _FieldDescriptor.TYPE_DOUBLE: decoder.DoubleDecoder, 389 _FieldDescriptor.TYPE_FLOAT: decoder.FloatDecoder, 390 _FieldDescriptor.TYPE_INT64: decoder.Int64Decoder, 391 _FieldDescriptor.TYPE_UINT64: decoder.UInt64Decoder, 392 _FieldDescriptor.TYPE_INT32: decoder.Int32Decoder, 393 _FieldDescriptor.TYPE_FIXED64: decoder.Fixed64Decoder, 394 _FieldDescriptor.TYPE_FIXED32: decoder.Fixed32Decoder, 395 _FieldDescriptor.TYPE_BOOL: decoder.BoolDecoder, 396 _FieldDescriptor.TYPE_STRING: decoder.StringDecoder, 397 _FieldDescriptor.TYPE_GROUP: decoder.GroupDecoder, 398 _FieldDescriptor.TYPE_MESSAGE: decoder.MessageDecoder, 399 _FieldDescriptor.TYPE_BYTES: decoder.BytesDecoder, 400 _FieldDescriptor.TYPE_UINT32: decoder.UInt32Decoder, 401 _FieldDescriptor.TYPE_ENUM: decoder.EnumDecoder, 402 _FieldDescriptor.TYPE_SFIXED32: decoder.SFixed32Decoder, 403 _FieldDescriptor.TYPE_SFIXED64: decoder.SFixed64Decoder, 404 _FieldDescriptor.TYPE_SINT32: decoder.SInt32Decoder, 405 _FieldDescriptor.TYPE_SINT64: decoder.SInt64Decoder, 406 } 407 408# Maps from field type to expected wiretype. 409FIELD_TYPE_TO_WIRE_TYPE = { 410 _FieldDescriptor.TYPE_DOUBLE: wire_format.WIRETYPE_FIXED64, 411 _FieldDescriptor.TYPE_FLOAT: wire_format.WIRETYPE_FIXED32, 412 _FieldDescriptor.TYPE_INT64: wire_format.WIRETYPE_VARINT, 413 _FieldDescriptor.TYPE_UINT64: wire_format.WIRETYPE_VARINT, 414 _FieldDescriptor.TYPE_INT32: wire_format.WIRETYPE_VARINT, 415 _FieldDescriptor.TYPE_FIXED64: wire_format.WIRETYPE_FIXED64, 416 _FieldDescriptor.TYPE_FIXED32: wire_format.WIRETYPE_FIXED32, 417 _FieldDescriptor.TYPE_BOOL: wire_format.WIRETYPE_VARINT, 418 _FieldDescriptor.TYPE_STRING: 419 wire_format.WIRETYPE_LENGTH_DELIMITED, 420 _FieldDescriptor.TYPE_GROUP: wire_format.WIRETYPE_START_GROUP, 421 _FieldDescriptor.TYPE_MESSAGE: 422 wire_format.WIRETYPE_LENGTH_DELIMITED, 423 _FieldDescriptor.TYPE_BYTES: 424 wire_format.WIRETYPE_LENGTH_DELIMITED, 425 _FieldDescriptor.TYPE_UINT32: wire_format.WIRETYPE_VARINT, 426 _FieldDescriptor.TYPE_ENUM: wire_format.WIRETYPE_VARINT, 427 _FieldDescriptor.TYPE_SFIXED32: wire_format.WIRETYPE_FIXED32, 428 _FieldDescriptor.TYPE_SFIXED64: wire_format.WIRETYPE_FIXED64, 429 _FieldDescriptor.TYPE_SINT32: wire_format.WIRETYPE_VARINT, 430 _FieldDescriptor.TYPE_SINT64: wire_format.WIRETYPE_VARINT, 431 } 432