• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Protocol Buffers - Google's data interchange format
2# Copyright 2008 Google Inc.  All rights reserved.
3# https://developers.google.com/protocol-buffers/
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Provides type checking routines.
32
33This module defines type checking utilities in the forms of dictionaries:
34
35VALUE_CHECKERS: A dictionary of field types and a value validation object.
36TYPE_TO_BYTE_SIZE_FN: A dictionary with field types and a size computing
37  function.
38TYPE_TO_SERIALIZE_METHOD: A dictionary with field types and serialization
39  function.
40FIELD_TYPE_TO_WIRE_TYPE: A dictionary with field typed and their
41  corresponding wire types.
42TYPE_TO_DESERIALIZE_METHOD: A dictionary with field types and deserialization
43  function.
44"""
45
46__author__ = 'robinson@google.com (Will Robinson)'
47
48try:
49  import ctypes
50except Exception:  # pylint: disable=broad-except
51  ctypes = None
52  import struct
53import numbers
54import six
55
56if six.PY3:
57  long = int
58
59from google.protobuf.internal import api_implementation
60from google.protobuf.internal import decoder
61from google.protobuf.internal import encoder
62from google.protobuf.internal import wire_format
63from google.protobuf import descriptor
64
65_FieldDescriptor = descriptor.FieldDescriptor
66
67
68def TruncateToFourByteFloat(original):
69  if ctypes:
70    return ctypes.c_float(original).value
71  else:
72    return struct.unpack('<f', struct.pack('<f', original))[0]
73
74
75def ToShortestFloat(original):
76  """Returns the shortest float that has same value in wire."""
77  # Return the original value if it is not truncated. This may happen
78  # if someone mixes this code with an old protobuf runtime.
79  # TODO(jieluo): Remove it after maybe 2022.
80  if TruncateToFourByteFloat(original) != original:
81    return original
82  # All 4 byte floats have between 6 and 9 significant digits, so we
83  # start with 6 as the lower bound.
84  # It has to be iterative because use '.9g' directly can not get rid
85  # of the noises for most values. For example if set a float_field=0.9
86  # use '.9g' will print 0.899999976.
87  precision = 6
88  rounded = float('{0:.{1}g}'.format(original, precision))
89  while TruncateToFourByteFloat(rounded) != original:
90    precision += 1
91    rounded = float('{0:.{1}g}'.format(original, precision))
92  return rounded
93
94
95def SupportsOpenEnums(field_descriptor):
96  return field_descriptor.containing_type.syntax == "proto3"
97
98def GetTypeChecker(field):
99  """Returns a type checker for a message field of the specified types.
100
101  Args:
102    field: FieldDescriptor object for this field.
103
104  Returns:
105    An instance of TypeChecker which can be used to verify the types
106    of values assigned to a field of the specified type.
107  """
108  if (field.cpp_type == _FieldDescriptor.CPPTYPE_STRING and
109      field.type == _FieldDescriptor.TYPE_STRING):
110    return UnicodeValueChecker()
111  if field.cpp_type == _FieldDescriptor.CPPTYPE_ENUM:
112    if SupportsOpenEnums(field):
113      # When open enums are supported, any int32 can be assigned.
114      return _VALUE_CHECKERS[_FieldDescriptor.CPPTYPE_INT32]
115    else:
116      return EnumValueChecker(field.enum_type)
117  return _VALUE_CHECKERS[field.cpp_type]
118
119
120# None of the typecheckers below make any attempt to guard against people
121# subclassing builtin types and doing weird things.  We're not trying to
122# protect against malicious clients here, just people accidentally shooting
123# themselves in the foot in obvious ways.
124
125class TypeChecker(object):
126
127  """Type checker used to catch type errors as early as possible
128  when the client is setting scalar fields in protocol messages.
129  """
130
131  def __init__(self, *acceptable_types):
132    self._acceptable_types = acceptable_types
133
134  def CheckValue(self, proposed_value):
135    """Type check the provided value and return it.
136
137    The returned value might have been normalized to another type.
138    """
139    if not isinstance(proposed_value, self._acceptable_types):
140      message = ('%.1024r has type %s, but expected one of: %s' %
141                 (proposed_value, type(proposed_value), self._acceptable_types))
142      raise TypeError(message)
143    # Some field types(float, double and bool) accept other types, must
144    # convert to the correct type in such cases.
145    if self._acceptable_types:
146      if self._acceptable_types[0] in (bool, float):
147        return self._acceptable_types[0](proposed_value)
148    return proposed_value
149
150
151class TypeCheckerWithDefault(TypeChecker):
152
153  def __init__(self, default_value, *acceptable_types):
154    TypeChecker.__init__(self, *acceptable_types)
155    self._default_value = default_value
156
157  def DefaultValue(self):
158    return self._default_value
159
160
161# IntValueChecker and its subclasses perform integer type-checks
162# and bounds-checks.
163class IntValueChecker(object):
164
165  """Checker used for integer fields.  Performs type-check and range check."""
166
167  def CheckValue(self, proposed_value):
168    if not isinstance(proposed_value, numbers.Integral):
169      message = ('%.1024r has type %s, but expected one of: %s' %
170                 (proposed_value, type(proposed_value), six.integer_types))
171      raise TypeError(message)
172    if not self._MIN <= int(proposed_value) <= self._MAX:
173      raise ValueError('Value out of range: %d' % proposed_value)
174    # We force 32-bit values to int and 64-bit values to long to make
175    # alternate implementations where the distinction is more significant
176    # (e.g. the C++ implementation) simpler.
177    proposed_value = self._TYPE(proposed_value)
178    return proposed_value
179
180  def DefaultValue(self):
181    return 0
182
183
184class EnumValueChecker(object):
185
186  """Checker used for enum fields.  Performs type-check and range check."""
187
188  def __init__(self, enum_type):
189    self._enum_type = enum_type
190
191  def CheckValue(self, proposed_value):
192    if not isinstance(proposed_value, numbers.Integral):
193      message = ('%.1024r has type %s, but expected one of: %s' %
194                 (proposed_value, type(proposed_value), six.integer_types))
195      raise TypeError(message)
196    if int(proposed_value) not in self._enum_type.values_by_number:
197      raise ValueError('Unknown enum value: %d' % proposed_value)
198    return proposed_value
199
200  def DefaultValue(self):
201    return self._enum_type.values[0].number
202
203
204class UnicodeValueChecker(object):
205
206  """Checker used for string fields.
207
208  Always returns a unicode value, even if the input is of type str.
209  """
210
211  def CheckValue(self, proposed_value):
212    if not isinstance(proposed_value, (bytes, six.text_type)):
213      message = ('%.1024r has type %s, but expected one of: %s' %
214                 (proposed_value, type(proposed_value), (bytes, six.text_type)))
215      raise TypeError(message)
216
217    # If the value is of type 'bytes' make sure that it is valid UTF-8 data.
218    if isinstance(proposed_value, bytes):
219      try:
220        proposed_value = proposed_value.decode('utf-8')
221      except UnicodeDecodeError:
222        raise ValueError('%.1024r has type bytes, but isn\'t valid UTF-8 '
223                         'encoding. Non-UTF-8 strings must be converted to '
224                         'unicode objects before being added.' %
225                         (proposed_value))
226    else:
227      try:
228        proposed_value.encode('utf8')
229      except UnicodeEncodeError:
230        raise ValueError('%.1024r isn\'t a valid unicode string and '
231                         'can\'t be encoded in UTF-8.'%
232                         (proposed_value))
233
234    return proposed_value
235
236  def DefaultValue(self):
237    return u""
238
239
240class Int32ValueChecker(IntValueChecker):
241  # We're sure to use ints instead of longs here since comparison may be more
242  # efficient.
243  _MIN = -2147483648
244  _MAX = 2147483647
245  _TYPE = int
246
247
248class Uint32ValueChecker(IntValueChecker):
249  _MIN = 0
250  _MAX = (1 << 32) - 1
251  _TYPE = int
252
253
254class Int64ValueChecker(IntValueChecker):
255  _MIN = -(1 << 63)
256  _MAX = (1 << 63) - 1
257  _TYPE = long
258
259
260class Uint64ValueChecker(IntValueChecker):
261  _MIN = 0
262  _MAX = (1 << 64) - 1
263  _TYPE = long
264
265
266# The max 4 bytes float is about 3.4028234663852886e+38
267_FLOAT_MAX = float.fromhex('0x1.fffffep+127')
268_FLOAT_MIN = -_FLOAT_MAX
269_INF = float('inf')
270_NEG_INF = float('-inf')
271
272
273class FloatValueChecker(object):
274
275  """Checker used for float fields.  Performs type-check and range check.
276
277  Values exceeding a 32-bit float will be converted to inf/-inf.
278  """
279
280  def CheckValue(self, proposed_value):
281    """Check and convert proposed_value to float."""
282    if not isinstance(proposed_value, numbers.Real):
283      message = ('%.1024r has type %s, but expected one of: numbers.Real' %
284                 (proposed_value, type(proposed_value)))
285      raise TypeError(message)
286    converted_value = float(proposed_value)
287    # This inf rounding matches the C++ proto SafeDoubleToFloat logic.
288    if converted_value > _FLOAT_MAX:
289      return _INF
290    if converted_value < _FLOAT_MIN:
291      return _NEG_INF
292
293    return TruncateToFourByteFloat(converted_value)
294
295  def DefaultValue(self):
296    return 0.0
297
298
299# Type-checkers for all scalar CPPTYPEs.
300_VALUE_CHECKERS = {
301    _FieldDescriptor.CPPTYPE_INT32: Int32ValueChecker(),
302    _FieldDescriptor.CPPTYPE_INT64: Int64ValueChecker(),
303    _FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(),
304    _FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(),
305    _FieldDescriptor.CPPTYPE_DOUBLE: TypeCheckerWithDefault(
306        0.0, float, numbers.Real),
307    _FieldDescriptor.CPPTYPE_FLOAT: FloatValueChecker(),
308    _FieldDescriptor.CPPTYPE_BOOL: TypeCheckerWithDefault(
309        False, bool, numbers.Integral),
310    _FieldDescriptor.CPPTYPE_STRING: TypeCheckerWithDefault(b'', bytes),
311    }
312
313
314# Map from field type to a function F, such that F(field_num, value)
315# gives the total byte size for a value of the given type.  This
316# byte size includes tag information and any other additional space
317# associated with serializing "value".
318TYPE_TO_BYTE_SIZE_FN = {
319    _FieldDescriptor.TYPE_DOUBLE: wire_format.DoubleByteSize,
320    _FieldDescriptor.TYPE_FLOAT: wire_format.FloatByteSize,
321    _FieldDescriptor.TYPE_INT64: wire_format.Int64ByteSize,
322    _FieldDescriptor.TYPE_UINT64: wire_format.UInt64ByteSize,
323    _FieldDescriptor.TYPE_INT32: wire_format.Int32ByteSize,
324    _FieldDescriptor.TYPE_FIXED64: wire_format.Fixed64ByteSize,
325    _FieldDescriptor.TYPE_FIXED32: wire_format.Fixed32ByteSize,
326    _FieldDescriptor.TYPE_BOOL: wire_format.BoolByteSize,
327    _FieldDescriptor.TYPE_STRING: wire_format.StringByteSize,
328    _FieldDescriptor.TYPE_GROUP: wire_format.GroupByteSize,
329    _FieldDescriptor.TYPE_MESSAGE: wire_format.MessageByteSize,
330    _FieldDescriptor.TYPE_BYTES: wire_format.BytesByteSize,
331    _FieldDescriptor.TYPE_UINT32: wire_format.UInt32ByteSize,
332    _FieldDescriptor.TYPE_ENUM: wire_format.EnumByteSize,
333    _FieldDescriptor.TYPE_SFIXED32: wire_format.SFixed32ByteSize,
334    _FieldDescriptor.TYPE_SFIXED64: wire_format.SFixed64ByteSize,
335    _FieldDescriptor.TYPE_SINT32: wire_format.SInt32ByteSize,
336    _FieldDescriptor.TYPE_SINT64: wire_format.SInt64ByteSize
337    }
338
339
340# Maps from field types to encoder constructors.
341TYPE_TO_ENCODER = {
342    _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleEncoder,
343    _FieldDescriptor.TYPE_FLOAT: encoder.FloatEncoder,
344    _FieldDescriptor.TYPE_INT64: encoder.Int64Encoder,
345    _FieldDescriptor.TYPE_UINT64: encoder.UInt64Encoder,
346    _FieldDescriptor.TYPE_INT32: encoder.Int32Encoder,
347    _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Encoder,
348    _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Encoder,
349    _FieldDescriptor.TYPE_BOOL: encoder.BoolEncoder,
350    _FieldDescriptor.TYPE_STRING: encoder.StringEncoder,
351    _FieldDescriptor.TYPE_GROUP: encoder.GroupEncoder,
352    _FieldDescriptor.TYPE_MESSAGE: encoder.MessageEncoder,
353    _FieldDescriptor.TYPE_BYTES: encoder.BytesEncoder,
354    _FieldDescriptor.TYPE_UINT32: encoder.UInt32Encoder,
355    _FieldDescriptor.TYPE_ENUM: encoder.EnumEncoder,
356    _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Encoder,
357    _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Encoder,
358    _FieldDescriptor.TYPE_SINT32: encoder.SInt32Encoder,
359    _FieldDescriptor.TYPE_SINT64: encoder.SInt64Encoder,
360    }
361
362
363# Maps from field types to sizer constructors.
364TYPE_TO_SIZER = {
365    _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleSizer,
366    _FieldDescriptor.TYPE_FLOAT: encoder.FloatSizer,
367    _FieldDescriptor.TYPE_INT64: encoder.Int64Sizer,
368    _FieldDescriptor.TYPE_UINT64: encoder.UInt64Sizer,
369    _FieldDescriptor.TYPE_INT32: encoder.Int32Sizer,
370    _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Sizer,
371    _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Sizer,
372    _FieldDescriptor.TYPE_BOOL: encoder.BoolSizer,
373    _FieldDescriptor.TYPE_STRING: encoder.StringSizer,
374    _FieldDescriptor.TYPE_GROUP: encoder.GroupSizer,
375    _FieldDescriptor.TYPE_MESSAGE: encoder.MessageSizer,
376    _FieldDescriptor.TYPE_BYTES: encoder.BytesSizer,
377    _FieldDescriptor.TYPE_UINT32: encoder.UInt32Sizer,
378    _FieldDescriptor.TYPE_ENUM: encoder.EnumSizer,
379    _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Sizer,
380    _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Sizer,
381    _FieldDescriptor.TYPE_SINT32: encoder.SInt32Sizer,
382    _FieldDescriptor.TYPE_SINT64: encoder.SInt64Sizer,
383    }
384
385
386# Maps from field type to a decoder constructor.
387TYPE_TO_DECODER = {
388    _FieldDescriptor.TYPE_DOUBLE: decoder.DoubleDecoder,
389    _FieldDescriptor.TYPE_FLOAT: decoder.FloatDecoder,
390    _FieldDescriptor.TYPE_INT64: decoder.Int64Decoder,
391    _FieldDescriptor.TYPE_UINT64: decoder.UInt64Decoder,
392    _FieldDescriptor.TYPE_INT32: decoder.Int32Decoder,
393    _FieldDescriptor.TYPE_FIXED64: decoder.Fixed64Decoder,
394    _FieldDescriptor.TYPE_FIXED32: decoder.Fixed32Decoder,
395    _FieldDescriptor.TYPE_BOOL: decoder.BoolDecoder,
396    _FieldDescriptor.TYPE_STRING: decoder.StringDecoder,
397    _FieldDescriptor.TYPE_GROUP: decoder.GroupDecoder,
398    _FieldDescriptor.TYPE_MESSAGE: decoder.MessageDecoder,
399    _FieldDescriptor.TYPE_BYTES: decoder.BytesDecoder,
400    _FieldDescriptor.TYPE_UINT32: decoder.UInt32Decoder,
401    _FieldDescriptor.TYPE_ENUM: decoder.EnumDecoder,
402    _FieldDescriptor.TYPE_SFIXED32: decoder.SFixed32Decoder,
403    _FieldDescriptor.TYPE_SFIXED64: decoder.SFixed64Decoder,
404    _FieldDescriptor.TYPE_SINT32: decoder.SInt32Decoder,
405    _FieldDescriptor.TYPE_SINT64: decoder.SInt64Decoder,
406    }
407
408# Maps from field type to expected wiretype.
409FIELD_TYPE_TO_WIRE_TYPE = {
410    _FieldDescriptor.TYPE_DOUBLE: wire_format.WIRETYPE_FIXED64,
411    _FieldDescriptor.TYPE_FLOAT: wire_format.WIRETYPE_FIXED32,
412    _FieldDescriptor.TYPE_INT64: wire_format.WIRETYPE_VARINT,
413    _FieldDescriptor.TYPE_UINT64: wire_format.WIRETYPE_VARINT,
414    _FieldDescriptor.TYPE_INT32: wire_format.WIRETYPE_VARINT,
415    _FieldDescriptor.TYPE_FIXED64: wire_format.WIRETYPE_FIXED64,
416    _FieldDescriptor.TYPE_FIXED32: wire_format.WIRETYPE_FIXED32,
417    _FieldDescriptor.TYPE_BOOL: wire_format.WIRETYPE_VARINT,
418    _FieldDescriptor.TYPE_STRING:
419      wire_format.WIRETYPE_LENGTH_DELIMITED,
420    _FieldDescriptor.TYPE_GROUP: wire_format.WIRETYPE_START_GROUP,
421    _FieldDescriptor.TYPE_MESSAGE:
422      wire_format.WIRETYPE_LENGTH_DELIMITED,
423    _FieldDescriptor.TYPE_BYTES:
424      wire_format.WIRETYPE_LENGTH_DELIMITED,
425    _FieldDescriptor.TYPE_UINT32: wire_format.WIRETYPE_VARINT,
426    _FieldDescriptor.TYPE_ENUM: wire_format.WIRETYPE_VARINT,
427    _FieldDescriptor.TYPE_SFIXED32: wire_format.WIRETYPE_FIXED32,
428    _FieldDescriptor.TYPE_SFIXED64: wire_format.WIRETYPE_FIXED64,
429    _FieldDescriptor.TYPE_SINT32: wire_format.WIRETYPE_VARINT,
430    _FieldDescriptor.TYPE_SINT64: wire_format.WIRETYPE_VARINT,
431    }
432