• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Protocol Buffers - Google's data interchange format
2# Copyright 2008 Google Inc.  All rights reserved.
3# http://code.google.com/p/protobuf/
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Code for encoding protocol message primitives.
32
33Contains the logic for encoding every logical protocol field type
34into one of the 5 physical wire types.
35
36This code is designed to push the Python interpreter's performance to the
37limits.
38
39The basic idea is that at startup time, for every field (i.e. every
40FieldDescriptor) we construct two functions:  a "sizer" and an "encoder".  The
41sizer takes a value of this field's type and computes its byte size.  The
42encoder takes a writer function and a value.  It encodes the value into byte
43strings and invokes the writer function to write those strings.  Typically the
44writer function is the write() method of a cStringIO.
45
46We try to do as much work as possible when constructing the writer and the
47sizer rather than when calling them.  In particular:
48* We copy any needed global functions to local variables, so that we do not need
49  to do costly global table lookups at runtime.
50* Similarly, we try to do any attribute lookups at startup time if possible.
51* Every field's tag is encoded to bytes at startup, since it can't change at
52  runtime.
53* Whatever component of the field size we can compute at startup, we do.
54* We *avoid* sharing code if doing so would make the code slower and not sharing
55  does not burden us too much.  For example, encoders for repeated fields do
56  not just call the encoders for singular fields in a loop because this would
57  add an extra function call overhead for every loop iteration; instead, we
58  manually inline the single-value encoder into the loop.
59* If a Python function lacks a return statement, Python actually generates
60  instructions to pop the result of the last statement off the stack, push
61  None onto the stack, and then return that.  If we really don't care what
62  value is returned, then we can save two instructions by returning the
63  result of the last statement.  It looks funny but it helps.
64* We assume that type and bounds checking has happened at a higher level.
65"""
66
67__author__ = 'kenton@google.com (Kenton Varda)'
68
69import struct
70from google.protobuf.internal import wire_format
71
72
73def _VarintSize(value):
74  """Compute the size of a varint value."""
75  if value <= 0x7f: return 1
76  if value <= 0x3fff: return 2
77  if value <= 0x1fffff: return 3
78  if value <= 0xfffffff: return 4
79  if value <= 0x7ffffffff: return 5
80  if value <= 0x3ffffffffff: return 6
81  if value <= 0x1ffffffffffff: return 7
82  if value <= 0xffffffffffffff: return 8
83  if value <= 0x7fffffffffffffff: return 9
84  return 10
85
86
87def _SignedVarintSize(value):
88  """Compute the size of a signed varint value."""
89  if value < 0: return 10
90  if value <= 0x7f: return 1
91  if value <= 0x3fff: return 2
92  if value <= 0x1fffff: return 3
93  if value <= 0xfffffff: return 4
94  if value <= 0x7ffffffff: return 5
95  if value <= 0x3ffffffffff: return 6
96  if value <= 0x1ffffffffffff: return 7
97  if value <= 0xffffffffffffff: return 8
98  if value <= 0x7fffffffffffffff: return 9
99  return 10
100
101
102def _TagSize(field_number):
103  """Returns the number of bytes required to serialize a tag with this field
104  number."""
105  # Just pass in type 0, since the type won't affect the tag+type size.
106  return _VarintSize(wire_format.PackTag(field_number, 0))
107
108
109# --------------------------------------------------------------------
110# In this section we define some generic sizers.  Each of these functions
111# takes parameters specific to a particular field type, e.g. int32 or fixed64.
112# It returns another function which in turn takes parameters specific to a
113# particular field, e.g. the field number and whether it is repeated or packed.
114# Look at the next section to see how these are used.
115
116
117def _SimpleSizer(compute_value_size):
118  """A sizer which uses the function compute_value_size to compute the size of
119  each value.  Typically compute_value_size is _VarintSize."""
120
121  def SpecificSizer(field_number, is_repeated, is_packed):
122    tag_size = _TagSize(field_number)
123    if is_packed:
124      local_VarintSize = _VarintSize
125      def PackedFieldSize(value):
126        result = 0
127        for element in value:
128          result += compute_value_size(element)
129        return result + local_VarintSize(result) + tag_size
130      return PackedFieldSize
131    elif is_repeated:
132      def RepeatedFieldSize(value):
133        result = tag_size * len(value)
134        for element in value:
135          result += compute_value_size(element)
136        return result
137      return RepeatedFieldSize
138    else:
139      def FieldSize(value):
140        return tag_size + compute_value_size(value)
141      return FieldSize
142
143  return SpecificSizer
144
145
146def _ModifiedSizer(compute_value_size, modify_value):
147  """Like SimpleSizer, but modify_value is invoked on each value before it is
148  passed to compute_value_size.  modify_value is typically ZigZagEncode."""
149
150  def SpecificSizer(field_number, is_repeated, is_packed):
151    tag_size = _TagSize(field_number)
152    if is_packed:
153      local_VarintSize = _VarintSize
154      def PackedFieldSize(value):
155        result = 0
156        for element in value:
157          result += compute_value_size(modify_value(element))
158        return result + local_VarintSize(result) + tag_size
159      return PackedFieldSize
160    elif is_repeated:
161      def RepeatedFieldSize(value):
162        result = tag_size * len(value)
163        for element in value:
164          result += compute_value_size(modify_value(element))
165        return result
166      return RepeatedFieldSize
167    else:
168      def FieldSize(value):
169        return tag_size + compute_value_size(modify_value(value))
170      return FieldSize
171
172  return SpecificSizer
173
174
175def _FixedSizer(value_size):
176  """Like _SimpleSizer except for a fixed-size field.  The input is the size
177  of one value."""
178
179  def SpecificSizer(field_number, is_repeated, is_packed):
180    tag_size = _TagSize(field_number)
181    if is_packed:
182      local_VarintSize = _VarintSize
183      def PackedFieldSize(value):
184        result = len(value) * value_size
185        return result + local_VarintSize(result) + tag_size
186      return PackedFieldSize
187    elif is_repeated:
188      element_size = value_size + tag_size
189      def RepeatedFieldSize(value):
190        return len(value) * element_size
191      return RepeatedFieldSize
192    else:
193      field_size = value_size + tag_size
194      def FieldSize(value):
195        return field_size
196      return FieldSize
197
198  return SpecificSizer
199
200
201# ====================================================================
202# Here we declare a sizer constructor for each field type.  Each "sizer
203# constructor" is a function that takes (field_number, is_repeated, is_packed)
204# as parameters and returns a sizer, which in turn takes a field value as
205# a parameter and returns its encoded size.
206
207
208Int32Sizer = Int64Sizer = EnumSizer = _SimpleSizer(_SignedVarintSize)
209
210UInt32Sizer = UInt64Sizer = _SimpleSizer(_VarintSize)
211
212SInt32Sizer = SInt64Sizer = _ModifiedSizer(
213    _SignedVarintSize, wire_format.ZigZagEncode)
214
215Fixed32Sizer = SFixed32Sizer = FloatSizer  = _FixedSizer(4)
216Fixed64Sizer = SFixed64Sizer = DoubleSizer = _FixedSizer(8)
217
218BoolSizer = _FixedSizer(1)
219
220
221def StringSizer(field_number, is_repeated, is_packed):
222  """Returns a sizer for a string field."""
223
224  tag_size = _TagSize(field_number)
225  local_VarintSize = _VarintSize
226  local_len = len
227  assert not is_packed
228  if is_repeated:
229    def RepeatedFieldSize(value):
230      result = tag_size * len(value)
231      for element in value:
232        l = local_len(element.encode('utf-8'))
233        result += local_VarintSize(l) + l
234      return result
235    return RepeatedFieldSize
236  else:
237    def FieldSize(value):
238      l = local_len(value.encode('utf-8'))
239      return tag_size + local_VarintSize(l) + l
240    return FieldSize
241
242
243def BytesSizer(field_number, is_repeated, is_packed):
244  """Returns a sizer for a bytes field."""
245
246  tag_size = _TagSize(field_number)
247  local_VarintSize = _VarintSize
248  local_len = len
249  assert not is_packed
250  if is_repeated:
251    def RepeatedFieldSize(value):
252      result = tag_size * len(value)
253      for element in value:
254        l = local_len(element)
255        result += local_VarintSize(l) + l
256      return result
257    return RepeatedFieldSize
258  else:
259    def FieldSize(value):
260      l = local_len(value)
261      return tag_size + local_VarintSize(l) + l
262    return FieldSize
263
264
265def GroupSizer(field_number, is_repeated, is_packed):
266  """Returns a sizer for a group field."""
267
268  tag_size = _TagSize(field_number) * 2
269  assert not is_packed
270  if is_repeated:
271    def RepeatedFieldSize(value):
272      result = tag_size * len(value)
273      for element in value:
274        result += element.ByteSize()
275      return result
276    return RepeatedFieldSize
277  else:
278    def FieldSize(value):
279      return tag_size + value.ByteSize()
280    return FieldSize
281
282
283def MessageSizer(field_number, is_repeated, is_packed):
284  """Returns a sizer for a message field."""
285
286  tag_size = _TagSize(field_number)
287  local_VarintSize = _VarintSize
288  assert not is_packed
289  if is_repeated:
290    def RepeatedFieldSize(value):
291      result = tag_size * len(value)
292      for element in value:
293        l = element.ByteSize()
294        result += local_VarintSize(l) + l
295      return result
296    return RepeatedFieldSize
297  else:
298    def FieldSize(value):
299      l = value.ByteSize()
300      return tag_size + local_VarintSize(l) + l
301    return FieldSize
302
303
304# --------------------------------------------------------------------
305# MessageSet is special.
306
307
308def MessageSetItemSizer(field_number):
309  """Returns a sizer for extensions of MessageSet.
310
311  The message set message looks like this:
312    message MessageSet {
313      repeated group Item = 1 {
314        required int32 type_id = 2;
315        required string message = 3;
316      }
317    }
318  """
319  static_size = (_TagSize(1) * 2 + _TagSize(2) + _VarintSize(field_number) +
320                 _TagSize(3))
321  local_VarintSize = _VarintSize
322
323  def FieldSize(value):
324    l = value.ByteSize()
325    return static_size + local_VarintSize(l) + l
326
327  return FieldSize
328
329
330# ====================================================================
331# Encoders!
332
333
334def _VarintEncoder():
335  """Return an encoder for a basic varint value (does not include tag)."""
336
337  local_chr = chr
338  def EncodeVarint(write, value):
339    bits = value & 0x7f
340    value >>= 7
341    while value:
342      write(local_chr(0x80|bits))
343      bits = value & 0x7f
344      value >>= 7
345    return write(local_chr(bits))
346
347  return EncodeVarint
348
349
350def _SignedVarintEncoder():
351  """Return an encoder for a basic signed varint value (does not include
352  tag)."""
353
354  local_chr = chr
355  def EncodeSignedVarint(write, value):
356    if value < 0:
357      value += (1 << 64)
358    bits = value & 0x7f
359    value >>= 7
360    while value:
361      write(local_chr(0x80|bits))
362      bits = value & 0x7f
363      value >>= 7
364    return write(local_chr(bits))
365
366  return EncodeSignedVarint
367
368
369_EncodeVarint = _VarintEncoder()
370_EncodeSignedVarint = _SignedVarintEncoder()
371
372
373def _VarintBytes(value):
374  """Encode the given integer as a varint and return the bytes.  This is only
375  called at startup time so it doesn't need to be fast."""
376
377  pieces = []
378  _EncodeVarint(pieces.append, value)
379  return "".join(pieces)
380
381
382def TagBytes(field_number, wire_type):
383  """Encode the given tag and return the bytes.  Only called at startup."""
384
385  return _VarintBytes(wire_format.PackTag(field_number, wire_type))
386
387# --------------------------------------------------------------------
388# As with sizers (see above), we have a number of common encoder
389# implementations.
390
391
392def _SimpleEncoder(wire_type, encode_value, compute_value_size):
393  """Return a constructor for an encoder for fields of a particular type.
394
395  Args:
396      wire_type:  The field's wire type, for encoding tags.
397      encode_value:  A function which encodes an individual value, e.g.
398        _EncodeVarint().
399      compute_value_size:  A function which computes the size of an individual
400        value, e.g. _VarintSize().
401  """
402
403  def SpecificEncoder(field_number, is_repeated, is_packed):
404    if is_packed:
405      tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
406      local_EncodeVarint = _EncodeVarint
407      def EncodePackedField(write, value):
408        write(tag_bytes)
409        size = 0
410        for element in value:
411          size += compute_value_size(element)
412        local_EncodeVarint(write, size)
413        for element in value:
414          encode_value(write, element)
415      return EncodePackedField
416    elif is_repeated:
417      tag_bytes = TagBytes(field_number, wire_type)
418      def EncodeRepeatedField(write, value):
419        for element in value:
420          write(tag_bytes)
421          encode_value(write, element)
422      return EncodeRepeatedField
423    else:
424      tag_bytes = TagBytes(field_number, wire_type)
425      def EncodeField(write, value):
426        write(tag_bytes)
427        return encode_value(write, value)
428      return EncodeField
429
430  return SpecificEncoder
431
432
433def _ModifiedEncoder(wire_type, encode_value, compute_value_size, modify_value):
434  """Like SimpleEncoder but additionally invokes modify_value on every value
435  before passing it to encode_value.  Usually modify_value is ZigZagEncode."""
436
437  def SpecificEncoder(field_number, is_repeated, is_packed):
438    if is_packed:
439      tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
440      local_EncodeVarint = _EncodeVarint
441      def EncodePackedField(write, value):
442        write(tag_bytes)
443        size = 0
444        for element in value:
445          size += compute_value_size(modify_value(element))
446        local_EncodeVarint(write, size)
447        for element in value:
448          encode_value(write, modify_value(element))
449      return EncodePackedField
450    elif is_repeated:
451      tag_bytes = TagBytes(field_number, wire_type)
452      def EncodeRepeatedField(write, value):
453        for element in value:
454          write(tag_bytes)
455          encode_value(write, modify_value(element))
456      return EncodeRepeatedField
457    else:
458      tag_bytes = TagBytes(field_number, wire_type)
459      def EncodeField(write, value):
460        write(tag_bytes)
461        return encode_value(write, modify_value(value))
462      return EncodeField
463
464  return SpecificEncoder
465
466
467def _StructPackEncoder(wire_type, format):
468  """Return a constructor for an encoder for a fixed-width field.
469
470  Args:
471      wire_type:  The field's wire type, for encoding tags.
472      format:  The format string to pass to struct.pack().
473  """
474
475  value_size = struct.calcsize(format)
476
477  def SpecificEncoder(field_number, is_repeated, is_packed):
478    local_struct_pack = struct.pack
479    if is_packed:
480      tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
481      local_EncodeVarint = _EncodeVarint
482      def EncodePackedField(write, value):
483        write(tag_bytes)
484        local_EncodeVarint(write, len(value) * value_size)
485        for element in value:
486          write(local_struct_pack(format, element))
487      return EncodePackedField
488    elif is_repeated:
489      tag_bytes = TagBytes(field_number, wire_type)
490      def EncodeRepeatedField(write, value):
491        for element in value:
492          write(tag_bytes)
493          write(local_struct_pack(format, element))
494      return EncodeRepeatedField
495    else:
496      tag_bytes = TagBytes(field_number, wire_type)
497      def EncodeField(write, value):
498        write(tag_bytes)
499        return write(local_struct_pack(format, value))
500      return EncodeField
501
502  return SpecificEncoder
503
504
505# ====================================================================
506# Here we declare an encoder constructor for each field type.  These work
507# very similarly to sizer constructors, described earlier.
508
509
510Int32Encoder = Int64Encoder = EnumEncoder = _SimpleEncoder(
511    wire_format.WIRETYPE_VARINT, _EncodeSignedVarint, _SignedVarintSize)
512
513UInt32Encoder = UInt64Encoder = _SimpleEncoder(
514    wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize)
515
516SInt32Encoder = SInt64Encoder = _ModifiedEncoder(
517    wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize,
518    wire_format.ZigZagEncode)
519
520# Note that Python conveniently guarantees that when using the '<' prefix on
521# formats, they will also have the same size across all platforms (as opposed
522# to without the prefix, where their sizes depend on the C compiler's basic
523# type sizes).
524Fixed32Encoder  = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<I')
525Fixed64Encoder  = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<Q')
526SFixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<i')
527SFixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<q')
528FloatEncoder    = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<f')
529DoubleEncoder   = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<d')
530
531
532def BoolEncoder(field_number, is_repeated, is_packed):
533  """Returns an encoder for a boolean field."""
534
535  false_byte = chr(0)
536  true_byte = chr(1)
537  if is_packed:
538    tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
539    local_EncodeVarint = _EncodeVarint
540    def EncodePackedField(write, value):
541      write(tag_bytes)
542      local_EncodeVarint(write, len(value))
543      for element in value:
544        if element:
545          write(true_byte)
546        else:
547          write(false_byte)
548    return EncodePackedField
549  elif is_repeated:
550    tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT)
551    def EncodeRepeatedField(write, value):
552      for element in value:
553        write(tag_bytes)
554        if element:
555          write(true_byte)
556        else:
557          write(false_byte)
558    return EncodeRepeatedField
559  else:
560    tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT)
561    def EncodeField(write, value):
562      write(tag_bytes)
563      if value:
564        return write(true_byte)
565      return write(false_byte)
566    return EncodeField
567
568
569def StringEncoder(field_number, is_repeated, is_packed):
570  """Returns an encoder for a string field."""
571
572  tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
573  local_EncodeVarint = _EncodeVarint
574  local_len = len
575  assert not is_packed
576  if is_repeated:
577    def EncodeRepeatedField(write, value):
578      for element in value:
579        encoded = element.encode('utf-8')
580        write(tag)
581        local_EncodeVarint(write, local_len(encoded))
582        write(encoded)
583    return EncodeRepeatedField
584  else:
585    def EncodeField(write, value):
586      encoded = value.encode('utf-8')
587      write(tag)
588      local_EncodeVarint(write, local_len(encoded))
589      return write(encoded)
590    return EncodeField
591
592
593def BytesEncoder(field_number, is_repeated, is_packed):
594  """Returns an encoder for a bytes field."""
595
596  tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
597  local_EncodeVarint = _EncodeVarint
598  local_len = len
599  assert not is_packed
600  if is_repeated:
601    def EncodeRepeatedField(write, value):
602      for element in value:
603        write(tag)
604        local_EncodeVarint(write, local_len(element))
605        write(element)
606    return EncodeRepeatedField
607  else:
608    def EncodeField(write, value):
609      write(tag)
610      local_EncodeVarint(write, local_len(value))
611      return write(value)
612    return EncodeField
613
614
615def GroupEncoder(field_number, is_repeated, is_packed):
616  """Returns an encoder for a group field."""
617
618  start_tag = TagBytes(field_number, wire_format.WIRETYPE_START_GROUP)
619  end_tag = TagBytes(field_number, wire_format.WIRETYPE_END_GROUP)
620  assert not is_packed
621  if is_repeated:
622    def EncodeRepeatedField(write, value):
623      for element in value:
624        write(start_tag)
625        element._InternalSerialize(write)
626        write(end_tag)
627    return EncodeRepeatedField
628  else:
629    def EncodeField(write, value):
630      write(start_tag)
631      value._InternalSerialize(write)
632      return write(end_tag)
633    return EncodeField
634
635
636def MessageEncoder(field_number, is_repeated, is_packed):
637  """Returns an encoder for a message field."""
638
639  tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
640  local_EncodeVarint = _EncodeVarint
641  assert not is_packed
642  if is_repeated:
643    def EncodeRepeatedField(write, value):
644      for element in value:
645        write(tag)
646        local_EncodeVarint(write, element.ByteSize())
647        element._InternalSerialize(write)
648    return EncodeRepeatedField
649  else:
650    def EncodeField(write, value):
651      write(tag)
652      local_EncodeVarint(write, value.ByteSize())
653      return value._InternalSerialize(write)
654    return EncodeField
655
656
657# --------------------------------------------------------------------
658# As before, MessageSet is special.
659
660
661def MessageSetItemEncoder(field_number):
662  """Encoder for extensions of MessageSet.
663
664  The message set message looks like this:
665    message MessageSet {
666      repeated group Item = 1 {
667        required int32 type_id = 2;
668        required string message = 3;
669      }
670    }
671  """
672  start_bytes = "".join([
673      TagBytes(1, wire_format.WIRETYPE_START_GROUP),
674      TagBytes(2, wire_format.WIRETYPE_VARINT),
675      _VarintBytes(field_number),
676      TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)])
677  end_bytes = TagBytes(1, wire_format.WIRETYPE_END_GROUP)
678  local_EncodeVarint = _EncodeVarint
679
680  def EncodeField(write, value):
681    write(start_bytes)
682    local_EncodeVarint(write, value.ByteSize())
683    value._InternalSerialize(write)
684    return write(end_bytes)
685
686  return EncodeField
687