1# Protocol Buffers - Google's data interchange format 2# Copyright 2008 Google Inc. All rights reserved. 3# http://code.google.com/p/protobuf/ 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Code for encoding protocol message primitives. 32 33Contains the logic for encoding every logical protocol field type 34into one of the 5 physical wire types. 35 36This code is designed to push the Python interpreter's performance to the 37limits. 38 39The basic idea is that at startup time, for every field (i.e. every 40FieldDescriptor) we construct two functions: a "sizer" and an "encoder". The 41sizer takes a value of this field's type and computes its byte size. The 42encoder takes a writer function and a value. It encodes the value into byte 43strings and invokes the writer function to write those strings. Typically the 44writer function is the write() method of a cStringIO. 45 46We try to do as much work as possible when constructing the writer and the 47sizer rather than when calling them. In particular: 48* We copy any needed global functions to local variables, so that we do not need 49 to do costly global table lookups at runtime. 50* Similarly, we try to do any attribute lookups at startup time if possible. 51* Every field's tag is encoded to bytes at startup, since it can't change at 52 runtime. 53* Whatever component of the field size we can compute at startup, we do. 54* We *avoid* sharing code if doing so would make the code slower and not sharing 55 does not burden us too much. For example, encoders for repeated fields do 56 not just call the encoders for singular fields in a loop because this would 57 add an extra function call overhead for every loop iteration; instead, we 58 manually inline the single-value encoder into the loop. 59* If a Python function lacks a return statement, Python actually generates 60 instructions to pop the result of the last statement off the stack, push 61 None onto the stack, and then return that. If we really don't care what 62 value is returned, then we can save two instructions by returning the 63 result of the last statement. It looks funny but it helps. 64* We assume that type and bounds checking has happened at a higher level. 65""" 66 67__author__ = 'kenton@google.com (Kenton Varda)' 68 69import struct 70from google.protobuf.internal import wire_format 71 72 73def _VarintSize(value): 74 """Compute the size of a varint value.""" 75 if value <= 0x7f: return 1 76 if value <= 0x3fff: return 2 77 if value <= 0x1fffff: return 3 78 if value <= 0xfffffff: return 4 79 if value <= 0x7ffffffff: return 5 80 if value <= 0x3ffffffffff: return 6 81 if value <= 0x1ffffffffffff: return 7 82 if value <= 0xffffffffffffff: return 8 83 if value <= 0x7fffffffffffffff: return 9 84 return 10 85 86 87def _SignedVarintSize(value): 88 """Compute the size of a signed varint value.""" 89 if value < 0: return 10 90 if value <= 0x7f: return 1 91 if value <= 0x3fff: return 2 92 if value <= 0x1fffff: return 3 93 if value <= 0xfffffff: return 4 94 if value <= 0x7ffffffff: return 5 95 if value <= 0x3ffffffffff: return 6 96 if value <= 0x1ffffffffffff: return 7 97 if value <= 0xffffffffffffff: return 8 98 if value <= 0x7fffffffffffffff: return 9 99 return 10 100 101 102def _TagSize(field_number): 103 """Returns the number of bytes required to serialize a tag with this field 104 number.""" 105 # Just pass in type 0, since the type won't affect the tag+type size. 106 return _VarintSize(wire_format.PackTag(field_number, 0)) 107 108 109# -------------------------------------------------------------------- 110# In this section we define some generic sizers. Each of these functions 111# takes parameters specific to a particular field type, e.g. int32 or fixed64. 112# It returns another function which in turn takes parameters specific to a 113# particular field, e.g. the field number and whether it is repeated or packed. 114# Look at the next section to see how these are used. 115 116 117def _SimpleSizer(compute_value_size): 118 """A sizer which uses the function compute_value_size to compute the size of 119 each value. Typically compute_value_size is _VarintSize.""" 120 121 def SpecificSizer(field_number, is_repeated, is_packed): 122 tag_size = _TagSize(field_number) 123 if is_packed: 124 local_VarintSize = _VarintSize 125 def PackedFieldSize(value): 126 result = 0 127 for element in value: 128 result += compute_value_size(element) 129 return result + local_VarintSize(result) + tag_size 130 return PackedFieldSize 131 elif is_repeated: 132 def RepeatedFieldSize(value): 133 result = tag_size * len(value) 134 for element in value: 135 result += compute_value_size(element) 136 return result 137 return RepeatedFieldSize 138 else: 139 def FieldSize(value): 140 return tag_size + compute_value_size(value) 141 return FieldSize 142 143 return SpecificSizer 144 145 146def _ModifiedSizer(compute_value_size, modify_value): 147 """Like SimpleSizer, but modify_value is invoked on each value before it is 148 passed to compute_value_size. modify_value is typically ZigZagEncode.""" 149 150 def SpecificSizer(field_number, is_repeated, is_packed): 151 tag_size = _TagSize(field_number) 152 if is_packed: 153 local_VarintSize = _VarintSize 154 def PackedFieldSize(value): 155 result = 0 156 for element in value: 157 result += compute_value_size(modify_value(element)) 158 return result + local_VarintSize(result) + tag_size 159 return PackedFieldSize 160 elif is_repeated: 161 def RepeatedFieldSize(value): 162 result = tag_size * len(value) 163 for element in value: 164 result += compute_value_size(modify_value(element)) 165 return result 166 return RepeatedFieldSize 167 else: 168 def FieldSize(value): 169 return tag_size + compute_value_size(modify_value(value)) 170 return FieldSize 171 172 return SpecificSizer 173 174 175def _FixedSizer(value_size): 176 """Like _SimpleSizer except for a fixed-size field. The input is the size 177 of one value.""" 178 179 def SpecificSizer(field_number, is_repeated, is_packed): 180 tag_size = _TagSize(field_number) 181 if is_packed: 182 local_VarintSize = _VarintSize 183 def PackedFieldSize(value): 184 result = len(value) * value_size 185 return result + local_VarintSize(result) + tag_size 186 return PackedFieldSize 187 elif is_repeated: 188 element_size = value_size + tag_size 189 def RepeatedFieldSize(value): 190 return len(value) * element_size 191 return RepeatedFieldSize 192 else: 193 field_size = value_size + tag_size 194 def FieldSize(value): 195 return field_size 196 return FieldSize 197 198 return SpecificSizer 199 200 201# ==================================================================== 202# Here we declare a sizer constructor for each field type. Each "sizer 203# constructor" is a function that takes (field_number, is_repeated, is_packed) 204# as parameters and returns a sizer, which in turn takes a field value as 205# a parameter and returns its encoded size. 206 207 208Int32Sizer = Int64Sizer = EnumSizer = _SimpleSizer(_SignedVarintSize) 209 210UInt32Sizer = UInt64Sizer = _SimpleSizer(_VarintSize) 211 212SInt32Sizer = SInt64Sizer = _ModifiedSizer( 213 _SignedVarintSize, wire_format.ZigZagEncode) 214 215Fixed32Sizer = SFixed32Sizer = FloatSizer = _FixedSizer(4) 216Fixed64Sizer = SFixed64Sizer = DoubleSizer = _FixedSizer(8) 217 218BoolSizer = _FixedSizer(1) 219 220 221def StringSizer(field_number, is_repeated, is_packed): 222 """Returns a sizer for a string field.""" 223 224 tag_size = _TagSize(field_number) 225 local_VarintSize = _VarintSize 226 local_len = len 227 assert not is_packed 228 if is_repeated: 229 def RepeatedFieldSize(value): 230 result = tag_size * len(value) 231 for element in value: 232 l = local_len(element.encode('utf-8')) 233 result += local_VarintSize(l) + l 234 return result 235 return RepeatedFieldSize 236 else: 237 def FieldSize(value): 238 l = local_len(value.encode('utf-8')) 239 return tag_size + local_VarintSize(l) + l 240 return FieldSize 241 242 243def BytesSizer(field_number, is_repeated, is_packed): 244 """Returns a sizer for a bytes field.""" 245 246 tag_size = _TagSize(field_number) 247 local_VarintSize = _VarintSize 248 local_len = len 249 assert not is_packed 250 if is_repeated: 251 def RepeatedFieldSize(value): 252 result = tag_size * len(value) 253 for element in value: 254 l = local_len(element) 255 result += local_VarintSize(l) + l 256 return result 257 return RepeatedFieldSize 258 else: 259 def FieldSize(value): 260 l = local_len(value) 261 return tag_size + local_VarintSize(l) + l 262 return FieldSize 263 264 265def GroupSizer(field_number, is_repeated, is_packed): 266 """Returns a sizer for a group field.""" 267 268 tag_size = _TagSize(field_number) * 2 269 assert not is_packed 270 if is_repeated: 271 def RepeatedFieldSize(value): 272 result = tag_size * len(value) 273 for element in value: 274 result += element.ByteSize() 275 return result 276 return RepeatedFieldSize 277 else: 278 def FieldSize(value): 279 return tag_size + value.ByteSize() 280 return FieldSize 281 282 283def MessageSizer(field_number, is_repeated, is_packed): 284 """Returns a sizer for a message field.""" 285 286 tag_size = _TagSize(field_number) 287 local_VarintSize = _VarintSize 288 assert not is_packed 289 if is_repeated: 290 def RepeatedFieldSize(value): 291 result = tag_size * len(value) 292 for element in value: 293 l = element.ByteSize() 294 result += local_VarintSize(l) + l 295 return result 296 return RepeatedFieldSize 297 else: 298 def FieldSize(value): 299 l = value.ByteSize() 300 return tag_size + local_VarintSize(l) + l 301 return FieldSize 302 303 304# -------------------------------------------------------------------- 305# MessageSet is special. 306 307 308def MessageSetItemSizer(field_number): 309 """Returns a sizer for extensions of MessageSet. 310 311 The message set message looks like this: 312 message MessageSet { 313 repeated group Item = 1 { 314 required int32 type_id = 2; 315 required string message = 3; 316 } 317 } 318 """ 319 static_size = (_TagSize(1) * 2 + _TagSize(2) + _VarintSize(field_number) + 320 _TagSize(3)) 321 local_VarintSize = _VarintSize 322 323 def FieldSize(value): 324 l = value.ByteSize() 325 return static_size + local_VarintSize(l) + l 326 327 return FieldSize 328 329 330# ==================================================================== 331# Encoders! 332 333 334def _VarintEncoder(): 335 """Return an encoder for a basic varint value (does not include tag).""" 336 337 local_chr = chr 338 def EncodeVarint(write, value): 339 bits = value & 0x7f 340 value >>= 7 341 while value: 342 write(local_chr(0x80|bits)) 343 bits = value & 0x7f 344 value >>= 7 345 return write(local_chr(bits)) 346 347 return EncodeVarint 348 349 350def _SignedVarintEncoder(): 351 """Return an encoder for a basic signed varint value (does not include 352 tag).""" 353 354 local_chr = chr 355 def EncodeSignedVarint(write, value): 356 if value < 0: 357 value += (1 << 64) 358 bits = value & 0x7f 359 value >>= 7 360 while value: 361 write(local_chr(0x80|bits)) 362 bits = value & 0x7f 363 value >>= 7 364 return write(local_chr(bits)) 365 366 return EncodeSignedVarint 367 368 369_EncodeVarint = _VarintEncoder() 370_EncodeSignedVarint = _SignedVarintEncoder() 371 372 373def _VarintBytes(value): 374 """Encode the given integer as a varint and return the bytes. This is only 375 called at startup time so it doesn't need to be fast.""" 376 377 pieces = [] 378 _EncodeVarint(pieces.append, value) 379 return "".join(pieces) 380 381 382def TagBytes(field_number, wire_type): 383 """Encode the given tag and return the bytes. Only called at startup.""" 384 385 return _VarintBytes(wire_format.PackTag(field_number, wire_type)) 386 387# -------------------------------------------------------------------- 388# As with sizers (see above), we have a number of common encoder 389# implementations. 390 391 392def _SimpleEncoder(wire_type, encode_value, compute_value_size): 393 """Return a constructor for an encoder for fields of a particular type. 394 395 Args: 396 wire_type: The field's wire type, for encoding tags. 397 encode_value: A function which encodes an individual value, e.g. 398 _EncodeVarint(). 399 compute_value_size: A function which computes the size of an individual 400 value, e.g. _VarintSize(). 401 """ 402 403 def SpecificEncoder(field_number, is_repeated, is_packed): 404 if is_packed: 405 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) 406 local_EncodeVarint = _EncodeVarint 407 def EncodePackedField(write, value): 408 write(tag_bytes) 409 size = 0 410 for element in value: 411 size += compute_value_size(element) 412 local_EncodeVarint(write, size) 413 for element in value: 414 encode_value(write, element) 415 return EncodePackedField 416 elif is_repeated: 417 tag_bytes = TagBytes(field_number, wire_type) 418 def EncodeRepeatedField(write, value): 419 for element in value: 420 write(tag_bytes) 421 encode_value(write, element) 422 return EncodeRepeatedField 423 else: 424 tag_bytes = TagBytes(field_number, wire_type) 425 def EncodeField(write, value): 426 write(tag_bytes) 427 return encode_value(write, value) 428 return EncodeField 429 430 return SpecificEncoder 431 432 433def _ModifiedEncoder(wire_type, encode_value, compute_value_size, modify_value): 434 """Like SimpleEncoder but additionally invokes modify_value on every value 435 before passing it to encode_value. Usually modify_value is ZigZagEncode.""" 436 437 def SpecificEncoder(field_number, is_repeated, is_packed): 438 if is_packed: 439 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) 440 local_EncodeVarint = _EncodeVarint 441 def EncodePackedField(write, value): 442 write(tag_bytes) 443 size = 0 444 for element in value: 445 size += compute_value_size(modify_value(element)) 446 local_EncodeVarint(write, size) 447 for element in value: 448 encode_value(write, modify_value(element)) 449 return EncodePackedField 450 elif is_repeated: 451 tag_bytes = TagBytes(field_number, wire_type) 452 def EncodeRepeatedField(write, value): 453 for element in value: 454 write(tag_bytes) 455 encode_value(write, modify_value(element)) 456 return EncodeRepeatedField 457 else: 458 tag_bytes = TagBytes(field_number, wire_type) 459 def EncodeField(write, value): 460 write(tag_bytes) 461 return encode_value(write, modify_value(value)) 462 return EncodeField 463 464 return SpecificEncoder 465 466 467def _StructPackEncoder(wire_type, format): 468 """Return a constructor for an encoder for a fixed-width field. 469 470 Args: 471 wire_type: The field's wire type, for encoding tags. 472 format: The format string to pass to struct.pack(). 473 """ 474 475 value_size = struct.calcsize(format) 476 477 def SpecificEncoder(field_number, is_repeated, is_packed): 478 local_struct_pack = struct.pack 479 if is_packed: 480 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) 481 local_EncodeVarint = _EncodeVarint 482 def EncodePackedField(write, value): 483 write(tag_bytes) 484 local_EncodeVarint(write, len(value) * value_size) 485 for element in value: 486 write(local_struct_pack(format, element)) 487 return EncodePackedField 488 elif is_repeated: 489 tag_bytes = TagBytes(field_number, wire_type) 490 def EncodeRepeatedField(write, value): 491 for element in value: 492 write(tag_bytes) 493 write(local_struct_pack(format, element)) 494 return EncodeRepeatedField 495 else: 496 tag_bytes = TagBytes(field_number, wire_type) 497 def EncodeField(write, value): 498 write(tag_bytes) 499 return write(local_struct_pack(format, value)) 500 return EncodeField 501 502 return SpecificEncoder 503 504 505# ==================================================================== 506# Here we declare an encoder constructor for each field type. These work 507# very similarly to sizer constructors, described earlier. 508 509 510Int32Encoder = Int64Encoder = EnumEncoder = _SimpleEncoder( 511 wire_format.WIRETYPE_VARINT, _EncodeSignedVarint, _SignedVarintSize) 512 513UInt32Encoder = UInt64Encoder = _SimpleEncoder( 514 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize) 515 516SInt32Encoder = SInt64Encoder = _ModifiedEncoder( 517 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize, 518 wire_format.ZigZagEncode) 519 520# Note that Python conveniently guarantees that when using the '<' prefix on 521# formats, they will also have the same size across all platforms (as opposed 522# to without the prefix, where their sizes depend on the C compiler's basic 523# type sizes). 524Fixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<I') 525Fixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<Q') 526SFixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<i') 527SFixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<q') 528FloatEncoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<f') 529DoubleEncoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<d') 530 531 532def BoolEncoder(field_number, is_repeated, is_packed): 533 """Returns an encoder for a boolean field.""" 534 535 false_byte = chr(0) 536 true_byte = chr(1) 537 if is_packed: 538 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) 539 local_EncodeVarint = _EncodeVarint 540 def EncodePackedField(write, value): 541 write(tag_bytes) 542 local_EncodeVarint(write, len(value)) 543 for element in value: 544 if element: 545 write(true_byte) 546 else: 547 write(false_byte) 548 return EncodePackedField 549 elif is_repeated: 550 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT) 551 def EncodeRepeatedField(write, value): 552 for element in value: 553 write(tag_bytes) 554 if element: 555 write(true_byte) 556 else: 557 write(false_byte) 558 return EncodeRepeatedField 559 else: 560 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT) 561 def EncodeField(write, value): 562 write(tag_bytes) 563 if value: 564 return write(true_byte) 565 return write(false_byte) 566 return EncodeField 567 568 569def StringEncoder(field_number, is_repeated, is_packed): 570 """Returns an encoder for a string field.""" 571 572 tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) 573 local_EncodeVarint = _EncodeVarint 574 local_len = len 575 assert not is_packed 576 if is_repeated: 577 def EncodeRepeatedField(write, value): 578 for element in value: 579 encoded = element.encode('utf-8') 580 write(tag) 581 local_EncodeVarint(write, local_len(encoded)) 582 write(encoded) 583 return EncodeRepeatedField 584 else: 585 def EncodeField(write, value): 586 encoded = value.encode('utf-8') 587 write(tag) 588 local_EncodeVarint(write, local_len(encoded)) 589 return write(encoded) 590 return EncodeField 591 592 593def BytesEncoder(field_number, is_repeated, is_packed): 594 """Returns an encoder for a bytes field.""" 595 596 tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) 597 local_EncodeVarint = _EncodeVarint 598 local_len = len 599 assert not is_packed 600 if is_repeated: 601 def EncodeRepeatedField(write, value): 602 for element in value: 603 write(tag) 604 local_EncodeVarint(write, local_len(element)) 605 write(element) 606 return EncodeRepeatedField 607 else: 608 def EncodeField(write, value): 609 write(tag) 610 local_EncodeVarint(write, local_len(value)) 611 return write(value) 612 return EncodeField 613 614 615def GroupEncoder(field_number, is_repeated, is_packed): 616 """Returns an encoder for a group field.""" 617 618 start_tag = TagBytes(field_number, wire_format.WIRETYPE_START_GROUP) 619 end_tag = TagBytes(field_number, wire_format.WIRETYPE_END_GROUP) 620 assert not is_packed 621 if is_repeated: 622 def EncodeRepeatedField(write, value): 623 for element in value: 624 write(start_tag) 625 element._InternalSerialize(write) 626 write(end_tag) 627 return EncodeRepeatedField 628 else: 629 def EncodeField(write, value): 630 write(start_tag) 631 value._InternalSerialize(write) 632 return write(end_tag) 633 return EncodeField 634 635 636def MessageEncoder(field_number, is_repeated, is_packed): 637 """Returns an encoder for a message field.""" 638 639 tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) 640 local_EncodeVarint = _EncodeVarint 641 assert not is_packed 642 if is_repeated: 643 def EncodeRepeatedField(write, value): 644 for element in value: 645 write(tag) 646 local_EncodeVarint(write, element.ByteSize()) 647 element._InternalSerialize(write) 648 return EncodeRepeatedField 649 else: 650 def EncodeField(write, value): 651 write(tag) 652 local_EncodeVarint(write, value.ByteSize()) 653 return value._InternalSerialize(write) 654 return EncodeField 655 656 657# -------------------------------------------------------------------- 658# As before, MessageSet is special. 659 660 661def MessageSetItemEncoder(field_number): 662 """Encoder for extensions of MessageSet. 663 664 The message set message looks like this: 665 message MessageSet { 666 repeated group Item = 1 { 667 required int32 type_id = 2; 668 required string message = 3; 669 } 670 } 671 """ 672 start_bytes = "".join([ 673 TagBytes(1, wire_format.WIRETYPE_START_GROUP), 674 TagBytes(2, wire_format.WIRETYPE_VARINT), 675 _VarintBytes(field_number), 676 TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)]) 677 end_bytes = TagBytes(1, wire_format.WIRETYPE_END_GROUP) 678 local_EncodeVarint = _EncodeVarint 679 680 def EncodeField(write, value): 681 write(start_bytes) 682 local_EncodeVarint(write, value.ByteSize()) 683 value._InternalSerialize(write) 684 return write(end_bytes) 685 686 return EncodeField 687