1# Protocol Buffers - Google's data interchange format 2# Copyright 2008 Google Inc. All rights reserved. 3# https://developers.google.com/protocol-buffers/ 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31#PY25 compatible for GAE. 32# 33# Copyright 2007 Google Inc. All Rights Reserved. 34 35"""Contains routines for printing protocol messages in text format.""" 36 37__author__ = 'kenton@google.com (Kenton Varda)' 38 39import cStringIO 40import re 41 42from google.protobuf.internal import type_checkers 43from google.protobuf import descriptor 44from google.protobuf import text_encoding 45 46__all__ = ['MessageToString', 'PrintMessage', 'PrintField', 47 'PrintFieldValue', 'Merge'] 48 49 50_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 51 type_checkers.Int32ValueChecker(), 52 type_checkers.Uint64ValueChecker(), 53 type_checkers.Int64ValueChecker()) 54_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) 55_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) 56_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, 57 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) 58 59 60class Error(Exception): 61 """Top-level module error for text_format.""" 62 63 64class ParseError(Error): 65 """Thrown in case of ASCII parsing error.""" 66 67 68def MessageToString(message, as_utf8=False, as_one_line=False, 69 pointy_brackets=False, use_index_order=False, 70 float_format=None): 71 """Convert protobuf message to text format. 72 73 Floating point values can be formatted compactly with 15 digits of 74 precision (which is the most that IEEE 754 "double" can guarantee) 75 using float_format='.15g'. 76 77 Args: 78 message: The protocol buffers message. 79 as_utf8: Produce text output in UTF8 format. 80 as_one_line: Don't introduce newlines between fields. 81 pointy_brackets: If True, use angle brackets instead of curly braces for 82 nesting. 83 use_index_order: If True, print fields of a proto message using the order 84 defined in source code instead of the field number. By default, use the 85 field number order. 86 float_format: If set, use this to specify floating point number formatting 87 (per the "Format Specification Mini-Language"); otherwise, str() is used. 88 89 Returns: 90 A string of the text formatted protocol buffer message. 91 """ 92 out = cStringIO.StringIO() 93 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line, 94 pointy_brackets=pointy_brackets, 95 use_index_order=use_index_order, 96 float_format=float_format) 97 result = out.getvalue() 98 out.close() 99 if as_one_line: 100 return result.rstrip() 101 return result 102 103 104def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, 105 pointy_brackets=False, use_index_order=False, 106 float_format=None): 107 fields = message.ListFields() 108 if use_index_order: 109 fields.sort(key=lambda x: x[0].index) 110 for field, value in fields: 111 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 112 for element in value: 113 PrintField(field, element, out, indent, as_utf8, as_one_line, 114 pointy_brackets=pointy_brackets, 115 float_format=float_format) 116 else: 117 PrintField(field, value, out, indent, as_utf8, as_one_line, 118 pointy_brackets=pointy_brackets, 119 float_format=float_format) 120 121 122def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, 123 pointy_brackets=False, float_format=None): 124 """Print a single field name/value pair. For repeated fields, the value 125 should be a single element.""" 126 127 out.write(' ' * indent) 128 if field.is_extension: 129 out.write('[') 130 if (field.containing_type.GetOptions().message_set_wire_format and 131 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 132 field.message_type == field.extension_scope and 133 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL): 134 out.write(field.message_type.full_name) 135 else: 136 out.write(field.full_name) 137 out.write(']') 138 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP: 139 # For groups, use the capitalized name. 140 out.write(field.message_type.name) 141 else: 142 out.write(field.name) 143 144 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 145 # The colon is optional in this case, but our cross-language golden files 146 # don't include it. 147 out.write(': ') 148 149 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line, 150 pointy_brackets=pointy_brackets, 151 float_format=float_format) 152 if as_one_line: 153 out.write(' ') 154 else: 155 out.write('\n') 156 157 158def PrintFieldValue(field, value, out, indent=0, as_utf8=False, 159 as_one_line=False, pointy_brackets=False, 160 float_format=None): 161 """Print a single field value (not including name). For repeated fields, 162 the value should be a single element.""" 163 164 if pointy_brackets: 165 openb = '<' 166 closeb = '>' 167 else: 168 openb = '{' 169 closeb = '}' 170 171 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 172 if as_one_line: 173 out.write(' %s ' % openb) 174 PrintMessage(value, out, indent, as_utf8, as_one_line, 175 pointy_brackets=pointy_brackets, 176 float_format=float_format) 177 out.write(closeb) 178 else: 179 out.write(' %s\n' % openb) 180 PrintMessage(value, out, indent + 2, as_utf8, as_one_line, 181 pointy_brackets=pointy_brackets, 182 float_format=float_format) 183 out.write(' ' * indent + closeb) 184 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 185 enum_value = field.enum_type.values_by_number.get(value, None) 186 if enum_value is not None: 187 out.write(enum_value.name) 188 else: 189 out.write(str(value)) 190 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 191 out.write('\"') 192 if isinstance(value, unicode): 193 out_value = value.encode('utf-8') 194 else: 195 out_value = value 196 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 197 # We need to escape non-UTF8 chars in TYPE_BYTES field. 198 out_as_utf8 = False 199 else: 200 out_as_utf8 = as_utf8 201 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 202 out.write('\"') 203 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 204 if value: 205 out.write('true') 206 else: 207 out.write('false') 208 elif field.cpp_type in _FLOAT_TYPES and float_format is not None: 209 out.write('{1:{0}}'.format(float_format, value)) 210 else: 211 out.write(str(value)) 212 213 214def _ParseOrMerge(lines, message, allow_multiple_scalars): 215 """Converts an ASCII representation of a protocol message into a message. 216 217 Args: 218 lines: Lines of a message's ASCII representation. 219 message: A protocol buffer message to merge into. 220 allow_multiple_scalars: Determines if repeated values for a non-repeated 221 field are permitted, e.g., the string "foo: 1 foo: 2" for a 222 required/optional field named "foo". 223 224 Raises: 225 ParseError: On ASCII parsing problems. 226 """ 227 tokenizer = _Tokenizer(lines) 228 while not tokenizer.AtEnd(): 229 _MergeField(tokenizer, message, allow_multiple_scalars) 230 231 232def Parse(text, message): 233 """Parses an ASCII representation of a protocol message into a message. 234 235 Args: 236 text: Message ASCII representation. 237 message: A protocol buffer message to merge into. 238 239 Returns: 240 The same message passed as argument. 241 242 Raises: 243 ParseError: On ASCII parsing problems. 244 """ 245 if not isinstance(text, str): text = text.decode('utf-8') 246 return ParseLines(text.split('\n'), message) 247 248 249def Merge(text, message): 250 """Parses an ASCII representation of a protocol message into a message. 251 252 Like Parse(), but allows repeated values for a non-repeated field, and uses 253 the last one. 254 255 Args: 256 text: Message ASCII representation. 257 message: A protocol buffer message to merge into. 258 259 Returns: 260 The same message passed as argument. 261 262 Raises: 263 ParseError: On ASCII parsing problems. 264 """ 265 return MergeLines(text.split('\n'), message) 266 267 268def ParseLines(lines, message): 269 """Parses an ASCII representation of a protocol message into a message. 270 271 Args: 272 lines: An iterable of lines of a message's ASCII representation. 273 message: A protocol buffer message to merge into. 274 275 Returns: 276 The same message passed as argument. 277 278 Raises: 279 ParseError: On ASCII parsing problems. 280 """ 281 _ParseOrMerge(lines, message, False) 282 return message 283 284 285def MergeLines(lines, message): 286 """Parses an ASCII representation of a protocol message into a message. 287 288 Args: 289 lines: An iterable of lines of a message's ASCII representation. 290 message: A protocol buffer message to merge into. 291 292 Returns: 293 The same message passed as argument. 294 295 Raises: 296 ParseError: On ASCII parsing problems. 297 """ 298 _ParseOrMerge(lines, message, True) 299 return message 300 301 302def _MergeField(tokenizer, message, allow_multiple_scalars): 303 """Merges a single protocol message field into a message. 304 305 Args: 306 tokenizer: A tokenizer to parse the field name and values. 307 message: A protocol message to record the data. 308 allow_multiple_scalars: Determines if repeated values for a non-repeated 309 field are permitted, e.g., the string "foo: 1 foo: 2" for a 310 required/optional field named "foo". 311 312 Raises: 313 ParseError: In case of ASCII parsing problems. 314 """ 315 message_descriptor = message.DESCRIPTOR 316 if tokenizer.TryConsume('['): 317 name = [tokenizer.ConsumeIdentifier()] 318 while tokenizer.TryConsume('.'): 319 name.append(tokenizer.ConsumeIdentifier()) 320 name = '.'.join(name) 321 322 if not message_descriptor.is_extendable: 323 raise tokenizer.ParseErrorPreviousToken( 324 'Message type "%s" does not have extensions.' % 325 message_descriptor.full_name) 326 # pylint: disable=protected-access 327 field = message.Extensions._FindExtensionByName(name) 328 # pylint: enable=protected-access 329 if not field: 330 raise tokenizer.ParseErrorPreviousToken( 331 'Extension "%s" not registered.' % name) 332 elif message_descriptor != field.containing_type: 333 raise tokenizer.ParseErrorPreviousToken( 334 'Extension "%s" does not extend message type "%s".' % ( 335 name, message_descriptor.full_name)) 336 tokenizer.Consume(']') 337 else: 338 name = tokenizer.ConsumeIdentifier() 339 field = message_descriptor.fields_by_name.get(name, None) 340 341 # Group names are expected to be capitalized as they appear in the 342 # .proto file, which actually matches their type names, not their field 343 # names. 344 if not field: 345 field = message_descriptor.fields_by_name.get(name.lower(), None) 346 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: 347 field = None 348 349 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and 350 field.message_type.name != name): 351 field = None 352 353 if not field: 354 raise tokenizer.ParseErrorPreviousToken( 355 'Message type "%s" has no field named "%s".' % ( 356 message_descriptor.full_name, name)) 357 358 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 359 tokenizer.TryConsume(':') 360 361 if tokenizer.TryConsume('<'): 362 end_token = '>' 363 else: 364 tokenizer.Consume('{') 365 end_token = '}' 366 367 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 368 if field.is_extension: 369 sub_message = message.Extensions[field].add() 370 else: 371 sub_message = getattr(message, field.name).add() 372 else: 373 if field.is_extension: 374 sub_message = message.Extensions[field] 375 else: 376 sub_message = getattr(message, field.name) 377 sub_message.SetInParent() 378 379 while not tokenizer.TryConsume(end_token): 380 if tokenizer.AtEnd(): 381 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token)) 382 _MergeField(tokenizer, sub_message, allow_multiple_scalars) 383 else: 384 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars) 385 386 # For historical reasons, fields may optionally be separated by commas or 387 # semicolons. 388 if not tokenizer.TryConsume(','): 389 tokenizer.TryConsume(';') 390 391 392def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars): 393 """Merges a single protocol message scalar field into a message. 394 395 Args: 396 tokenizer: A tokenizer to parse the field value. 397 message: A protocol message to record the data. 398 field: The descriptor of the field to be merged. 399 allow_multiple_scalars: Determines if repeated values for a non-repeated 400 field are permitted, e.g., the string "foo: 1 foo: 2" for a 401 required/optional field named "foo". 402 403 Raises: 404 ParseError: In case of ASCII parsing problems. 405 RuntimeError: On runtime errors. 406 """ 407 tokenizer.Consume(':') 408 value = None 409 410 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 411 descriptor.FieldDescriptor.TYPE_SINT32, 412 descriptor.FieldDescriptor.TYPE_SFIXED32): 413 value = tokenizer.ConsumeInt32() 414 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 415 descriptor.FieldDescriptor.TYPE_SINT64, 416 descriptor.FieldDescriptor.TYPE_SFIXED64): 417 value = tokenizer.ConsumeInt64() 418 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 419 descriptor.FieldDescriptor.TYPE_FIXED32): 420 value = tokenizer.ConsumeUint32() 421 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 422 descriptor.FieldDescriptor.TYPE_FIXED64): 423 value = tokenizer.ConsumeUint64() 424 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 425 descriptor.FieldDescriptor.TYPE_DOUBLE): 426 value = tokenizer.ConsumeFloat() 427 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 428 value = tokenizer.ConsumeBool() 429 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 430 value = tokenizer.ConsumeString() 431 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 432 value = tokenizer.ConsumeByteString() 433 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 434 value = tokenizer.ConsumeEnum(field) 435 else: 436 raise RuntimeError('Unknown field type %d' % field.type) 437 438 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 439 if field.is_extension: 440 message.Extensions[field].append(value) 441 else: 442 getattr(message, field.name).append(value) 443 else: 444 if field.is_extension: 445 if not allow_multiple_scalars and message.HasExtension(field): 446 raise tokenizer.ParseErrorPreviousToken( 447 'Message type "%s" should not have multiple "%s" extensions.' % 448 (message.DESCRIPTOR.full_name, field.full_name)) 449 else: 450 message.Extensions[field] = value 451 else: 452 if not allow_multiple_scalars and message.HasField(field.name): 453 raise tokenizer.ParseErrorPreviousToken( 454 'Message type "%s" should not have multiple "%s" fields.' % 455 (message.DESCRIPTOR.full_name, field.name)) 456 else: 457 setattr(message, field.name, value) 458 459 460class _Tokenizer(object): 461 """Protocol buffer ASCII representation tokenizer. 462 463 This class handles the lower level string parsing by splitting it into 464 meaningful tokens. 465 466 It was directly ported from the Java protocol buffer API. 467 """ 468 469 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) 470 _TOKEN = re.compile( 471 '[a-zA-Z_][0-9a-zA-Z_+-]*|' # an identifier 472 '[0-9+-][0-9a-zA-Z_.+-]*|' # a number 473 '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string 474 '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string 475 _IDENTIFIER = re.compile(r'\w+') 476 477 def __init__(self, lines): 478 self._position = 0 479 self._line = -1 480 self._column = 0 481 self._token_start = None 482 self.token = '' 483 self._lines = iter(lines) 484 self._current_line = '' 485 self._previous_line = 0 486 self._previous_column = 0 487 self._more_lines = True 488 self._SkipWhitespace() 489 self.NextToken() 490 491 def AtEnd(self): 492 """Checks the end of the text was reached. 493 494 Returns: 495 True iff the end was reached. 496 """ 497 return not self.token 498 499 def _PopLine(self): 500 while len(self._current_line) <= self._column: 501 try: 502 self._current_line = self._lines.next() 503 except StopIteration: 504 self._current_line = '' 505 self._more_lines = False 506 return 507 else: 508 self._line += 1 509 self._column = 0 510 511 def _SkipWhitespace(self): 512 while True: 513 self._PopLine() 514 match = self._WHITESPACE.match(self._current_line, self._column) 515 if not match: 516 break 517 length = len(match.group(0)) 518 self._column += length 519 520 def TryConsume(self, token): 521 """Tries to consume a given piece of text. 522 523 Args: 524 token: Text to consume. 525 526 Returns: 527 True iff the text was consumed. 528 """ 529 if self.token == token: 530 self.NextToken() 531 return True 532 return False 533 534 def Consume(self, token): 535 """Consumes a piece of text. 536 537 Args: 538 token: Text to consume. 539 540 Raises: 541 ParseError: If the text couldn't be consumed. 542 """ 543 if not self.TryConsume(token): 544 raise self._ParseError('Expected "%s".' % token) 545 546 def ConsumeIdentifier(self): 547 """Consumes protocol message field identifier. 548 549 Returns: 550 Identifier string. 551 552 Raises: 553 ParseError: If an identifier couldn't be consumed. 554 """ 555 result = self.token 556 if not self._IDENTIFIER.match(result): 557 raise self._ParseError('Expected identifier.') 558 self.NextToken() 559 return result 560 561 def ConsumeInt32(self): 562 """Consumes a signed 32bit integer number. 563 564 Returns: 565 The integer parsed. 566 567 Raises: 568 ParseError: If a signed 32bit integer couldn't be consumed. 569 """ 570 try: 571 result = ParseInteger(self.token, is_signed=True, is_long=False) 572 except ValueError, e: 573 raise self._ParseError(str(e)) 574 self.NextToken() 575 return result 576 577 def ConsumeUint32(self): 578 """Consumes an unsigned 32bit integer number. 579 580 Returns: 581 The integer parsed. 582 583 Raises: 584 ParseError: If an unsigned 32bit integer couldn't be consumed. 585 """ 586 try: 587 result = ParseInteger(self.token, is_signed=False, is_long=False) 588 except ValueError, e: 589 raise self._ParseError(str(e)) 590 self.NextToken() 591 return result 592 593 def ConsumeInt64(self): 594 """Consumes a signed 64bit integer number. 595 596 Returns: 597 The integer parsed. 598 599 Raises: 600 ParseError: If a signed 64bit integer couldn't be consumed. 601 """ 602 try: 603 result = ParseInteger(self.token, is_signed=True, is_long=True) 604 except ValueError, e: 605 raise self._ParseError(str(e)) 606 self.NextToken() 607 return result 608 609 def ConsumeUint64(self): 610 """Consumes an unsigned 64bit integer number. 611 612 Returns: 613 The integer parsed. 614 615 Raises: 616 ParseError: If an unsigned 64bit integer couldn't be consumed. 617 """ 618 try: 619 result = ParseInteger(self.token, is_signed=False, is_long=True) 620 except ValueError, e: 621 raise self._ParseError(str(e)) 622 self.NextToken() 623 return result 624 625 def ConsumeFloat(self): 626 """Consumes an floating point number. 627 628 Returns: 629 The number parsed. 630 631 Raises: 632 ParseError: If a floating point number couldn't be consumed. 633 """ 634 try: 635 result = ParseFloat(self.token) 636 except ValueError, e: 637 raise self._ParseError(str(e)) 638 self.NextToken() 639 return result 640 641 def ConsumeBool(self): 642 """Consumes a boolean value. 643 644 Returns: 645 The bool parsed. 646 647 Raises: 648 ParseError: If a boolean value couldn't be consumed. 649 """ 650 try: 651 result = ParseBool(self.token) 652 except ValueError, e: 653 raise self._ParseError(str(e)) 654 self.NextToken() 655 return result 656 657 def ConsumeString(self): 658 """Consumes a string value. 659 660 Returns: 661 The string parsed. 662 663 Raises: 664 ParseError: If a string value couldn't be consumed. 665 """ 666 the_bytes = self.ConsumeByteString() 667 try: 668 return unicode(the_bytes, 'utf-8') 669 except UnicodeDecodeError, e: 670 raise self._StringParseError(e) 671 672 def ConsumeByteString(self): 673 """Consumes a byte array value. 674 675 Returns: 676 The array parsed (as a string). 677 678 Raises: 679 ParseError: If a byte array value couldn't be consumed. 680 """ 681 the_list = [self._ConsumeSingleByteString()] 682 while self.token and self.token[0] in ('\'', '"'): 683 the_list.append(self._ConsumeSingleByteString()) 684 return ''.encode('latin1').join(the_list) ##PY25 685##!PY25 return b''.join(the_list) 686 687 def _ConsumeSingleByteString(self): 688 """Consume one token of a string literal. 689 690 String literals (whether bytes or text) can come in multiple adjacent 691 tokens which are automatically concatenated, like in C or Python. This 692 method only consumes one token. 693 """ 694 text = self.token 695 if len(text) < 1 or text[0] not in ('\'', '"'): 696 raise self._ParseError('Expected string.') 697 698 if len(text) < 2 or text[-1] != text[0]: 699 raise self._ParseError('String missing ending quote.') 700 701 try: 702 result = text_encoding.CUnescape(text[1:-1]) 703 except ValueError, e: 704 raise self._ParseError(str(e)) 705 self.NextToken() 706 return result 707 708 def ConsumeEnum(self, field): 709 try: 710 result = ParseEnum(field, self.token) 711 except ValueError, e: 712 raise self._ParseError(str(e)) 713 self.NextToken() 714 return result 715 716 def ParseErrorPreviousToken(self, message): 717 """Creates and *returns* a ParseError for the previously read token. 718 719 Args: 720 message: A message to set for the exception. 721 722 Returns: 723 A ParseError instance. 724 """ 725 return ParseError('%d:%d : %s' % ( 726 self._previous_line + 1, self._previous_column + 1, message)) 727 728 def _ParseError(self, message): 729 """Creates and *returns* a ParseError for the current token.""" 730 return ParseError('%d:%d : %s' % ( 731 self._line + 1, self._column + 1, message)) 732 733 def _StringParseError(self, e): 734 return self._ParseError('Couldn\'t parse string: ' + str(e)) 735 736 def NextToken(self): 737 """Reads the next meaningful token.""" 738 self._previous_line = self._line 739 self._previous_column = self._column 740 741 self._column += len(self.token) 742 self._SkipWhitespace() 743 744 if not self._more_lines: 745 self.token = '' 746 return 747 748 match = self._TOKEN.match(self._current_line, self._column) 749 if match: 750 token = match.group(0) 751 self.token = token 752 else: 753 self.token = self._current_line[self._column] 754 755 756def ParseInteger(text, is_signed=False, is_long=False): 757 """Parses an integer. 758 759 Args: 760 text: The text to parse. 761 is_signed: True if a signed integer must be parsed. 762 is_long: True if a long integer must be parsed. 763 764 Returns: 765 The integer value. 766 767 Raises: 768 ValueError: Thrown Iff the text is not a valid integer. 769 """ 770 # Do the actual parsing. Exception handling is propagated to caller. 771 try: 772 # We force 32-bit values to int and 64-bit values to long to make 773 # alternate implementations where the distinction is more significant 774 # (e.g. the C++ implementation) simpler. 775 if is_long: 776 result = long(text, 0) 777 else: 778 result = int(text, 0) 779 except ValueError: 780 raise ValueError('Couldn\'t parse integer: %s' % text) 781 782 # Check if the integer is sane. Exceptions handled by callers. 783 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 784 checker.CheckValue(result) 785 return result 786 787 788def ParseFloat(text): 789 """Parse a floating point number. 790 791 Args: 792 text: Text to parse. 793 794 Returns: 795 The number parsed. 796 797 Raises: 798 ValueError: If a floating point number couldn't be parsed. 799 """ 800 try: 801 # Assume Python compatible syntax. 802 return float(text) 803 except ValueError: 804 # Check alternative spellings. 805 if _FLOAT_INFINITY.match(text): 806 if text[0] == '-': 807 return float('-inf') 808 else: 809 return float('inf') 810 elif _FLOAT_NAN.match(text): 811 return float('nan') 812 else: 813 # assume '1.0f' format 814 try: 815 return float(text.rstrip('f')) 816 except ValueError: 817 raise ValueError('Couldn\'t parse float: %s' % text) 818 819 820def ParseBool(text): 821 """Parse a boolean value. 822 823 Args: 824 text: Text to parse. 825 826 Returns: 827 Boolean values parsed 828 829 Raises: 830 ValueError: If text is not a valid boolean. 831 """ 832 if text in ('true', 't', '1'): 833 return True 834 elif text in ('false', 'f', '0'): 835 return False 836 else: 837 raise ValueError('Expected "true" or "false".') 838 839 840def ParseEnum(field, value): 841 """Parse an enum value. 842 843 The value can be specified by a number (the enum value), or by 844 a string literal (the enum name). 845 846 Args: 847 field: Enum field descriptor. 848 value: String value. 849 850 Returns: 851 Enum value number. 852 853 Raises: 854 ValueError: If the enum value could not be parsed. 855 """ 856 enum_descriptor = field.enum_type 857 try: 858 number = int(value, 0) 859 except ValueError: 860 # Identifier. 861 enum_value = enum_descriptor.values_by_name.get(value, None) 862 if enum_value is None: 863 raise ValueError( 864 'Enum type "%s" has no value named %s.' % ( 865 enum_descriptor.full_name, value)) 866 else: 867 # Numeric value. 868 enum_value = enum_descriptor.values_by_number.get(number, None) 869 if enum_value is None: 870 raise ValueError( 871 'Enum type "%s" has no value with number %d.' % ( 872 enum_descriptor.full_name, number)) 873 return enum_value.number 874