• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Protocol Buffers - Google's data interchange format
2# Copyright 2008 Google Inc.  All rights reserved.
3# https://developers.google.com/protocol-buffers/
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31#PY25 compatible for GAE.
32#
33# Copyright 2007 Google Inc. All Rights Reserved.
34
35"""Contains routines for printing protocol messages in text format."""
36
37__author__ = 'kenton@google.com (Kenton Varda)'
38
39import cStringIO
40import re
41
42from google.protobuf.internal import type_checkers
43from google.protobuf import descriptor
44from google.protobuf import text_encoding
45
46__all__ = ['MessageToString', 'PrintMessage', 'PrintField',
47           'PrintFieldValue', 'Merge']
48
49
50_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
51                     type_checkers.Int32ValueChecker(),
52                     type_checkers.Uint64ValueChecker(),
53                     type_checkers.Int64ValueChecker())
54_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
55_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
56_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
57                          descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
58
59
60class Error(Exception):
61  """Top-level module error for text_format."""
62
63
64class ParseError(Error):
65  """Thrown in case of ASCII parsing error."""
66
67
68def MessageToString(message, as_utf8=False, as_one_line=False,
69                    pointy_brackets=False, use_index_order=False,
70                    float_format=None):
71  """Convert protobuf message to text format.
72
73  Floating point values can be formatted compactly with 15 digits of
74  precision (which is the most that IEEE 754 "double" can guarantee)
75  using float_format='.15g'.
76
77  Args:
78    message: The protocol buffers message.
79    as_utf8: Produce text output in UTF8 format.
80    as_one_line: Don't introduce newlines between fields.
81    pointy_brackets: If True, use angle brackets instead of curly braces for
82      nesting.
83    use_index_order: If True, print fields of a proto message using the order
84      defined in source code instead of the field number. By default, use the
85      field number order.
86    float_format: If set, use this to specify floating point number formatting
87      (per the "Format Specification Mini-Language"); otherwise, str() is used.
88
89  Returns:
90    A string of the text formatted protocol buffer message.
91  """
92  out = cStringIO.StringIO()
93  PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
94               pointy_brackets=pointy_brackets,
95               use_index_order=use_index_order,
96               float_format=float_format)
97  result = out.getvalue()
98  out.close()
99  if as_one_line:
100    return result.rstrip()
101  return result
102
103
104def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
105                 pointy_brackets=False, use_index_order=False,
106                 float_format=None):
107  fields = message.ListFields()
108  if use_index_order:
109    fields.sort(key=lambda x: x[0].index)
110  for field, value in fields:
111    if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
112      for element in value:
113        PrintField(field, element, out, indent, as_utf8, as_one_line,
114                   pointy_brackets=pointy_brackets,
115                   float_format=float_format)
116    else:
117      PrintField(field, value, out, indent, as_utf8, as_one_line,
118                 pointy_brackets=pointy_brackets,
119                 float_format=float_format)
120
121
122def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
123               pointy_brackets=False, float_format=None):
124  """Print a single field name/value pair.  For repeated fields, the value
125  should be a single element."""
126
127  out.write(' ' * indent)
128  if field.is_extension:
129    out.write('[')
130    if (field.containing_type.GetOptions().message_set_wire_format and
131        field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
132        field.message_type == field.extension_scope and
133        field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
134      out.write(field.message_type.full_name)
135    else:
136      out.write(field.full_name)
137    out.write(']')
138  elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
139    # For groups, use the capitalized name.
140    out.write(field.message_type.name)
141  else:
142    out.write(field.name)
143
144  if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
145    # The colon is optional in this case, but our cross-language golden files
146    # don't include it.
147    out.write(': ')
148
149  PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
150                  pointy_brackets=pointy_brackets,
151                  float_format=float_format)
152  if as_one_line:
153    out.write(' ')
154  else:
155    out.write('\n')
156
157
158def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
159                    as_one_line=False, pointy_brackets=False,
160                    float_format=None):
161  """Print a single field value (not including name).  For repeated fields,
162  the value should be a single element."""
163
164  if pointy_brackets:
165    openb = '<'
166    closeb = '>'
167  else:
168    openb = '{'
169    closeb = '}'
170
171  if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
172    if as_one_line:
173      out.write(' %s ' % openb)
174      PrintMessage(value, out, indent, as_utf8, as_one_line,
175                   pointy_brackets=pointy_brackets,
176                   float_format=float_format)
177      out.write(closeb)
178    else:
179      out.write(' %s\n' % openb)
180      PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
181                   pointy_brackets=pointy_brackets,
182                   float_format=float_format)
183      out.write(' ' * indent + closeb)
184  elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
185    enum_value = field.enum_type.values_by_number.get(value, None)
186    if enum_value is not None:
187      out.write(enum_value.name)
188    else:
189      out.write(str(value))
190  elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
191    out.write('\"')
192    if isinstance(value, unicode):
193      out_value = value.encode('utf-8')
194    else:
195      out_value = value
196    if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
197      # We need to escape non-UTF8 chars in TYPE_BYTES field.
198      out_as_utf8 = False
199    else:
200      out_as_utf8 = as_utf8
201    out.write(text_encoding.CEscape(out_value, out_as_utf8))
202    out.write('\"')
203  elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
204    if value:
205      out.write('true')
206    else:
207      out.write('false')
208  elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
209    out.write('{1:{0}}'.format(float_format, value))
210  else:
211    out.write(str(value))
212
213
214def _ParseOrMerge(lines, message, allow_multiple_scalars):
215  """Converts an ASCII representation of a protocol message into a message.
216
217  Args:
218    lines: Lines of a message's ASCII representation.
219    message: A protocol buffer message to merge into.
220    allow_multiple_scalars: Determines if repeated values for a non-repeated
221      field are permitted, e.g., the string "foo: 1 foo: 2" for a
222      required/optional field named "foo".
223
224  Raises:
225    ParseError: On ASCII parsing problems.
226  """
227  tokenizer = _Tokenizer(lines)
228  while not tokenizer.AtEnd():
229    _MergeField(tokenizer, message, allow_multiple_scalars)
230
231
232def Parse(text, message):
233  """Parses an ASCII representation of a protocol message into a message.
234
235  Args:
236    text: Message ASCII representation.
237    message: A protocol buffer message to merge into.
238
239  Returns:
240    The same message passed as argument.
241
242  Raises:
243    ParseError: On ASCII parsing problems.
244  """
245  if not isinstance(text, str): text = text.decode('utf-8')
246  return ParseLines(text.split('\n'), message)
247
248
249def Merge(text, message):
250  """Parses an ASCII representation of a protocol message into a message.
251
252  Like Parse(), but allows repeated values for a non-repeated field, and uses
253  the last one.
254
255  Args:
256    text: Message ASCII representation.
257    message: A protocol buffer message to merge into.
258
259  Returns:
260    The same message passed as argument.
261
262  Raises:
263    ParseError: On ASCII parsing problems.
264  """
265  return MergeLines(text.split('\n'), message)
266
267
268def ParseLines(lines, message):
269  """Parses an ASCII representation of a protocol message into a message.
270
271  Args:
272    lines: An iterable of lines of a message's ASCII representation.
273    message: A protocol buffer message to merge into.
274
275  Returns:
276    The same message passed as argument.
277
278  Raises:
279    ParseError: On ASCII parsing problems.
280  """
281  _ParseOrMerge(lines, message, False)
282  return message
283
284
285def MergeLines(lines, message):
286  """Parses an ASCII representation of a protocol message into a message.
287
288  Args:
289    lines: An iterable of lines of a message's ASCII representation.
290    message: A protocol buffer message to merge into.
291
292  Returns:
293    The same message passed as argument.
294
295  Raises:
296    ParseError: On ASCII parsing problems.
297  """
298  _ParseOrMerge(lines, message, True)
299  return message
300
301
302def _MergeField(tokenizer, message, allow_multiple_scalars):
303  """Merges a single protocol message field into a message.
304
305  Args:
306    tokenizer: A tokenizer to parse the field name and values.
307    message: A protocol message to record the data.
308    allow_multiple_scalars: Determines if repeated values for a non-repeated
309      field are permitted, e.g., the string "foo: 1 foo: 2" for a
310      required/optional field named "foo".
311
312  Raises:
313    ParseError: In case of ASCII parsing problems.
314  """
315  message_descriptor = message.DESCRIPTOR
316  if tokenizer.TryConsume('['):
317    name = [tokenizer.ConsumeIdentifier()]
318    while tokenizer.TryConsume('.'):
319      name.append(tokenizer.ConsumeIdentifier())
320    name = '.'.join(name)
321
322    if not message_descriptor.is_extendable:
323      raise tokenizer.ParseErrorPreviousToken(
324          'Message type "%s" does not have extensions.' %
325          message_descriptor.full_name)
326    # pylint: disable=protected-access
327    field = message.Extensions._FindExtensionByName(name)
328    # pylint: enable=protected-access
329    if not field:
330      raise tokenizer.ParseErrorPreviousToken(
331          'Extension "%s" not registered.' % name)
332    elif message_descriptor != field.containing_type:
333      raise tokenizer.ParseErrorPreviousToken(
334          'Extension "%s" does not extend message type "%s".' % (
335              name, message_descriptor.full_name))
336    tokenizer.Consume(']')
337  else:
338    name = tokenizer.ConsumeIdentifier()
339    field = message_descriptor.fields_by_name.get(name, None)
340
341    # Group names are expected to be capitalized as they appear in the
342    # .proto file, which actually matches their type names, not their field
343    # names.
344    if not field:
345      field = message_descriptor.fields_by_name.get(name.lower(), None)
346      if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
347        field = None
348
349    if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
350        field.message_type.name != name):
351      field = None
352
353    if not field:
354      raise tokenizer.ParseErrorPreviousToken(
355          'Message type "%s" has no field named "%s".' % (
356              message_descriptor.full_name, name))
357
358  if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
359    tokenizer.TryConsume(':')
360
361    if tokenizer.TryConsume('<'):
362      end_token = '>'
363    else:
364      tokenizer.Consume('{')
365      end_token = '}'
366
367    if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
368      if field.is_extension:
369        sub_message = message.Extensions[field].add()
370      else:
371        sub_message = getattr(message, field.name).add()
372    else:
373      if field.is_extension:
374        sub_message = message.Extensions[field]
375      else:
376        sub_message = getattr(message, field.name)
377      sub_message.SetInParent()
378
379    while not tokenizer.TryConsume(end_token):
380      if tokenizer.AtEnd():
381        raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
382      _MergeField(tokenizer, sub_message, allow_multiple_scalars)
383  else:
384    _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
385
386  # For historical reasons, fields may optionally be separated by commas or
387  # semicolons.
388  if not tokenizer.TryConsume(','):
389    tokenizer.TryConsume(';')
390
391
392def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
393  """Merges a single protocol message scalar field into a message.
394
395  Args:
396    tokenizer: A tokenizer to parse the field value.
397    message: A protocol message to record the data.
398    field: The descriptor of the field to be merged.
399    allow_multiple_scalars: Determines if repeated values for a non-repeated
400      field are permitted, e.g., the string "foo: 1 foo: 2" for a
401      required/optional field named "foo".
402
403  Raises:
404    ParseError: In case of ASCII parsing problems.
405    RuntimeError: On runtime errors.
406  """
407  tokenizer.Consume(':')
408  value = None
409
410  if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
411                    descriptor.FieldDescriptor.TYPE_SINT32,
412                    descriptor.FieldDescriptor.TYPE_SFIXED32):
413    value = tokenizer.ConsumeInt32()
414  elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
415                      descriptor.FieldDescriptor.TYPE_SINT64,
416                      descriptor.FieldDescriptor.TYPE_SFIXED64):
417    value = tokenizer.ConsumeInt64()
418  elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
419                      descriptor.FieldDescriptor.TYPE_FIXED32):
420    value = tokenizer.ConsumeUint32()
421  elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
422                      descriptor.FieldDescriptor.TYPE_FIXED64):
423    value = tokenizer.ConsumeUint64()
424  elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
425                      descriptor.FieldDescriptor.TYPE_DOUBLE):
426    value = tokenizer.ConsumeFloat()
427  elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
428    value = tokenizer.ConsumeBool()
429  elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
430    value = tokenizer.ConsumeString()
431  elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
432    value = tokenizer.ConsumeByteString()
433  elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
434    value = tokenizer.ConsumeEnum(field)
435  else:
436    raise RuntimeError('Unknown field type %d' % field.type)
437
438  if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
439    if field.is_extension:
440      message.Extensions[field].append(value)
441    else:
442      getattr(message, field.name).append(value)
443  else:
444    if field.is_extension:
445      if not allow_multiple_scalars and message.HasExtension(field):
446        raise tokenizer.ParseErrorPreviousToken(
447            'Message type "%s" should not have multiple "%s" extensions.' %
448            (message.DESCRIPTOR.full_name, field.full_name))
449      else:
450        message.Extensions[field] = value
451    else:
452      if not allow_multiple_scalars and message.HasField(field.name):
453        raise tokenizer.ParseErrorPreviousToken(
454            'Message type "%s" should not have multiple "%s" fields.' %
455            (message.DESCRIPTOR.full_name, field.name))
456      else:
457        setattr(message, field.name, value)
458
459
460class _Tokenizer(object):
461  """Protocol buffer ASCII representation tokenizer.
462
463  This class handles the lower level string parsing by splitting it into
464  meaningful tokens.
465
466  It was directly ported from the Java protocol buffer API.
467  """
468
469  _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
470  _TOKEN = re.compile(
471      '[a-zA-Z_][0-9a-zA-Z_+-]*|'           # an identifier
472      '[0-9+-][0-9a-zA-Z_.+-]*|'            # a number
473      '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'  # a double-quoted string
474      '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')  # a single-quoted string
475  _IDENTIFIER = re.compile(r'\w+')
476
477  def __init__(self, lines):
478    self._position = 0
479    self._line = -1
480    self._column = 0
481    self._token_start = None
482    self.token = ''
483    self._lines = iter(lines)
484    self._current_line = ''
485    self._previous_line = 0
486    self._previous_column = 0
487    self._more_lines = True
488    self._SkipWhitespace()
489    self.NextToken()
490
491  def AtEnd(self):
492    """Checks the end of the text was reached.
493
494    Returns:
495      True iff the end was reached.
496    """
497    return not self.token
498
499  def _PopLine(self):
500    while len(self._current_line) <= self._column:
501      try:
502        self._current_line = self._lines.next()
503      except StopIteration:
504        self._current_line = ''
505        self._more_lines = False
506        return
507      else:
508        self._line += 1
509        self._column = 0
510
511  def _SkipWhitespace(self):
512    while True:
513      self._PopLine()
514      match = self._WHITESPACE.match(self._current_line, self._column)
515      if not match:
516        break
517      length = len(match.group(0))
518      self._column += length
519
520  def TryConsume(self, token):
521    """Tries to consume a given piece of text.
522
523    Args:
524      token: Text to consume.
525
526    Returns:
527      True iff the text was consumed.
528    """
529    if self.token == token:
530      self.NextToken()
531      return True
532    return False
533
534  def Consume(self, token):
535    """Consumes a piece of text.
536
537    Args:
538      token: Text to consume.
539
540    Raises:
541      ParseError: If the text couldn't be consumed.
542    """
543    if not self.TryConsume(token):
544      raise self._ParseError('Expected "%s".' % token)
545
546  def ConsumeIdentifier(self):
547    """Consumes protocol message field identifier.
548
549    Returns:
550      Identifier string.
551
552    Raises:
553      ParseError: If an identifier couldn't be consumed.
554    """
555    result = self.token
556    if not self._IDENTIFIER.match(result):
557      raise self._ParseError('Expected identifier.')
558    self.NextToken()
559    return result
560
561  def ConsumeInt32(self):
562    """Consumes a signed 32bit integer number.
563
564    Returns:
565      The integer parsed.
566
567    Raises:
568      ParseError: If a signed 32bit integer couldn't be consumed.
569    """
570    try:
571      result = ParseInteger(self.token, is_signed=True, is_long=False)
572    except ValueError, e:
573      raise self._ParseError(str(e))
574    self.NextToken()
575    return result
576
577  def ConsumeUint32(self):
578    """Consumes an unsigned 32bit integer number.
579
580    Returns:
581      The integer parsed.
582
583    Raises:
584      ParseError: If an unsigned 32bit integer couldn't be consumed.
585    """
586    try:
587      result = ParseInteger(self.token, is_signed=False, is_long=False)
588    except ValueError, e:
589      raise self._ParseError(str(e))
590    self.NextToken()
591    return result
592
593  def ConsumeInt64(self):
594    """Consumes a signed 64bit integer number.
595
596    Returns:
597      The integer parsed.
598
599    Raises:
600      ParseError: If a signed 64bit integer couldn't be consumed.
601    """
602    try:
603      result = ParseInteger(self.token, is_signed=True, is_long=True)
604    except ValueError, e:
605      raise self._ParseError(str(e))
606    self.NextToken()
607    return result
608
609  def ConsumeUint64(self):
610    """Consumes an unsigned 64bit integer number.
611
612    Returns:
613      The integer parsed.
614
615    Raises:
616      ParseError: If an unsigned 64bit integer couldn't be consumed.
617    """
618    try:
619      result = ParseInteger(self.token, is_signed=False, is_long=True)
620    except ValueError, e:
621      raise self._ParseError(str(e))
622    self.NextToken()
623    return result
624
625  def ConsumeFloat(self):
626    """Consumes an floating point number.
627
628    Returns:
629      The number parsed.
630
631    Raises:
632      ParseError: If a floating point number couldn't be consumed.
633    """
634    try:
635      result = ParseFloat(self.token)
636    except ValueError, e:
637      raise self._ParseError(str(e))
638    self.NextToken()
639    return result
640
641  def ConsumeBool(self):
642    """Consumes a boolean value.
643
644    Returns:
645      The bool parsed.
646
647    Raises:
648      ParseError: If a boolean value couldn't be consumed.
649    """
650    try:
651      result = ParseBool(self.token)
652    except ValueError, e:
653      raise self._ParseError(str(e))
654    self.NextToken()
655    return result
656
657  def ConsumeString(self):
658    """Consumes a string value.
659
660    Returns:
661      The string parsed.
662
663    Raises:
664      ParseError: If a string value couldn't be consumed.
665    """
666    the_bytes = self.ConsumeByteString()
667    try:
668      return unicode(the_bytes, 'utf-8')
669    except UnicodeDecodeError, e:
670      raise self._StringParseError(e)
671
672  def ConsumeByteString(self):
673    """Consumes a byte array value.
674
675    Returns:
676      The array parsed (as a string).
677
678    Raises:
679      ParseError: If a byte array value couldn't be consumed.
680    """
681    the_list = [self._ConsumeSingleByteString()]
682    while self.token and self.token[0] in ('\'', '"'):
683      the_list.append(self._ConsumeSingleByteString())
684    return ''.encode('latin1').join(the_list)  ##PY25
685##!PY25    return b''.join(the_list)
686
687  def _ConsumeSingleByteString(self):
688    """Consume one token of a string literal.
689
690    String literals (whether bytes or text) can come in multiple adjacent
691    tokens which are automatically concatenated, like in C or Python.  This
692    method only consumes one token.
693    """
694    text = self.token
695    if len(text) < 1 or text[0] not in ('\'', '"'):
696      raise self._ParseError('Expected string.')
697
698    if len(text) < 2 or text[-1] != text[0]:
699      raise self._ParseError('String missing ending quote.')
700
701    try:
702      result = text_encoding.CUnescape(text[1:-1])
703    except ValueError, e:
704      raise self._ParseError(str(e))
705    self.NextToken()
706    return result
707
708  def ConsumeEnum(self, field):
709    try:
710      result = ParseEnum(field, self.token)
711    except ValueError, e:
712      raise self._ParseError(str(e))
713    self.NextToken()
714    return result
715
716  def ParseErrorPreviousToken(self, message):
717    """Creates and *returns* a ParseError for the previously read token.
718
719    Args:
720      message: A message to set for the exception.
721
722    Returns:
723      A ParseError instance.
724    """
725    return ParseError('%d:%d : %s' % (
726        self._previous_line + 1, self._previous_column + 1, message))
727
728  def _ParseError(self, message):
729    """Creates and *returns* a ParseError for the current token."""
730    return ParseError('%d:%d : %s' % (
731        self._line + 1, self._column + 1, message))
732
733  def _StringParseError(self, e):
734    return self._ParseError('Couldn\'t parse string: ' + str(e))
735
736  def NextToken(self):
737    """Reads the next meaningful token."""
738    self._previous_line = self._line
739    self._previous_column = self._column
740
741    self._column += len(self.token)
742    self._SkipWhitespace()
743
744    if not self._more_lines:
745      self.token = ''
746      return
747
748    match = self._TOKEN.match(self._current_line, self._column)
749    if match:
750      token = match.group(0)
751      self.token = token
752    else:
753      self.token = self._current_line[self._column]
754
755
756def ParseInteger(text, is_signed=False, is_long=False):
757  """Parses an integer.
758
759  Args:
760    text: The text to parse.
761    is_signed: True if a signed integer must be parsed.
762    is_long: True if a long integer must be parsed.
763
764  Returns:
765    The integer value.
766
767  Raises:
768    ValueError: Thrown Iff the text is not a valid integer.
769  """
770  # Do the actual parsing. Exception handling is propagated to caller.
771  try:
772    # We force 32-bit values to int and 64-bit values to long to make
773    # alternate implementations where the distinction is more significant
774    # (e.g. the C++ implementation) simpler.
775    if is_long:
776      result = long(text, 0)
777    else:
778      result = int(text, 0)
779  except ValueError:
780    raise ValueError('Couldn\'t parse integer: %s' % text)
781
782  # Check if the integer is sane. Exceptions handled by callers.
783  checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
784  checker.CheckValue(result)
785  return result
786
787
788def ParseFloat(text):
789  """Parse a floating point number.
790
791  Args:
792    text: Text to parse.
793
794  Returns:
795    The number parsed.
796
797  Raises:
798    ValueError: If a floating point number couldn't be parsed.
799  """
800  try:
801    # Assume Python compatible syntax.
802    return float(text)
803  except ValueError:
804    # Check alternative spellings.
805    if _FLOAT_INFINITY.match(text):
806      if text[0] == '-':
807        return float('-inf')
808      else:
809        return float('inf')
810    elif _FLOAT_NAN.match(text):
811      return float('nan')
812    else:
813      # assume '1.0f' format
814      try:
815        return float(text.rstrip('f'))
816      except ValueError:
817        raise ValueError('Couldn\'t parse float: %s' % text)
818
819
820def ParseBool(text):
821  """Parse a boolean value.
822
823  Args:
824    text: Text to parse.
825
826  Returns:
827    Boolean values parsed
828
829  Raises:
830    ValueError: If text is not a valid boolean.
831  """
832  if text in ('true', 't', '1'):
833    return True
834  elif text in ('false', 'f', '0'):
835    return False
836  else:
837    raise ValueError('Expected "true" or "false".')
838
839
840def ParseEnum(field, value):
841  """Parse an enum value.
842
843  The value can be specified by a number (the enum value), or by
844  a string literal (the enum name).
845
846  Args:
847    field: Enum field descriptor.
848    value: String value.
849
850  Returns:
851    Enum value number.
852
853  Raises:
854    ValueError: If the enum value could not be parsed.
855  """
856  enum_descriptor = field.enum_type
857  try:
858    number = int(value, 0)
859  except ValueError:
860    # Identifier.
861    enum_value = enum_descriptor.values_by_name.get(value, None)
862    if enum_value is None:
863      raise ValueError(
864          'Enum type "%s" has no value named %s.' % (
865              enum_descriptor.full_name, value))
866  else:
867    # Numeric value.
868    enum_value = enum_descriptor.values_by_number.get(number, None)
869    if enum_value is None:
870      raise ValueError(
871          'Enum type "%s" has no value with number %d.' % (
872              enum_descriptor.full_name, number))
873  return enum_value.number
874