• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright 2007 Neal Norwitz
4# Portions Copyright 2007 Google Inc.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Generate an Abstract Syntax Tree (AST) for C++."""
19
20# FIXME:
21#  * Tokens should never be exported, need to convert to Nodes
22#    (return types, parameters, etc.)
23#  * Handle static class data for templatized classes
24#  * Handle casts (both C++ and C-style)
25#  * Handle conditions and loops (if/else, switch, for, while/do)
26#
27# TODO much, much later:
28#  * Handle #define
29#  * exceptions
30
31
32try:
33  # Python 3.x
34  import builtins
35except ImportError:
36  # Python 2.x
37  import __builtin__ as builtins
38
39import collections
40import sys
41import traceback
42
43from cpp import keywords
44from cpp import tokenize
45from cpp import utils
46
47
48if not hasattr(builtins, 'reversed'):
49  # Support Python 2.3 and earlier.
50  def reversed(seq):
51    for i in range(len(seq)-1, -1, -1):
52      yield seq[i]
53
54if not hasattr(builtins, 'next'):
55  # Support Python 2.5 and earlier.
56  def next(obj):
57    return obj.next()
58
59
60VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
61
62FUNCTION_NONE = 0x00
63FUNCTION_CONST = 0x01
64FUNCTION_VIRTUAL = 0x02
65FUNCTION_PURE_VIRTUAL = 0x04
66FUNCTION_CTOR = 0x08
67FUNCTION_DTOR = 0x10
68FUNCTION_ATTRIBUTE = 0x20
69FUNCTION_UNKNOWN_ANNOTATION = 0x40
70FUNCTION_THROW = 0x80
71FUNCTION_OVERRIDE = 0x100
72
73"""
74These are currently unused.  Should really handle these properly at some point.
75
76TYPE_MODIFIER_INLINE   = 0x010000
77TYPE_MODIFIER_EXTERN   = 0x020000
78TYPE_MODIFIER_STATIC   = 0x040000
79TYPE_MODIFIER_CONST    = 0x080000
80TYPE_MODIFIER_REGISTER = 0x100000
81TYPE_MODIFIER_VOLATILE = 0x200000
82TYPE_MODIFIER_MUTABLE  = 0x400000
83
84TYPE_MODIFIER_MAP = {
85    'inline': TYPE_MODIFIER_INLINE,
86    'extern': TYPE_MODIFIER_EXTERN,
87    'static': TYPE_MODIFIER_STATIC,
88    'const': TYPE_MODIFIER_CONST,
89    'register': TYPE_MODIFIER_REGISTER,
90    'volatile': TYPE_MODIFIER_VOLATILE,
91    'mutable': TYPE_MODIFIER_MUTABLE,
92    }
93"""
94
95_INTERNAL_TOKEN = 'internal'
96_NAMESPACE_POP = 'ns-pop'
97
98
99# TODO(nnorwitz): use this as a singleton for templated_types, etc
100# where we don't want to create a new empty dict each time.  It is also const.
101class _NullDict(object):
102  __contains__ = lambda self: False
103  keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
104
105
106# TODO(nnorwitz): move AST nodes into a separate module.
107class Node(object):
108  """Base AST node."""
109
110  def __init__(self, start, end):
111    self.start = start
112    self.end = end
113
114  def IsDeclaration(self):
115    """Returns bool if this node is a declaration."""
116    return False
117
118  def IsDefinition(self):
119    """Returns bool if this node is a definition."""
120    return False
121
122  def IsExportable(self):
123    """Returns bool if this node exportable from a header file."""
124    return False
125
126  def Requires(self, node):
127    """Does this AST node require the definition of the node passed in?"""
128    return False
129
130  def XXX__str__(self):
131    return self._StringHelper(self.__class__.__name__, '')
132
133  def _StringHelper(self, name, suffix):
134    if not utils.DEBUG:
135      return '%s(%s)' % (name, suffix)
136    return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
137
138  def __repr__(self):
139    return str(self)
140
141
142class Define(Node):
143  def __init__(self, start, end, name, definition):
144    Node.__init__(self, start, end)
145    self.name = name
146    self.definition = definition
147
148  def __str__(self):
149    value = '%s %s' % (self.name, self.definition)
150    return self._StringHelper(self.__class__.__name__, value)
151
152
153class Include(Node):
154  def __init__(self, start, end, filename, system):
155    Node.__init__(self, start, end)
156    self.filename = filename
157    self.system = system
158
159  def __str__(self):
160    fmt = '"%s"'
161    if self.system:
162      fmt = '<%s>'
163    return self._StringHelper(self.__class__.__name__, fmt % self.filename)
164
165
166class Goto(Node):
167  def __init__(self, start, end, label):
168    Node.__init__(self, start, end)
169    self.label = label
170
171  def __str__(self):
172    return self._StringHelper(self.__class__.__name__, str(self.label))
173
174
175class Expr(Node):
176  def __init__(self, start, end, expr):
177    Node.__init__(self, start, end)
178    self.expr = expr
179
180  def Requires(self, node):
181    # TODO(nnorwitz): impl.
182    return False
183
184  def __str__(self):
185    return self._StringHelper(self.__class__.__name__, str(self.expr))
186
187
188class Return(Expr):
189  pass
190
191
192class Delete(Expr):
193  pass
194
195
196class Friend(Expr):
197  def __init__(self, start, end, expr, namespace):
198    Expr.__init__(self, start, end, expr)
199    self.namespace = namespace[:]
200
201
202class Using(Node):
203  def __init__(self, start, end, names):
204    Node.__init__(self, start, end)
205    self.names = names
206
207  def __str__(self):
208    return self._StringHelper(self.__class__.__name__, str(self.names))
209
210
211class Parameter(Node):
212  def __init__(self, start, end, name, parameter_type, default):
213    Node.__init__(self, start, end)
214    self.name = name
215    self.type = parameter_type
216    self.default = default
217
218  def Requires(self, node):
219    # TODO(nnorwitz): handle namespaces, etc.
220    return self.type.name == node.name
221
222  def __str__(self):
223    name = str(self.type)
224    suffix = '%s %s' % (name, self.name)
225    if self.default:
226      suffix += ' = ' + ''.join([d.name for d in self.default])
227    return self._StringHelper(self.__class__.__name__, suffix)
228
229
230class _GenericDeclaration(Node):
231  def __init__(self, start, end, name, namespace):
232    Node.__init__(self, start, end)
233    self.name = name
234    self.namespace = namespace[:]
235
236  def FullName(self):
237    prefix = ''
238    if self.namespace and self.namespace[-1]:
239      prefix = '::'.join(self.namespace) + '::'
240    return prefix + self.name
241
242  def _TypeStringHelper(self, suffix):
243    if self.namespace:
244      names = [n or '<anonymous>' for n in self.namespace]
245      suffix += ' in ' + '::'.join(names)
246    return self._StringHelper(self.__class__.__name__, suffix)
247
248
249# TODO(nnorwitz): merge with Parameter in some way?
250class VariableDeclaration(_GenericDeclaration):
251  def __init__(self, start, end, name, var_type, initial_value, namespace):
252    _GenericDeclaration.__init__(self, start, end, name, namespace)
253    self.type = var_type
254    self.initial_value = initial_value
255
256  def Requires(self, node):
257    # TODO(nnorwitz): handle namespaces, etc.
258    return self.type.name == node.name
259
260  def ToString(self):
261    """Return a string that tries to reconstitute the variable decl."""
262    suffix = '%s %s' % (self.type, self.name)
263    if self.initial_value:
264      suffix += ' = ' + self.initial_value
265    return suffix
266
267  def __str__(self):
268    return self._StringHelper(self.__class__.__name__, self.ToString())
269
270
271class Typedef(_GenericDeclaration):
272  def __init__(self, start, end, name, alias, namespace):
273    _GenericDeclaration.__init__(self, start, end, name, namespace)
274    self.alias = alias
275
276  def IsDefinition(self):
277    return True
278
279  def IsExportable(self):
280    return True
281
282  def Requires(self, node):
283    # TODO(nnorwitz): handle namespaces, etc.
284    name = node.name
285    for token in self.alias:
286      if token is not None and name == token.name:
287        return True
288    return False
289
290  def __str__(self):
291    suffix = '%s, %s' % (self.name, self.alias)
292    return self._TypeStringHelper(suffix)
293
294
295class _NestedType(_GenericDeclaration):
296  def __init__(self, start, end, name, fields, namespace):
297    _GenericDeclaration.__init__(self, start, end, name, namespace)
298    self.fields = fields
299
300  def IsDefinition(self):
301    return True
302
303  def IsExportable(self):
304    return True
305
306  def __str__(self):
307    suffix = '%s, {%s}' % (self.name, self.fields)
308    return self._TypeStringHelper(suffix)
309
310
311class Union(_NestedType):
312  pass
313
314
315class Enum(_NestedType):
316  pass
317
318
319class Class(_GenericDeclaration):
320  def __init__(self, start, end, name, bases, templated_types, body, namespace):
321    _GenericDeclaration.__init__(self, start, end, name, namespace)
322    self.bases = bases
323    self.body = body
324    self.templated_types = templated_types
325
326  def IsDeclaration(self):
327    return self.bases is None and self.body is None
328
329  def IsDefinition(self):
330    return not self.IsDeclaration()
331
332  def IsExportable(self):
333    return not self.IsDeclaration()
334
335  def Requires(self, node):
336    # TODO(nnorwitz): handle namespaces, etc.
337    if self.bases:
338      for token_list in self.bases:
339        # TODO(nnorwitz): bases are tokens, do name comparison.
340        for token in token_list:
341          if token.name == node.name:
342            return True
343    # TODO(nnorwitz): search in body too.
344    return False
345
346  def __str__(self):
347    name = self.name
348    if self.templated_types:
349      name += '<%s>' % self.templated_types
350    suffix = '%s, %s, %s' % (name, self.bases, self.body)
351    return self._TypeStringHelper(suffix)
352
353
354class Struct(Class):
355  pass
356
357
358class Function(_GenericDeclaration):
359  def __init__(self, start, end, name, return_type, parameters,
360               modifiers, templated_types, body, namespace):
361    _GenericDeclaration.__init__(self, start, end, name, namespace)
362    converter = TypeConverter(namespace)
363    self.return_type = converter.CreateReturnType(return_type)
364    self.parameters = converter.ToParameters(parameters)
365    self.modifiers = modifiers
366    self.body = body
367    self.templated_types = templated_types
368
369  def IsDeclaration(self):
370    return self.body is None
371
372  def IsDefinition(self):
373    return self.body is not None
374
375  def IsExportable(self):
376    if self.return_type and 'static' in self.return_type.modifiers:
377      return False
378    return None not in self.namespace
379
380  def Requires(self, node):
381    if self.parameters:
382      # TODO(nnorwitz): parameters are tokens, do name comparison.
383      for p in self.parameters:
384        if p.name == node.name:
385          return True
386    # TODO(nnorwitz): search in body too.
387    return False
388
389  def __str__(self):
390    # TODO(nnorwitz): add templated_types.
391    suffix = ('%s %s(%s), 0x%02x, %s' %
392              (self.return_type, self.name, self.parameters,
393               self.modifiers, self.body))
394    return self._TypeStringHelper(suffix)
395
396
397class Method(Function):
398  def __init__(self, start, end, name, in_class, return_type, parameters,
399               modifiers, templated_types, body, namespace):
400    Function.__init__(self, start, end, name, return_type, parameters,
401                      modifiers, templated_types, body, namespace)
402    # TODO(nnorwitz): in_class could also be a namespace which can
403    # mess up finding functions properly.
404    self.in_class = in_class
405
406
407class Type(_GenericDeclaration):
408  """Type used for any variable (eg class, primitive, struct, etc)."""
409
410  def __init__(self, start, end, name, templated_types, modifiers,
411               reference, pointer, array):
412    """
413        Args:
414          name: str name of main type
415          templated_types: [Class (Type?)] template type info between <>
416          modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
417          reference, pointer, array: bools
418        """
419    _GenericDeclaration.__init__(self, start, end, name, [])
420    self.templated_types = templated_types
421    if not name and modifiers:
422      self.name = modifiers.pop()
423    self.modifiers = modifiers
424    self.reference = reference
425    self.pointer = pointer
426    self.array = array
427
428  def __str__(self):
429    prefix = ''
430    if self.modifiers:
431      prefix = ' '.join(self.modifiers) + ' '
432    name = str(self.name)
433    if self.templated_types:
434      name += '<%s>' % self.templated_types
435    suffix = prefix + name
436    if self.reference:
437      suffix += '&'
438    if self.pointer:
439      suffix += '*'
440    if self.array:
441      suffix += '[]'
442    return self._TypeStringHelper(suffix)
443
444  # By definition, Is* are always False.  A Type can only exist in
445  # some sort of variable declaration, parameter, or return value.
446  def IsDeclaration(self):
447    return False
448
449  def IsDefinition(self):
450    return False
451
452  def IsExportable(self):
453    return False
454
455
456class TypeConverter(object):
457
458  def __init__(self, namespace_stack):
459    self.namespace_stack = namespace_stack
460
461  def _GetTemplateEnd(self, tokens, start):
462    count = 1
463    end = start
464    while 1:
465      token = tokens[end]
466      end += 1
467      if token.name == '<':
468        count += 1
469      elif token.name == '>':
470        count -= 1
471        if count == 0:
472          break
473    return tokens[start:end-1], end
474
475  def ToType(self, tokens):
476    """Convert [Token,...] to [Class(...), ] useful for base classes.
477        For example, code like class Foo : public Bar<x, y> { ... };
478        the "Bar<x, y>" portion gets converted to an AST.
479
480        Returns:
481          [Class(...), ...]
482        """
483    result = []
484    name_tokens = []
485    reference = pointer = array = False
486
487    def AddType(templated_types):
488      # Partition tokens into name and modifier tokens.
489      names = []
490      modifiers = []
491      for t in name_tokens:
492        if keywords.IsKeyword(t.name):
493          modifiers.append(t.name)
494        else:
495          names.append(t.name)
496      name = ''.join(names)
497      if name_tokens:
498        result.append(Type(name_tokens[0].start, name_tokens[-1].end,
499                           name, templated_types, modifiers,
500                           reference, pointer, array))
501      del name_tokens[:]
502
503    i = 0
504    end = len(tokens)
505    while i < end:
506      token = tokens[i]
507      if token.name == '<':
508        new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
509        AddType(self.ToType(new_tokens))
510        # If there is a comma after the template, we need to consume
511        # that here otherwise it becomes part of the name.
512        i = new_end
513        reference = pointer = array = False
514      elif token.name == ',':
515        AddType([])
516        reference = pointer = array = False
517      elif token.name == '*':
518        pointer = True
519      elif token.name == '&':
520        reference = True
521      elif token.name == '[':
522        pointer = True
523      elif token.name == ']':
524        pass
525      else:
526        name_tokens.append(token)
527      i += 1
528
529    if name_tokens:
530      # No '<' in the tokens, just a simple name and no template.
531      AddType([])
532    return result
533
534  def DeclarationToParts(self, parts, needs_name_removed):
535    name = None
536    default = []
537    if needs_name_removed:
538      # Handle default (initial) values properly.
539      for i, t in enumerate(parts):
540        if t.name == '=':
541          default = parts[i+1:]
542          name = parts[i-1].name
543          if name == ']' and parts[i-2].name == '[':
544            name = parts[i-3].name
545            i -= 1
546          parts = parts[:i-1]
547          break
548      else:
549        if parts[-1].token_type == tokenize.NAME:
550          name = parts.pop().name
551        else:
552          # TODO(nnorwitz): this is a hack that happens for code like
553          # Register(Foo<T>); where it thinks this is a function call
554          # but it's actually a declaration.
555          name = '???'
556    modifiers = []
557    type_name = []
558    other_tokens = []
559    templated_types = []
560    i = 0
561    end = len(parts)
562    while i < end:
563      p = parts[i]
564      if keywords.IsKeyword(p.name):
565        modifiers.append(p.name)
566      elif p.name == '<':
567        templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
568        templated_types = self.ToType(templated_tokens)
569        i = new_end - 1
570        # Don't add a spurious :: to data members being initialized.
571        next_index = i + 1
572        if next_index < end and parts[next_index].name == '::':
573          i += 1
574      elif p.name in ('[', ']', '='):
575        # These are handled elsewhere.
576        other_tokens.append(p)
577      elif p.name not in ('*', '&', '>'):
578        # Ensure that names have a space between them.
579        if (type_name and type_name[-1].token_type == tokenize.NAME and
580                p.token_type == tokenize.NAME):
581          type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
582        type_name.append(p)
583      else:
584        other_tokens.append(p)
585      i += 1
586    type_name = ''.join([t.name for t in type_name])
587    return name, type_name, templated_types, modifiers, default, other_tokens
588
589  def ToParameters(self, tokens):
590    if not tokens:
591      return []
592
593    result = []
594    name = type_name = ''
595    type_modifiers = []
596    pointer = reference = array = False
597    first_token = None
598    default = []
599
600    def AddParameter(end):
601      if default:
602        del default[0]  # Remove flag.
603      parts = self.DeclarationToParts(type_modifiers, True)
604      (name, type_name, templated_types, modifiers,
605       unused_default, unused_other_tokens) = parts
606      parameter_type = Type(first_token.start, first_token.end,
607                            type_name, templated_types, modifiers,
608                            reference, pointer, array)
609      p = Parameter(first_token.start, end, name,
610                    parameter_type, default)
611      result.append(p)
612
613    template_count = 0
614    brace_count = 0
615    for s in tokens:
616      if not first_token:
617        first_token = s
618
619      # Check for braces before templates, as we can have unmatched '<>'
620      # inside default arguments.
621      if s.name == '{':
622        brace_count += 1
623      elif s.name == '}':
624        brace_count -= 1
625      if brace_count > 0:
626        type_modifiers.append(s)
627        continue
628
629      if s.name == '<':
630        template_count += 1
631      elif s.name == '>':
632        template_count -= 1
633      if template_count > 0:
634        type_modifiers.append(s)
635        continue
636
637      if s.name == ',':
638        AddParameter(s.start)
639        name = type_name = ''
640        type_modifiers = []
641        pointer = reference = array = False
642        first_token = None
643        default = []
644      elif s.name == '*':
645        pointer = True
646      elif s.name == '&':
647        reference = True
648      elif s.name == '[':
649        array = True
650      elif s.name == ']':
651        pass  # Just don't add to type_modifiers.
652      elif s.name == '=':
653        # Got a default value.  Add any value (None) as a flag.
654        default.append(None)
655      elif default:
656        default.append(s)
657      else:
658        type_modifiers.append(s)
659    AddParameter(tokens[-1].end)
660    return result
661
662  def CreateReturnType(self, return_type_seq):
663    if not return_type_seq:
664      return None
665    start = return_type_seq[0].start
666    end = return_type_seq[-1].end
667    _, name, templated_types, modifiers, default, other_tokens = \
668        self.DeclarationToParts(return_type_seq, False)
669    names = [n.name for n in other_tokens]
670    reference = '&' in names
671    pointer = '*' in names
672    array = '[' in names
673    return Type(start, end, name, templated_types, modifiers,
674                reference, pointer, array)
675
676  def GetTemplateIndices(self, names):
677    # names is a list of strings.
678    start = names.index('<')
679    end = len(names) - 1
680    while end > 0:
681      if names[end] == '>':
682        break
683      end -= 1
684    return start, end+1
685
686class AstBuilder(object):
687  def __init__(self, token_stream, filename, in_class='', visibility=None,
688               namespace_stack=[]):
689    self.tokens = token_stream
690    self.filename = filename
691    # TODO(nnorwitz): use a better data structure (deque) for the queue.
692    # Switching directions of the "queue" improved perf by about 25%.
693    # Using a deque should be even better since we access from both sides.
694    self.token_queue = []
695    self.namespace_stack = namespace_stack[:]
696    self.in_class = in_class
697    if in_class is None:
698      self.in_class_name_only = None
699    else:
700      self.in_class_name_only = in_class.split('::')[-1]
701    self.visibility = visibility
702    self.in_function = False
703    self.current_token = None
704    # Keep the state whether we are currently handling a typedef or not.
705    self._handling_typedef = False
706
707    self.converter = TypeConverter(self.namespace_stack)
708
709  def HandleError(self, msg, token):
710    printable_queue = list(reversed(self.token_queue[-20:]))
711    sys.stderr.write('Got %s in %s @ %s %s\n' %
712                     (msg, self.filename, token, printable_queue))
713
714  def Generate(self):
715    while 1:
716      token = self._GetNextToken()
717      if not token:
718        break
719
720      # Get the next token.
721      self.current_token = token
722
723      # Dispatch on the next token type.
724      if token.token_type == _INTERNAL_TOKEN:
725        if token.name == _NAMESPACE_POP:
726          self.namespace_stack.pop()
727        continue
728
729      try:
730        result = self._GenerateOne(token)
731        if result is not None:
732          yield result
733      except:
734        self.HandleError('exception', token)
735        raise
736
737  def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
738                      ref_pointer_name_seq, templated_types, value=None):
739    reference = '&' in ref_pointer_name_seq
740    pointer = '*' in ref_pointer_name_seq
741    array = '[' in ref_pointer_name_seq
742    var_type = Type(pos_token.start, pos_token.end, type_name,
743                    templated_types, type_modifiers,
744                    reference, pointer, array)
745    return VariableDeclaration(pos_token.start, pos_token.end,
746                               name, var_type, value, self.namespace_stack)
747
748  def _GenerateOne(self, token):
749    if token.token_type == tokenize.NAME:
750      if (keywords.IsKeyword(token.name) and
751          not keywords.IsBuiltinType(token.name)):
752        if token.name == 'enum':
753          # Pop the next token and only put it back if it's not
754          # 'class'.  This allows us to support the two-token
755          # 'enum class' keyword as if it were simply 'enum'.
756          next = self._GetNextToken()
757          if next.name != 'class':
758            self._AddBackToken(next)
759
760        method = getattr(self, 'handle_' + token.name)
761        return method()
762      elif token.name == self.in_class_name_only:
763        # The token name is the same as the class, must be a ctor if
764        # there is a paren.  Otherwise, it's the return type.
765        # Peek ahead to get the next token to figure out which.
766        next = self._GetNextToken()
767        self._AddBackToken(next)
768        if next.token_type == tokenize.SYNTAX and next.name == '(':
769          return self._GetMethod([token], FUNCTION_CTOR, None, True)
770        # Fall through--handle like any other method.
771
772      # Handle data or function declaration/definition.
773      syntax = tokenize.SYNTAX
774      temp_tokens, last_token = \
775          self._GetVarTokensUpToIgnoringTemplates(syntax,
776                                                  '(', ';', '{', '[')
777      temp_tokens.insert(0, token)
778      if last_token.name == '(':
779        # If there is an assignment before the paren,
780        # this is an expression, not a method.
781        expr = bool([e for e in temp_tokens if e.name == '='])
782        if expr:
783          new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
784          temp_tokens.append(last_token)
785          temp_tokens.extend(new_temp)
786          last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
787
788      if last_token.name == '[':
789        # Handle array, this isn't a method, unless it's an operator.
790        # TODO(nnorwitz): keep the size somewhere.
791        # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
792        temp_tokens.append(last_token)
793        if temp_tokens[-2].name == 'operator':
794          temp_tokens.append(self._GetNextToken())
795        else:
796          temp_tokens2, last_token = \
797              self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
798          temp_tokens.extend(temp_tokens2)
799
800      if last_token.name == ';':
801        # Handle data, this isn't a method.
802        parts = self.converter.DeclarationToParts(temp_tokens, True)
803        (name, type_name, templated_types, modifiers, default,
804         unused_other_tokens) = parts
805
806        t0 = temp_tokens[0]
807        names = [t.name for t in temp_tokens]
808        if templated_types:
809          start, end = self.converter.GetTemplateIndices(names)
810          names = names[:start] + names[end:]
811        default = ''.join([t.name for t in default])
812        return self._CreateVariable(t0, name, type_name, modifiers,
813                                    names, templated_types, default)
814      if last_token.name == '{':
815        self._AddBackTokens(temp_tokens[1:])
816        self._AddBackToken(last_token)
817        method_name = temp_tokens[0].name
818        method = getattr(self, 'handle_' + method_name, None)
819        if not method:
820          # Must be declaring a variable.
821          # TODO(nnorwitz): handle the declaration.
822          return None
823        return method()
824      return self._GetMethod(temp_tokens, 0, None, False)
825    elif token.token_type == tokenize.SYNTAX:
826      if token.name == '~' and self.in_class:
827        # Must be a dtor (probably not in method body).
828        token = self._GetNextToken()
829        # self.in_class can contain A::Name, but the dtor will only
830        # be Name.  Make sure to compare against the right value.
831        if (token.token_type == tokenize.NAME and
832                token.name == self.in_class_name_only):
833          return self._GetMethod([token], FUNCTION_DTOR, None, True)
834      # TODO(nnorwitz): handle a lot more syntax.
835    elif token.token_type == tokenize.PREPROCESSOR:
836      # TODO(nnorwitz): handle more preprocessor directives.
837      # token starts with a #, so remove it and strip whitespace.
838      name = token.name[1:].lstrip()
839      if name.startswith('include'):
840        # Remove "include".
841        name = name[7:].strip()
842        assert name
843        # Handle #include \<newline> "header-on-second-line.h".
844        if name.startswith('\\'):
845          name = name[1:].strip()
846        assert name[0] in '<"', token
847        assert name[-1] in '>"', token
848        system = name[0] == '<'
849        filename = name[1:-1]
850        return Include(token.start, token.end, filename, system)
851      if name.startswith('define'):
852        # Remove "define".
853        name = name[6:].strip()
854        assert name
855        value = ''
856        for i, c in enumerate(name):
857          if c.isspace():
858            value = name[i:].lstrip()
859            name = name[:i]
860            break
861        return Define(token.start, token.end, name, value)
862      if name.startswith('if') and name[2:3].isspace():
863        condition = name[3:].strip()
864        if condition.startswith('0') or condition.startswith('(0)'):
865          self._SkipIf0Blocks()
866    return None
867
868  def _GetTokensUpTo(self, expected_token_type, expected_token):
869    return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
870
871  def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
872    last_token = self._GetNextToken()
873    tokens = []
874    while (last_token.token_type != expected_token_type or
875           last_token.name not in expected_tokens):
876      tokens.append(last_token)
877      last_token = self._GetNextToken()
878    return tokens, last_token
879
880  # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an
881  # expected token.
882  def _GetVarTokensUpToIgnoringTemplates(self, expected_token_type,
883                                         *expected_tokens):
884    last_token = self._GetNextToken()
885    tokens = []
886    nesting = 0
887    while (nesting > 0 or
888           last_token.token_type != expected_token_type or
889           last_token.name not in expected_tokens):
890      tokens.append(last_token)
891      last_token = self._GetNextToken()
892      if last_token.name == '<':
893        nesting += 1
894      elif last_token.name == '>':
895        nesting -= 1
896    return tokens, last_token
897
898  # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
899  def _IgnoreUpTo(self, token_type, token):
900    unused_tokens = self._GetTokensUpTo(token_type, token)
901
902  def _SkipIf0Blocks(self):
903    count = 1
904    while 1:
905      token = self._GetNextToken()
906      if token.token_type != tokenize.PREPROCESSOR:
907        continue
908
909      name = token.name[1:].lstrip()
910      if name.startswith('endif'):
911        count -= 1
912        if count == 0:
913          break
914      elif name.startswith('if'):
915        count += 1
916
917  def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
918    if GetNextToken is None:
919      GetNextToken = self._GetNextToken
920    # Assumes the current token is open_paren and we will consume
921    # and return up to the close_paren.
922    count = 1
923    token = GetNextToken()
924    while 1:
925      if token.token_type == tokenize.SYNTAX:
926        if token.name == open_paren:
927          count += 1
928        elif token.name == close_paren:
929          count -= 1
930          if count == 0:
931            break
932      yield token
933      token = GetNextToken()
934    yield token
935
936  def _GetParameters(self):
937    return self._GetMatchingChar('(', ')')
938
939  def GetScope(self):
940    return self._GetMatchingChar('{', '}')
941
942  def _GetNextToken(self):
943    if self.token_queue:
944      return self.token_queue.pop()
945    try:
946      return next(self.tokens)
947    except StopIteration:
948      return
949
950  def _AddBackToken(self, token):
951    if token.whence == tokenize.WHENCE_STREAM:
952      token.whence = tokenize.WHENCE_QUEUE
953      self.token_queue.insert(0, token)
954    else:
955      assert token.whence == tokenize.WHENCE_QUEUE, token
956      self.token_queue.append(token)
957
958  def _AddBackTokens(self, tokens):
959    if tokens:
960      if tokens[-1].whence == tokenize.WHENCE_STREAM:
961        for token in tokens:
962          token.whence = tokenize.WHENCE_QUEUE
963        self.token_queue[:0] = reversed(tokens)
964      else:
965        assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
966        self.token_queue.extend(reversed(tokens))
967
968  def GetName(self, seq=None):
969    """Returns ([tokens], next_token_info)."""
970    GetNextToken = self._GetNextToken
971    if seq is not None:
972      it = iter(seq)
973      GetNextToken = lambda: next(it)
974    next_token = GetNextToken()
975    tokens = []
976    last_token_was_name = False
977    while (next_token.token_type == tokenize.NAME or
978           (next_token.token_type == tokenize.SYNTAX and
979            next_token.name in ('::', '<'))):
980      # Two NAMEs in a row means the identifier should terminate.
981      # It's probably some sort of variable declaration.
982      if last_token_was_name and next_token.token_type == tokenize.NAME:
983        break
984      last_token_was_name = next_token.token_type == tokenize.NAME
985      tokens.append(next_token)
986      # Handle templated names.
987      if next_token.name == '<':
988        tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
989        last_token_was_name = True
990      next_token = GetNextToken()
991    return tokens, next_token
992
993  def GetMethod(self, modifiers, templated_types):
994    return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
995    assert len(return_type_and_name) >= 1
996    return self._GetMethod(return_type_and_name, modifiers, templated_types,
997                           False)
998
999  def _GetMethod(self, return_type_and_name, modifiers, templated_types,
1000                 get_paren):
1001    template_portion = None
1002    if get_paren:
1003      token = self._GetNextToken()
1004      assert token.token_type == tokenize.SYNTAX, token
1005      if token.name == '<':
1006        # Handle templatized dtors.
1007        template_portion = [token]
1008        template_portion.extend(self._GetMatchingChar('<', '>'))
1009        token = self._GetNextToken()
1010      assert token.token_type == tokenize.SYNTAX, token
1011      assert token.name == '(', token
1012
1013    name = return_type_and_name.pop()
1014    # Handle templatized ctors.
1015    if name.name == '>':
1016      index = 1
1017      while return_type_and_name[index].name != '<':
1018        index += 1
1019      template_portion = return_type_and_name[index:] + [name]
1020      del return_type_and_name[index:]
1021      name = return_type_and_name.pop()
1022    elif name.name == ']':
1023      rt = return_type_and_name
1024      assert rt[-1].name == '[', return_type_and_name
1025      assert rt[-2].name == 'operator', return_type_and_name
1026      name_seq = return_type_and_name[-2:]
1027      del return_type_and_name[-2:]
1028      name = tokenize.Token(tokenize.NAME, 'operator[]',
1029                            name_seq[0].start, name.end)
1030      # Get the open paren so _GetParameters() below works.
1031      unused_open_paren = self._GetNextToken()
1032
1033    # TODO(nnorwitz): store template_portion.
1034    return_type = return_type_and_name
1035    indices = name
1036    if return_type:
1037      indices = return_type[0]
1038
1039    # Force ctor for templatized ctors.
1040    if name.name == self.in_class and not modifiers:
1041      modifiers |= FUNCTION_CTOR
1042    parameters = list(self._GetParameters())
1043    del parameters[-1]              # Remove trailing ')'.
1044
1045    # Handling operator() is especially weird.
1046    if name.name == 'operator' and not parameters:
1047      token = self._GetNextToken()
1048      assert token.name == '(', token
1049      parameters = list(self._GetParameters())
1050      del parameters[-1]          # Remove trailing ')'.
1051
1052    token = self._GetNextToken()
1053    while token.token_type == tokenize.NAME:
1054      modifier_token = token
1055      token = self._GetNextToken()
1056      if modifier_token.name == 'const':
1057        modifiers |= FUNCTION_CONST
1058      elif modifier_token.name == '__attribute__':
1059        # TODO(nnorwitz): handle more __attribute__ details.
1060        modifiers |= FUNCTION_ATTRIBUTE
1061        assert token.name == '(', token
1062        # Consume everything between the (parens).
1063        unused_tokens = list(self._GetMatchingChar('(', ')'))
1064        token = self._GetNextToken()
1065      elif modifier_token.name == 'throw':
1066        modifiers |= FUNCTION_THROW
1067        assert token.name == '(', token
1068        # Consume everything between the (parens).
1069        unused_tokens = list(self._GetMatchingChar('(', ')'))
1070        token = self._GetNextToken()
1071      elif modifier_token.name == 'override':
1072        modifiers |= FUNCTION_OVERRIDE
1073      elif modifier_token.name == modifier_token.name.upper():
1074        # HACK(nnorwitz):  assume that all upper-case names
1075        # are some macro we aren't expanding.
1076        modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1077      else:
1078        self.HandleError('unexpected token', modifier_token)
1079
1080    assert token.token_type == tokenize.SYNTAX, token
1081    # Handle ctor initializers.
1082    if token.name == ':':
1083      # TODO(nnorwitz): anything else to handle for initializer list?
1084      while token.name != ';' and token.name != '{':
1085        token = self._GetNextToken()
1086
1087    # Handle pointer to functions that are really data but look
1088    # like method declarations.
1089    if token.name == '(':
1090      if parameters[0].name == '*':
1091        # name contains the return type.
1092        name = parameters.pop()
1093        # parameters contains the name of the data.
1094        modifiers = [p.name for p in parameters]
1095        # Already at the ( to open the parameter list.
1096        function_parameters = list(self._GetMatchingChar('(', ')'))
1097        del function_parameters[-1]  # Remove trailing ')'.
1098        # TODO(nnorwitz): store the function_parameters.
1099        token = self._GetNextToken()
1100        assert token.token_type == tokenize.SYNTAX, token
1101        assert token.name == ';', token
1102        return self._CreateVariable(indices, name.name, indices.name,
1103                                    modifiers, '', None)
1104      # At this point, we got something like:
1105      #  return_type (type::*name_)(params);
1106      # This is a data member called name_ that is a function pointer.
1107      # With this code: void (sq_type::*field_)(string&);
1108      # We get: name=void return_type=[] parameters=sq_type ... field_
1109      # TODO(nnorwitz): is return_type always empty?
1110      # TODO(nnorwitz): this isn't even close to being correct.
1111      # Just put in something so we don't crash and can move on.
1112      real_name = parameters[-1]
1113      modifiers = [p.name for p in self._GetParameters()]
1114      del modifiers[-1]           # Remove trailing ')'.
1115      return self._CreateVariable(indices, real_name.name, indices.name,
1116                                  modifiers, '', None)
1117
1118    if token.name == '{':
1119      body = list(self.GetScope())
1120      del body[-1]                # Remove trailing '}'.
1121    else:
1122      body = None
1123      if token.name == '=':
1124        token = self._GetNextToken()
1125
1126        if token.name == 'default' or token.name == 'delete':
1127          # Ignore explicitly defaulted and deleted special members
1128          # in C++11.
1129          token = self._GetNextToken()
1130        else:
1131          # Handle pure-virtual declarations.
1132          assert token.token_type == tokenize.CONSTANT, token
1133          assert token.name == '0', token
1134          modifiers |= FUNCTION_PURE_VIRTUAL
1135          token = self._GetNextToken()
1136
1137      if token.name == '[':
1138        # TODO(nnorwitz): store tokens and improve parsing.
1139        # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1140        tokens = list(self._GetMatchingChar('[', ']'))
1141        token = self._GetNextToken()
1142
1143      assert token.name == ';', (token, return_type_and_name, parameters)
1144
1145    # Looks like we got a method, not a function.
1146    if len(return_type) > 2 and return_type[-1].name == '::':
1147      return_type, in_class = \
1148          self._GetReturnTypeAndClassName(return_type)
1149      return Method(indices.start, indices.end, name.name, in_class,
1150                    return_type, parameters, modifiers, templated_types,
1151                    body, self.namespace_stack)
1152    return Function(indices.start, indices.end, name.name, return_type,
1153                    parameters, modifiers, templated_types, body,
1154                    self.namespace_stack)
1155
1156  def _GetReturnTypeAndClassName(self, token_seq):
1157    # Splitting the return type from the class name in a method
1158    # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
1159    # Where is the return type and where is the class name?
1160    # The heuristic used is to pull the last name as the class name.
1161    # This includes all the templated type info.
1162    # TODO(nnorwitz): if there is only One name like in the
1163    # example above, punt and assume the last bit is the class name.
1164
1165    # Ignore a :: prefix, if exists so we can find the first real name.
1166    i = 0
1167    if token_seq[0].name == '::':
1168      i = 1
1169    # Ignore a :: suffix, if exists.
1170    end = len(token_seq) - 1
1171    if token_seq[end-1].name == '::':
1172      end -= 1
1173
1174    # Make a copy of the sequence so we can append a sentinel
1175    # value. This is required for GetName will has to have some
1176    # terminating condition beyond the last name.
1177    seq_copy = token_seq[i:end]
1178    seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1179    names = []
1180    while i < end:
1181      # Iterate through the sequence parsing out each name.
1182      new_name, next = self.GetName(seq_copy[i:])
1183      assert new_name, 'Got empty new_name, next=%s' % next
1184      # We got a pointer or ref.  Add it to the name.
1185      if next and next.token_type == tokenize.SYNTAX:
1186        new_name.append(next)
1187      names.append(new_name)
1188      i += len(new_name)
1189
1190    # Now that we have the names, it's time to undo what we did.
1191
1192    # Remove the sentinel value.
1193    names[-1].pop()
1194    # Flatten the token sequence for the return type.
1195    return_type = [e for seq in names[:-1] for e in seq]
1196    # The class name is the last name.
1197    class_name = names[-1]
1198    return return_type, class_name
1199
1200  def handle_bool(self):
1201    pass
1202
1203  def handle_char(self):
1204    pass
1205
1206  def handle_int(self):
1207    pass
1208
1209  def handle_long(self):
1210    pass
1211
1212  def handle_short(self):
1213    pass
1214
1215  def handle_double(self):
1216    pass
1217
1218  def handle_float(self):
1219    pass
1220
1221  def handle_void(self):
1222    pass
1223
1224  def handle_wchar_t(self):
1225    pass
1226
1227  def handle_unsigned(self):
1228    pass
1229
1230  def handle_signed(self):
1231    pass
1232
1233  def _GetNestedType(self, ctor):
1234    name = None
1235    name_tokens, token = self.GetName()
1236    if name_tokens:
1237      name = ''.join([t.name for t in name_tokens])
1238
1239    # Handle forward declarations.
1240    if token.token_type == tokenize.SYNTAX and token.name == ';':
1241      return ctor(token.start, token.end, name, None,
1242                  self.namespace_stack)
1243
1244    if token.token_type == tokenize.NAME and self._handling_typedef:
1245      self._AddBackToken(token)
1246      return ctor(token.start, token.end, name, None,
1247                  self.namespace_stack)
1248
1249    # Must be the type declaration.
1250    fields = list(self._GetMatchingChar('{', '}'))
1251    del fields[-1]                  # Remove trailing '}'.
1252    if token.token_type == tokenize.SYNTAX and token.name == '{':
1253      next = self._GetNextToken()
1254      new_type = ctor(token.start, token.end, name, fields,
1255                      self.namespace_stack)
1256      # A name means this is an anonymous type and the name
1257      # is the variable declaration.
1258      if next.token_type != tokenize.NAME:
1259        return new_type
1260      name = new_type
1261      token = next
1262
1263    # Must be variable declaration using the type prefixed with keyword.
1264    assert token.token_type == tokenize.NAME, token
1265    return self._CreateVariable(token, token.name, name, [], '', None)
1266
1267  def handle_struct(self):
1268    # Special case the handling typedef/aliasing of structs here.
1269    # It would be a pain to handle in the class code.
1270    name_tokens, var_token = self.GetName()
1271    if name_tokens:
1272      next_token = self._GetNextToken()
1273      is_syntax = (var_token.token_type == tokenize.SYNTAX and
1274                   var_token.name[0] in '*&')
1275      is_variable = (var_token.token_type == tokenize.NAME and
1276                     next_token.name == ';')
1277      variable = var_token
1278      if is_syntax and not is_variable:
1279        variable = next_token
1280        temp = self._GetNextToken()
1281        if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1282          # Handle methods declared to return a struct.
1283          t0 = name_tokens[0]
1284          struct = tokenize.Token(tokenize.NAME, 'struct',
1285                                  t0.start-7, t0.start-2)
1286          type_and_name = [struct]
1287          type_and_name.extend(name_tokens)
1288          type_and_name.extend((var_token, next_token))
1289          return self._GetMethod(type_and_name, 0, None, False)
1290        assert temp.name == ';', (temp, name_tokens, var_token)
1291      if is_syntax or (is_variable and not self._handling_typedef):
1292        modifiers = ['struct']
1293        type_name = ''.join([t.name for t in name_tokens])
1294        position = name_tokens[0]
1295        return self._CreateVariable(position, variable.name, type_name,
1296                                    modifiers, var_token.name, None)
1297      name_tokens.extend((var_token, next_token))
1298      self._AddBackTokens(name_tokens)
1299    else:
1300      self._AddBackToken(var_token)
1301    return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1302
1303  def handle_union(self):
1304    return self._GetNestedType(Union)
1305
1306  def handle_enum(self):
1307    return self._GetNestedType(Enum)
1308
1309  def handle_auto(self):
1310    # TODO(nnorwitz): warn about using auto?  Probably not since it
1311    # will be reclaimed and useful for C++0x.
1312    pass
1313
1314  def handle_register(self):
1315    pass
1316
1317  def handle_const(self):
1318    pass
1319
1320  def handle_inline(self):
1321    pass
1322
1323  def handle_extern(self):
1324    pass
1325
1326  def handle_static(self):
1327    pass
1328
1329  def handle_virtual(self):
1330    # What follows must be a method.
1331    token = token2 = self._GetNextToken()
1332    if token.name == 'inline':
1333      # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1334      token2 = self._GetNextToken()
1335    if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1336      return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1337    assert token.token_type == tokenize.NAME or token.name == '::', token
1338    return_type_and_name, _ = self._GetVarTokensUpToIgnoringTemplates(
1339        tokenize.SYNTAX, '(')  # )
1340    return_type_and_name.insert(0, token)
1341    if token2 is not token:
1342      return_type_and_name.insert(1, token2)
1343    return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1344                           None, False)
1345
1346  def handle_volatile(self):
1347    pass
1348
1349  def handle_mutable(self):
1350    pass
1351
1352  def handle_public(self):
1353    assert self.in_class
1354    self.visibility = VISIBILITY_PUBLIC
1355
1356  def handle_protected(self):
1357    assert self.in_class
1358    self.visibility = VISIBILITY_PROTECTED
1359
1360  def handle_private(self):
1361    assert self.in_class
1362    self.visibility = VISIBILITY_PRIVATE
1363
1364  def handle_friend(self):
1365    tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1366    assert tokens
1367    t0 = tokens[0]
1368    return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1369
1370  def handle_static_cast(self):
1371    pass
1372
1373  def handle_const_cast(self):
1374    pass
1375
1376  def handle_dynamic_cast(self):
1377    pass
1378
1379  def handle_reinterpret_cast(self):
1380    pass
1381
1382  def handle_new(self):
1383    pass
1384
1385  def handle_delete(self):
1386    tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1387    assert tokens
1388    return Delete(tokens[0].start, tokens[0].end, tokens)
1389
1390  def handle_typedef(self):
1391    token = self._GetNextToken()
1392    if (token.token_type == tokenize.NAME and
1393            keywords.IsKeyword(token.name)):
1394      # Token must be struct/enum/union/class.
1395      method = getattr(self, 'handle_' + token.name)
1396      self._handling_typedef = True
1397      tokens = [method()]
1398      self._handling_typedef = False
1399    else:
1400      tokens = [token]
1401
1402    # Get the remainder of the typedef up to the semi-colon.
1403    tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1404
1405    # TODO(nnorwitz): clean all this up.
1406    assert tokens
1407    name = tokens.pop()
1408    indices = name
1409    if tokens:
1410      indices = tokens[0]
1411    if not indices:
1412      indices = token
1413    if name.name == ')':
1414      # HACK(nnorwitz): Handle pointers to functions "properly".
1415      if (len(tokens) >= 4 and
1416              tokens[1].name == '(' and tokens[2].name == '*'):
1417        tokens.append(name)
1418        name = tokens[3]
1419    elif name.name == ']':
1420      # HACK(nnorwitz): Handle arrays properly.
1421      if len(tokens) >= 2:
1422        tokens.append(name)
1423        name = tokens[1]
1424    new_type = tokens
1425    if tokens and isinstance(tokens[0], tokenize.Token):
1426      new_type = self.converter.ToType(tokens)[0]
1427    return Typedef(indices.start, indices.end, name.name,
1428                   new_type, self.namespace_stack)
1429
1430  def handle_typeid(self):
1431    pass  # Not needed yet.
1432
1433  def handle_typename(self):
1434    pass  # Not needed yet.
1435
1436  def _GetTemplatedTypes(self):
1437    result = collections.OrderedDict()
1438    tokens = list(self._GetMatchingChar('<', '>'))
1439    len_tokens = len(tokens) - 1    # Ignore trailing '>'.
1440    i = 0
1441    while i < len_tokens:
1442      key = tokens[i].name
1443      i += 1
1444      if keywords.IsKeyword(key) or key == ',':
1445        continue
1446      type_name = default = None
1447      if i < len_tokens:
1448        i += 1
1449        if tokens[i-1].name == '=':
1450          assert i < len_tokens, '%s %s' % (i, tokens)
1451          default, unused_next_token = self.GetName(tokens[i:])
1452          i += len(default)
1453        else:
1454          if tokens[i-1].name != ',':
1455            # We got something like: Type variable.
1456            # Re-adjust the key (variable) and type_name (Type).
1457            key = tokens[i-1].name
1458            type_name = tokens[i-2]
1459
1460      result[key] = (type_name, default)
1461    return result
1462
1463  def handle_template(self):
1464    token = self._GetNextToken()
1465    assert token.token_type == tokenize.SYNTAX, token
1466    assert token.name == '<', token
1467    templated_types = self._GetTemplatedTypes()
1468    # TODO(nnorwitz): for now, just ignore the template params.
1469    token = self._GetNextToken()
1470    if token.token_type == tokenize.NAME:
1471      if token.name == 'class':
1472        return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1473      elif token.name == 'struct':
1474        return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1475      elif token.name == 'friend':
1476        return self.handle_friend()
1477    self._AddBackToken(token)
1478    tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1479    tokens.append(last)
1480    self._AddBackTokens(tokens)
1481    if last.name == '(':
1482      return self.GetMethod(FUNCTION_NONE, templated_types)
1483    # Must be a variable definition.
1484    return None
1485
1486  def handle_true(self):
1487    pass  # Nothing to do.
1488
1489  def handle_false(self):
1490    pass  # Nothing to do.
1491
1492  def handle_asm(self):
1493    pass  # Not needed yet.
1494
1495  def handle_class(self):
1496    return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1497
1498  def _GetBases(self):
1499    # Get base classes.
1500    bases = []
1501    while 1:
1502      token = self._GetNextToken()
1503      assert token.token_type == tokenize.NAME, token
1504      # TODO(nnorwitz): store kind of inheritance...maybe.
1505      if token.name not in ('public', 'protected', 'private'):
1506        # If inheritance type is not specified, it is private.
1507        # Just put the token back so we can form a name.
1508        # TODO(nnorwitz): it would be good to warn about this.
1509        self._AddBackToken(token)
1510      else:
1511        # Check for virtual inheritance.
1512        token = self._GetNextToken()
1513        if token.name != 'virtual':
1514          self._AddBackToken(token)
1515        else:
1516          # TODO(nnorwitz): store that we got virtual for this base.
1517          pass
1518      base, next_token = self.GetName()
1519      bases_ast = self.converter.ToType(base)
1520      assert len(bases_ast) == 1, bases_ast
1521      bases.append(bases_ast[0])
1522      assert next_token.token_type == tokenize.SYNTAX, next_token
1523      if next_token.name == '{':
1524        token = next_token
1525        break
1526      # Support multiple inheritance.
1527      assert next_token.name == ',', next_token
1528    return bases, token
1529
1530  def _GetClass(self, class_type, visibility, templated_types):
1531    class_name = None
1532    class_token = self._GetNextToken()
1533    if class_token.token_type != tokenize.NAME:
1534      assert class_token.token_type == tokenize.SYNTAX, class_token
1535      token = class_token
1536    else:
1537      # Skip any macro (e.g. storage class specifiers) after the
1538      # 'class' keyword.
1539      next_token = self._GetNextToken()
1540      if next_token.token_type == tokenize.NAME:
1541        self._AddBackToken(next_token)
1542      else:
1543        self._AddBackTokens([class_token, next_token])
1544      name_tokens, token = self.GetName()
1545      class_name = ''.join([t.name for t in name_tokens])
1546    bases = None
1547    if token.token_type == tokenize.SYNTAX:
1548      if token.name == ';':
1549        # Forward declaration.
1550        return class_type(class_token.start, class_token.end,
1551                          class_name, None, templated_types, None,
1552                          self.namespace_stack)
1553      if token.name in '*&':
1554        # Inline forward declaration.  Could be method or data.
1555        name_token = self._GetNextToken()
1556        next_token = self._GetNextToken()
1557        if next_token.name == ';':
1558          # Handle data
1559          modifiers = ['class']
1560          return self._CreateVariable(class_token, name_token.name,
1561                                      class_name,
1562                                      modifiers, token.name, None)
1563        else:
1564          # Assume this is a method.
1565          tokens = (class_token, token, name_token, next_token)
1566          self._AddBackTokens(tokens)
1567          return self.GetMethod(FUNCTION_NONE, None)
1568      if token.name == ':':
1569        bases, token = self._GetBases()
1570
1571    body = None
1572    if token.token_type == tokenize.SYNTAX and token.name == '{':
1573      assert token.token_type == tokenize.SYNTAX, token
1574      assert token.name == '{', token
1575
1576      ast = AstBuilder(self.GetScope(), self.filename, class_name,
1577                       visibility, self.namespace_stack)
1578      body = list(ast.Generate())
1579
1580      if not self._handling_typedef:
1581        token = self._GetNextToken()
1582        if token.token_type != tokenize.NAME:
1583          assert token.token_type == tokenize.SYNTAX, token
1584          assert token.name == ';', token
1585        else:
1586          new_class = class_type(class_token.start, class_token.end,
1587                                 class_name, bases, None,
1588                                 body, self.namespace_stack)
1589
1590          modifiers = []
1591          return self._CreateVariable(class_token,
1592                                      token.name, new_class,
1593                                      modifiers, token.name, None)
1594    else:
1595      if not self._handling_typedef:
1596        self.HandleError('non-typedef token', token)
1597      self._AddBackToken(token)
1598
1599    return class_type(class_token.start, class_token.end, class_name,
1600                      bases, templated_types, body, self.namespace_stack)
1601
1602  def handle_namespace(self):
1603    # Support anonymous namespaces.
1604    name = None
1605    name_tokens, token = self.GetName()
1606    if name_tokens:
1607      name = ''.join([t.name for t in name_tokens])
1608    self.namespace_stack.append(name)
1609    assert token.token_type == tokenize.SYNTAX, token
1610    # Create an internal token that denotes when the namespace is complete.
1611    internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1612                                    None, None)
1613    internal_token.whence = token.whence
1614    if token.name == '=':
1615      # TODO(nnorwitz): handle aliasing namespaces.
1616      name, next_token = self.GetName()
1617      assert next_token.name == ';', next_token
1618      self._AddBackToken(internal_token)
1619    else:
1620      assert token.name == '{', token
1621      tokens = list(self.GetScope())
1622      # Replace the trailing } with the internal namespace pop token.
1623      tokens[-1] = internal_token
1624      # Handle namespace with nothing in it.
1625      self._AddBackTokens(tokens)
1626    return None
1627
1628  def handle_using(self):
1629    tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1630    assert tokens
1631    return Using(tokens[0].start, tokens[0].end, tokens)
1632
1633  def handle_explicit(self):
1634    assert self.in_class
1635    # Nothing much to do.
1636    # TODO(nnorwitz): maybe verify the method name == class name.
1637    # This must be a ctor.
1638    return self.GetMethod(FUNCTION_CTOR, None)
1639
1640  def handle_this(self):
1641    pass  # Nothing to do.
1642
1643  def handle_operator(self):
1644    # Pull off the next token(s?) and make that part of the method name.
1645    pass
1646
1647  def handle_sizeof(self):
1648    pass
1649
1650  def handle_case(self):
1651    pass
1652
1653  def handle_switch(self):
1654    pass
1655
1656  def handle_default(self):
1657    token = self._GetNextToken()
1658    assert token.token_type == tokenize.SYNTAX
1659    assert token.name == ':'
1660
1661  def handle_if(self):
1662    pass
1663
1664  def handle_else(self):
1665    pass
1666
1667  def handle_return(self):
1668    tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1669    if not tokens:
1670      return Return(self.current_token.start, self.current_token.end, None)
1671    return Return(tokens[0].start, tokens[0].end, tokens)
1672
1673  def handle_goto(self):
1674    tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1675    assert len(tokens) == 1, str(tokens)
1676    return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1677
1678  def handle_try(self):
1679    pass  # Not needed yet.
1680
1681  def handle_catch(self):
1682    pass  # Not needed yet.
1683
1684  def handle_throw(self):
1685    pass  # Not needed yet.
1686
1687  def handle_while(self):
1688    pass
1689
1690  def handle_do(self):
1691    pass
1692
1693  def handle_for(self):
1694    pass
1695
1696  def handle_break(self):
1697    self._IgnoreUpTo(tokenize.SYNTAX, ';')
1698
1699  def handle_continue(self):
1700    self._IgnoreUpTo(tokenize.SYNTAX, ';')
1701
1702
1703def BuilderFromSource(source, filename):
1704  """Utility method that returns an AstBuilder from source code.
1705
1706    Args:
1707      source: 'C++ source code'
1708      filename: 'file1'
1709
1710    Returns:
1711      AstBuilder
1712    """
1713  return AstBuilder(tokenize.GetTokens(source), filename)
1714
1715
1716def PrintIndentifiers(filename, should_print):
1717  """Prints all identifiers for a C++ source file.
1718
1719    Args:
1720      filename: 'file1'
1721      should_print: predicate with signature: bool Function(token)
1722    """
1723  source = utils.ReadFile(filename, False)
1724  if source is None:
1725    sys.stderr.write('Unable to find: %s\n' % filename)
1726    return
1727
1728  #print('Processing %s' % actual_filename)
1729  builder = BuilderFromSource(source, filename)
1730  try:
1731    for node in builder.Generate():
1732      if should_print(node):
1733        print(node.name)
1734  except KeyboardInterrupt:
1735    return
1736  except:
1737    pass
1738
1739
1740def PrintAllIndentifiers(filenames, should_print):
1741  """Prints all identifiers for each C++ source file in filenames.
1742
1743    Args:
1744      filenames: ['file1', 'file2', ...]
1745      should_print: predicate with signature: bool Function(token)
1746    """
1747  for path in filenames:
1748    PrintIndentifiers(path, should_print)
1749
1750
1751def main(argv):
1752  for filename in argv[1:]:
1753    source = utils.ReadFile(filename)
1754    if source is None:
1755      continue
1756
1757    print('Processing %s' % filename)
1758    builder = BuilderFromSource(source, filename)
1759    try:
1760      entire_ast = filter(None, builder.Generate())
1761    except KeyboardInterrupt:
1762      return
1763    except:
1764      # Already printed a warning, print the traceback and continue.
1765      traceback.print_exc()
1766    else:
1767      if utils.DEBUG:
1768        for ast in entire_ast:
1769          print(ast)
1770
1771
1772if __name__ == '__main__':
1773  main(sys.argv)
1774