1#!/usr/bin/env python 2# 3# Copyright 2007 Neal Norwitz 4# Portions Copyright 2007 Google Inc. 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18"""Generate an Abstract Syntax Tree (AST) for C++.""" 19 20# FIXME: 21# * Tokens should never be exported, need to convert to Nodes 22# (return types, parameters, etc.) 23# * Handle static class data for templatized classes 24# * Handle casts (both C++ and C-style) 25# * Handle conditions and loops (if/else, switch, for, while/do) 26# 27# TODO much, much later: 28# * Handle #define 29# * exceptions 30 31 32try: 33 # Python 3.x 34 import builtins 35except ImportError: 36 # Python 2.x 37 import __builtin__ as builtins 38 39import collections 40import sys 41import traceback 42 43from cpp import keywords 44from cpp import tokenize 45from cpp import utils 46 47 48if not hasattr(builtins, 'reversed'): 49 # Support Python 2.3 and earlier. 50 def reversed(seq): 51 for i in range(len(seq)-1, -1, -1): 52 yield seq[i] 53 54if not hasattr(builtins, 'next'): 55 # Support Python 2.5 and earlier. 56 def next(obj): 57 return obj.next() 58 59 60VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) 61 62FUNCTION_NONE = 0x00 63FUNCTION_CONST = 0x01 64FUNCTION_VIRTUAL = 0x02 65FUNCTION_PURE_VIRTUAL = 0x04 66FUNCTION_CTOR = 0x08 67FUNCTION_DTOR = 0x10 68FUNCTION_ATTRIBUTE = 0x20 69FUNCTION_UNKNOWN_ANNOTATION = 0x40 70FUNCTION_THROW = 0x80 71FUNCTION_OVERRIDE = 0x100 72 73""" 74These are currently unused. Should really handle these properly at some point. 75 76TYPE_MODIFIER_INLINE = 0x010000 77TYPE_MODIFIER_EXTERN = 0x020000 78TYPE_MODIFIER_STATIC = 0x040000 79TYPE_MODIFIER_CONST = 0x080000 80TYPE_MODIFIER_REGISTER = 0x100000 81TYPE_MODIFIER_VOLATILE = 0x200000 82TYPE_MODIFIER_MUTABLE = 0x400000 83 84TYPE_MODIFIER_MAP = { 85 'inline': TYPE_MODIFIER_INLINE, 86 'extern': TYPE_MODIFIER_EXTERN, 87 'static': TYPE_MODIFIER_STATIC, 88 'const': TYPE_MODIFIER_CONST, 89 'register': TYPE_MODIFIER_REGISTER, 90 'volatile': TYPE_MODIFIER_VOLATILE, 91 'mutable': TYPE_MODIFIER_MUTABLE, 92 } 93""" 94 95_INTERNAL_TOKEN = 'internal' 96_NAMESPACE_POP = 'ns-pop' 97 98 99# TODO(nnorwitz): use this as a singleton for templated_types, etc 100# where we don't want to create a new empty dict each time. It is also const. 101class _NullDict(object): 102 __contains__ = lambda self: False 103 keys = values = items = iterkeys = itervalues = iteritems = lambda self: () 104 105 106# TODO(nnorwitz): move AST nodes into a separate module. 107class Node(object): 108 """Base AST node.""" 109 110 def __init__(self, start, end): 111 self.start = start 112 self.end = end 113 114 def IsDeclaration(self): 115 """Returns bool if this node is a declaration.""" 116 return False 117 118 def IsDefinition(self): 119 """Returns bool if this node is a definition.""" 120 return False 121 122 def IsExportable(self): 123 """Returns bool if this node exportable from a header file.""" 124 return False 125 126 def Requires(self, node): 127 """Does this AST node require the definition of the node passed in?""" 128 return False 129 130 def XXX__str__(self): 131 return self._StringHelper(self.__class__.__name__, '') 132 133 def _StringHelper(self, name, suffix): 134 if not utils.DEBUG: 135 return '%s(%s)' % (name, suffix) 136 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) 137 138 def __repr__(self): 139 return str(self) 140 141 142class Define(Node): 143 def __init__(self, start, end, name, definition): 144 Node.__init__(self, start, end) 145 self.name = name 146 self.definition = definition 147 148 def __str__(self): 149 value = '%s %s' % (self.name, self.definition) 150 return self._StringHelper(self.__class__.__name__, value) 151 152 153class Include(Node): 154 def __init__(self, start, end, filename, system): 155 Node.__init__(self, start, end) 156 self.filename = filename 157 self.system = system 158 159 def __str__(self): 160 fmt = '"%s"' 161 if self.system: 162 fmt = '<%s>' 163 return self._StringHelper(self.__class__.__name__, fmt % self.filename) 164 165 166class Goto(Node): 167 def __init__(self, start, end, label): 168 Node.__init__(self, start, end) 169 self.label = label 170 171 def __str__(self): 172 return self._StringHelper(self.__class__.__name__, str(self.label)) 173 174 175class Expr(Node): 176 def __init__(self, start, end, expr): 177 Node.__init__(self, start, end) 178 self.expr = expr 179 180 def Requires(self, node): 181 # TODO(nnorwitz): impl. 182 return False 183 184 def __str__(self): 185 return self._StringHelper(self.__class__.__name__, str(self.expr)) 186 187 188class Return(Expr): 189 pass 190 191 192class Delete(Expr): 193 pass 194 195 196class Friend(Expr): 197 def __init__(self, start, end, expr, namespace): 198 Expr.__init__(self, start, end, expr) 199 self.namespace = namespace[:] 200 201 202class Using(Node): 203 def __init__(self, start, end, names): 204 Node.__init__(self, start, end) 205 self.names = names 206 207 def __str__(self): 208 return self._StringHelper(self.__class__.__name__, str(self.names)) 209 210 211class Parameter(Node): 212 def __init__(self, start, end, name, parameter_type, default): 213 Node.__init__(self, start, end) 214 self.name = name 215 self.type = parameter_type 216 self.default = default 217 218 def Requires(self, node): 219 # TODO(nnorwitz): handle namespaces, etc. 220 return self.type.name == node.name 221 222 def __str__(self): 223 name = str(self.type) 224 suffix = '%s %s' % (name, self.name) 225 if self.default: 226 suffix += ' = ' + ''.join([d.name for d in self.default]) 227 return self._StringHelper(self.__class__.__name__, suffix) 228 229 230class _GenericDeclaration(Node): 231 def __init__(self, start, end, name, namespace): 232 Node.__init__(self, start, end) 233 self.name = name 234 self.namespace = namespace[:] 235 236 def FullName(self): 237 prefix = '' 238 if self.namespace and self.namespace[-1]: 239 prefix = '::'.join(self.namespace) + '::' 240 return prefix + self.name 241 242 def _TypeStringHelper(self, suffix): 243 if self.namespace: 244 names = [n or '<anonymous>' for n in self.namespace] 245 suffix += ' in ' + '::'.join(names) 246 return self._StringHelper(self.__class__.__name__, suffix) 247 248 249# TODO(nnorwitz): merge with Parameter in some way? 250class VariableDeclaration(_GenericDeclaration): 251 def __init__(self, start, end, name, var_type, initial_value, namespace): 252 _GenericDeclaration.__init__(self, start, end, name, namespace) 253 self.type = var_type 254 self.initial_value = initial_value 255 256 def Requires(self, node): 257 # TODO(nnorwitz): handle namespaces, etc. 258 return self.type.name == node.name 259 260 def ToString(self): 261 """Return a string that tries to reconstitute the variable decl.""" 262 suffix = '%s %s' % (self.type, self.name) 263 if self.initial_value: 264 suffix += ' = ' + self.initial_value 265 return suffix 266 267 def __str__(self): 268 return self._StringHelper(self.__class__.__name__, self.ToString()) 269 270 271class Typedef(_GenericDeclaration): 272 def __init__(self, start, end, name, alias, namespace): 273 _GenericDeclaration.__init__(self, start, end, name, namespace) 274 self.alias = alias 275 276 def IsDefinition(self): 277 return True 278 279 def IsExportable(self): 280 return True 281 282 def Requires(self, node): 283 # TODO(nnorwitz): handle namespaces, etc. 284 name = node.name 285 for token in self.alias: 286 if token is not None and name == token.name: 287 return True 288 return False 289 290 def __str__(self): 291 suffix = '%s, %s' % (self.name, self.alias) 292 return self._TypeStringHelper(suffix) 293 294 295class _NestedType(_GenericDeclaration): 296 def __init__(self, start, end, name, fields, namespace): 297 _GenericDeclaration.__init__(self, start, end, name, namespace) 298 self.fields = fields 299 300 def IsDefinition(self): 301 return True 302 303 def IsExportable(self): 304 return True 305 306 def __str__(self): 307 suffix = '%s, {%s}' % (self.name, self.fields) 308 return self._TypeStringHelper(suffix) 309 310 311class Union(_NestedType): 312 pass 313 314 315class Enum(_NestedType): 316 pass 317 318 319class Class(_GenericDeclaration): 320 def __init__(self, start, end, name, bases, templated_types, body, namespace): 321 _GenericDeclaration.__init__(self, start, end, name, namespace) 322 self.bases = bases 323 self.body = body 324 self.templated_types = templated_types 325 326 def IsDeclaration(self): 327 return self.bases is None and self.body is None 328 329 def IsDefinition(self): 330 return not self.IsDeclaration() 331 332 def IsExportable(self): 333 return not self.IsDeclaration() 334 335 def Requires(self, node): 336 # TODO(nnorwitz): handle namespaces, etc. 337 if self.bases: 338 for token_list in self.bases: 339 # TODO(nnorwitz): bases are tokens, do name comparison. 340 for token in token_list: 341 if token.name == node.name: 342 return True 343 # TODO(nnorwitz): search in body too. 344 return False 345 346 def __str__(self): 347 name = self.name 348 if self.templated_types: 349 name += '<%s>' % self.templated_types 350 suffix = '%s, %s, %s' % (name, self.bases, self.body) 351 return self._TypeStringHelper(suffix) 352 353 354class Struct(Class): 355 pass 356 357 358class Function(_GenericDeclaration): 359 def __init__(self, start, end, name, return_type, parameters, 360 modifiers, templated_types, body, namespace): 361 _GenericDeclaration.__init__(self, start, end, name, namespace) 362 converter = TypeConverter(namespace) 363 self.return_type = converter.CreateReturnType(return_type) 364 self.parameters = converter.ToParameters(parameters) 365 self.modifiers = modifiers 366 self.body = body 367 self.templated_types = templated_types 368 369 def IsDeclaration(self): 370 return self.body is None 371 372 def IsDefinition(self): 373 return self.body is not None 374 375 def IsExportable(self): 376 if self.return_type and 'static' in self.return_type.modifiers: 377 return False 378 return None not in self.namespace 379 380 def Requires(self, node): 381 if self.parameters: 382 # TODO(nnorwitz): parameters are tokens, do name comparison. 383 for p in self.parameters: 384 if p.name == node.name: 385 return True 386 # TODO(nnorwitz): search in body too. 387 return False 388 389 def __str__(self): 390 # TODO(nnorwitz): add templated_types. 391 suffix = ('%s %s(%s), 0x%02x, %s' % 392 (self.return_type, self.name, self.parameters, 393 self.modifiers, self.body)) 394 return self._TypeStringHelper(suffix) 395 396 397class Method(Function): 398 def __init__(self, start, end, name, in_class, return_type, parameters, 399 modifiers, templated_types, body, namespace): 400 Function.__init__(self, start, end, name, return_type, parameters, 401 modifiers, templated_types, body, namespace) 402 # TODO(nnorwitz): in_class could also be a namespace which can 403 # mess up finding functions properly. 404 self.in_class = in_class 405 406 407class Type(_GenericDeclaration): 408 """Type used for any variable (eg class, primitive, struct, etc).""" 409 410 def __init__(self, start, end, name, templated_types, modifiers, 411 reference, pointer, array): 412 """ 413 Args: 414 name: str name of main type 415 templated_types: [Class (Type?)] template type info between <> 416 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. 417 reference, pointer, array: bools 418 """ 419 _GenericDeclaration.__init__(self, start, end, name, []) 420 self.templated_types = templated_types 421 if not name and modifiers: 422 self.name = modifiers.pop() 423 self.modifiers = modifiers 424 self.reference = reference 425 self.pointer = pointer 426 self.array = array 427 428 def __str__(self): 429 prefix = '' 430 if self.modifiers: 431 prefix = ' '.join(self.modifiers) + ' ' 432 name = str(self.name) 433 if self.templated_types: 434 name += '<%s>' % self.templated_types 435 suffix = prefix + name 436 if self.reference: 437 suffix += '&' 438 if self.pointer: 439 suffix += '*' 440 if self.array: 441 suffix += '[]' 442 return self._TypeStringHelper(suffix) 443 444 # By definition, Is* are always False. A Type can only exist in 445 # some sort of variable declaration, parameter, or return value. 446 def IsDeclaration(self): 447 return False 448 449 def IsDefinition(self): 450 return False 451 452 def IsExportable(self): 453 return False 454 455 456class TypeConverter(object): 457 458 def __init__(self, namespace_stack): 459 self.namespace_stack = namespace_stack 460 461 def _GetTemplateEnd(self, tokens, start): 462 count = 1 463 end = start 464 while 1: 465 token = tokens[end] 466 end += 1 467 if token.name == '<': 468 count += 1 469 elif token.name == '>': 470 count -= 1 471 if count == 0: 472 break 473 return tokens[start:end-1], end 474 475 def ToType(self, tokens): 476 """Convert [Token,...] to [Class(...), ] useful for base classes. 477 For example, code like class Foo : public Bar<x, y> { ... }; 478 the "Bar<x, y>" portion gets converted to an AST. 479 480 Returns: 481 [Class(...), ...] 482 """ 483 result = [] 484 name_tokens = [] 485 reference = pointer = array = False 486 487 def AddType(templated_types): 488 # Partition tokens into name and modifier tokens. 489 names = [] 490 modifiers = [] 491 for t in name_tokens: 492 if keywords.IsKeyword(t.name): 493 modifiers.append(t.name) 494 else: 495 names.append(t.name) 496 name = ''.join(names) 497 if name_tokens: 498 result.append(Type(name_tokens[0].start, name_tokens[-1].end, 499 name, templated_types, modifiers, 500 reference, pointer, array)) 501 del name_tokens[:] 502 503 i = 0 504 end = len(tokens) 505 while i < end: 506 token = tokens[i] 507 if token.name == '<': 508 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1) 509 AddType(self.ToType(new_tokens)) 510 # If there is a comma after the template, we need to consume 511 # that here otherwise it becomes part of the name. 512 i = new_end 513 reference = pointer = array = False 514 elif token.name == ',': 515 AddType([]) 516 reference = pointer = array = False 517 elif token.name == '*': 518 pointer = True 519 elif token.name == '&': 520 reference = True 521 elif token.name == '[': 522 pointer = True 523 elif token.name == ']': 524 pass 525 else: 526 name_tokens.append(token) 527 i += 1 528 529 if name_tokens: 530 # No '<' in the tokens, just a simple name and no template. 531 AddType([]) 532 return result 533 534 def DeclarationToParts(self, parts, needs_name_removed): 535 name = None 536 default = [] 537 if needs_name_removed: 538 # Handle default (initial) values properly. 539 for i, t in enumerate(parts): 540 if t.name == '=': 541 default = parts[i+1:] 542 name = parts[i-1].name 543 if name == ']' and parts[i-2].name == '[': 544 name = parts[i-3].name 545 i -= 1 546 parts = parts[:i-1] 547 break 548 else: 549 if parts[-1].token_type == tokenize.NAME: 550 name = parts.pop().name 551 else: 552 # TODO(nnorwitz): this is a hack that happens for code like 553 # Register(Foo<T>); where it thinks this is a function call 554 # but it's actually a declaration. 555 name = '???' 556 modifiers = [] 557 type_name = [] 558 other_tokens = [] 559 templated_types = [] 560 i = 0 561 end = len(parts) 562 while i < end: 563 p = parts[i] 564 if keywords.IsKeyword(p.name): 565 modifiers.append(p.name) 566 elif p.name == '<': 567 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1) 568 templated_types = self.ToType(templated_tokens) 569 i = new_end - 1 570 # Don't add a spurious :: to data members being initialized. 571 next_index = i + 1 572 if next_index < end and parts[next_index].name == '::': 573 i += 1 574 elif p.name in ('[', ']', '='): 575 # These are handled elsewhere. 576 other_tokens.append(p) 577 elif p.name not in ('*', '&', '>'): 578 # Ensure that names have a space between them. 579 if (type_name and type_name[-1].token_type == tokenize.NAME and 580 p.token_type == tokenize.NAME): 581 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0)) 582 type_name.append(p) 583 else: 584 other_tokens.append(p) 585 i += 1 586 type_name = ''.join([t.name for t in type_name]) 587 return name, type_name, templated_types, modifiers, default, other_tokens 588 589 def ToParameters(self, tokens): 590 if not tokens: 591 return [] 592 593 result = [] 594 name = type_name = '' 595 type_modifiers = [] 596 pointer = reference = array = False 597 first_token = None 598 default = [] 599 600 def AddParameter(end): 601 if default: 602 del default[0] # Remove flag. 603 parts = self.DeclarationToParts(type_modifiers, True) 604 (name, type_name, templated_types, modifiers, 605 unused_default, unused_other_tokens) = parts 606 parameter_type = Type(first_token.start, first_token.end, 607 type_name, templated_types, modifiers, 608 reference, pointer, array) 609 p = Parameter(first_token.start, end, name, 610 parameter_type, default) 611 result.append(p) 612 613 template_count = 0 614 brace_count = 0 615 for s in tokens: 616 if not first_token: 617 first_token = s 618 619 # Check for braces before templates, as we can have unmatched '<>' 620 # inside default arguments. 621 if s.name == '{': 622 brace_count += 1 623 elif s.name == '}': 624 brace_count -= 1 625 if brace_count > 0: 626 type_modifiers.append(s) 627 continue 628 629 if s.name == '<': 630 template_count += 1 631 elif s.name == '>': 632 template_count -= 1 633 if template_count > 0: 634 type_modifiers.append(s) 635 continue 636 637 if s.name == ',': 638 AddParameter(s.start) 639 name = type_name = '' 640 type_modifiers = [] 641 pointer = reference = array = False 642 first_token = None 643 default = [] 644 elif s.name == '*': 645 pointer = True 646 elif s.name == '&': 647 reference = True 648 elif s.name == '[': 649 array = True 650 elif s.name == ']': 651 pass # Just don't add to type_modifiers. 652 elif s.name == '=': 653 # Got a default value. Add any value (None) as a flag. 654 default.append(None) 655 elif default: 656 default.append(s) 657 else: 658 type_modifiers.append(s) 659 AddParameter(tokens[-1].end) 660 return result 661 662 def CreateReturnType(self, return_type_seq): 663 if not return_type_seq: 664 return None 665 start = return_type_seq[0].start 666 end = return_type_seq[-1].end 667 _, name, templated_types, modifiers, default, other_tokens = \ 668 self.DeclarationToParts(return_type_seq, False) 669 names = [n.name for n in other_tokens] 670 reference = '&' in names 671 pointer = '*' in names 672 array = '[' in names 673 return Type(start, end, name, templated_types, modifiers, 674 reference, pointer, array) 675 676 def GetTemplateIndices(self, names): 677 # names is a list of strings. 678 start = names.index('<') 679 end = len(names) - 1 680 while end > 0: 681 if names[end] == '>': 682 break 683 end -= 1 684 return start, end+1 685 686class AstBuilder(object): 687 def __init__(self, token_stream, filename, in_class='', visibility=None, 688 namespace_stack=[]): 689 self.tokens = token_stream 690 self.filename = filename 691 # TODO(nnorwitz): use a better data structure (deque) for the queue. 692 # Switching directions of the "queue" improved perf by about 25%. 693 # Using a deque should be even better since we access from both sides. 694 self.token_queue = [] 695 self.namespace_stack = namespace_stack[:] 696 self.in_class = in_class 697 if in_class is None: 698 self.in_class_name_only = None 699 else: 700 self.in_class_name_only = in_class.split('::')[-1] 701 self.visibility = visibility 702 self.in_function = False 703 self.current_token = None 704 # Keep the state whether we are currently handling a typedef or not. 705 self._handling_typedef = False 706 707 self.converter = TypeConverter(self.namespace_stack) 708 709 def HandleError(self, msg, token): 710 printable_queue = list(reversed(self.token_queue[-20:])) 711 sys.stderr.write('Got %s in %s @ %s %s\n' % 712 (msg, self.filename, token, printable_queue)) 713 714 def Generate(self): 715 while 1: 716 token = self._GetNextToken() 717 if not token: 718 break 719 720 # Get the next token. 721 self.current_token = token 722 723 # Dispatch on the next token type. 724 if token.token_type == _INTERNAL_TOKEN: 725 if token.name == _NAMESPACE_POP: 726 self.namespace_stack.pop() 727 continue 728 729 try: 730 result = self._GenerateOne(token) 731 if result is not None: 732 yield result 733 except: 734 self.HandleError('exception', token) 735 raise 736 737 def _CreateVariable(self, pos_token, name, type_name, type_modifiers, 738 ref_pointer_name_seq, templated_types, value=None): 739 reference = '&' in ref_pointer_name_seq 740 pointer = '*' in ref_pointer_name_seq 741 array = '[' in ref_pointer_name_seq 742 var_type = Type(pos_token.start, pos_token.end, type_name, 743 templated_types, type_modifiers, 744 reference, pointer, array) 745 return VariableDeclaration(pos_token.start, pos_token.end, 746 name, var_type, value, self.namespace_stack) 747 748 def _GenerateOne(self, token): 749 if token.token_type == tokenize.NAME: 750 if (keywords.IsKeyword(token.name) and 751 not keywords.IsBuiltinType(token.name)): 752 if token.name == 'enum': 753 # Pop the next token and only put it back if it's not 754 # 'class'. This allows us to support the two-token 755 # 'enum class' keyword as if it were simply 'enum'. 756 next = self._GetNextToken() 757 if next.name != 'class': 758 self._AddBackToken(next) 759 760 method = getattr(self, 'handle_' + token.name) 761 return method() 762 elif token.name == self.in_class_name_only: 763 # The token name is the same as the class, must be a ctor if 764 # there is a paren. Otherwise, it's the return type. 765 # Peek ahead to get the next token to figure out which. 766 next = self._GetNextToken() 767 self._AddBackToken(next) 768 if next.token_type == tokenize.SYNTAX and next.name == '(': 769 return self._GetMethod([token], FUNCTION_CTOR, None, True) 770 # Fall through--handle like any other method. 771 772 # Handle data or function declaration/definition. 773 syntax = tokenize.SYNTAX 774 temp_tokens, last_token = \ 775 self._GetVarTokensUpToIgnoringTemplates(syntax, 776 '(', ';', '{', '[') 777 temp_tokens.insert(0, token) 778 if last_token.name == '(': 779 # If there is an assignment before the paren, 780 # this is an expression, not a method. 781 expr = bool([e for e in temp_tokens if e.name == '=']) 782 if expr: 783 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';') 784 temp_tokens.append(last_token) 785 temp_tokens.extend(new_temp) 786 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0) 787 788 if last_token.name == '[': 789 # Handle array, this isn't a method, unless it's an operator. 790 # TODO(nnorwitz): keep the size somewhere. 791 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']') 792 temp_tokens.append(last_token) 793 if temp_tokens[-2].name == 'operator': 794 temp_tokens.append(self._GetNextToken()) 795 else: 796 temp_tokens2, last_token = \ 797 self._GetVarTokensUpTo(tokenize.SYNTAX, ';') 798 temp_tokens.extend(temp_tokens2) 799 800 if last_token.name == ';': 801 # Handle data, this isn't a method. 802 parts = self.converter.DeclarationToParts(temp_tokens, True) 803 (name, type_name, templated_types, modifiers, default, 804 unused_other_tokens) = parts 805 806 t0 = temp_tokens[0] 807 names = [t.name for t in temp_tokens] 808 if templated_types: 809 start, end = self.converter.GetTemplateIndices(names) 810 names = names[:start] + names[end:] 811 default = ''.join([t.name for t in default]) 812 return self._CreateVariable(t0, name, type_name, modifiers, 813 names, templated_types, default) 814 if last_token.name == '{': 815 self._AddBackTokens(temp_tokens[1:]) 816 self._AddBackToken(last_token) 817 method_name = temp_tokens[0].name 818 method = getattr(self, 'handle_' + method_name, None) 819 if not method: 820 # Must be declaring a variable. 821 # TODO(nnorwitz): handle the declaration. 822 return None 823 return method() 824 return self._GetMethod(temp_tokens, 0, None, False) 825 elif token.token_type == tokenize.SYNTAX: 826 if token.name == '~' and self.in_class: 827 # Must be a dtor (probably not in method body). 828 token = self._GetNextToken() 829 # self.in_class can contain A::Name, but the dtor will only 830 # be Name. Make sure to compare against the right value. 831 if (token.token_type == tokenize.NAME and 832 token.name == self.in_class_name_only): 833 return self._GetMethod([token], FUNCTION_DTOR, None, True) 834 # TODO(nnorwitz): handle a lot more syntax. 835 elif token.token_type == tokenize.PREPROCESSOR: 836 # TODO(nnorwitz): handle more preprocessor directives. 837 # token starts with a #, so remove it and strip whitespace. 838 name = token.name[1:].lstrip() 839 if name.startswith('include'): 840 # Remove "include". 841 name = name[7:].strip() 842 assert name 843 # Handle #include \<newline> "header-on-second-line.h". 844 if name.startswith('\\'): 845 name = name[1:].strip() 846 assert name[0] in '<"', token 847 assert name[-1] in '>"', token 848 system = name[0] == '<' 849 filename = name[1:-1] 850 return Include(token.start, token.end, filename, system) 851 if name.startswith('define'): 852 # Remove "define". 853 name = name[6:].strip() 854 assert name 855 value = '' 856 for i, c in enumerate(name): 857 if c.isspace(): 858 value = name[i:].lstrip() 859 name = name[:i] 860 break 861 return Define(token.start, token.end, name, value) 862 if name.startswith('if') and name[2:3].isspace(): 863 condition = name[3:].strip() 864 if condition.startswith('0') or condition.startswith('(0)'): 865 self._SkipIf0Blocks() 866 return None 867 868 def _GetTokensUpTo(self, expected_token_type, expected_token): 869 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0] 870 871 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens): 872 last_token = self._GetNextToken() 873 tokens = [] 874 while (last_token.token_type != expected_token_type or 875 last_token.name not in expected_tokens): 876 tokens.append(last_token) 877 last_token = self._GetNextToken() 878 return tokens, last_token 879 880 # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an 881 # expected token. 882 def _GetVarTokensUpToIgnoringTemplates(self, expected_token_type, 883 *expected_tokens): 884 last_token = self._GetNextToken() 885 tokens = [] 886 nesting = 0 887 while (nesting > 0 or 888 last_token.token_type != expected_token_type or 889 last_token.name not in expected_tokens): 890 tokens.append(last_token) 891 last_token = self._GetNextToken() 892 if last_token.name == '<': 893 nesting += 1 894 elif last_token.name == '>': 895 nesting -= 1 896 return tokens, last_token 897 898 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary. 899 def _IgnoreUpTo(self, token_type, token): 900 unused_tokens = self._GetTokensUpTo(token_type, token) 901 902 def _SkipIf0Blocks(self): 903 count = 1 904 while 1: 905 token = self._GetNextToken() 906 if token.token_type != tokenize.PREPROCESSOR: 907 continue 908 909 name = token.name[1:].lstrip() 910 if name.startswith('endif'): 911 count -= 1 912 if count == 0: 913 break 914 elif name.startswith('if'): 915 count += 1 916 917 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None): 918 if GetNextToken is None: 919 GetNextToken = self._GetNextToken 920 # Assumes the current token is open_paren and we will consume 921 # and return up to the close_paren. 922 count = 1 923 token = GetNextToken() 924 while 1: 925 if token.token_type == tokenize.SYNTAX: 926 if token.name == open_paren: 927 count += 1 928 elif token.name == close_paren: 929 count -= 1 930 if count == 0: 931 break 932 yield token 933 token = GetNextToken() 934 yield token 935 936 def _GetParameters(self): 937 return self._GetMatchingChar('(', ')') 938 939 def GetScope(self): 940 return self._GetMatchingChar('{', '}') 941 942 def _GetNextToken(self): 943 if self.token_queue: 944 return self.token_queue.pop() 945 try: 946 return next(self.tokens) 947 except StopIteration: 948 return 949 950 def _AddBackToken(self, token): 951 if token.whence == tokenize.WHENCE_STREAM: 952 token.whence = tokenize.WHENCE_QUEUE 953 self.token_queue.insert(0, token) 954 else: 955 assert token.whence == tokenize.WHENCE_QUEUE, token 956 self.token_queue.append(token) 957 958 def _AddBackTokens(self, tokens): 959 if tokens: 960 if tokens[-1].whence == tokenize.WHENCE_STREAM: 961 for token in tokens: 962 token.whence = tokenize.WHENCE_QUEUE 963 self.token_queue[:0] = reversed(tokens) 964 else: 965 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens 966 self.token_queue.extend(reversed(tokens)) 967 968 def GetName(self, seq=None): 969 """Returns ([tokens], next_token_info).""" 970 GetNextToken = self._GetNextToken 971 if seq is not None: 972 it = iter(seq) 973 GetNextToken = lambda: next(it) 974 next_token = GetNextToken() 975 tokens = [] 976 last_token_was_name = False 977 while (next_token.token_type == tokenize.NAME or 978 (next_token.token_type == tokenize.SYNTAX and 979 next_token.name in ('::', '<'))): 980 # Two NAMEs in a row means the identifier should terminate. 981 # It's probably some sort of variable declaration. 982 if last_token_was_name and next_token.token_type == tokenize.NAME: 983 break 984 last_token_was_name = next_token.token_type == tokenize.NAME 985 tokens.append(next_token) 986 # Handle templated names. 987 if next_token.name == '<': 988 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) 989 last_token_was_name = True 990 next_token = GetNextToken() 991 return tokens, next_token 992 993 def GetMethod(self, modifiers, templated_types): 994 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') 995 assert len(return_type_and_name) >= 1 996 return self._GetMethod(return_type_and_name, modifiers, templated_types, 997 False) 998 999 def _GetMethod(self, return_type_and_name, modifiers, templated_types, 1000 get_paren): 1001 template_portion = None 1002 if get_paren: 1003 token = self._GetNextToken() 1004 assert token.token_type == tokenize.SYNTAX, token 1005 if token.name == '<': 1006 # Handle templatized dtors. 1007 template_portion = [token] 1008 template_portion.extend(self._GetMatchingChar('<', '>')) 1009 token = self._GetNextToken() 1010 assert token.token_type == tokenize.SYNTAX, token 1011 assert token.name == '(', token 1012 1013 name = return_type_and_name.pop() 1014 # Handle templatized ctors. 1015 if name.name == '>': 1016 index = 1 1017 while return_type_and_name[index].name != '<': 1018 index += 1 1019 template_portion = return_type_and_name[index:] + [name] 1020 del return_type_and_name[index:] 1021 name = return_type_and_name.pop() 1022 elif name.name == ']': 1023 rt = return_type_and_name 1024 assert rt[-1].name == '[', return_type_and_name 1025 assert rt[-2].name == 'operator', return_type_and_name 1026 name_seq = return_type_and_name[-2:] 1027 del return_type_and_name[-2:] 1028 name = tokenize.Token(tokenize.NAME, 'operator[]', 1029 name_seq[0].start, name.end) 1030 # Get the open paren so _GetParameters() below works. 1031 unused_open_paren = self._GetNextToken() 1032 1033 # TODO(nnorwitz): store template_portion. 1034 return_type = return_type_and_name 1035 indices = name 1036 if return_type: 1037 indices = return_type[0] 1038 1039 # Force ctor for templatized ctors. 1040 if name.name == self.in_class and not modifiers: 1041 modifiers |= FUNCTION_CTOR 1042 parameters = list(self._GetParameters()) 1043 del parameters[-1] # Remove trailing ')'. 1044 1045 # Handling operator() is especially weird. 1046 if name.name == 'operator' and not parameters: 1047 token = self._GetNextToken() 1048 assert token.name == '(', token 1049 parameters = list(self._GetParameters()) 1050 del parameters[-1] # Remove trailing ')'. 1051 1052 token = self._GetNextToken() 1053 while token.token_type == tokenize.NAME: 1054 modifier_token = token 1055 token = self._GetNextToken() 1056 if modifier_token.name == 'const': 1057 modifiers |= FUNCTION_CONST 1058 elif modifier_token.name == '__attribute__': 1059 # TODO(nnorwitz): handle more __attribute__ details. 1060 modifiers |= FUNCTION_ATTRIBUTE 1061 assert token.name == '(', token 1062 # Consume everything between the (parens). 1063 unused_tokens = list(self._GetMatchingChar('(', ')')) 1064 token = self._GetNextToken() 1065 elif modifier_token.name == 'throw': 1066 modifiers |= FUNCTION_THROW 1067 assert token.name == '(', token 1068 # Consume everything between the (parens). 1069 unused_tokens = list(self._GetMatchingChar('(', ')')) 1070 token = self._GetNextToken() 1071 elif modifier_token.name == 'override': 1072 modifiers |= FUNCTION_OVERRIDE 1073 elif modifier_token.name == modifier_token.name.upper(): 1074 # HACK(nnorwitz): assume that all upper-case names 1075 # are some macro we aren't expanding. 1076 modifiers |= FUNCTION_UNKNOWN_ANNOTATION 1077 else: 1078 self.HandleError('unexpected token', modifier_token) 1079 1080 assert token.token_type == tokenize.SYNTAX, token 1081 # Handle ctor initializers. 1082 if token.name == ':': 1083 # TODO(nnorwitz): anything else to handle for initializer list? 1084 while token.name != ';' and token.name != '{': 1085 token = self._GetNextToken() 1086 1087 # Handle pointer to functions that are really data but look 1088 # like method declarations. 1089 if token.name == '(': 1090 if parameters[0].name == '*': 1091 # name contains the return type. 1092 name = parameters.pop() 1093 # parameters contains the name of the data. 1094 modifiers = [p.name for p in parameters] 1095 # Already at the ( to open the parameter list. 1096 function_parameters = list(self._GetMatchingChar('(', ')')) 1097 del function_parameters[-1] # Remove trailing ')'. 1098 # TODO(nnorwitz): store the function_parameters. 1099 token = self._GetNextToken() 1100 assert token.token_type == tokenize.SYNTAX, token 1101 assert token.name == ';', token 1102 return self._CreateVariable(indices, name.name, indices.name, 1103 modifiers, '', None) 1104 # At this point, we got something like: 1105 # return_type (type::*name_)(params); 1106 # This is a data member called name_ that is a function pointer. 1107 # With this code: void (sq_type::*field_)(string&); 1108 # We get: name=void return_type=[] parameters=sq_type ... field_ 1109 # TODO(nnorwitz): is return_type always empty? 1110 # TODO(nnorwitz): this isn't even close to being correct. 1111 # Just put in something so we don't crash and can move on. 1112 real_name = parameters[-1] 1113 modifiers = [p.name for p in self._GetParameters()] 1114 del modifiers[-1] # Remove trailing ')'. 1115 return self._CreateVariable(indices, real_name.name, indices.name, 1116 modifiers, '', None) 1117 1118 if token.name == '{': 1119 body = list(self.GetScope()) 1120 del body[-1] # Remove trailing '}'. 1121 else: 1122 body = None 1123 if token.name == '=': 1124 token = self._GetNextToken() 1125 1126 if token.name == 'default' or token.name == 'delete': 1127 # Ignore explicitly defaulted and deleted special members 1128 # in C++11. 1129 token = self._GetNextToken() 1130 else: 1131 # Handle pure-virtual declarations. 1132 assert token.token_type == tokenize.CONSTANT, token 1133 assert token.name == '0', token 1134 modifiers |= FUNCTION_PURE_VIRTUAL 1135 token = self._GetNextToken() 1136 1137 if token.name == '[': 1138 # TODO(nnorwitz): store tokens and improve parsing. 1139 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N]; 1140 tokens = list(self._GetMatchingChar('[', ']')) 1141 token = self._GetNextToken() 1142 1143 assert token.name == ';', (token, return_type_and_name, parameters) 1144 1145 # Looks like we got a method, not a function. 1146 if len(return_type) > 2 and return_type[-1].name == '::': 1147 return_type, in_class = \ 1148 self._GetReturnTypeAndClassName(return_type) 1149 return Method(indices.start, indices.end, name.name, in_class, 1150 return_type, parameters, modifiers, templated_types, 1151 body, self.namespace_stack) 1152 return Function(indices.start, indices.end, name.name, return_type, 1153 parameters, modifiers, templated_types, body, 1154 self.namespace_stack) 1155 1156 def _GetReturnTypeAndClassName(self, token_seq): 1157 # Splitting the return type from the class name in a method 1158 # can be tricky. For example, Return::Type::Is::Hard::To::Find(). 1159 # Where is the return type and where is the class name? 1160 # The heuristic used is to pull the last name as the class name. 1161 # This includes all the templated type info. 1162 # TODO(nnorwitz): if there is only One name like in the 1163 # example above, punt and assume the last bit is the class name. 1164 1165 # Ignore a :: prefix, if exists so we can find the first real name. 1166 i = 0 1167 if token_seq[0].name == '::': 1168 i = 1 1169 # Ignore a :: suffix, if exists. 1170 end = len(token_seq) - 1 1171 if token_seq[end-1].name == '::': 1172 end -= 1 1173 1174 # Make a copy of the sequence so we can append a sentinel 1175 # value. This is required for GetName will has to have some 1176 # terminating condition beyond the last name. 1177 seq_copy = token_seq[i:end] 1178 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0)) 1179 names = [] 1180 while i < end: 1181 # Iterate through the sequence parsing out each name. 1182 new_name, next = self.GetName(seq_copy[i:]) 1183 assert new_name, 'Got empty new_name, next=%s' % next 1184 # We got a pointer or ref. Add it to the name. 1185 if next and next.token_type == tokenize.SYNTAX: 1186 new_name.append(next) 1187 names.append(new_name) 1188 i += len(new_name) 1189 1190 # Now that we have the names, it's time to undo what we did. 1191 1192 # Remove the sentinel value. 1193 names[-1].pop() 1194 # Flatten the token sequence for the return type. 1195 return_type = [e for seq in names[:-1] for e in seq] 1196 # The class name is the last name. 1197 class_name = names[-1] 1198 return return_type, class_name 1199 1200 def handle_bool(self): 1201 pass 1202 1203 def handle_char(self): 1204 pass 1205 1206 def handle_int(self): 1207 pass 1208 1209 def handle_long(self): 1210 pass 1211 1212 def handle_short(self): 1213 pass 1214 1215 def handle_double(self): 1216 pass 1217 1218 def handle_float(self): 1219 pass 1220 1221 def handle_void(self): 1222 pass 1223 1224 def handle_wchar_t(self): 1225 pass 1226 1227 def handle_unsigned(self): 1228 pass 1229 1230 def handle_signed(self): 1231 pass 1232 1233 def _GetNestedType(self, ctor): 1234 name = None 1235 name_tokens, token = self.GetName() 1236 if name_tokens: 1237 name = ''.join([t.name for t in name_tokens]) 1238 1239 # Handle forward declarations. 1240 if token.token_type == tokenize.SYNTAX and token.name == ';': 1241 return ctor(token.start, token.end, name, None, 1242 self.namespace_stack) 1243 1244 if token.token_type == tokenize.NAME and self._handling_typedef: 1245 self._AddBackToken(token) 1246 return ctor(token.start, token.end, name, None, 1247 self.namespace_stack) 1248 1249 # Must be the type declaration. 1250 fields = list(self._GetMatchingChar('{', '}')) 1251 del fields[-1] # Remove trailing '}'. 1252 if token.token_type == tokenize.SYNTAX and token.name == '{': 1253 next = self._GetNextToken() 1254 new_type = ctor(token.start, token.end, name, fields, 1255 self.namespace_stack) 1256 # A name means this is an anonymous type and the name 1257 # is the variable declaration. 1258 if next.token_type != tokenize.NAME: 1259 return new_type 1260 name = new_type 1261 token = next 1262 1263 # Must be variable declaration using the type prefixed with keyword. 1264 assert token.token_type == tokenize.NAME, token 1265 return self._CreateVariable(token, token.name, name, [], '', None) 1266 1267 def handle_struct(self): 1268 # Special case the handling typedef/aliasing of structs here. 1269 # It would be a pain to handle in the class code. 1270 name_tokens, var_token = self.GetName() 1271 if name_tokens: 1272 next_token = self._GetNextToken() 1273 is_syntax = (var_token.token_type == tokenize.SYNTAX and 1274 var_token.name[0] in '*&') 1275 is_variable = (var_token.token_type == tokenize.NAME and 1276 next_token.name == ';') 1277 variable = var_token 1278 if is_syntax and not is_variable: 1279 variable = next_token 1280 temp = self._GetNextToken() 1281 if temp.token_type == tokenize.SYNTAX and temp.name == '(': 1282 # Handle methods declared to return a struct. 1283 t0 = name_tokens[0] 1284 struct = tokenize.Token(tokenize.NAME, 'struct', 1285 t0.start-7, t0.start-2) 1286 type_and_name = [struct] 1287 type_and_name.extend(name_tokens) 1288 type_and_name.extend((var_token, next_token)) 1289 return self._GetMethod(type_and_name, 0, None, False) 1290 assert temp.name == ';', (temp, name_tokens, var_token) 1291 if is_syntax or (is_variable and not self._handling_typedef): 1292 modifiers = ['struct'] 1293 type_name = ''.join([t.name for t in name_tokens]) 1294 position = name_tokens[0] 1295 return self._CreateVariable(position, variable.name, type_name, 1296 modifiers, var_token.name, None) 1297 name_tokens.extend((var_token, next_token)) 1298 self._AddBackTokens(name_tokens) 1299 else: 1300 self._AddBackToken(var_token) 1301 return self._GetClass(Struct, VISIBILITY_PUBLIC, None) 1302 1303 def handle_union(self): 1304 return self._GetNestedType(Union) 1305 1306 def handle_enum(self): 1307 return self._GetNestedType(Enum) 1308 1309 def handle_auto(self): 1310 # TODO(nnorwitz): warn about using auto? Probably not since it 1311 # will be reclaimed and useful for C++0x. 1312 pass 1313 1314 def handle_register(self): 1315 pass 1316 1317 def handle_const(self): 1318 pass 1319 1320 def handle_inline(self): 1321 pass 1322 1323 def handle_extern(self): 1324 pass 1325 1326 def handle_static(self): 1327 pass 1328 1329 def handle_virtual(self): 1330 # What follows must be a method. 1331 token = token2 = self._GetNextToken() 1332 if token.name == 'inline': 1333 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'. 1334 token2 = self._GetNextToken() 1335 if token2.token_type == tokenize.SYNTAX and token2.name == '~': 1336 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None) 1337 assert token.token_type == tokenize.NAME or token.name == '::', token 1338 return_type_and_name, _ = self._GetVarTokensUpToIgnoringTemplates( 1339 tokenize.SYNTAX, '(') # ) 1340 return_type_and_name.insert(0, token) 1341 if token2 is not token: 1342 return_type_and_name.insert(1, token2) 1343 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, 1344 None, False) 1345 1346 def handle_volatile(self): 1347 pass 1348 1349 def handle_mutable(self): 1350 pass 1351 1352 def handle_public(self): 1353 assert self.in_class 1354 self.visibility = VISIBILITY_PUBLIC 1355 1356 def handle_protected(self): 1357 assert self.in_class 1358 self.visibility = VISIBILITY_PROTECTED 1359 1360 def handle_private(self): 1361 assert self.in_class 1362 self.visibility = VISIBILITY_PRIVATE 1363 1364 def handle_friend(self): 1365 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1366 assert tokens 1367 t0 = tokens[0] 1368 return Friend(t0.start, t0.end, tokens, self.namespace_stack) 1369 1370 def handle_static_cast(self): 1371 pass 1372 1373 def handle_const_cast(self): 1374 pass 1375 1376 def handle_dynamic_cast(self): 1377 pass 1378 1379 def handle_reinterpret_cast(self): 1380 pass 1381 1382 def handle_new(self): 1383 pass 1384 1385 def handle_delete(self): 1386 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1387 assert tokens 1388 return Delete(tokens[0].start, tokens[0].end, tokens) 1389 1390 def handle_typedef(self): 1391 token = self._GetNextToken() 1392 if (token.token_type == tokenize.NAME and 1393 keywords.IsKeyword(token.name)): 1394 # Token must be struct/enum/union/class. 1395 method = getattr(self, 'handle_' + token.name) 1396 self._handling_typedef = True 1397 tokens = [method()] 1398 self._handling_typedef = False 1399 else: 1400 tokens = [token] 1401 1402 # Get the remainder of the typedef up to the semi-colon. 1403 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) 1404 1405 # TODO(nnorwitz): clean all this up. 1406 assert tokens 1407 name = tokens.pop() 1408 indices = name 1409 if tokens: 1410 indices = tokens[0] 1411 if not indices: 1412 indices = token 1413 if name.name == ')': 1414 # HACK(nnorwitz): Handle pointers to functions "properly". 1415 if (len(tokens) >= 4 and 1416 tokens[1].name == '(' and tokens[2].name == '*'): 1417 tokens.append(name) 1418 name = tokens[3] 1419 elif name.name == ']': 1420 # HACK(nnorwitz): Handle arrays properly. 1421 if len(tokens) >= 2: 1422 tokens.append(name) 1423 name = tokens[1] 1424 new_type = tokens 1425 if tokens and isinstance(tokens[0], tokenize.Token): 1426 new_type = self.converter.ToType(tokens)[0] 1427 return Typedef(indices.start, indices.end, name.name, 1428 new_type, self.namespace_stack) 1429 1430 def handle_typeid(self): 1431 pass # Not needed yet. 1432 1433 def handle_typename(self): 1434 pass # Not needed yet. 1435 1436 def _GetTemplatedTypes(self): 1437 result = collections.OrderedDict() 1438 tokens = list(self._GetMatchingChar('<', '>')) 1439 len_tokens = len(tokens) - 1 # Ignore trailing '>'. 1440 i = 0 1441 while i < len_tokens: 1442 key = tokens[i].name 1443 i += 1 1444 if keywords.IsKeyword(key) or key == ',': 1445 continue 1446 type_name = default = None 1447 if i < len_tokens: 1448 i += 1 1449 if tokens[i-1].name == '=': 1450 assert i < len_tokens, '%s %s' % (i, tokens) 1451 default, unused_next_token = self.GetName(tokens[i:]) 1452 i += len(default) 1453 else: 1454 if tokens[i-1].name != ',': 1455 # We got something like: Type variable. 1456 # Re-adjust the key (variable) and type_name (Type). 1457 key = tokens[i-1].name 1458 type_name = tokens[i-2] 1459 1460 result[key] = (type_name, default) 1461 return result 1462 1463 def handle_template(self): 1464 token = self._GetNextToken() 1465 assert token.token_type == tokenize.SYNTAX, token 1466 assert token.name == '<', token 1467 templated_types = self._GetTemplatedTypes() 1468 # TODO(nnorwitz): for now, just ignore the template params. 1469 token = self._GetNextToken() 1470 if token.token_type == tokenize.NAME: 1471 if token.name == 'class': 1472 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types) 1473 elif token.name == 'struct': 1474 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types) 1475 elif token.name == 'friend': 1476 return self.handle_friend() 1477 self._AddBackToken(token) 1478 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') 1479 tokens.append(last) 1480 self._AddBackTokens(tokens) 1481 if last.name == '(': 1482 return self.GetMethod(FUNCTION_NONE, templated_types) 1483 # Must be a variable definition. 1484 return None 1485 1486 def handle_true(self): 1487 pass # Nothing to do. 1488 1489 def handle_false(self): 1490 pass # Nothing to do. 1491 1492 def handle_asm(self): 1493 pass # Not needed yet. 1494 1495 def handle_class(self): 1496 return self._GetClass(Class, VISIBILITY_PRIVATE, None) 1497 1498 def _GetBases(self): 1499 # Get base classes. 1500 bases = [] 1501 while 1: 1502 token = self._GetNextToken() 1503 assert token.token_type == tokenize.NAME, token 1504 # TODO(nnorwitz): store kind of inheritance...maybe. 1505 if token.name not in ('public', 'protected', 'private'): 1506 # If inheritance type is not specified, it is private. 1507 # Just put the token back so we can form a name. 1508 # TODO(nnorwitz): it would be good to warn about this. 1509 self._AddBackToken(token) 1510 else: 1511 # Check for virtual inheritance. 1512 token = self._GetNextToken() 1513 if token.name != 'virtual': 1514 self._AddBackToken(token) 1515 else: 1516 # TODO(nnorwitz): store that we got virtual for this base. 1517 pass 1518 base, next_token = self.GetName() 1519 bases_ast = self.converter.ToType(base) 1520 assert len(bases_ast) == 1, bases_ast 1521 bases.append(bases_ast[0]) 1522 assert next_token.token_type == tokenize.SYNTAX, next_token 1523 if next_token.name == '{': 1524 token = next_token 1525 break 1526 # Support multiple inheritance. 1527 assert next_token.name == ',', next_token 1528 return bases, token 1529 1530 def _GetClass(self, class_type, visibility, templated_types): 1531 class_name = None 1532 class_token = self._GetNextToken() 1533 if class_token.token_type != tokenize.NAME: 1534 assert class_token.token_type == tokenize.SYNTAX, class_token 1535 token = class_token 1536 else: 1537 # Skip any macro (e.g. storage class specifiers) after the 1538 # 'class' keyword. 1539 next_token = self._GetNextToken() 1540 if next_token.token_type == tokenize.NAME: 1541 self._AddBackToken(next_token) 1542 else: 1543 self._AddBackTokens([class_token, next_token]) 1544 name_tokens, token = self.GetName() 1545 class_name = ''.join([t.name for t in name_tokens]) 1546 bases = None 1547 if token.token_type == tokenize.SYNTAX: 1548 if token.name == ';': 1549 # Forward declaration. 1550 return class_type(class_token.start, class_token.end, 1551 class_name, None, templated_types, None, 1552 self.namespace_stack) 1553 if token.name in '*&': 1554 # Inline forward declaration. Could be method or data. 1555 name_token = self._GetNextToken() 1556 next_token = self._GetNextToken() 1557 if next_token.name == ';': 1558 # Handle data 1559 modifiers = ['class'] 1560 return self._CreateVariable(class_token, name_token.name, 1561 class_name, 1562 modifiers, token.name, None) 1563 else: 1564 # Assume this is a method. 1565 tokens = (class_token, token, name_token, next_token) 1566 self._AddBackTokens(tokens) 1567 return self.GetMethod(FUNCTION_NONE, None) 1568 if token.name == ':': 1569 bases, token = self._GetBases() 1570 1571 body = None 1572 if token.token_type == tokenize.SYNTAX and token.name == '{': 1573 assert token.token_type == tokenize.SYNTAX, token 1574 assert token.name == '{', token 1575 1576 ast = AstBuilder(self.GetScope(), self.filename, class_name, 1577 visibility, self.namespace_stack) 1578 body = list(ast.Generate()) 1579 1580 if not self._handling_typedef: 1581 token = self._GetNextToken() 1582 if token.token_type != tokenize.NAME: 1583 assert token.token_type == tokenize.SYNTAX, token 1584 assert token.name == ';', token 1585 else: 1586 new_class = class_type(class_token.start, class_token.end, 1587 class_name, bases, None, 1588 body, self.namespace_stack) 1589 1590 modifiers = [] 1591 return self._CreateVariable(class_token, 1592 token.name, new_class, 1593 modifiers, token.name, None) 1594 else: 1595 if not self._handling_typedef: 1596 self.HandleError('non-typedef token', token) 1597 self._AddBackToken(token) 1598 1599 return class_type(class_token.start, class_token.end, class_name, 1600 bases, templated_types, body, self.namespace_stack) 1601 1602 def handle_namespace(self): 1603 # Support anonymous namespaces. 1604 name = None 1605 name_tokens, token = self.GetName() 1606 if name_tokens: 1607 name = ''.join([t.name for t in name_tokens]) 1608 self.namespace_stack.append(name) 1609 assert token.token_type == tokenize.SYNTAX, token 1610 # Create an internal token that denotes when the namespace is complete. 1611 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, 1612 None, None) 1613 internal_token.whence = token.whence 1614 if token.name == '=': 1615 # TODO(nnorwitz): handle aliasing namespaces. 1616 name, next_token = self.GetName() 1617 assert next_token.name == ';', next_token 1618 self._AddBackToken(internal_token) 1619 else: 1620 assert token.name == '{', token 1621 tokens = list(self.GetScope()) 1622 # Replace the trailing } with the internal namespace pop token. 1623 tokens[-1] = internal_token 1624 # Handle namespace with nothing in it. 1625 self._AddBackTokens(tokens) 1626 return None 1627 1628 def handle_using(self): 1629 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1630 assert tokens 1631 return Using(tokens[0].start, tokens[0].end, tokens) 1632 1633 def handle_explicit(self): 1634 assert self.in_class 1635 # Nothing much to do. 1636 # TODO(nnorwitz): maybe verify the method name == class name. 1637 # This must be a ctor. 1638 return self.GetMethod(FUNCTION_CTOR, None) 1639 1640 def handle_this(self): 1641 pass # Nothing to do. 1642 1643 def handle_operator(self): 1644 # Pull off the next token(s?) and make that part of the method name. 1645 pass 1646 1647 def handle_sizeof(self): 1648 pass 1649 1650 def handle_case(self): 1651 pass 1652 1653 def handle_switch(self): 1654 pass 1655 1656 def handle_default(self): 1657 token = self._GetNextToken() 1658 assert token.token_type == tokenize.SYNTAX 1659 assert token.name == ':' 1660 1661 def handle_if(self): 1662 pass 1663 1664 def handle_else(self): 1665 pass 1666 1667 def handle_return(self): 1668 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1669 if not tokens: 1670 return Return(self.current_token.start, self.current_token.end, None) 1671 return Return(tokens[0].start, tokens[0].end, tokens) 1672 1673 def handle_goto(self): 1674 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1675 assert len(tokens) == 1, str(tokens) 1676 return Goto(tokens[0].start, tokens[0].end, tokens[0].name) 1677 1678 def handle_try(self): 1679 pass # Not needed yet. 1680 1681 def handle_catch(self): 1682 pass # Not needed yet. 1683 1684 def handle_throw(self): 1685 pass # Not needed yet. 1686 1687 def handle_while(self): 1688 pass 1689 1690 def handle_do(self): 1691 pass 1692 1693 def handle_for(self): 1694 pass 1695 1696 def handle_break(self): 1697 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1698 1699 def handle_continue(self): 1700 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1701 1702 1703def BuilderFromSource(source, filename): 1704 """Utility method that returns an AstBuilder from source code. 1705 1706 Args: 1707 source: 'C++ source code' 1708 filename: 'file1' 1709 1710 Returns: 1711 AstBuilder 1712 """ 1713 return AstBuilder(tokenize.GetTokens(source), filename) 1714 1715 1716def PrintIndentifiers(filename, should_print): 1717 """Prints all identifiers for a C++ source file. 1718 1719 Args: 1720 filename: 'file1' 1721 should_print: predicate with signature: bool Function(token) 1722 """ 1723 source = utils.ReadFile(filename, False) 1724 if source is None: 1725 sys.stderr.write('Unable to find: %s\n' % filename) 1726 return 1727 1728 #print('Processing %s' % actual_filename) 1729 builder = BuilderFromSource(source, filename) 1730 try: 1731 for node in builder.Generate(): 1732 if should_print(node): 1733 print(node.name) 1734 except KeyboardInterrupt: 1735 return 1736 except: 1737 pass 1738 1739 1740def PrintAllIndentifiers(filenames, should_print): 1741 """Prints all identifiers for each C++ source file in filenames. 1742 1743 Args: 1744 filenames: ['file1', 'file2', ...] 1745 should_print: predicate with signature: bool Function(token) 1746 """ 1747 for path in filenames: 1748 PrintIndentifiers(path, should_print) 1749 1750 1751def main(argv): 1752 for filename in argv[1:]: 1753 source = utils.ReadFile(filename) 1754 if source is None: 1755 continue 1756 1757 print('Processing %s' % filename) 1758 builder = BuilderFromSource(source, filename) 1759 try: 1760 entire_ast = filter(None, builder.Generate()) 1761 except KeyboardInterrupt: 1762 return 1763 except: 1764 # Already printed a warning, print the traceback and continue. 1765 traceback.print_exc() 1766 else: 1767 if utils.DEBUG: 1768 for ast in entire_ast: 1769 print(ast) 1770 1771 1772if __name__ == '__main__': 1773 main(sys.argv) 1774