1#!/usr/bin/env python 2# 3# Copyright 2007 Neal Norwitz 4# Portions Copyright 2007 Google Inc. 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18"""Generate an Abstract Syntax Tree (AST) for C++.""" 19 20__author__ = 'nnorwitz@google.com (Neal Norwitz)' 21 22 23# TODO: 24# * Tokens should never be exported, need to convert to Nodes 25# (return types, parameters, etc.) 26# * Handle static class data for templatized classes 27# * Handle casts (both C++ and C-style) 28# * Handle conditions and loops (if/else, switch, for, while/do) 29# 30# TODO much, much later: 31# * Handle #define 32# * exceptions 33 34 35try: 36 # Python 3.x 37 import builtins 38except ImportError: 39 # Python 2.x 40 import __builtin__ as builtins 41 42import sys 43import traceback 44 45from cpp import keywords 46from cpp import tokenize 47from cpp import utils 48 49 50if not hasattr(builtins, 'reversed'): 51 # Support Python 2.3 and earlier. 52 def reversed(seq): 53 for i in range(len(seq)-1, -1, -1): 54 yield seq[i] 55 56if not hasattr(builtins, 'next'): 57 # Support Python 2.5 and earlier. 58 def next(obj): 59 return obj.next() 60 61 62VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) 63 64FUNCTION_NONE = 0x00 65FUNCTION_CONST = 0x01 66FUNCTION_VIRTUAL = 0x02 67FUNCTION_PURE_VIRTUAL = 0x04 68FUNCTION_CTOR = 0x08 69FUNCTION_DTOR = 0x10 70FUNCTION_ATTRIBUTE = 0x20 71FUNCTION_UNKNOWN_ANNOTATION = 0x40 72FUNCTION_THROW = 0x80 73FUNCTION_OVERRIDE = 0x100 74 75""" 76These are currently unused. Should really handle these properly at some point. 77 78TYPE_MODIFIER_INLINE = 0x010000 79TYPE_MODIFIER_EXTERN = 0x020000 80TYPE_MODIFIER_STATIC = 0x040000 81TYPE_MODIFIER_CONST = 0x080000 82TYPE_MODIFIER_REGISTER = 0x100000 83TYPE_MODIFIER_VOLATILE = 0x200000 84TYPE_MODIFIER_MUTABLE = 0x400000 85 86TYPE_MODIFIER_MAP = { 87 'inline': TYPE_MODIFIER_INLINE, 88 'extern': TYPE_MODIFIER_EXTERN, 89 'static': TYPE_MODIFIER_STATIC, 90 'const': TYPE_MODIFIER_CONST, 91 'register': TYPE_MODIFIER_REGISTER, 92 'volatile': TYPE_MODIFIER_VOLATILE, 93 'mutable': TYPE_MODIFIER_MUTABLE, 94 } 95""" 96 97_INTERNAL_TOKEN = 'internal' 98_NAMESPACE_POP = 'ns-pop' 99 100 101# TODO(nnorwitz): use this as a singleton for templated_types, etc 102# where we don't want to create a new empty dict each time. It is also const. 103class _NullDict(object): 104 __contains__ = lambda self: False 105 keys = values = items = iterkeys = itervalues = iteritems = lambda self: () 106 107 108# TODO(nnorwitz): move AST nodes into a separate module. 109class Node(object): 110 """Base AST node.""" 111 112 def __init__(self, start, end): 113 self.start = start 114 self.end = end 115 116 def IsDeclaration(self): 117 """Returns bool if this node is a declaration.""" 118 return False 119 120 def IsDefinition(self): 121 """Returns bool if this node is a definition.""" 122 return False 123 124 def IsExportable(self): 125 """Returns bool if this node exportable from a header file.""" 126 return False 127 128 def Requires(self, node): 129 """Does this AST node require the definition of the node passed in?""" 130 return False 131 132 def XXX__str__(self): 133 return self._StringHelper(self.__class__.__name__, '') 134 135 def _StringHelper(self, name, suffix): 136 if not utils.DEBUG: 137 return '%s(%s)' % (name, suffix) 138 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) 139 140 def __repr__(self): 141 return str(self) 142 143 144class Define(Node): 145 def __init__(self, start, end, name, definition): 146 Node.__init__(self, start, end) 147 self.name = name 148 self.definition = definition 149 150 def __str__(self): 151 value = '%s %s' % (self.name, self.definition) 152 return self._StringHelper(self.__class__.__name__, value) 153 154 155class Include(Node): 156 def __init__(self, start, end, filename, system): 157 Node.__init__(self, start, end) 158 self.filename = filename 159 self.system = system 160 161 def __str__(self): 162 fmt = '"%s"' 163 if self.system: 164 fmt = '<%s>' 165 return self._StringHelper(self.__class__.__name__, fmt % self.filename) 166 167 168class Goto(Node): 169 def __init__(self, start, end, label): 170 Node.__init__(self, start, end) 171 self.label = label 172 173 def __str__(self): 174 return self._StringHelper(self.__class__.__name__, str(self.label)) 175 176 177class Expr(Node): 178 def __init__(self, start, end, expr): 179 Node.__init__(self, start, end) 180 self.expr = expr 181 182 def Requires(self, node): 183 # TODO(nnorwitz): impl. 184 return False 185 186 def __str__(self): 187 return self._StringHelper(self.__class__.__name__, str(self.expr)) 188 189 190class Return(Expr): 191 pass 192 193 194class Delete(Expr): 195 pass 196 197 198class Friend(Expr): 199 def __init__(self, start, end, expr, namespace): 200 Expr.__init__(self, start, end, expr) 201 self.namespace = namespace[:] 202 203 204class Using(Node): 205 def __init__(self, start, end, names): 206 Node.__init__(self, start, end) 207 self.names = names 208 209 def __str__(self): 210 return self._StringHelper(self.__class__.__name__, str(self.names)) 211 212 213class Parameter(Node): 214 def __init__(self, start, end, name, parameter_type, default): 215 Node.__init__(self, start, end) 216 self.name = name 217 self.type = parameter_type 218 self.default = default 219 220 def Requires(self, node): 221 # TODO(nnorwitz): handle namespaces, etc. 222 return self.type.name == node.name 223 224 def __str__(self): 225 name = str(self.type) 226 suffix = '%s %s' % (name, self.name) 227 if self.default: 228 suffix += ' = ' + ''.join([d.name for d in self.default]) 229 return self._StringHelper(self.__class__.__name__, suffix) 230 231 232class _GenericDeclaration(Node): 233 def __init__(self, start, end, name, namespace): 234 Node.__init__(self, start, end) 235 self.name = name 236 self.namespace = namespace[:] 237 238 def FullName(self): 239 prefix = '' 240 if self.namespace and self.namespace[-1]: 241 prefix = '::'.join(self.namespace) + '::' 242 return prefix + self.name 243 244 def _TypeStringHelper(self, suffix): 245 if self.namespace: 246 names = [n or '<anonymous>' for n in self.namespace] 247 suffix += ' in ' + '::'.join(names) 248 return self._StringHelper(self.__class__.__name__, suffix) 249 250 251# TODO(nnorwitz): merge with Parameter in some way? 252class VariableDeclaration(_GenericDeclaration): 253 def __init__(self, start, end, name, var_type, initial_value, namespace): 254 _GenericDeclaration.__init__(self, start, end, name, namespace) 255 self.type = var_type 256 self.initial_value = initial_value 257 258 def Requires(self, node): 259 # TODO(nnorwitz): handle namespaces, etc. 260 return self.type.name == node.name 261 262 def ToString(self): 263 """Return a string that tries to reconstitute the variable decl.""" 264 suffix = '%s %s' % (self.type, self.name) 265 if self.initial_value: 266 suffix += ' = ' + self.initial_value 267 return suffix 268 269 def __str__(self): 270 return self._StringHelper(self.__class__.__name__, self.ToString()) 271 272 273class Typedef(_GenericDeclaration): 274 def __init__(self, start, end, name, alias, namespace): 275 _GenericDeclaration.__init__(self, start, end, name, namespace) 276 self.alias = alias 277 278 def IsDefinition(self): 279 return True 280 281 def IsExportable(self): 282 return True 283 284 def Requires(self, node): 285 # TODO(nnorwitz): handle namespaces, etc. 286 name = node.name 287 for token in self.alias: 288 if token is not None and name == token.name: 289 return True 290 return False 291 292 def __str__(self): 293 suffix = '%s, %s' % (self.name, self.alias) 294 return self._TypeStringHelper(suffix) 295 296 297class _NestedType(_GenericDeclaration): 298 def __init__(self, start, end, name, fields, namespace): 299 _GenericDeclaration.__init__(self, start, end, name, namespace) 300 self.fields = fields 301 302 def IsDefinition(self): 303 return True 304 305 def IsExportable(self): 306 return True 307 308 def __str__(self): 309 suffix = '%s, {%s}' % (self.name, self.fields) 310 return self._TypeStringHelper(suffix) 311 312 313class Union(_NestedType): 314 pass 315 316 317class Enum(_NestedType): 318 pass 319 320 321class Class(_GenericDeclaration): 322 def __init__(self, start, end, name, bases, templated_types, body, namespace): 323 _GenericDeclaration.__init__(self, start, end, name, namespace) 324 self.bases = bases 325 self.body = body 326 self.templated_types = templated_types 327 328 def IsDeclaration(self): 329 return self.bases is None and self.body is None 330 331 def IsDefinition(self): 332 return not self.IsDeclaration() 333 334 def IsExportable(self): 335 return not self.IsDeclaration() 336 337 def Requires(self, node): 338 # TODO(nnorwitz): handle namespaces, etc. 339 if self.bases: 340 for token_list in self.bases: 341 # TODO(nnorwitz): bases are tokens, do name comparison. 342 for token in token_list: 343 if token.name == node.name: 344 return True 345 # TODO(nnorwitz): search in body too. 346 return False 347 348 def __str__(self): 349 name = self.name 350 if self.templated_types: 351 name += '<%s>' % self.templated_types 352 suffix = '%s, %s, %s' % (name, self.bases, self.body) 353 return self._TypeStringHelper(suffix) 354 355 356class Struct(Class): 357 pass 358 359 360class Function(_GenericDeclaration): 361 def __init__(self, start, end, name, return_type, parameters, 362 modifiers, templated_types, body, namespace): 363 _GenericDeclaration.__init__(self, start, end, name, namespace) 364 converter = TypeConverter(namespace) 365 self.return_type = converter.CreateReturnType(return_type) 366 self.parameters = converter.ToParameters(parameters) 367 self.modifiers = modifiers 368 self.body = body 369 self.templated_types = templated_types 370 371 def IsDeclaration(self): 372 return self.body is None 373 374 def IsDefinition(self): 375 return self.body is not None 376 377 def IsExportable(self): 378 if self.return_type and 'static' in self.return_type.modifiers: 379 return False 380 return None not in self.namespace 381 382 def Requires(self, node): 383 if self.parameters: 384 # TODO(nnorwitz): parameters are tokens, do name comparison. 385 for p in self.parameters: 386 if p.name == node.name: 387 return True 388 # TODO(nnorwitz): search in body too. 389 return False 390 391 def __str__(self): 392 # TODO(nnorwitz): add templated_types. 393 suffix = ('%s %s(%s), 0x%02x, %s' % 394 (self.return_type, self.name, self.parameters, 395 self.modifiers, self.body)) 396 return self._TypeStringHelper(suffix) 397 398 399class Method(Function): 400 def __init__(self, start, end, name, in_class, return_type, parameters, 401 modifiers, templated_types, body, namespace): 402 Function.__init__(self, start, end, name, return_type, parameters, 403 modifiers, templated_types, body, namespace) 404 # TODO(nnorwitz): in_class could also be a namespace which can 405 # mess up finding functions properly. 406 self.in_class = in_class 407 408 409class Type(_GenericDeclaration): 410 """Type used for any variable (eg class, primitive, struct, etc).""" 411 412 def __init__(self, start, end, name, templated_types, modifiers, 413 reference, pointer, array): 414 """ 415 Args: 416 name: str name of main type 417 templated_types: [Class (Type?)] template type info between <> 418 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. 419 reference, pointer, array: bools 420 """ 421 _GenericDeclaration.__init__(self, start, end, name, []) 422 self.templated_types = templated_types 423 if not name and modifiers: 424 self.name = modifiers.pop() 425 self.modifiers = modifiers 426 self.reference = reference 427 self.pointer = pointer 428 self.array = array 429 430 def __str__(self): 431 prefix = '' 432 if self.modifiers: 433 prefix = ' '.join(self.modifiers) + ' ' 434 name = str(self.name) 435 if self.templated_types: 436 name += '<%s>' % self.templated_types 437 suffix = prefix + name 438 if self.reference: 439 suffix += '&' 440 if self.pointer: 441 suffix += '*' 442 if self.array: 443 suffix += '[]' 444 return self._TypeStringHelper(suffix) 445 446 # By definition, Is* are always False. A Type can only exist in 447 # some sort of variable declaration, parameter, or return value. 448 def IsDeclaration(self): 449 return False 450 451 def IsDefinition(self): 452 return False 453 454 def IsExportable(self): 455 return False 456 457 458class TypeConverter(object): 459 460 def __init__(self, namespace_stack): 461 self.namespace_stack = namespace_stack 462 463 def _GetTemplateEnd(self, tokens, start): 464 count = 1 465 end = start 466 while 1: 467 token = tokens[end] 468 end += 1 469 if token.name == '<': 470 count += 1 471 elif token.name == '>': 472 count -= 1 473 if count == 0: 474 break 475 return tokens[start:end-1], end 476 477 def ToType(self, tokens): 478 """Convert [Token,...] to [Class(...), ] useful for base classes. 479 For example, code like class Foo : public Bar<x, y> { ... }; 480 the "Bar<x, y>" portion gets converted to an AST. 481 482 Returns: 483 [Class(...), ...] 484 """ 485 result = [] 486 name_tokens = [] 487 reference = pointer = array = False 488 489 def AddType(templated_types): 490 # Partition tokens into name and modifier tokens. 491 names = [] 492 modifiers = [] 493 for t in name_tokens: 494 if keywords.IsKeyword(t.name): 495 modifiers.append(t.name) 496 else: 497 names.append(t.name) 498 name = ''.join(names) 499 if name_tokens: 500 result.append(Type(name_tokens[0].start, name_tokens[-1].end, 501 name, templated_types, modifiers, 502 reference, pointer, array)) 503 del name_tokens[:] 504 505 i = 0 506 end = len(tokens) 507 while i < end: 508 token = tokens[i] 509 if token.name == '<': 510 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1) 511 AddType(self.ToType(new_tokens)) 512 # If there is a comma after the template, we need to consume 513 # that here otherwise it becomes part of the name. 514 i = new_end 515 reference = pointer = array = False 516 elif token.name == ',': 517 AddType([]) 518 reference = pointer = array = False 519 elif token.name == '*': 520 pointer = True 521 elif token.name == '&': 522 reference = True 523 elif token.name == '[': 524 pointer = True 525 elif token.name == ']': 526 pass 527 else: 528 name_tokens.append(token) 529 i += 1 530 531 if name_tokens: 532 # No '<' in the tokens, just a simple name and no template. 533 AddType([]) 534 return result 535 536 def DeclarationToParts(self, parts, needs_name_removed): 537 name = None 538 default = [] 539 if needs_name_removed: 540 # Handle default (initial) values properly. 541 for i, t in enumerate(parts): 542 if t.name == '=': 543 default = parts[i+1:] 544 name = parts[i-1].name 545 if name == ']' and parts[i-2].name == '[': 546 name = parts[i-3].name 547 i -= 1 548 parts = parts[:i-1] 549 break 550 else: 551 if parts[-1].token_type == tokenize.NAME: 552 name = parts.pop().name 553 else: 554 # TODO(nnorwitz): this is a hack that happens for code like 555 # Register(Foo<T>); where it thinks this is a function call 556 # but it's actually a declaration. 557 name = '???' 558 modifiers = [] 559 type_name = [] 560 other_tokens = [] 561 templated_types = [] 562 i = 0 563 end = len(parts) 564 while i < end: 565 p = parts[i] 566 if keywords.IsKeyword(p.name): 567 modifiers.append(p.name) 568 elif p.name == '<': 569 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1) 570 templated_types = self.ToType(templated_tokens) 571 i = new_end - 1 572 # Don't add a spurious :: to data members being initialized. 573 next_index = i + 1 574 if next_index < end and parts[next_index].name == '::': 575 i += 1 576 elif p.name in ('[', ']', '='): 577 # These are handled elsewhere. 578 other_tokens.append(p) 579 elif p.name not in ('*', '&', '>'): 580 # Ensure that names have a space between them. 581 if (type_name and type_name[-1].token_type == tokenize.NAME and 582 p.token_type == tokenize.NAME): 583 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0)) 584 type_name.append(p) 585 else: 586 other_tokens.append(p) 587 i += 1 588 type_name = ''.join([t.name for t in type_name]) 589 return name, type_name, templated_types, modifiers, default, other_tokens 590 591 def ToParameters(self, tokens): 592 if not tokens: 593 return [] 594 595 result = [] 596 name = type_name = '' 597 type_modifiers = [] 598 pointer = reference = array = False 599 first_token = None 600 default = [] 601 602 def AddParameter(end): 603 if default: 604 del default[0] # Remove flag. 605 parts = self.DeclarationToParts(type_modifiers, True) 606 (name, type_name, templated_types, modifiers, 607 unused_default, unused_other_tokens) = parts 608 parameter_type = Type(first_token.start, first_token.end, 609 type_name, templated_types, modifiers, 610 reference, pointer, array) 611 p = Parameter(first_token.start, end, name, 612 parameter_type, default) 613 result.append(p) 614 615 template_count = 0 616 for s in tokens: 617 if not first_token: 618 first_token = s 619 if s.name == '<': 620 template_count += 1 621 elif s.name == '>': 622 template_count -= 1 623 if template_count > 0: 624 type_modifiers.append(s) 625 continue 626 627 if s.name == ',': 628 AddParameter(s.start) 629 name = type_name = '' 630 type_modifiers = [] 631 pointer = reference = array = False 632 first_token = None 633 default = [] 634 elif s.name == '*': 635 pointer = True 636 elif s.name == '&': 637 reference = True 638 elif s.name == '[': 639 array = True 640 elif s.name == ']': 641 pass # Just don't add to type_modifiers. 642 elif s.name == '=': 643 # Got a default value. Add any value (None) as a flag. 644 default.append(None) 645 elif default: 646 default.append(s) 647 else: 648 type_modifiers.append(s) 649 AddParameter(tokens[-1].end) 650 return result 651 652 def CreateReturnType(self, return_type_seq): 653 if not return_type_seq: 654 return None 655 start = return_type_seq[0].start 656 end = return_type_seq[-1].end 657 _, name, templated_types, modifiers, default, other_tokens = \ 658 self.DeclarationToParts(return_type_seq, False) 659 names = [n.name for n in other_tokens] 660 reference = '&' in names 661 pointer = '*' in names 662 array = '[' in names 663 return Type(start, end, name, templated_types, modifiers, 664 reference, pointer, array) 665 666 def GetTemplateIndices(self, names): 667 # names is a list of strings. 668 start = names.index('<') 669 end = len(names) - 1 670 while end > 0: 671 if names[end] == '>': 672 break 673 end -= 1 674 return start, end+1 675 676class AstBuilder(object): 677 def __init__(self, token_stream, filename, in_class='', visibility=None, 678 namespace_stack=[]): 679 self.tokens = token_stream 680 self.filename = filename 681 # TODO(nnorwitz): use a better data structure (deque) for the queue. 682 # Switching directions of the "queue" improved perf by about 25%. 683 # Using a deque should be even better since we access from both sides. 684 self.token_queue = [] 685 self.namespace_stack = namespace_stack[:] 686 self.in_class = in_class 687 if in_class is None: 688 self.in_class_name_only = None 689 else: 690 self.in_class_name_only = in_class.split('::')[-1] 691 self.visibility = visibility 692 self.in_function = False 693 self.current_token = None 694 # Keep the state whether we are currently handling a typedef or not. 695 self._handling_typedef = False 696 697 self.converter = TypeConverter(self.namespace_stack) 698 699 def HandleError(self, msg, token): 700 printable_queue = list(reversed(self.token_queue[-20:])) 701 sys.stderr.write('Got %s in %s @ %s %s\n' % 702 (msg, self.filename, token, printable_queue)) 703 704 def Generate(self): 705 while 1: 706 token = self._GetNextToken() 707 if not token: 708 break 709 710 # Get the next token. 711 self.current_token = token 712 713 # Dispatch on the next token type. 714 if token.token_type == _INTERNAL_TOKEN: 715 if token.name == _NAMESPACE_POP: 716 self.namespace_stack.pop() 717 continue 718 719 try: 720 result = self._GenerateOne(token) 721 if result is not None: 722 yield result 723 except: 724 self.HandleError('exception', token) 725 raise 726 727 def _CreateVariable(self, pos_token, name, type_name, type_modifiers, 728 ref_pointer_name_seq, templated_types, value=None): 729 reference = '&' in ref_pointer_name_seq 730 pointer = '*' in ref_pointer_name_seq 731 array = '[' in ref_pointer_name_seq 732 var_type = Type(pos_token.start, pos_token.end, type_name, 733 templated_types, type_modifiers, 734 reference, pointer, array) 735 return VariableDeclaration(pos_token.start, pos_token.end, 736 name, var_type, value, self.namespace_stack) 737 738 def _GenerateOne(self, token): 739 if token.token_type == tokenize.NAME: 740 if (keywords.IsKeyword(token.name) and 741 not keywords.IsBuiltinType(token.name)): 742 method = getattr(self, 'handle_' + token.name) 743 return method() 744 elif token.name == self.in_class_name_only: 745 # The token name is the same as the class, must be a ctor if 746 # there is a paren. Otherwise, it's the return type. 747 # Peek ahead to get the next token to figure out which. 748 next = self._GetNextToken() 749 self._AddBackToken(next) 750 if next.token_type == tokenize.SYNTAX and next.name == '(': 751 return self._GetMethod([token], FUNCTION_CTOR, None, True) 752 # Fall through--handle like any other method. 753 754 # Handle data or function declaration/definition. 755 syntax = tokenize.SYNTAX 756 temp_tokens, last_token = \ 757 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[') 758 temp_tokens.insert(0, token) 759 if last_token.name == '(': 760 # If there is an assignment before the paren, 761 # this is an expression, not a method. 762 expr = bool([e for e in temp_tokens if e.name == '=']) 763 if expr: 764 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';') 765 temp_tokens.append(last_token) 766 temp_tokens.extend(new_temp) 767 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0) 768 769 if last_token.name == '[': 770 # Handle array, this isn't a method, unless it's an operator. 771 # TODO(nnorwitz): keep the size somewhere. 772 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']') 773 temp_tokens.append(last_token) 774 if temp_tokens[-2].name == 'operator': 775 temp_tokens.append(self._GetNextToken()) 776 else: 777 temp_tokens2, last_token = \ 778 self._GetVarTokensUpTo(tokenize.SYNTAX, ';') 779 temp_tokens.extend(temp_tokens2) 780 781 if last_token.name == ';': 782 # Handle data, this isn't a method. 783 parts = self.converter.DeclarationToParts(temp_tokens, True) 784 (name, type_name, templated_types, modifiers, default, 785 unused_other_tokens) = parts 786 787 t0 = temp_tokens[0] 788 names = [t.name for t in temp_tokens] 789 if templated_types: 790 start, end = self.converter.GetTemplateIndices(names) 791 names = names[:start] + names[end:] 792 default = ''.join([t.name for t in default]) 793 return self._CreateVariable(t0, name, type_name, modifiers, 794 names, templated_types, default) 795 if last_token.name == '{': 796 self._AddBackTokens(temp_tokens[1:]) 797 self._AddBackToken(last_token) 798 method_name = temp_tokens[0].name 799 method = getattr(self, 'handle_' + method_name, None) 800 if not method: 801 # Must be declaring a variable. 802 # TODO(nnorwitz): handle the declaration. 803 return None 804 return method() 805 return self._GetMethod(temp_tokens, 0, None, False) 806 elif token.token_type == tokenize.SYNTAX: 807 if token.name == '~' and self.in_class: 808 # Must be a dtor (probably not in method body). 809 token = self._GetNextToken() 810 # self.in_class can contain A::Name, but the dtor will only 811 # be Name. Make sure to compare against the right value. 812 if (token.token_type == tokenize.NAME and 813 token.name == self.in_class_name_only): 814 return self._GetMethod([token], FUNCTION_DTOR, None, True) 815 # TODO(nnorwitz): handle a lot more syntax. 816 elif token.token_type == tokenize.PREPROCESSOR: 817 # TODO(nnorwitz): handle more preprocessor directives. 818 # token starts with a #, so remove it and strip whitespace. 819 name = token.name[1:].lstrip() 820 if name.startswith('include'): 821 # Remove "include". 822 name = name[7:].strip() 823 assert name 824 # Handle #include \<newline> "header-on-second-line.h". 825 if name.startswith('\\'): 826 name = name[1:].strip() 827 assert name[0] in '<"', token 828 assert name[-1] in '>"', token 829 system = name[0] == '<' 830 filename = name[1:-1] 831 return Include(token.start, token.end, filename, system) 832 if name.startswith('define'): 833 # Remove "define". 834 name = name[6:].strip() 835 assert name 836 value = '' 837 for i, c in enumerate(name): 838 if c.isspace(): 839 value = name[i:].lstrip() 840 name = name[:i] 841 break 842 return Define(token.start, token.end, name, value) 843 if name.startswith('if') and name[2:3].isspace(): 844 condition = name[3:].strip() 845 if condition.startswith('0') or condition.startswith('(0)'): 846 self._SkipIf0Blocks() 847 return None 848 849 def _GetTokensUpTo(self, expected_token_type, expected_token): 850 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0] 851 852 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens): 853 last_token = self._GetNextToken() 854 tokens = [] 855 while (last_token.token_type != expected_token_type or 856 last_token.name not in expected_tokens): 857 tokens.append(last_token) 858 last_token = self._GetNextToken() 859 return tokens, last_token 860 861 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary. 862 def _IgnoreUpTo(self, token_type, token): 863 unused_tokens = self._GetTokensUpTo(token_type, token) 864 865 def _SkipIf0Blocks(self): 866 count = 1 867 while 1: 868 token = self._GetNextToken() 869 if token.token_type != tokenize.PREPROCESSOR: 870 continue 871 872 name = token.name[1:].lstrip() 873 if name.startswith('endif'): 874 count -= 1 875 if count == 0: 876 break 877 elif name.startswith('if'): 878 count += 1 879 880 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None): 881 if GetNextToken is None: 882 GetNextToken = self._GetNextToken 883 # Assumes the current token is open_paren and we will consume 884 # and return up to the close_paren. 885 count = 1 886 token = GetNextToken() 887 while 1: 888 if token.token_type == tokenize.SYNTAX: 889 if token.name == open_paren: 890 count += 1 891 elif token.name == close_paren: 892 count -= 1 893 if count == 0: 894 break 895 yield token 896 token = GetNextToken() 897 yield token 898 899 def _GetParameters(self): 900 return self._GetMatchingChar('(', ')') 901 902 def GetScope(self): 903 return self._GetMatchingChar('{', '}') 904 905 def _GetNextToken(self): 906 if self.token_queue: 907 return self.token_queue.pop() 908 return next(self.tokens) 909 910 def _AddBackToken(self, token): 911 if token.whence == tokenize.WHENCE_STREAM: 912 token.whence = tokenize.WHENCE_QUEUE 913 self.token_queue.insert(0, token) 914 else: 915 assert token.whence == tokenize.WHENCE_QUEUE, token 916 self.token_queue.append(token) 917 918 def _AddBackTokens(self, tokens): 919 if tokens: 920 if tokens[-1].whence == tokenize.WHENCE_STREAM: 921 for token in tokens: 922 token.whence = tokenize.WHENCE_QUEUE 923 self.token_queue[:0] = reversed(tokens) 924 else: 925 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens 926 self.token_queue.extend(reversed(tokens)) 927 928 def GetName(self, seq=None): 929 """Returns ([tokens], next_token_info).""" 930 GetNextToken = self._GetNextToken 931 if seq is not None: 932 it = iter(seq) 933 GetNextToken = lambda: next(it) 934 next_token = GetNextToken() 935 tokens = [] 936 last_token_was_name = False 937 while (next_token.token_type == tokenize.NAME or 938 (next_token.token_type == tokenize.SYNTAX and 939 next_token.name in ('::', '<'))): 940 # Two NAMEs in a row means the identifier should terminate. 941 # It's probably some sort of variable declaration. 942 if last_token_was_name and next_token.token_type == tokenize.NAME: 943 break 944 last_token_was_name = next_token.token_type == tokenize.NAME 945 tokens.append(next_token) 946 # Handle templated names. 947 if next_token.name == '<': 948 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) 949 last_token_was_name = True 950 next_token = GetNextToken() 951 return tokens, next_token 952 953 def GetMethod(self, modifiers, templated_types): 954 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') 955 assert len(return_type_and_name) >= 1 956 return self._GetMethod(return_type_and_name, modifiers, templated_types, 957 False) 958 959 def _GetMethod(self, return_type_and_name, modifiers, templated_types, 960 get_paren): 961 template_portion = None 962 if get_paren: 963 token = self._GetNextToken() 964 assert token.token_type == tokenize.SYNTAX, token 965 if token.name == '<': 966 # Handle templatized dtors. 967 template_portion = [token] 968 template_portion.extend(self._GetMatchingChar('<', '>')) 969 token = self._GetNextToken() 970 assert token.token_type == tokenize.SYNTAX, token 971 assert token.name == '(', token 972 973 name = return_type_and_name.pop() 974 # Handle templatized ctors. 975 if name.name == '>': 976 index = 1 977 while return_type_and_name[index].name != '<': 978 index += 1 979 template_portion = return_type_and_name[index:] + [name] 980 del return_type_and_name[index:] 981 name = return_type_and_name.pop() 982 elif name.name == ']': 983 rt = return_type_and_name 984 assert rt[-1].name == '[', return_type_and_name 985 assert rt[-2].name == 'operator', return_type_and_name 986 name_seq = return_type_and_name[-2:] 987 del return_type_and_name[-2:] 988 name = tokenize.Token(tokenize.NAME, 'operator[]', 989 name_seq[0].start, name.end) 990 # Get the open paren so _GetParameters() below works. 991 unused_open_paren = self._GetNextToken() 992 993 # TODO(nnorwitz): store template_portion. 994 return_type = return_type_and_name 995 indices = name 996 if return_type: 997 indices = return_type[0] 998 999 # Force ctor for templatized ctors. 1000 if name.name == self.in_class and not modifiers: 1001 modifiers |= FUNCTION_CTOR 1002 parameters = list(self._GetParameters()) 1003 del parameters[-1] # Remove trailing ')'. 1004 1005 # Handling operator() is especially weird. 1006 if name.name == 'operator' and not parameters: 1007 token = self._GetNextToken() 1008 assert token.name == '(', token 1009 parameters = list(self._GetParameters()) 1010 del parameters[-1] # Remove trailing ')'. 1011 1012 token = self._GetNextToken() 1013 while token.token_type == tokenize.NAME: 1014 modifier_token = token 1015 token = self._GetNextToken() 1016 if modifier_token.name == 'const': 1017 modifiers |= FUNCTION_CONST 1018 elif modifier_token.name == '__attribute__': 1019 # TODO(nnorwitz): handle more __attribute__ details. 1020 modifiers |= FUNCTION_ATTRIBUTE 1021 assert token.name == '(', token 1022 # Consume everything between the (parens). 1023 unused_tokens = list(self._GetMatchingChar('(', ')')) 1024 token = self._GetNextToken() 1025 elif modifier_token.name == 'throw': 1026 modifiers |= FUNCTION_THROW 1027 assert token.name == '(', token 1028 # Consume everything between the (parens). 1029 unused_tokens = list(self._GetMatchingChar('(', ')')) 1030 token = self._GetNextToken() 1031 elif modifier_token.name == 'override': 1032 modifiers |= FUNCTION_OVERRIDE 1033 elif modifier_token.name == modifier_token.name.upper(): 1034 # HACK(nnorwitz): assume that all upper-case names 1035 # are some macro we aren't expanding. 1036 modifiers |= FUNCTION_UNKNOWN_ANNOTATION 1037 else: 1038 self.HandleError('unexpected token', modifier_token) 1039 1040 assert token.token_type == tokenize.SYNTAX, token 1041 # Handle ctor initializers. 1042 if token.name == ':': 1043 # TODO(nnorwitz): anything else to handle for initializer list? 1044 while token.name != ';' and token.name != '{': 1045 token = self._GetNextToken() 1046 1047 # Handle pointer to functions that are really data but look 1048 # like method declarations. 1049 if token.name == '(': 1050 if parameters[0].name == '*': 1051 # name contains the return type. 1052 name = parameters.pop() 1053 # parameters contains the name of the data. 1054 modifiers = [p.name for p in parameters] 1055 # Already at the ( to open the parameter list. 1056 function_parameters = list(self._GetMatchingChar('(', ')')) 1057 del function_parameters[-1] # Remove trailing ')'. 1058 # TODO(nnorwitz): store the function_parameters. 1059 token = self._GetNextToken() 1060 assert token.token_type == tokenize.SYNTAX, token 1061 assert token.name == ';', token 1062 return self._CreateVariable(indices, name.name, indices.name, 1063 modifiers, '', None) 1064 # At this point, we got something like: 1065 # return_type (type::*name_)(params); 1066 # This is a data member called name_ that is a function pointer. 1067 # With this code: void (sq_type::*field_)(string&); 1068 # We get: name=void return_type=[] parameters=sq_type ... field_ 1069 # TODO(nnorwitz): is return_type always empty? 1070 # TODO(nnorwitz): this isn't even close to being correct. 1071 # Just put in something so we don't crash and can move on. 1072 real_name = parameters[-1] 1073 modifiers = [p.name for p in self._GetParameters()] 1074 del modifiers[-1] # Remove trailing ')'. 1075 return self._CreateVariable(indices, real_name.name, indices.name, 1076 modifiers, '', None) 1077 1078 if token.name == '{': 1079 body = list(self.GetScope()) 1080 del body[-1] # Remove trailing '}'. 1081 else: 1082 body = None 1083 if token.name == '=': 1084 token = self._GetNextToken() 1085 1086 if token.name == 'default' or token.name == 'delete': 1087 # Ignore explicitly defaulted and deleted special members 1088 # in C++11. 1089 token = self._GetNextToken() 1090 else: 1091 # Handle pure-virtual declarations. 1092 assert token.token_type == tokenize.CONSTANT, token 1093 assert token.name == '0', token 1094 modifiers |= FUNCTION_PURE_VIRTUAL 1095 token = self._GetNextToken() 1096 1097 if token.name == '[': 1098 # TODO(nnorwitz): store tokens and improve parsing. 1099 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N]; 1100 tokens = list(self._GetMatchingChar('[', ']')) 1101 token = self._GetNextToken() 1102 1103 assert token.name == ';', (token, return_type_and_name, parameters) 1104 1105 # Looks like we got a method, not a function. 1106 if len(return_type) > 2 and return_type[-1].name == '::': 1107 return_type, in_class = \ 1108 self._GetReturnTypeAndClassName(return_type) 1109 return Method(indices.start, indices.end, name.name, in_class, 1110 return_type, parameters, modifiers, templated_types, 1111 body, self.namespace_stack) 1112 return Function(indices.start, indices.end, name.name, return_type, 1113 parameters, modifiers, templated_types, body, 1114 self.namespace_stack) 1115 1116 def _GetReturnTypeAndClassName(self, token_seq): 1117 # Splitting the return type from the class name in a method 1118 # can be tricky. For example, Return::Type::Is::Hard::To::Find(). 1119 # Where is the return type and where is the class name? 1120 # The heuristic used is to pull the last name as the class name. 1121 # This includes all the templated type info. 1122 # TODO(nnorwitz): if there is only One name like in the 1123 # example above, punt and assume the last bit is the class name. 1124 1125 # Ignore a :: prefix, if exists so we can find the first real name. 1126 i = 0 1127 if token_seq[0].name == '::': 1128 i = 1 1129 # Ignore a :: suffix, if exists. 1130 end = len(token_seq) - 1 1131 if token_seq[end-1].name == '::': 1132 end -= 1 1133 1134 # Make a copy of the sequence so we can append a sentinel 1135 # value. This is required for GetName will has to have some 1136 # terminating condition beyond the last name. 1137 seq_copy = token_seq[i:end] 1138 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0)) 1139 names = [] 1140 while i < end: 1141 # Iterate through the sequence parsing out each name. 1142 new_name, next = self.GetName(seq_copy[i:]) 1143 assert new_name, 'Got empty new_name, next=%s' % next 1144 # We got a pointer or ref. Add it to the name. 1145 if next and next.token_type == tokenize.SYNTAX: 1146 new_name.append(next) 1147 names.append(new_name) 1148 i += len(new_name) 1149 1150 # Now that we have the names, it's time to undo what we did. 1151 1152 # Remove the sentinel value. 1153 names[-1].pop() 1154 # Flatten the token sequence for the return type. 1155 return_type = [e for seq in names[:-1] for e in seq] 1156 # The class name is the last name. 1157 class_name = names[-1] 1158 return return_type, class_name 1159 1160 def handle_bool(self): 1161 pass 1162 1163 def handle_char(self): 1164 pass 1165 1166 def handle_int(self): 1167 pass 1168 1169 def handle_long(self): 1170 pass 1171 1172 def handle_short(self): 1173 pass 1174 1175 def handle_double(self): 1176 pass 1177 1178 def handle_float(self): 1179 pass 1180 1181 def handle_void(self): 1182 pass 1183 1184 def handle_wchar_t(self): 1185 pass 1186 1187 def handle_unsigned(self): 1188 pass 1189 1190 def handle_signed(self): 1191 pass 1192 1193 def _GetNestedType(self, ctor): 1194 name = None 1195 name_tokens, token = self.GetName() 1196 if name_tokens: 1197 name = ''.join([t.name for t in name_tokens]) 1198 1199 # Handle forward declarations. 1200 if token.token_type == tokenize.SYNTAX and token.name == ';': 1201 return ctor(token.start, token.end, name, None, 1202 self.namespace_stack) 1203 1204 if token.token_type == tokenize.NAME and self._handling_typedef: 1205 self._AddBackToken(token) 1206 return ctor(token.start, token.end, name, None, 1207 self.namespace_stack) 1208 1209 # Must be the type declaration. 1210 fields = list(self._GetMatchingChar('{', '}')) 1211 del fields[-1] # Remove trailing '}'. 1212 if token.token_type == tokenize.SYNTAX and token.name == '{': 1213 next = self._GetNextToken() 1214 new_type = ctor(token.start, token.end, name, fields, 1215 self.namespace_stack) 1216 # A name means this is an anonymous type and the name 1217 # is the variable declaration. 1218 if next.token_type != tokenize.NAME: 1219 return new_type 1220 name = new_type 1221 token = next 1222 1223 # Must be variable declaration using the type prefixed with keyword. 1224 assert token.token_type == tokenize.NAME, token 1225 return self._CreateVariable(token, token.name, name, [], '', None) 1226 1227 def handle_struct(self): 1228 # Special case the handling typedef/aliasing of structs here. 1229 # It would be a pain to handle in the class code. 1230 name_tokens, var_token = self.GetName() 1231 if name_tokens: 1232 next_token = self._GetNextToken() 1233 is_syntax = (var_token.token_type == tokenize.SYNTAX and 1234 var_token.name[0] in '*&') 1235 is_variable = (var_token.token_type == tokenize.NAME and 1236 next_token.name == ';') 1237 variable = var_token 1238 if is_syntax and not is_variable: 1239 variable = next_token 1240 temp = self._GetNextToken() 1241 if temp.token_type == tokenize.SYNTAX and temp.name == '(': 1242 # Handle methods declared to return a struct. 1243 t0 = name_tokens[0] 1244 struct = tokenize.Token(tokenize.NAME, 'struct', 1245 t0.start-7, t0.start-2) 1246 type_and_name = [struct] 1247 type_and_name.extend(name_tokens) 1248 type_and_name.extend((var_token, next_token)) 1249 return self._GetMethod(type_and_name, 0, None, False) 1250 assert temp.name == ';', (temp, name_tokens, var_token) 1251 if is_syntax or (is_variable and not self._handling_typedef): 1252 modifiers = ['struct'] 1253 type_name = ''.join([t.name for t in name_tokens]) 1254 position = name_tokens[0] 1255 return self._CreateVariable(position, variable.name, type_name, 1256 modifiers, var_token.name, None) 1257 name_tokens.extend((var_token, next_token)) 1258 self._AddBackTokens(name_tokens) 1259 else: 1260 self._AddBackToken(var_token) 1261 return self._GetClass(Struct, VISIBILITY_PUBLIC, None) 1262 1263 def handle_union(self): 1264 return self._GetNestedType(Union) 1265 1266 def handle_enum(self): 1267 token = self._GetNextToken() 1268 if not (token.token_type == tokenize.NAME and token.name == 'class'): 1269 self._AddBackToken(token) 1270 return self._GetNestedType(Enum) 1271 1272 def handle_auto(self): 1273 # TODO(nnorwitz): warn about using auto? Probably not since it 1274 # will be reclaimed and useful for C++0x. 1275 pass 1276 1277 def handle_register(self): 1278 pass 1279 1280 def handle_const(self): 1281 pass 1282 1283 def handle_inline(self): 1284 pass 1285 1286 def handle_extern(self): 1287 pass 1288 1289 def handle_static(self): 1290 pass 1291 1292 def handle_virtual(self): 1293 # What follows must be a method. 1294 token = token2 = self._GetNextToken() 1295 if token.name == 'inline': 1296 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'. 1297 token2 = self._GetNextToken() 1298 if token2.token_type == tokenize.SYNTAX and token2.name == '~': 1299 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None) 1300 assert token.token_type == tokenize.NAME or token.name == '::', token 1301 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') # ) 1302 return_type_and_name.insert(0, token) 1303 if token2 is not token: 1304 return_type_and_name.insert(1, token2) 1305 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, 1306 None, False) 1307 1308 def handle_volatile(self): 1309 pass 1310 1311 def handle_mutable(self): 1312 pass 1313 1314 def handle_public(self): 1315 assert self.in_class 1316 self.visibility = VISIBILITY_PUBLIC 1317 1318 def handle_protected(self): 1319 assert self.in_class 1320 self.visibility = VISIBILITY_PROTECTED 1321 1322 def handle_private(self): 1323 assert self.in_class 1324 self.visibility = VISIBILITY_PRIVATE 1325 1326 def handle_friend(self): 1327 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1328 assert tokens 1329 t0 = tokens[0] 1330 return Friend(t0.start, t0.end, tokens, self.namespace_stack) 1331 1332 def handle_static_cast(self): 1333 pass 1334 1335 def handle_const_cast(self): 1336 pass 1337 1338 def handle_dynamic_cast(self): 1339 pass 1340 1341 def handle_reinterpret_cast(self): 1342 pass 1343 1344 def handle_new(self): 1345 pass 1346 1347 def handle_delete(self): 1348 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1349 assert tokens 1350 return Delete(tokens[0].start, tokens[0].end, tokens) 1351 1352 def handle_typedef(self): 1353 token = self._GetNextToken() 1354 if (token.token_type == tokenize.NAME and 1355 keywords.IsKeyword(token.name)): 1356 # Token must be struct/enum/union/class. 1357 method = getattr(self, 'handle_' + token.name) 1358 self._handling_typedef = True 1359 tokens = [method()] 1360 self._handling_typedef = False 1361 else: 1362 tokens = [token] 1363 1364 # Get the remainder of the typedef up to the semi-colon. 1365 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) 1366 1367 # TODO(nnorwitz): clean all this up. 1368 assert tokens 1369 name = tokens.pop() 1370 indices = name 1371 if tokens: 1372 indices = tokens[0] 1373 if not indices: 1374 indices = token 1375 if name.name == ')': 1376 # HACK(nnorwitz): Handle pointers to functions "properly". 1377 if (len(tokens) >= 4 and 1378 tokens[1].name == '(' and tokens[2].name == '*'): 1379 tokens.append(name) 1380 name = tokens[3] 1381 elif name.name == ']': 1382 # HACK(nnorwitz): Handle arrays properly. 1383 if len(tokens) >= 2: 1384 tokens.append(name) 1385 name = tokens[1] 1386 new_type = tokens 1387 if tokens and isinstance(tokens[0], tokenize.Token): 1388 new_type = self.converter.ToType(tokens)[0] 1389 return Typedef(indices.start, indices.end, name.name, 1390 new_type, self.namespace_stack) 1391 1392 def handle_typeid(self): 1393 pass # Not needed yet. 1394 1395 def handle_typename(self): 1396 pass # Not needed yet. 1397 1398 def _GetTemplatedTypes(self): 1399 result = {} 1400 tokens = list(self._GetMatchingChar('<', '>')) 1401 len_tokens = len(tokens) - 1 # Ignore trailing '>'. 1402 i = 0 1403 while i < len_tokens: 1404 key = tokens[i].name 1405 i += 1 1406 if keywords.IsKeyword(key) or key == ',': 1407 continue 1408 type_name = default = None 1409 if i < len_tokens: 1410 i += 1 1411 if tokens[i-1].name == '=': 1412 assert i < len_tokens, '%s %s' % (i, tokens) 1413 default, unused_next_token = self.GetName(tokens[i:]) 1414 i += len(default) 1415 else: 1416 if tokens[i-1].name != ',': 1417 # We got something like: Type variable. 1418 # Re-adjust the key (variable) and type_name (Type). 1419 key = tokens[i-1].name 1420 type_name = tokens[i-2] 1421 1422 result[key] = (type_name, default) 1423 return result 1424 1425 def handle_template(self): 1426 token = self._GetNextToken() 1427 assert token.token_type == tokenize.SYNTAX, token 1428 assert token.name == '<', token 1429 templated_types = self._GetTemplatedTypes() 1430 # TODO(nnorwitz): for now, just ignore the template params. 1431 token = self._GetNextToken() 1432 if token.token_type == tokenize.NAME: 1433 if token.name == 'class': 1434 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types) 1435 elif token.name == 'struct': 1436 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types) 1437 elif token.name == 'friend': 1438 return self.handle_friend() 1439 self._AddBackToken(token) 1440 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') 1441 tokens.append(last) 1442 self._AddBackTokens(tokens) 1443 if last.name == '(': 1444 return self.GetMethod(FUNCTION_NONE, templated_types) 1445 # Must be a variable definition. 1446 return None 1447 1448 def handle_true(self): 1449 pass # Nothing to do. 1450 1451 def handle_false(self): 1452 pass # Nothing to do. 1453 1454 def handle_asm(self): 1455 pass # Not needed yet. 1456 1457 def handle_class(self): 1458 return self._GetClass(Class, VISIBILITY_PRIVATE, None) 1459 1460 def _GetBases(self): 1461 # Get base classes. 1462 bases = [] 1463 while 1: 1464 token = self._GetNextToken() 1465 assert token.token_type == tokenize.NAME, token 1466 # TODO(nnorwitz): store kind of inheritance...maybe. 1467 if token.name not in ('public', 'protected', 'private'): 1468 # If inheritance type is not specified, it is private. 1469 # Just put the token back so we can form a name. 1470 # TODO(nnorwitz): it would be good to warn about this. 1471 self._AddBackToken(token) 1472 else: 1473 # Check for virtual inheritance. 1474 token = self._GetNextToken() 1475 if token.name != 'virtual': 1476 self._AddBackToken(token) 1477 else: 1478 # TODO(nnorwitz): store that we got virtual for this base. 1479 pass 1480 base, next_token = self.GetName() 1481 bases_ast = self.converter.ToType(base) 1482 assert len(bases_ast) == 1, bases_ast 1483 bases.append(bases_ast[0]) 1484 assert next_token.token_type == tokenize.SYNTAX, next_token 1485 if next_token.name == '{': 1486 token = next_token 1487 break 1488 # Support multiple inheritance. 1489 assert next_token.name == ',', next_token 1490 return bases, token 1491 1492 def _GetClass(self, class_type, visibility, templated_types): 1493 class_name = None 1494 class_token = self._GetNextToken() 1495 if class_token.token_type != tokenize.NAME: 1496 assert class_token.token_type == tokenize.SYNTAX, class_token 1497 token = class_token 1498 else: 1499 # Skip any macro (e.g. storage class specifiers) after the 1500 # 'class' keyword. 1501 next_token = self._GetNextToken() 1502 if next_token.token_type == tokenize.NAME: 1503 self._AddBackToken(next_token) 1504 else: 1505 self._AddBackTokens([class_token, next_token]) 1506 name_tokens, token = self.GetName() 1507 class_name = ''.join([t.name for t in name_tokens]) 1508 bases = None 1509 if token.token_type == tokenize.SYNTAX: 1510 if token.name == ';': 1511 # Forward declaration. 1512 return class_type(class_token.start, class_token.end, 1513 class_name, None, templated_types, None, 1514 self.namespace_stack) 1515 if token.name in '*&': 1516 # Inline forward declaration. Could be method or data. 1517 name_token = self._GetNextToken() 1518 next_token = self._GetNextToken() 1519 if next_token.name == ';': 1520 # Handle data 1521 modifiers = ['class'] 1522 return self._CreateVariable(class_token, name_token.name, 1523 class_name, 1524 modifiers, token.name, None) 1525 else: 1526 # Assume this is a method. 1527 tokens = (class_token, token, name_token, next_token) 1528 self._AddBackTokens(tokens) 1529 return self.GetMethod(FUNCTION_NONE, None) 1530 if token.name == ':': 1531 bases, token = self._GetBases() 1532 1533 body = None 1534 if token.token_type == tokenize.SYNTAX and token.name == '{': 1535 assert token.token_type == tokenize.SYNTAX, token 1536 assert token.name == '{', token 1537 1538 ast = AstBuilder(self.GetScope(), self.filename, class_name, 1539 visibility, self.namespace_stack) 1540 body = list(ast.Generate()) 1541 1542 if not self._handling_typedef: 1543 token = self._GetNextToken() 1544 if token.token_type != tokenize.NAME: 1545 assert token.token_type == tokenize.SYNTAX, token 1546 assert token.name == ';', token 1547 else: 1548 new_class = class_type(class_token.start, class_token.end, 1549 class_name, bases, None, 1550 body, self.namespace_stack) 1551 1552 modifiers = [] 1553 return self._CreateVariable(class_token, 1554 token.name, new_class, 1555 modifiers, token.name, None) 1556 else: 1557 if not self._handling_typedef: 1558 self.HandleError('non-typedef token', token) 1559 self._AddBackToken(token) 1560 1561 return class_type(class_token.start, class_token.end, class_name, 1562 bases, templated_types, body, self.namespace_stack) 1563 1564 def handle_namespace(self): 1565 token = self._GetNextToken() 1566 # Support anonymous namespaces. 1567 name = None 1568 if token.token_type == tokenize.NAME: 1569 name = token.name 1570 token = self._GetNextToken() 1571 self.namespace_stack.append(name) 1572 assert token.token_type == tokenize.SYNTAX, token 1573 # Create an internal token that denotes when the namespace is complete. 1574 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, 1575 None, None) 1576 internal_token.whence = token.whence 1577 if token.name == '=': 1578 # TODO(nnorwitz): handle aliasing namespaces. 1579 name, next_token = self.GetName() 1580 assert next_token.name == ';', next_token 1581 self._AddBackToken(internal_token) 1582 else: 1583 assert token.name == '{', token 1584 tokens = list(self.GetScope()) 1585 # Replace the trailing } with the internal namespace pop token. 1586 tokens[-1] = internal_token 1587 # Handle namespace with nothing in it. 1588 self._AddBackTokens(tokens) 1589 return None 1590 1591 def handle_using(self): 1592 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1593 assert tokens 1594 return Using(tokens[0].start, tokens[0].end, tokens) 1595 1596 def handle_explicit(self): 1597 assert self.in_class 1598 # Nothing much to do. 1599 # TODO(nnorwitz): maybe verify the method name == class name. 1600 # This must be a ctor. 1601 return self.GetMethod(FUNCTION_CTOR, None) 1602 1603 def handle_this(self): 1604 pass # Nothing to do. 1605 1606 def handle_operator(self): 1607 # Pull off the next token(s?) and make that part of the method name. 1608 pass 1609 1610 def handle_sizeof(self): 1611 pass 1612 1613 def handle_case(self): 1614 pass 1615 1616 def handle_switch(self): 1617 pass 1618 1619 def handle_default(self): 1620 token = self._GetNextToken() 1621 assert token.token_type == tokenize.SYNTAX 1622 assert token.name == ':' 1623 1624 def handle_if(self): 1625 pass 1626 1627 def handle_else(self): 1628 pass 1629 1630 def handle_return(self): 1631 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1632 if not tokens: 1633 return Return(self.current_token.start, self.current_token.end, None) 1634 return Return(tokens[0].start, tokens[0].end, tokens) 1635 1636 def handle_goto(self): 1637 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1638 assert len(tokens) == 1, str(tokens) 1639 return Goto(tokens[0].start, tokens[0].end, tokens[0].name) 1640 1641 def handle_try(self): 1642 pass # Not needed yet. 1643 1644 def handle_catch(self): 1645 pass # Not needed yet. 1646 1647 def handle_throw(self): 1648 pass # Not needed yet. 1649 1650 def handle_while(self): 1651 pass 1652 1653 def handle_do(self): 1654 pass 1655 1656 def handle_for(self): 1657 pass 1658 1659 def handle_break(self): 1660 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1661 1662 def handle_continue(self): 1663 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1664 1665 1666def BuilderFromSource(source, filename): 1667 """Utility method that returns an AstBuilder from source code. 1668 1669 Args: 1670 source: 'C++ source code' 1671 filename: 'file1' 1672 1673 Returns: 1674 AstBuilder 1675 """ 1676 return AstBuilder(tokenize.GetTokens(source), filename) 1677 1678 1679def PrintIndentifiers(filename, should_print): 1680 """Prints all identifiers for a C++ source file. 1681 1682 Args: 1683 filename: 'file1' 1684 should_print: predicate with signature: bool Function(token) 1685 """ 1686 source = utils.ReadFile(filename, False) 1687 if source is None: 1688 sys.stderr.write('Unable to find: %s\n' % filename) 1689 return 1690 1691 #print('Processing %s' % actual_filename) 1692 builder = BuilderFromSource(source, filename) 1693 try: 1694 for node in builder.Generate(): 1695 if should_print(node): 1696 print(node.name) 1697 except KeyboardInterrupt: 1698 return 1699 except: 1700 pass 1701 1702 1703def PrintAllIndentifiers(filenames, should_print): 1704 """Prints all identifiers for each C++ source file in filenames. 1705 1706 Args: 1707 filenames: ['file1', 'file2', ...] 1708 should_print: predicate with signature: bool Function(token) 1709 """ 1710 for path in filenames: 1711 PrintIndentifiers(path, should_print) 1712 1713 1714def main(argv): 1715 for filename in argv[1:]: 1716 source = utils.ReadFile(filename) 1717 if source is None: 1718 continue 1719 1720 print('Processing %s' % filename) 1721 builder = BuilderFromSource(source, filename) 1722 try: 1723 entire_ast = filter(None, builder.Generate()) 1724 except KeyboardInterrupt: 1725 return 1726 except: 1727 # Already printed a warning, print the traceback and continue. 1728 traceback.print_exc() 1729 else: 1730 if utils.DEBUG: 1731 for ast in entire_ast: 1732 print(ast) 1733 1734 1735if __name__ == '__main__': 1736 main(sys.argv) 1737