1#!/usr/bin/env python 2"""A glorified C pre-processor parser.""" 3 4import ctypes 5import logging 6import os 7import re 8import site 9import utils 10 11top = os.getenv('ANDROID_BUILD_TOP') 12if top is None: 13 utils.panic('ANDROID_BUILD_TOP not set.\n') 14 15# Set up the env vars for libclang. 16site.addsitedir(os.path.join(top, 'external/clang/bindings/python')) 17 18import clang.cindex 19from clang.cindex import conf 20from clang.cindex import Cursor 21from clang.cindex import CursorKind 22from clang.cindex import SourceLocation 23from clang.cindex import SourceRange 24from clang.cindex import TokenGroup 25from clang.cindex import TokenKind 26from clang.cindex import TranslationUnit 27 28# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc. 29# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 30clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64/libclang_android.so')) 31 32from defaults import kCppUndefinedMacro 33from defaults import kernel_remove_config_macros 34from defaults import kernel_struct_replacements 35from defaults import kernel_token_replacements 36 37 38debugBlockParser = False 39debugCppExpr = False 40debugOptimIf01 = False 41 42############################################################################### 43############################################################################### 44##### ##### 45##### C P P T O K E N S ##### 46##### ##### 47############################################################################### 48############################################################################### 49 50# the list of supported C-preprocessor tokens 51# plus a couple of C tokens as well 52tokEOF = "\0" 53tokLN = "\n" 54tokSTRINGIFY = "#" 55tokCONCAT = "##" 56tokLOGICAND = "&&" 57tokLOGICOR = "||" 58tokSHL = "<<" 59tokSHR = ">>" 60tokEQUAL = "==" 61tokNEQUAL = "!=" 62tokLT = "<" 63tokLTE = "<=" 64tokGT = ">" 65tokGTE = ">=" 66tokELLIPSIS = "..." 67tokSPACE = " " 68tokDEFINED = "defined" 69tokLPAREN = "(" 70tokRPAREN = ")" 71tokNOT = "!" 72tokPLUS = "+" 73tokMINUS = "-" 74tokMULTIPLY = "*" 75tokDIVIDE = "/" 76tokMODULUS = "%" 77tokBINAND = "&" 78tokBINOR = "|" 79tokBINXOR = "^" 80tokCOMMA = "," 81tokLBRACE = "{" 82tokRBRACE = "}" 83tokARROW = "->" 84tokINCREMENT = "++" 85tokDECREMENT = "--" 86tokNUMBER = "<number>" 87tokIDENT = "<ident>" 88tokSTRING = "<string>" 89 90 91class Token(clang.cindex.Token): 92 """A class that represents one token after parsing. 93 94 It inherits the class in libclang, with an extra id property to hold the 95 new spelling of the token. The spelling property in the base class is 96 defined as read-only. New names after macro instantiation are saved in 97 their ids now. It also facilitates the renaming of directive optimizations 98 like replacing 'ifndef X' with 'if !defined(X)'. 99 100 It also overrides the cursor property of the base class. Because the one 101 in libclang always queries based on a single token, which usually doesn't 102 hold useful information. The cursor in this class can be set by calling 103 CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the 104 base class. 105 """ 106 107 def __init__(self, tu=None, group=None, int_data=None, ptr_data=None, 108 cursor=None): 109 clang.cindex.Token.__init__(self) 110 self._id = None 111 self._tu = tu 112 self._group = group 113 self._cursor = cursor 114 # self.int_data and self.ptr_data are from the base class. But 115 # self.int_data doesn't accept a None value. 116 if int_data is not None: 117 self.int_data = int_data 118 self.ptr_data = ptr_data 119 120 @property 121 def id(self): 122 """Name of the token.""" 123 if self._id is None: 124 return self.spelling 125 else: 126 return self._id 127 128 @id.setter 129 def id(self, new_id): 130 """Setting name of the token.""" 131 self._id = new_id 132 133 @property 134 def cursor(self): 135 if self._cursor is None: 136 self._cursor = clang.cindex.Token.cursor 137 return self._cursor 138 139 @cursor.setter 140 def cursor(self, new_cursor): 141 self._cursor = new_cursor 142 143 def __repr__(self): 144 if self.id == 'defined': 145 return self.id 146 elif self.kind == TokenKind.IDENTIFIER: 147 return "(ident %s)" % self.id 148 149 return self.id 150 151 def __str__(self): 152 return self.id 153 154 155class BadExpectedToken(Exception): 156 """An exception that will be raised for unexpected tokens.""" 157 pass 158 159 160# The __contains__ function in libclang SourceRange class contains a bug. It 161# gives wrong result when dealing with single line range. 162# Bug filed with upstream: 163# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277 164def SourceRange__contains__(self, other): 165 """Determine if a given location is inside the range.""" 166 if not isinstance(other, SourceLocation): 167 return False 168 if other.file is None and self.start.file is None: 169 pass 170 elif (self.start.file.name != other.file.name or 171 other.file.name != self.end.file.name): 172 # same file name 173 return False 174 # same file, in between lines 175 if self.start.line < other.line < self.end.line: 176 return True 177 # same file, same line 178 elif self.start.line == other.line == self.end.line: 179 if self.start.column <= other.column <= self.end.column: 180 return True 181 elif self.start.line == other.line: 182 # same file first line 183 if self.start.column <= other.column: 184 return True 185 elif other.line == self.end.line: 186 # same file last line 187 if other.column <= self.end.column: 188 return True 189 return False 190 191 192SourceRange.__contains__ = SourceRange__contains__ 193 194 195################################################################################ 196################################################################################ 197##### ##### 198##### C P P T O K E N I Z E R ##### 199##### ##### 200################################################################################ 201################################################################################ 202 203 204class CppTokenizer(object): 205 """A tokenizer that converts some input text into a list of tokens. 206 207 It calls libclang's tokenizer to get the parsed tokens. In addition, it 208 updates the cursor property in each token after parsing, by calling 209 getTokensWithCursors(). 210 """ 211 212 clang_flags = ['-E', '-x', 'c'] 213 options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD 214 215 def __init__(self): 216 """Initialize a new CppTokenizer object.""" 217 self._indexer = clang.cindex.Index.create() 218 self._tu = None 219 self._index = 0 220 self.tokens = None 221 222 def _getTokensWithCursors(self): 223 """Helper method to return all tokens with their cursors. 224 225 The cursor property in a clang Token doesn't provide enough 226 information. Because it is queried based on single token each time 227 without any context, i.e. via calling conf.lib.clang_annotateTokens() 228 with only one token given. So we often see 'INVALID_FILE' in one 229 token's cursor. In this function it passes all the available tokens 230 to get more informative cursors. 231 """ 232 233 tokens_memory = ctypes.POINTER(clang.cindex.Token)() 234 tokens_count = ctypes.c_uint() 235 236 conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent, 237 ctypes.byref(tokens_memory), 238 ctypes.byref(tokens_count)) 239 240 count = int(tokens_count.value) 241 242 # If we get no tokens, no memory was allocated. Be sure not to return 243 # anything and potentially call a destructor on nothing. 244 if count < 1: 245 return 246 247 cursors = (Cursor * count)() 248 cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor)) 249 250 conf.lib.clang_annotateTokens(self._tu, tokens_memory, count, 251 cursors_memory) 252 253 tokens_array = ctypes.cast( 254 tokens_memory, 255 ctypes.POINTER(clang.cindex.Token * count)).contents 256 token_group = TokenGroup(self._tu, tokens_memory, tokens_count) 257 258 tokens = [] 259 for i in xrange(0, count): 260 token = Token(self._tu, token_group, 261 int_data=tokens_array[i].int_data, 262 ptr_data=tokens_array[i].ptr_data, 263 cursor=cursors[i]) 264 # We only want non-comment tokens. 265 if token.kind != TokenKind.COMMENT: 266 tokens.append(token) 267 268 return tokens 269 270 def parseString(self, lines): 271 """Parse a list of text lines into a BlockList object.""" 272 file_ = 'dummy.c' 273 self._tu = self._indexer.parse(file_, self.clang_flags, 274 unsaved_files=[(file_, lines)], 275 options=self.options) 276 self.tokens = self._getTokensWithCursors() 277 278 def parseFile(self, file_): 279 """Parse a file into a BlockList object.""" 280 self._tu = self._indexer.parse(file_, self.clang_flags, 281 options=self.options) 282 self.tokens = self._getTokensWithCursors() 283 284 def nextToken(self): 285 """Return next token from the list.""" 286 if self._index < len(self.tokens): 287 t = self.tokens[self._index] 288 self._index += 1 289 return t 290 else: 291 return None 292 293 294class CppStringTokenizer(CppTokenizer): 295 """A CppTokenizer derived class that accepts a string of text as input.""" 296 297 def __init__(self, line): 298 CppTokenizer.__init__(self) 299 self.parseString(line) 300 301 302class CppFileTokenizer(CppTokenizer): 303 """A CppTokenizer derived class that accepts a file as input.""" 304 305 def __init__(self, file_): 306 CppTokenizer.__init__(self) 307 self.parseFile(file_) 308 309 310# Unit testing 311# 312class CppTokenizerTester(object): 313 """A class used to test CppTokenizer classes.""" 314 315 def __init__(self, tokenizer=None): 316 self._tokenizer = tokenizer 317 self._token = None 318 319 def setTokenizer(self, tokenizer): 320 self._tokenizer = tokenizer 321 322 def expect(self, id): 323 self._token = self._tokenizer.nextToken() 324 if self._token is None: 325 tokid = '' 326 else: 327 tokid = self._token.id 328 if tokid == id: 329 return 330 raise BadExpectedToken("### BAD TOKEN: '%s' expecting '%s'" % ( 331 tokid, id)) 332 333 def expectToken(self, id, line, col): 334 self.expect(id) 335 if self._token.location.line != line: 336 raise BadExpectedToken( 337 "### BAD LINENO: token '%s' got '%d' expecting '%d'" % ( 338 id, self._token.lineno, line)) 339 if self._token.location.column != col: 340 raise BadExpectedToken("### BAD COLNO: '%d' expecting '%d'" % ( 341 self._token.colno, col)) 342 343 def expectTokens(self, tokens): 344 for id, line, col in tokens: 345 self.expectToken(id, line, col) 346 347 def expectList(self, list_): 348 for item in list_: 349 self.expect(item) 350 351 352def test_CppTokenizer(): 353 tester = CppTokenizerTester() 354 355 tester.setTokenizer(CppStringTokenizer("#an/example && (01923_xy)")) 356 tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN, 357 "01923_xy", tokRPAREN]) 358 359 tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)")) 360 tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND, 361 "defined", tokLPAREN, "BAZ", tokRPAREN]) 362 363 tester.setTokenizer(CppStringTokenizer("/*\n#\n*/")) 364 tester.expectList([]) 365 366 tester.setTokenizer(CppStringTokenizer("first\nsecond")) 367 tester.expectList(["first", "second"]) 368 369 tester.setTokenizer(CppStringTokenizer("first second\n third")) 370 tester.expectTokens([("first", 1, 1), 371 ("second", 1, 7), 372 ("third", 2, 3)]) 373 374 tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */")) 375 tester.expectTokens([("boo", 1, 1)]) 376 377 tester.setTokenizer(CppStringTokenizer("an \\\n example")) 378 tester.expectTokens([("an", 1, 1), 379 ("example", 2, 2)]) 380 return True 381 382 383################################################################################ 384################################################################################ 385##### ##### 386##### C P P E X P R E S S I O N S ##### 387##### ##### 388################################################################################ 389################################################################################ 390 391 392class CppExpr(object): 393 """A class that models the condition of #if directives into an expr tree. 394 395 Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where 396 "op" is a string describing the operation 397 """ 398 399 unaries = ["!", "~"] 400 binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", 401 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"] 402 precedences = { 403 "?": 1, ":": 1, 404 "||": 2, 405 "&&": 3, 406 "|": 4, 407 "^": 5, 408 "&": 6, 409 "==": 7, "!=": 7, 410 "<": 8, "<=": 8, ">": 8, ">=": 8, 411 "<<": 9, ">>": 9, 412 "+": 10, "-": 10, 413 "*": 11, "/": 11, "%": 11, 414 "!": 12, "~": 12 415 } 416 417 def __init__(self, tokens): 418 """Initialize a CppExpr. 'tokens' must be a CppToken list.""" 419 self.tokens = tokens 420 self._num_tokens = len(tokens) 421 self._index = 0 422 423 if debugCppExpr: 424 print "CppExpr: trying to parse %s" % repr(tokens) 425 self.expr = self.parseExpression(0) 426 if debugCppExpr: 427 print "CppExpr: got " + repr(self.expr) 428 if self._index != self._num_tokens: 429 self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s" 430 % (self._index, self._num_tokens, repr(tokens))) 431 432 def throw(self, exception, msg): 433 if self._index < self._num_tokens: 434 tok = self.tokens[self._index] 435 print "%d:%d: %s" % (tok.location.line, tok.location.column, msg) 436 else: 437 print "EOF: %s" % msg 438 raise exception(msg) 439 440 def expectId(self, id): 441 """Check that a given token id is at the current position.""" 442 token = self.tokens[self._index] 443 if self._index >= self._num_tokens or token.id != id: 444 self.throw(BadExpectedToken, 445 "### expecting '%s' in expression, got '%s'" % ( 446 id, token.id)) 447 self._index += 1 448 449 def is_decimal(self): 450 token = self.tokens[self._index].id 451 if token[-1] in "ULul": 452 token = token[:-1] 453 try: 454 val = int(token, 10) 455 self._index += 1 456 return ('int', val) 457 except ValueError: 458 return None 459 460 def is_octal(self): 461 token = self.tokens[self._index].id 462 if token[-1] in "ULul": 463 token = token[:-1] 464 if len(token) < 2 or token[0] != '0': 465 return None 466 try: 467 val = int(token, 8) 468 self._index += 1 469 return ('oct', val) 470 except ValueError: 471 return None 472 473 def is_hexadecimal(self): 474 token = self.tokens[self._index].id 475 if token[-1] in "ULul": 476 token = token[:-1] 477 if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'): 478 return None 479 try: 480 val = int(token, 16) 481 self._index += 1 482 return ('hex', val) 483 except ValueError: 484 return None 485 486 def is_integer(self): 487 if self.tokens[self._index].kind != TokenKind.LITERAL: 488 return None 489 490 c = self.is_hexadecimal() 491 if c: 492 return c 493 494 c = self.is_octal() 495 if c: 496 return c 497 498 c = self.is_decimal() 499 if c: 500 return c 501 502 return None 503 504 def is_number(self): 505 t = self.tokens[self._index] 506 if t.id == tokMINUS and self._index + 1 < self._num_tokens: 507 self._index += 1 508 c = self.is_integer() 509 if c: 510 op, val = c 511 return (op, -val) 512 if t.id == tokPLUS and self._index + 1 < self._num_tokens: 513 self._index += 1 514 c = self.is_integer() 515 if c: 516 return c 517 518 return self.is_integer() 519 520 def is_defined(self): 521 t = self.tokens[self._index] 522 if t.id != tokDEFINED: 523 return None 524 525 # We have the defined keyword, check the rest. 526 self._index += 1 527 used_parens = False 528 if (self._index < self._num_tokens and 529 self.tokens[self._index].id == tokLPAREN): 530 used_parens = True 531 self._index += 1 532 533 if self._index >= self._num_tokens: 534 self.throw(BadExpectedToken, 535 "### 'defined' must be followed by macro name or left " 536 "paren") 537 538 t = self.tokens[self._index] 539 if t.kind != TokenKind.IDENTIFIER: 540 self.throw(BadExpectedToken, 541 "### 'defined' must be followed by macro name") 542 543 self._index += 1 544 if used_parens: 545 self.expectId(tokRPAREN) 546 547 return ("defined", t.id) 548 549 def is_call_or_ident(self): 550 if self._index >= self._num_tokens: 551 return None 552 553 t = self.tokens[self._index] 554 if t.kind != TokenKind.IDENTIFIER: 555 return None 556 557 name = t.id 558 559 self._index += 1 560 if (self._index >= self._num_tokens or 561 self.tokens[self._index].id != tokLPAREN): 562 return ("ident", name) 563 564 params = [] 565 depth = 1 566 self._index += 1 567 j = self._index 568 while self._index < self._num_tokens: 569 id = self.tokens[self._index].id 570 if id == tokLPAREN: 571 depth += 1 572 elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): 573 k = self._index 574 param = self.tokens[j:k] 575 params.append(param) 576 if id == tokRPAREN: 577 break 578 j = self._index + 1 579 elif id == tokRPAREN: 580 depth -= 1 581 self._index += 1 582 583 if self._index >= self._num_tokens: 584 return None 585 586 self._index += 1 587 return ("call", (name, params)) 588 589 # Implements the "precedence climbing" algorithm from 590 # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. 591 # The "classic" algorithm would be fine if we were using a tool to 592 # generate the parser, but we're not. Dijkstra's "shunting yard" 593 # algorithm hasn't been necessary yet. 594 595 def parseExpression(self, minPrecedence): 596 if self._index >= self._num_tokens: 597 return None 598 599 node = self.parsePrimary() 600 while (self.token() and self.isBinary(self.token()) and 601 self.precedence(self.token()) >= minPrecedence): 602 op = self.token() 603 self.nextToken() 604 rhs = self.parseExpression(self.precedence(op) + 1) 605 node = (op.id, node, rhs) 606 607 return node 608 609 def parsePrimary(self): 610 op = self.token() 611 if self.isUnary(op): 612 self.nextToken() 613 return (op.id, self.parseExpression(self.precedence(op))) 614 615 primary = None 616 if op.id == tokLPAREN: 617 self.nextToken() 618 primary = self.parseExpression(0) 619 self.expectId(tokRPAREN) 620 elif op.id == "?": 621 self.nextToken() 622 primary = self.parseExpression(0) 623 self.expectId(":") 624 elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL: 625 primary = self.is_number() 626 # Checking for 'defined' needs to come first now because 'defined' is 627 # recognized as IDENTIFIER. 628 elif op.id == tokDEFINED: 629 primary = self.is_defined() 630 elif op.kind == TokenKind.IDENTIFIER: 631 primary = self.is_call_or_ident() 632 else: 633 self.throw(BadExpectedToken, 634 "didn't expect to see a %s in factor" % ( 635 self.tokens[self._index].id)) 636 return primary 637 638 def isBinary(self, token): 639 return token.id in self.binaries 640 641 def isUnary(self, token): 642 return token.id in self.unaries 643 644 def precedence(self, token): 645 return self.precedences.get(token.id) 646 647 def token(self): 648 if self._index >= self._num_tokens: 649 return None 650 return self.tokens[self._index] 651 652 def nextToken(self): 653 self._index += 1 654 if self._index >= self._num_tokens: 655 return None 656 return self.tokens[self._index] 657 658 def dump_node(self, e): 659 op = e[0] 660 line = "(" + op 661 if op == "int": 662 line += " %d)" % e[1] 663 elif op == "oct": 664 line += " 0%o)" % e[1] 665 elif op == "hex": 666 line += " 0x%x)" % e[1] 667 elif op == "ident": 668 line += " %s)" % e[1] 669 elif op == "defined": 670 line += " %s)" % e[1] 671 elif op == "call": 672 arg = e[1] 673 line += " %s [" % arg[0] 674 prefix = "" 675 for param in arg[1]: 676 par = "" 677 for tok in param: 678 par += str(tok) 679 line += "%s%s" % (prefix, par) 680 prefix = "," 681 line += "])" 682 elif op in CppExpr.unaries: 683 line += " %s)" % self.dump_node(e[1]) 684 elif op in CppExpr.binaries: 685 line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) 686 else: 687 line += " ?%s)" % repr(e[1]) 688 689 return line 690 691 def __repr__(self): 692 return self.dump_node(self.expr) 693 694 def source_node(self, e): 695 op = e[0] 696 if op == "int": 697 return "%d" % e[1] 698 if op == "hex": 699 return "0x%x" % e[1] 700 if op == "oct": 701 return "0%o" % e[1] 702 if op == "ident": 703 # XXX: should try to expand 704 return e[1] 705 if op == "defined": 706 return "defined(%s)" % e[1] 707 708 prec = CppExpr.precedences.get(op, 1000) 709 arg = e[1] 710 if op in CppExpr.unaries: 711 arg_src = self.source_node(arg) 712 arg_op = arg[0] 713 arg_prec = CppExpr.precedences.get(arg_op, 1000) 714 if arg_prec < prec: 715 return "!(" + arg_src + ")" 716 else: 717 return "!" + arg_src 718 if op in CppExpr.binaries: 719 arg2 = e[2] 720 arg1_op = arg[0] 721 arg2_op = arg2[0] 722 arg1_src = self.source_node(arg) 723 arg2_src = self.source_node(arg2) 724 if CppExpr.precedences.get(arg1_op, 1000) < prec: 725 arg1_src = "(%s)" % arg1_src 726 if CppExpr.precedences.get(arg2_op, 1000) < prec: 727 arg2_src = "(%s)" % arg2_src 728 729 return "%s %s %s" % (arg1_src, op, arg2_src) 730 return "???" 731 732 def __str__(self): 733 return self.source_node(self.expr) 734 735 @staticmethod 736 def int_node(e): 737 if e[0] in ["int", "oct", "hex"]: 738 return e[1] 739 else: 740 return None 741 742 def toInt(self): 743 return self.int_node(self.expr) 744 745 def optimize_node(self, e, macros=None): 746 if macros is None: 747 macros = {} 748 op = e[0] 749 750 if op == "defined": 751 op, name = e 752 if macros.has_key(name): 753 if macros[name] == kCppUndefinedMacro: 754 return ("int", 0) 755 else: 756 try: 757 value = int(macros[name]) 758 return ("int", value) 759 except ValueError: 760 return ("defined", macros[name]) 761 762 if kernel_remove_config_macros and name.startswith("CONFIG_"): 763 return ("int", 0) 764 765 return e 766 767 elif op == "ident": 768 op, name = e 769 if macros.has_key(name): 770 try: 771 value = int(macros[name]) 772 expanded = ("int", value) 773 except ValueError: 774 expanded = ("ident", macros[name]) 775 return self.optimize_node(expanded, macros) 776 return e 777 778 elif op == "!": 779 op, v = e 780 v = self.optimize_node(v, macros) 781 if v[0] == "int": 782 if v[1] == 0: 783 return ("int", 1) 784 else: 785 return ("int", 0) 786 return ('!', v) 787 788 elif op == "&&": 789 op, l, r = e 790 l = self.optimize_node(l, macros) 791 r = self.optimize_node(r, macros) 792 li = self.int_node(l) 793 ri = self.int_node(r) 794 if li is not None: 795 if li == 0: 796 return ("int", 0) 797 else: 798 return r 799 elif ri is not None: 800 if ri == 0: 801 return ("int", 0) 802 else: 803 return l 804 return (op, l, r) 805 806 elif op == "||": 807 op, l, r = e 808 l = self.optimize_node(l, macros) 809 r = self.optimize_node(r, macros) 810 li = self.int_node(l) 811 ri = self.int_node(r) 812 if li is not None: 813 if li == 0: 814 return r 815 else: 816 return ("int", 1) 817 elif ri is not None: 818 if ri == 0: 819 return l 820 else: 821 return ("int", 1) 822 return (op, l, r) 823 824 else: 825 return e 826 827 def optimize(self, macros=None): 828 if macros is None: 829 macros = {} 830 self.expr = self.optimize_node(self.expr, macros) 831 832 833def test_cpp_expr(expr, expected): 834 e = CppExpr(CppStringTokenizer(expr).tokens) 835 s1 = repr(e) 836 if s1 != expected: 837 print ("[FAIL]: expression '%s' generates '%s', should be " 838 "'%s'" % (expr, s1, expected)) 839 global failure_count 840 failure_count += 1 841 842 843def test_cpp_expr_optim(expr, expected, macros=None): 844 if macros is None: 845 macros = {} 846 e = CppExpr(CppStringTokenizer(expr).tokens) 847 e.optimize(macros) 848 s1 = repr(e) 849 if s1 != expected: 850 print ("[FAIL]: optimized expression '%s' generates '%s' with " 851 "macros %s, should be '%s'" % (expr, s1, macros, expected)) 852 global failure_count 853 failure_count += 1 854 855 856def test_cpp_expr_source(expr, expected): 857 e = CppExpr(CppStringTokenizer(expr).tokens) 858 s1 = str(e) 859 if s1 != expected: 860 print ("[FAIL]: source expression '%s' generates '%s', should " 861 "be '%s'" % (expr, s1, expected)) 862 global failure_count 863 failure_count += 1 864 865 866def test_CppExpr(): 867 test_cpp_expr("0", "(int 0)") 868 test_cpp_expr("1", "(int 1)") 869 test_cpp_expr("-5", "(int -5)") 870 test_cpp_expr("+1", "(int 1)") 871 test_cpp_expr("0U", "(int 0)") 872 test_cpp_expr("015", "(oct 015)") 873 test_cpp_expr("015l", "(oct 015)") 874 test_cpp_expr("0x3e", "(hex 0x3e)") 875 test_cpp_expr("(0)", "(int 0)") 876 test_cpp_expr("1 && 1", "(&& (int 1) (int 1))") 877 test_cpp_expr("1 && 0", "(&& (int 1) (int 0))") 878 test_cpp_expr("EXAMPLE", "(ident EXAMPLE)") 879 test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") 880 test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)") 881 test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)") 882 test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))") 883 test_cpp_expr("defined(ABC) || defined(BINGO)", 884 "(|| (defined ABC) (defined BINGO))") 885 test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])") 886 test_cpp_expr("A == 1 || defined(B)", 887 "(|| (== (ident A) (int 1)) (defined B))") 888 889 test_cpp_expr_optim("0", "(int 0)") 890 test_cpp_expr_optim("1", "(int 1)") 891 test_cpp_expr_optim("1 && 1", "(int 1)") 892 test_cpp_expr_optim("1 && +1", "(int 1)") 893 test_cpp_expr_optim("0x1 && 01", "(oct 01)") 894 test_cpp_expr_optim("1 && 0", "(int 0)") 895 test_cpp_expr_optim("0 && 1", "(int 0)") 896 test_cpp_expr_optim("0 && 0", "(int 0)") 897 test_cpp_expr_optim("1 || 1", "(int 1)") 898 test_cpp_expr_optim("1 || 0", "(int 1)") 899 test_cpp_expr_optim("0 || 1", "(int 1)") 900 test_cpp_expr_optim("0 || 0", "(int 0)") 901 test_cpp_expr_optim("A", "(ident A)") 902 test_cpp_expr_optim("A", "(int 1)", {"A": 1}) 903 test_cpp_expr_optim("A || B", "(int 1)", {"A": 1}) 904 test_cpp_expr_optim("A || B", "(int 1)", {"B": 1}) 905 test_cpp_expr_optim("A && B", "(ident B)", {"A": 1}) 906 test_cpp_expr_optim("A && B", "(ident A)", {"B": 1}) 907 test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))") 908 test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)") 909 test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") 910 test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)") 911 test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)", 912 {"EXAMPLE": "XOWOE"}) 913 test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)", 914 {"EXAMPLE": kCppUndefinedMacro}) 915 test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))") 916 test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))", 917 {"EXAMPLE": "XOWOE"}) 918 test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)", 919 {"EXAMPLE": kCppUndefinedMacro}) 920 test_cpp_expr_optim("defined(A) || defined(B)", 921 "(|| (defined A) (defined B))") 922 test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"}) 923 test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"}) 924 test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)", 925 {"B": kCppUndefinedMacro}) 926 test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)", 927 {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro}) 928 test_cpp_expr_optim("defined(A) && defined(B)", 929 "(&& (defined A) (defined B))") 930 test_cpp_expr_optim("defined(A) && defined(B)", 931 "(defined B)", {"A": "1"}) 932 test_cpp_expr_optim("defined(A) && defined(B)", 933 "(defined A)", {"B": "1"}) 934 test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", 935 {"B": kCppUndefinedMacro}) 936 test_cpp_expr_optim("defined(A) && defined(B)", 937 "(int 0)", {"A": kCppUndefinedMacro}) 938 test_cpp_expr_optim("A == 1 || defined(B)", 939 "(|| (== (ident A) (int 1)) (defined B))") 940 test_cpp_expr_optim( 941 "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", 942 "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))", 943 {"__KERNEL__": kCppUndefinedMacro}) 944 945 test_cpp_expr_source("0", "0") 946 test_cpp_expr_source("1", "1") 947 test_cpp_expr_source("1 && 1", "1 && 1") 948 test_cpp_expr_source("1 && 0", "1 && 0") 949 test_cpp_expr_source("0 && 1", "0 && 1") 950 test_cpp_expr_source("0 && 0", "0 && 0") 951 test_cpp_expr_source("1 || 1", "1 || 1") 952 test_cpp_expr_source("1 || 0", "1 || 0") 953 test_cpp_expr_source("0 || 1", "0 || 1") 954 test_cpp_expr_source("0 || 0", "0 || 0") 955 test_cpp_expr_source("EXAMPLE", "EXAMPLE") 956 test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3") 957 test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)") 958 test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)") 959 test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)") 960 961 962################################################################################ 963################################################################################ 964##### ##### 965##### C P P B L O C K ##### 966##### ##### 967################################################################################ 968################################################################################ 969 970 971class Block(object): 972 """A class used to model a block of input source text. 973 974 There are two block types: 975 - directive blocks: contain the tokens of a single pre-processor 976 directive (e.g. #if) 977 - text blocks, contain the tokens of non-directive blocks 978 979 The cpp parser class below will transform an input source file into a list 980 of Block objects (grouped in a BlockList object for convenience) 981 """ 982 983 def __init__(self, tokens, directive=None, lineno=0, identifier=None): 984 """Initialize a new block, if 'directive' is None, it is a text block. 985 986 NOTE: This automatically converts '#ifdef MACRO' into 987 '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'. 988 """ 989 990 if directive == "ifdef": 991 tok = Token() 992 tok.id = tokDEFINED 993 tokens = [tok] + tokens 994 directive = "if" 995 996 elif directive == "ifndef": 997 tok1 = Token() 998 tok2 = Token() 999 tok1.id = tokNOT 1000 tok2.id = tokDEFINED 1001 tokens = [tok1, tok2] + tokens 1002 directive = "if" 1003 1004 self.tokens = tokens 1005 self.directive = directive 1006 self.define_id = identifier 1007 if lineno > 0: 1008 self.lineno = lineno 1009 else: 1010 self.lineno = self.tokens[0].location.line 1011 1012 if self.isIf(): 1013 self.expr = CppExpr(self.tokens) 1014 1015 def isDirective(self): 1016 """Return True iff this is a directive block.""" 1017 return self.directive is not None 1018 1019 def isConditional(self): 1020 """Return True iff this is a conditional directive block.""" 1021 return self.directive in ["if", "ifdef", "ifndef", "else", "elif", 1022 "endif"] 1023 1024 def isDefine(self): 1025 """Return the macro name in a #define directive, or None otherwise.""" 1026 if self.directive != "define": 1027 return None 1028 return self.define_id 1029 1030 def isIf(self): 1031 """Return True iff this is an #if-like directive block.""" 1032 return self.directive in ["if", "ifdef", "ifndef", "elif"] 1033 1034 def isEndif(self): 1035 """Return True iff this is an #endif directive block.""" 1036 return self.directive == "endif" 1037 1038 def isInclude(self): 1039 """Check whether this is a #include directive. 1040 1041 If true, returns the corresponding file name (with brackets or 1042 double-qoutes). None otherwise. 1043 """ 1044 1045 if self.directive != "include": 1046 return None 1047 return ''.join([str(x) for x in self.tokens]) 1048 1049 @staticmethod 1050 def format_blocks(tokens, indent=0): 1051 """Return the formatted lines of strings with proper indentation.""" 1052 newline = True 1053 result = [] 1054 buf = '' 1055 i = 0 1056 while i < len(tokens): 1057 t = tokens[i] 1058 if t.id == '{': 1059 buf += ' {' 1060 result.append(strip_space(buf)) 1061 indent += 2 1062 buf = '' 1063 newline = True 1064 elif t.id == '}': 1065 indent -= 2 1066 if not newline: 1067 result.append(strip_space(buf)) 1068 # Look ahead to determine if it's the end of line. 1069 if (i + 1 < len(tokens) and 1070 (tokens[i+1].id == ';' or 1071 tokens[i+1].id in ['else', '__attribute__', 1072 '__attribute', '__packed'] or 1073 tokens[i+1].kind == TokenKind.IDENTIFIER)): 1074 buf = ' ' * indent + '}' 1075 newline = False 1076 else: 1077 result.append(' ' * indent + '}') 1078 buf = '' 1079 newline = True 1080 elif t.id == ';': 1081 result.append(strip_space(buf) + ';') 1082 buf = '' 1083 newline = True 1084 # We prefer a new line for each constant in enum. 1085 elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL: 1086 result.append(strip_space(buf) + ',') 1087 buf = '' 1088 newline = True 1089 else: 1090 if newline: 1091 buf += ' ' * indent + str(t) 1092 else: 1093 buf += ' ' + str(t) 1094 newline = False 1095 i += 1 1096 1097 if buf: 1098 result.append(strip_space(buf)) 1099 1100 return result, indent 1101 1102 def write(self, out, indent): 1103 """Dump the current block.""" 1104 # removeWhiteSpace() will sometimes creates non-directive blocks 1105 # without any tokens. These come from blocks that only contained 1106 # empty lines and spaces. They should not be printed in the final 1107 # output, and then should not be counted for this operation. 1108 # 1109 if self.directive is None and not self.tokens: 1110 return indent 1111 1112 if self.directive: 1113 out.write(str(self) + '\n') 1114 else: 1115 lines, indent = self.format_blocks(self.tokens, indent) 1116 for line in lines: 1117 out.write(line + '\n') 1118 1119 return indent 1120 1121 def __repr__(self): 1122 """Generate the representation of a given block.""" 1123 if self.directive: 1124 result = "#%s " % self.directive 1125 if self.isIf(): 1126 result += repr(self.expr) 1127 else: 1128 for tok in self.tokens: 1129 result += repr(tok) 1130 else: 1131 result = "" 1132 for tok in self.tokens: 1133 result += repr(tok) 1134 1135 return result 1136 1137 def __str__(self): 1138 """Generate the string representation of a given block.""" 1139 if self.directive: 1140 # "#if" 1141 if self.directive == "if": 1142 # small optimization to re-generate #ifdef and #ifndef 1143 e = self.expr.expr 1144 op = e[0] 1145 if op == "defined": 1146 result = "#ifdef %s" % e[1] 1147 elif op == "!" and e[1][0] == "defined": 1148 result = "#ifndef %s" % e[1][1] 1149 else: 1150 result = "#if " + str(self.expr) 1151 1152 # "#define" 1153 elif self.isDefine(): 1154 result = "#%s %s" % (self.directive, self.define_id) 1155 if self.tokens: 1156 result += " " 1157 expr = strip_space(' '.join([tok.id for tok in self.tokens])) 1158 # remove the space between name and '(' in function call 1159 result += re.sub(r'(\w+) \(', r'\1(', expr) 1160 1161 # "#error" 1162 # Concatenating tokens with a space separator, because they may 1163 # not be quoted and broken into several tokens 1164 elif self.directive == "error": 1165 result = "#error %s" % ' '.join([tok.id for tok in self.tokens]) 1166 1167 else: 1168 result = "#%s" % self.directive 1169 if self.tokens: 1170 result += " " 1171 result += ''.join([tok.id for tok in self.tokens]) 1172 else: 1173 lines, _ = self.format_blocks(self.tokens) 1174 result = '\n'.join(lines) 1175 1176 return result 1177 1178 1179class BlockList(object): 1180 """A convenience class used to hold and process a list of blocks. 1181 1182 It calls the cpp parser to get the blocks. 1183 """ 1184 1185 def __init__(self, blocks): 1186 self.blocks = blocks 1187 1188 def __len__(self): 1189 return len(self.blocks) 1190 1191 def __getitem__(self, n): 1192 return self.blocks[n] 1193 1194 def __repr__(self): 1195 return repr(self.blocks) 1196 1197 def __str__(self): 1198 result = '\n'.join([str(b) for b in self.blocks]) 1199 return result 1200 1201 def dump(self): 1202 """Dump all the blocks in current BlockList.""" 1203 print '##### BEGIN #####' 1204 for i, b in enumerate(self.blocks): 1205 print '### BLOCK %d ###' % i 1206 print b 1207 print '##### END #####' 1208 1209 def optimizeIf01(self): 1210 """Remove the code between #if 0 .. #endif in a BlockList.""" 1211 self.blocks = optimize_if01(self.blocks) 1212 1213 def optimizeMacros(self, macros): 1214 """Remove known defined and undefined macros from a BlockList.""" 1215 for b in self.blocks: 1216 if b.isIf(): 1217 b.expr.optimize(macros) 1218 1219 def optimizeAll(self, macros): 1220 self.optimizeMacros(macros) 1221 self.optimizeIf01() 1222 return 1223 1224 def findIncludes(self): 1225 """Return the list of included files in a BlockList.""" 1226 result = [] 1227 for b in self.blocks: 1228 i = b.isInclude() 1229 if i: 1230 result.append(i) 1231 return result 1232 1233 def write(self, out): 1234 indent = 0 1235 for b in self.blocks: 1236 indent = b.write(out, indent) 1237 1238 def removeVarsAndFuncs(self, keep): 1239 """Remove variable and function declarations. 1240 1241 All extern and static declarations corresponding to variable and 1242 function declarations are removed. We only accept typedefs and 1243 enum/structs/union declarations. 1244 1245 However, we keep the definitions corresponding to the set of known 1246 static inline functions in the set 'keep', which is useful 1247 for optimized byteorder swap functions and stuff like that. 1248 """ 1249 1250 # NOTE: It's also removing function-like macros, such as __SYSCALL(...) 1251 # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h. 1252 # It could be problematic when we have function-like macros but without 1253 # '}' following them. It will skip all the tokens/blocks until seeing a 1254 # '}' as the function end. Fortunately we don't have such cases in the 1255 # current kernel headers. 1256 1257 # state = 0 => normal (i.e. LN + spaces) 1258 # state = 1 => typedef/struct encountered, ends with ";" 1259 # state = 2 => var declaration encountered, ends with ";" 1260 # state = 3 => func declaration encountered, ends with "}" 1261 1262 state = 0 1263 depth = 0 1264 blocks2 = [] 1265 skipTokens = False 1266 for b in self.blocks: 1267 if b.isDirective(): 1268 blocks2.append(b) 1269 else: 1270 n = len(b.tokens) 1271 i = 0 1272 if skipTokens: 1273 first = n 1274 else: 1275 first = 0 1276 while i < n: 1277 tok = b.tokens[i] 1278 tokid = tok.id 1279 # If we are not looking for the start of a new 1280 # type/var/func, then skip over tokens until 1281 # we find our terminator, managing the depth of 1282 # accolades as we go. 1283 if state > 0: 1284 terminator = False 1285 if tokid == '{': 1286 depth += 1 1287 elif tokid == '}': 1288 if depth > 0: 1289 depth -= 1 1290 if (depth == 0) and (state == 3): 1291 terminator = True 1292 elif tokid == ';' and depth == 0: 1293 terminator = True 1294 1295 if terminator: 1296 # we found the terminator 1297 state = 0 1298 if skipTokens: 1299 skipTokens = False 1300 first = i + 1 1301 1302 i += 1 1303 continue 1304 1305 # Is it a new type definition, then start recording it 1306 if tok.id in ['struct', 'typedef', 'enum', 'union', 1307 '__extension__']: 1308 state = 1 1309 i += 1 1310 continue 1311 1312 # Is it a variable or function definition. If so, first 1313 # try to determine which type it is, and also extract 1314 # its name. 1315 # 1316 # We're going to parse the next tokens of the same block 1317 # until we find a semicolon or a left parenthesis. 1318 # 1319 # The semicolon corresponds to a variable definition, 1320 # the left-parenthesis to a function definition. 1321 # 1322 # We also assume that the var/func name is the last 1323 # identifier before the terminator. 1324 # 1325 j = i + 1 1326 ident = "" 1327 while j < n: 1328 tokid = b.tokens[j].id 1329 if tokid == '(': # a function declaration 1330 state = 3 1331 break 1332 elif tokid == ';': # a variable declaration 1333 state = 2 1334 break 1335 if b.tokens[j].kind == TokenKind.IDENTIFIER: 1336 ident = b.tokens[j].id 1337 j += 1 1338 1339 if j >= n: 1340 # This can only happen when the declaration 1341 # does not end on the current block (e.g. with 1342 # a directive mixed inside it. 1343 # 1344 # We will treat it as malformed because 1345 # it's very hard to recover from this case 1346 # without making our parser much more 1347 # complex. 1348 # 1349 logging.debug("### skip unterminated static '%s'", 1350 ident) 1351 break 1352 1353 if ident in keep: 1354 logging.debug("### keep var/func '%s': %s", ident, 1355 repr(b.tokens[i:j])) 1356 else: 1357 # We're going to skip the tokens for this declaration 1358 logging.debug("### skip var/func '%s': %s", ident, 1359 repr(b.tokens[i:j])) 1360 if i > first: 1361 blocks2.append(Block(b.tokens[first:i])) 1362 skipTokens = True 1363 first = n 1364 1365 i += 1 1366 1367 if i > first: 1368 #print "### final '%s'" % repr(b.tokens[first:i]) 1369 blocks2.append(Block(b.tokens[first:i])) 1370 1371 self.blocks = blocks2 1372 1373 def replaceTokens(self, replacements): 1374 """Replace tokens according to the given dict.""" 1375 extra_includes = [] 1376 for b in self.blocks: 1377 made_change = False 1378 if b.isInclude() is None: 1379 i = 0 1380 while i < len(b.tokens): 1381 tok = b.tokens[i] 1382 if (tok.kind == TokenKind.KEYWORD and tok.id == 'struct' 1383 and (i + 2) < len(b.tokens) and b.tokens[i + 2].id == '{'): 1384 struct_name = b.tokens[i + 1].id 1385 if struct_name in kernel_struct_replacements: 1386 extra_includes.append("<bits/%s.h>" % struct_name) 1387 end = i + 2 1388 while end < len(b.tokens) and b.tokens[end].id != '}': 1389 end += 1 1390 end += 1 # Swallow '}' 1391 while end < len(b.tokens) and b.tokens[end].id != ';': 1392 end += 1 1393 end += 1 # Swallow ';' 1394 # Remove these tokens. We'll replace them later with a #include block. 1395 b.tokens[i:end] = [] 1396 made_change = True 1397 # We've just modified b.tokens, so revisit the current offset. 1398 continue 1399 if tok.kind == TokenKind.IDENTIFIER: 1400 if tok.id in replacements: 1401 tok.id = replacements[tok.id] 1402 made_change = True 1403 i += 1 1404 1405 if b.isDefine() and b.define_id in replacements: 1406 b.define_id = replacements[b.define_id] 1407 made_change = True 1408 1409 if made_change and b.isIf(): 1410 # Keep 'expr' in sync with 'tokens'. 1411 b.expr = CppExpr(b.tokens) 1412 1413 for extra_include in extra_includes: 1414 replacement = CppStringTokenizer(extra_include) 1415 self.blocks.insert(2, Block(replacement.tokens, directive='include')) 1416 1417 1418 1419def strip_space(s): 1420 """Strip out redundant space in a given string.""" 1421 1422 # NOTE: It ought to be more clever to not destroy spaces in string tokens. 1423 replacements = {' . ': '.', 1424 ' [': '[', 1425 '[ ': '[', 1426 ' ]': ']', 1427 '( ': '(', 1428 ' )': ')', 1429 ' ,': ',', 1430 '# ': '#', 1431 ' ;': ';', 1432 '~ ': '~', 1433 ' -> ': '->'} 1434 result = s 1435 for r in replacements: 1436 result = result.replace(r, replacements[r]) 1437 1438 # Remove the space between function name and the parenthesis. 1439 result = re.sub(r'(\w+) \(', r'\1(', result) 1440 return result 1441 1442 1443class BlockParser(object): 1444 """A class that converts an input source file into a BlockList object.""" 1445 1446 def __init__(self, tokzer=None): 1447 """Initialize a block parser. 1448 1449 The input source is provided through a Tokenizer object. 1450 """ 1451 self._tokzer = tokzer 1452 self._parsed = False 1453 1454 @property 1455 def parsed(self): 1456 return self._parsed 1457 1458 @staticmethod 1459 def _short_extent(extent): 1460 return '%d:%d - %d:%d' % (extent.start.line, extent.start.column, 1461 extent.end.line, extent.end.column) 1462 1463 def getBlocks(self, tokzer=None): 1464 """Return all the blocks parsed.""" 1465 1466 def consume_extent(i, tokens, extent=None, detect_change=False): 1467 """Return tokens that belong to the given extent. 1468 1469 It parses all the tokens that follow tokens[i], until getting out 1470 of the extent. When detect_change is True, it may terminate early 1471 when detecting preprocessing directives inside the extent. 1472 """ 1473 1474 result = [] 1475 if extent is None: 1476 extent = tokens[i].cursor.extent 1477 1478 while i < len(tokens) and tokens[i].location in extent: 1479 t = tokens[i] 1480 if debugBlockParser: 1481 print ' ' * 2, t.id, t.kind, t.cursor.kind 1482 if (detect_change and t.cursor.extent != extent and 1483 t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE): 1484 break 1485 result.append(t) 1486 i += 1 1487 return (i, result) 1488 1489 def consume_line(i, tokens): 1490 """Return tokens that follow tokens[i] in the same line.""" 1491 result = [] 1492 line = tokens[i].location.line 1493 while i < len(tokens) and tokens[i].location.line == line: 1494 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1495 break 1496 result.append(tokens[i]) 1497 i += 1 1498 return (i, result) 1499 1500 if tokzer is None: 1501 tokzer = self._tokzer 1502 tokens = tokzer.tokens 1503 1504 blocks = [] 1505 buf = [] 1506 i = 0 1507 1508 while i < len(tokens): 1509 t = tokens[i] 1510 cursor = t.cursor 1511 1512 if debugBlockParser: 1513 print ("%d: Processing [%s], kind=[%s], cursor=[%s], " 1514 "extent=[%s]" % (t.location.line, t.spelling, t.kind, 1515 cursor.kind, 1516 self._short_extent(cursor.extent))) 1517 1518 if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1519 if buf: 1520 blocks.append(Block(buf)) 1521 buf = [] 1522 1523 j = i 1524 if j + 1 >= len(tokens): 1525 raise BadExpectedToken("### BAD TOKEN at %s" % (t.location)) 1526 directive = tokens[j+1].id 1527 1528 if directive == 'define': 1529 if i+2 >= len(tokens): 1530 raise BadExpectedToken("### BAD TOKEN at %s" % 1531 (tokens[i].location)) 1532 1533 # Skip '#' and 'define'. 1534 extent = tokens[i].cursor.extent 1535 i += 2 1536 id = '' 1537 # We need to separate the id from the remaining of 1538 # the line, especially for the function-like macro. 1539 if (i + 1 < len(tokens) and tokens[i+1].id == '(' and 1540 (tokens[i].location.column + len(tokens[i].spelling) == 1541 tokens[i+1].location.column)): 1542 while i < len(tokens): 1543 id += tokens[i].id 1544 if tokens[i].spelling == ')': 1545 i += 1 1546 break 1547 i += 1 1548 else: 1549 id += tokens[i].id 1550 # Advance to the next token that follows the macro id 1551 i += 1 1552 1553 (i, ret) = consume_extent(i, tokens, extent=extent) 1554 blocks.append(Block(ret, directive=directive, 1555 lineno=t.location.line, identifier=id)) 1556 1557 else: 1558 (i, ret) = consume_extent(i, tokens) 1559 blocks.append(Block(ret[2:], directive=directive, 1560 lineno=t.location.line)) 1561 1562 elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: 1563 if buf: 1564 blocks.append(Block(buf)) 1565 buf = [] 1566 directive = tokens[i+1].id 1567 (i, ret) = consume_extent(i, tokens) 1568 1569 blocks.append(Block(ret[2:], directive=directive, 1570 lineno=t.location.line)) 1571 1572 elif cursor.kind == CursorKind.VAR_DECL: 1573 if buf: 1574 blocks.append(Block(buf)) 1575 buf = [] 1576 1577 (i, ret) = consume_extent(i, tokens, detect_change=True) 1578 buf += ret 1579 1580 elif cursor.kind == CursorKind.FUNCTION_DECL: 1581 if buf: 1582 blocks.append(Block(buf)) 1583 buf = [] 1584 1585 (i, ret) = consume_extent(i, tokens, detect_change=True) 1586 buf += ret 1587 1588 else: 1589 (i, ret) = consume_line(i, tokens) 1590 buf += ret 1591 1592 if buf: 1593 blocks.append(Block(buf)) 1594 1595 # _parsed=True indicates a successful parsing, although may result an 1596 # empty BlockList. 1597 self._parsed = True 1598 1599 return BlockList(blocks) 1600 1601 def parse(self, tokzer): 1602 return self.getBlocks(tokzer) 1603 1604 def parseFile(self, path): 1605 return self.getBlocks(CppFileTokenizer(path)) 1606 1607 1608def test_block_parsing(lines, expected): 1609 """Helper method to test the correctness of BlockParser.parse.""" 1610 blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines))) 1611 if len(blocks) != len(expected): 1612 raise BadExpectedToken("BlockParser.parse() returned '%s' expecting " 1613 "'%s'" % (str(blocks), repr(expected))) 1614 for n in range(len(blocks)): 1615 if str(blocks[n]) != expected[n]: 1616 raise BadExpectedToken("BlockParser.parse()[%d] is '%s', " 1617 "expecting '%s'" % (n, str(blocks[n]), 1618 expected[n])) 1619 1620 1621def test_BlockParser(): 1622 test_block_parsing(["#error hello"], ["#error hello"]) 1623 test_block_parsing(["foo", "", "bar"], ["foo bar"]) 1624 1625 # We currently cannot handle the following case with libclang properly. 1626 # Fortunately it doesn't appear in current headers. 1627 # test_block_parsing(["foo", " # ", "bar"], ["foo", "bar"]) 1628 1629 test_block_parsing(["foo", 1630 " # /* ahah */ if defined(__KERNEL__) /* more */", 1631 "bar", "#endif"], 1632 ["foo", "#ifdef __KERNEL__", "bar", "#endif"]) 1633 1634 1635################################################################################ 1636################################################################################ 1637##### ##### 1638##### B L O C K L I S T O P T I M I Z A T I O N ##### 1639##### ##### 1640################################################################################ 1641################################################################################ 1642 1643 1644def find_matching_endif(blocks, i): 1645 """Traverse the blocks to find out the matching #endif.""" 1646 n = len(blocks) 1647 depth = 1 1648 while i < n: 1649 if blocks[i].isDirective(): 1650 dir_ = blocks[i].directive 1651 if dir_ in ["if", "ifndef", "ifdef"]: 1652 depth += 1 1653 elif depth == 1 and dir_ in ["else", "elif"]: 1654 return i 1655 elif dir_ == "endif": 1656 depth -= 1 1657 if depth == 0: 1658 return i 1659 i += 1 1660 return i 1661 1662 1663def optimize_if01(blocks): 1664 """Remove the code between #if 0 .. #endif in a list of CppBlocks.""" 1665 i = 0 1666 n = len(blocks) 1667 result = [] 1668 while i < n: 1669 j = i 1670 while j < n and not blocks[j].isIf(): 1671 j += 1 1672 if j > i: 1673 logging.debug("appending lines %d to %d", blocks[i].lineno, 1674 blocks[j-1].lineno) 1675 result += blocks[i:j] 1676 if j >= n: 1677 break 1678 expr = blocks[j].expr 1679 r = expr.toInt() 1680 if r is None: 1681 result.append(blocks[j]) 1682 i = j + 1 1683 continue 1684 1685 if r == 0: 1686 # if 0 => skip everything until the corresponding #endif 1687 j = find_matching_endif(blocks, j + 1) 1688 if j >= n: 1689 # unterminated #if 0, finish here 1690 break 1691 dir_ = blocks[j].directive 1692 if dir_ == "endif": 1693 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)", 1694 blocks[i].lineno, blocks[j].lineno) 1695 i = j + 1 1696 elif dir_ == "else": 1697 # convert 'else' into 'if 1' 1698 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d " 1699 "to %d)", blocks[i].lineno, blocks[j-1].lineno) 1700 blocks[j].directive = "if" 1701 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens) 1702 i = j 1703 elif dir_ == "elif": 1704 # convert 'elif' into 'if' 1705 logging.debug("convert 'if 0' .. 'elif' into 'if'") 1706 blocks[j].directive = "if" 1707 i = j 1708 continue 1709 1710 # if 1 => find corresponding endif and remove/transform them 1711 k = find_matching_endif(blocks, j + 1) 1712 if k >= n: 1713 # unterminated #if 1, finish here 1714 logging.debug("unterminated 'if 1'") 1715 result += blocks[j+1:k] 1716 break 1717 1718 dir_ = blocks[k].directive 1719 if dir_ == "endif": 1720 logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)", 1721 blocks[j].lineno, blocks[k].lineno) 1722 result += optimize_if01(blocks[j+1:k]) 1723 i = k + 1 1724 elif dir_ == "else": 1725 # convert 'else' into 'if 0' 1726 logging.debug("convert 'if 1' .. 'else' (lines %d to %d)", 1727 blocks[j].lineno, blocks[k].lineno) 1728 result += optimize_if01(blocks[j+1:k]) 1729 blocks[k].directive = "if" 1730 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1731 i = k 1732 elif dir_ == "elif": 1733 # convert 'elif' into 'if 0' 1734 logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)", 1735 blocks[j].lineno, blocks[k].lineno) 1736 result += optimize_if01(blocks[j+1:k]) 1737 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1738 i = k 1739 return result 1740 1741 1742def test_optimizeAll(): 1743 text = """\ 1744#if 1 1745#define GOOD_1 1746#endif 1747#if 0 1748#define BAD_2 1749#define BAD_3 1750#endif 1751 1752#if 1 1753#define GOOD_2 1754#else 1755#define BAD_4 1756#endif 1757 1758#if 0 1759#define BAD_5 1760#else 1761#define GOOD_3 1762#endif 1763 1764#if defined(__KERNEL__) 1765#define BAD_KERNEL 1766#endif 1767 1768#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) 1769#define X 1770#endif 1771 1772#ifndef SIGRTMAX 1773#define SIGRTMAX 123 1774#endif /* SIGRTMAX */ 1775 1776#if 0 1777#if 1 1778#define BAD_6 1779#endif 1780#endif\ 1781""" 1782 1783 expected = """\ 1784#define GOOD_1 1785#define GOOD_2 1786#define GOOD_3 1787#if !defined(__GLIBC__) || __GLIBC__ < 2 1788#define X 1789#endif 1790#ifndef __SIGRTMAX 1791#define __SIGRTMAX 123 1792#endif 1793""" 1794 1795 out = utils.StringOutput() 1796 blocks = BlockParser().parse(CppStringTokenizer(text)) 1797 blocks.replaceTokens(kernel_token_replacements) 1798 blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro}) 1799 blocks.write(out) 1800 if out.get() != expected: 1801 print "[FAIL]: macro optimization failed\n" 1802 print "<<<< expecting '", 1803 print expected, 1804 print "'\n>>>> result '", 1805 print out.get(), 1806 print "'\n----" 1807 global failure_count 1808 failure_count += 1 1809 1810 1811def runUnitTests(): 1812 """Always run all unit tests for this program.""" 1813 test_CppTokenizer() 1814 test_CppExpr() 1815 test_optimizeAll() 1816 test_BlockParser() 1817 1818 1819failure_count = 0 1820runUnitTests() 1821if failure_count != 0: 1822 utils.panic("Unit tests failed in cpp.py.\n") 1823