1#!/usr/bin/env python 2"""A glorified C pre-processor parser.""" 3 4import ctypes 5import logging 6import os 7import re 8import site 9import unittest 10import utils 11 12top = os.getenv('ANDROID_BUILD_TOP') 13if top is None: 14 utils.panic('ANDROID_BUILD_TOP not set.\n') 15 16# Set up the env vars for libclang. 17site.addsitedir(os.path.join(top, 'external/clang/bindings/python')) 18 19import clang.cindex 20from clang.cindex import conf 21from clang.cindex import Cursor 22from clang.cindex import CursorKind 23from clang.cindex import SourceLocation 24from clang.cindex import SourceRange 25from clang.cindex import TokenGroup 26from clang.cindex import TokenKind 27from clang.cindex import TranslationUnit 28 29# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc. 30# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 31clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64/libclang_android.so')) 32 33from defaults import kCppUndefinedMacro 34from defaults import kernel_remove_config_macros 35from defaults import kernel_struct_replacements 36from defaults import kernel_token_replacements 37 38 39debugBlockParser = False 40debugCppExpr = False 41debugOptimIf01 = False 42 43############################################################################### 44############################################################################### 45##### ##### 46##### C P P T O K E N S ##### 47##### ##### 48############################################################################### 49############################################################################### 50 51# the list of supported C-preprocessor tokens 52# plus a couple of C tokens as well 53tokEOF = "\0" 54tokLN = "\n" 55tokSTRINGIFY = "#" 56tokCONCAT = "##" 57tokLOGICAND = "&&" 58tokLOGICOR = "||" 59tokSHL = "<<" 60tokSHR = ">>" 61tokEQUAL = "==" 62tokNEQUAL = "!=" 63tokLT = "<" 64tokLTE = "<=" 65tokGT = ">" 66tokGTE = ">=" 67tokELLIPSIS = "..." 68tokSPACE = " " 69tokDEFINED = "defined" 70tokLPAREN = "(" 71tokRPAREN = ")" 72tokNOT = "!" 73tokPLUS = "+" 74tokMINUS = "-" 75tokMULTIPLY = "*" 76tokDIVIDE = "/" 77tokMODULUS = "%" 78tokBINAND = "&" 79tokBINOR = "|" 80tokBINXOR = "^" 81tokCOMMA = "," 82tokLBRACE = "{" 83tokRBRACE = "}" 84tokARROW = "->" 85tokINCREMENT = "++" 86tokDECREMENT = "--" 87tokNUMBER = "<number>" 88tokIDENT = "<ident>" 89tokSTRING = "<string>" 90 91 92class Token(clang.cindex.Token): 93 """A class that represents one token after parsing. 94 95 It inherits the class in libclang, with an extra id property to hold the 96 new spelling of the token. The spelling property in the base class is 97 defined as read-only. New names after macro instantiation are saved in 98 their ids now. It also facilitates the renaming of directive optimizations 99 like replacing 'ifndef X' with 'if !defined(X)'. 100 101 It also overrides the cursor property of the base class. Because the one 102 in libclang always queries based on a single token, which usually doesn't 103 hold useful information. The cursor in this class can be set by calling 104 CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the 105 base class. 106 """ 107 108 def __init__(self, tu=None, group=None, int_data=None, ptr_data=None, 109 cursor=None): 110 clang.cindex.Token.__init__(self) 111 self._id = None 112 self._tu = tu 113 self._group = group 114 self._cursor = cursor 115 # self.int_data and self.ptr_data are from the base class. But 116 # self.int_data doesn't accept a None value. 117 if int_data is not None: 118 self.int_data = int_data 119 self.ptr_data = ptr_data 120 121 @property 122 def id(self): 123 """Name of the token.""" 124 if self._id is None: 125 return self.spelling 126 else: 127 return self._id 128 129 @id.setter 130 def id(self, new_id): 131 """Setting name of the token.""" 132 self._id = new_id 133 134 @property 135 def cursor(self): 136 if self._cursor is None: 137 self._cursor = clang.cindex.Token.cursor 138 return self._cursor 139 140 @cursor.setter 141 def cursor(self, new_cursor): 142 self._cursor = new_cursor 143 144 def __repr__(self): 145 if self.id == 'defined': 146 return self.id 147 elif self.kind == TokenKind.IDENTIFIER: 148 return "(ident %s)" % self.id 149 150 return self.id 151 152 def __str__(self): 153 return self.id 154 155 156class BadExpectedToken(Exception): 157 """An exception that will be raised for unexpected tokens.""" 158 pass 159 160 161# The __contains__ function in libclang SourceRange class contains a bug. It 162# gives wrong result when dealing with single line range. 163# Bug filed with upstream: 164# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277 165def SourceRange__contains__(self, other): 166 """Determine if a given location is inside the range.""" 167 if not isinstance(other, SourceLocation): 168 return False 169 if other.file is None and self.start.file is None: 170 pass 171 elif (self.start.file.name != other.file.name or 172 other.file.name != self.end.file.name): 173 # same file name 174 return False 175 # same file, in between lines 176 if self.start.line < other.line < self.end.line: 177 return True 178 # same file, same line 179 elif self.start.line == other.line == self.end.line: 180 if self.start.column <= other.column <= self.end.column: 181 return True 182 elif self.start.line == other.line: 183 # same file first line 184 if self.start.column <= other.column: 185 return True 186 elif other.line == self.end.line: 187 # same file last line 188 if other.column <= self.end.column: 189 return True 190 return False 191 192 193SourceRange.__contains__ = SourceRange__contains__ 194 195 196################################################################################ 197################################################################################ 198##### ##### 199##### C P P T O K E N I Z E R ##### 200##### ##### 201################################################################################ 202################################################################################ 203 204 205class CppTokenizer(object): 206 """A tokenizer that converts some input text into a list of tokens. 207 208 It calls libclang's tokenizer to get the parsed tokens. In addition, it 209 updates the cursor property in each token after parsing, by calling 210 getTokensWithCursors(). 211 """ 212 213 clang_flags = ['-E', '-x', 'c'] 214 options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD 215 216 def __init__(self): 217 """Initialize a new CppTokenizer object.""" 218 self._indexer = clang.cindex.Index.create() 219 self._tu = None 220 self._index = 0 221 self.tokens = None 222 223 def _getTokensWithCursors(self): 224 """Helper method to return all tokens with their cursors. 225 226 The cursor property in a clang Token doesn't provide enough 227 information. Because it is queried based on single token each time 228 without any context, i.e. via calling conf.lib.clang_annotateTokens() 229 with only one token given. So we often see 'INVALID_FILE' in one 230 token's cursor. In this function it passes all the available tokens 231 to get more informative cursors. 232 """ 233 234 tokens_memory = ctypes.POINTER(clang.cindex.Token)() 235 tokens_count = ctypes.c_uint() 236 237 conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent, 238 ctypes.byref(tokens_memory), 239 ctypes.byref(tokens_count)) 240 241 count = int(tokens_count.value) 242 243 # If we get no tokens, no memory was allocated. Be sure not to return 244 # anything and potentially call a destructor on nothing. 245 if count < 1: 246 return 247 248 cursors = (Cursor * count)() 249 cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor)) 250 251 conf.lib.clang_annotateTokens(self._tu, tokens_memory, count, 252 cursors_memory) 253 254 tokens_array = ctypes.cast( 255 tokens_memory, 256 ctypes.POINTER(clang.cindex.Token * count)).contents 257 token_group = TokenGroup(self._tu, tokens_memory, tokens_count) 258 259 tokens = [] 260 for i in xrange(0, count): 261 token = Token(self._tu, token_group, 262 int_data=tokens_array[i].int_data, 263 ptr_data=tokens_array[i].ptr_data, 264 cursor=cursors[i]) 265 # We only want non-comment tokens. 266 if token.kind != TokenKind.COMMENT: 267 tokens.append(token) 268 269 return tokens 270 271 def parseString(self, lines): 272 """Parse a list of text lines into a BlockList object.""" 273 file_ = 'dummy.c' 274 self._tu = self._indexer.parse(file_, self.clang_flags, 275 unsaved_files=[(file_, lines)], 276 options=self.options) 277 self.tokens = self._getTokensWithCursors() 278 279 def parseFile(self, file_): 280 """Parse a file into a BlockList object.""" 281 self._tu = self._indexer.parse(file_, self.clang_flags, 282 options=self.options) 283 self.tokens = self._getTokensWithCursors() 284 285 def nextToken(self): 286 """Return next token from the list.""" 287 if self._index < len(self.tokens): 288 t = self.tokens[self._index] 289 self._index += 1 290 return t 291 else: 292 return None 293 294 295class CppStringTokenizer(CppTokenizer): 296 """A CppTokenizer derived class that accepts a string of text as input.""" 297 298 def __init__(self, line): 299 CppTokenizer.__init__(self) 300 self.parseString(line) 301 302 303class CppFileTokenizer(CppTokenizer): 304 """A CppTokenizer derived class that accepts a file as input.""" 305 306 def __init__(self, file_): 307 CppTokenizer.__init__(self) 308 self.parseFile(file_) 309 310 311# Unit testing 312# 313class CppTokenizerTests(unittest.TestCase): 314 """CppTokenizer tests.""" 315 316 def get_tokens(self, token_string, line_col=False): 317 tokens = CppStringTokenizer(token_string) 318 token_list = [] 319 while True: 320 token = tokens.nextToken() 321 if not token: 322 break 323 if line_col: 324 token_list.append((token.id, token.location.line, 325 token.location.column)) 326 else: 327 token_list.append(token.id) 328 return token_list 329 330 def test_hash(self): 331 self.assertEqual(self.get_tokens("#an/example && (01923_xy)"), 332 ["#", "an", "/", "example", tokLOGICAND, tokLPAREN, 333 "01923_xy", tokRPAREN]) 334 335 def test_parens(self): 336 self.assertEqual(self.get_tokens("FOO(BAR) && defined(BAZ)"), 337 ["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND, 338 "defined", tokLPAREN, "BAZ", tokRPAREN]) 339 340 def test_comment(self): 341 self.assertEqual(self.get_tokens("/*\n#\n*/"), []) 342 343 def test_line_cross(self): 344 self.assertEqual(self.get_tokens("first\nsecond"), ["first", "second"]) 345 346 def test_line_cross_line_col(self): 347 self.assertEqual(self.get_tokens("first second\n third", True), 348 [("first", 1, 1), ("second", 1, 7), ("third", 2, 3)]) 349 350 def test_comment_line_col(self): 351 self.assertEqual(self.get_tokens("boo /* what the\nhell */", True), 352 [("boo", 1, 1)]) 353 354 def test_escapes(self): 355 self.assertEqual(self.get_tokens("an \\\n example", True), 356 [("an", 1, 1), ("example", 2, 2)]) 357 358 359################################################################################ 360################################################################################ 361##### ##### 362##### C P P E X P R E S S I O N S ##### 363##### ##### 364################################################################################ 365################################################################################ 366 367 368class CppExpr(object): 369 """A class that models the condition of #if directives into an expr tree. 370 371 Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where 372 "op" is a string describing the operation 373 """ 374 375 unaries = ["!", "~"] 376 binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", 377 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"] 378 precedences = { 379 "?": 1, ":": 1, 380 "||": 2, 381 "&&": 3, 382 "|": 4, 383 "^": 5, 384 "&": 6, 385 "==": 7, "!=": 7, 386 "<": 8, "<=": 8, ">": 8, ">=": 8, 387 "<<": 9, ">>": 9, 388 "+": 10, "-": 10, 389 "*": 11, "/": 11, "%": 11, 390 "!": 12, "~": 12 391 } 392 393 def __init__(self, tokens): 394 """Initialize a CppExpr. 'tokens' must be a CppToken list.""" 395 self.tokens = tokens 396 self._num_tokens = len(tokens) 397 self._index = 0 398 399 if debugCppExpr: 400 print "CppExpr: trying to parse %s" % repr(tokens) 401 self.expr = self.parseExpression(0) 402 if debugCppExpr: 403 print "CppExpr: got " + repr(self.expr) 404 if self._index != self._num_tokens: 405 self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s" 406 % (self._index, self._num_tokens, repr(tokens))) 407 408 def throw(self, exception, msg): 409 if self._index < self._num_tokens: 410 tok = self.tokens[self._index] 411 print "%d:%d: %s" % (tok.location.line, tok.location.column, msg) 412 else: 413 print "EOF: %s" % msg 414 raise exception(msg) 415 416 def expectId(self, id): 417 """Check that a given token id is at the current position.""" 418 token = self.tokens[self._index] 419 if self._index >= self._num_tokens or token.id != id: 420 self.throw(BadExpectedToken, 421 "### expecting '%s' in expression, got '%s'" % ( 422 id, token.id)) 423 self._index += 1 424 425 def is_decimal(self): 426 token = self.tokens[self._index].id 427 if token[-1] in "ULul": 428 token = token[:-1] 429 try: 430 val = int(token, 10) 431 self._index += 1 432 return ('int', val) 433 except ValueError: 434 return None 435 436 def is_octal(self): 437 token = self.tokens[self._index].id 438 if token[-1] in "ULul": 439 token = token[:-1] 440 if len(token) < 2 or token[0] != '0': 441 return None 442 try: 443 val = int(token, 8) 444 self._index += 1 445 return ('oct', val) 446 except ValueError: 447 return None 448 449 def is_hexadecimal(self): 450 token = self.tokens[self._index].id 451 if token[-1] in "ULul": 452 token = token[:-1] 453 if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'): 454 return None 455 try: 456 val = int(token, 16) 457 self._index += 1 458 return ('hex', val) 459 except ValueError: 460 return None 461 462 def is_integer(self): 463 if self.tokens[self._index].kind != TokenKind.LITERAL: 464 return None 465 466 c = self.is_hexadecimal() 467 if c: 468 return c 469 470 c = self.is_octal() 471 if c: 472 return c 473 474 c = self.is_decimal() 475 if c: 476 return c 477 478 return None 479 480 def is_number(self): 481 t = self.tokens[self._index] 482 if t.id == tokMINUS and self._index + 1 < self._num_tokens: 483 self._index += 1 484 c = self.is_integer() 485 if c: 486 op, val = c 487 return (op, -val) 488 if t.id == tokPLUS and self._index + 1 < self._num_tokens: 489 self._index += 1 490 c = self.is_integer() 491 if c: 492 return c 493 494 return self.is_integer() 495 496 def is_defined(self): 497 t = self.tokens[self._index] 498 if t.id != tokDEFINED: 499 return None 500 501 # We have the defined keyword, check the rest. 502 self._index += 1 503 used_parens = False 504 if (self._index < self._num_tokens and 505 self.tokens[self._index].id == tokLPAREN): 506 used_parens = True 507 self._index += 1 508 509 if self._index >= self._num_tokens: 510 self.throw(BadExpectedToken, 511 "### 'defined' must be followed by macro name or left " 512 "paren") 513 514 t = self.tokens[self._index] 515 if t.kind != TokenKind.IDENTIFIER: 516 self.throw(BadExpectedToken, 517 "### 'defined' must be followed by macro name") 518 519 self._index += 1 520 if used_parens: 521 self.expectId(tokRPAREN) 522 523 return ("defined", t.id) 524 525 def is_call_or_ident(self): 526 if self._index >= self._num_tokens: 527 return None 528 529 t = self.tokens[self._index] 530 if t.kind != TokenKind.IDENTIFIER: 531 return None 532 533 name = t.id 534 535 self._index += 1 536 if (self._index >= self._num_tokens or 537 self.tokens[self._index].id != tokLPAREN): 538 return ("ident", name) 539 540 params = [] 541 depth = 1 542 self._index += 1 543 j = self._index 544 while self._index < self._num_tokens: 545 id = self.tokens[self._index].id 546 if id == tokLPAREN: 547 depth += 1 548 elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): 549 k = self._index 550 param = self.tokens[j:k] 551 params.append(param) 552 if id == tokRPAREN: 553 break 554 j = self._index + 1 555 elif id == tokRPAREN: 556 depth -= 1 557 self._index += 1 558 559 if self._index >= self._num_tokens: 560 return None 561 562 self._index += 1 563 return ("call", (name, params)) 564 565 # Implements the "precedence climbing" algorithm from 566 # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. 567 # The "classic" algorithm would be fine if we were using a tool to 568 # generate the parser, but we're not. Dijkstra's "shunting yard" 569 # algorithm hasn't been necessary yet. 570 571 def parseExpression(self, minPrecedence): 572 if self._index >= self._num_tokens: 573 return None 574 575 node = self.parsePrimary() 576 while (self.token() and self.isBinary(self.token()) and 577 self.precedence(self.token()) >= minPrecedence): 578 op = self.token() 579 self.nextToken() 580 rhs = self.parseExpression(self.precedence(op) + 1) 581 node = (op.id, node, rhs) 582 583 return node 584 585 def parsePrimary(self): 586 op = self.token() 587 if self.isUnary(op): 588 self.nextToken() 589 return (op.id, self.parseExpression(self.precedence(op))) 590 591 primary = None 592 if op.id == tokLPAREN: 593 self.nextToken() 594 primary = self.parseExpression(0) 595 self.expectId(tokRPAREN) 596 elif op.id == "?": 597 self.nextToken() 598 primary = self.parseExpression(0) 599 self.expectId(":") 600 elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL: 601 primary = self.is_number() 602 # Checking for 'defined' needs to come first now because 'defined' is 603 # recognized as IDENTIFIER. 604 elif op.id == tokDEFINED: 605 primary = self.is_defined() 606 elif op.kind == TokenKind.IDENTIFIER: 607 primary = self.is_call_or_ident() 608 else: 609 self.throw(BadExpectedToken, 610 "didn't expect to see a %s in factor" % ( 611 self.tokens[self._index].id)) 612 return primary 613 614 def isBinary(self, token): 615 return token.id in self.binaries 616 617 def isUnary(self, token): 618 return token.id in self.unaries 619 620 def precedence(self, token): 621 return self.precedences.get(token.id) 622 623 def token(self): 624 if self._index >= self._num_tokens: 625 return None 626 return self.tokens[self._index] 627 628 def nextToken(self): 629 self._index += 1 630 if self._index >= self._num_tokens: 631 return None 632 return self.tokens[self._index] 633 634 def dump_node(self, e): 635 op = e[0] 636 line = "(" + op 637 if op == "int": 638 line += " %d)" % e[1] 639 elif op == "oct": 640 line += " 0%o)" % e[1] 641 elif op == "hex": 642 line += " 0x%x)" % e[1] 643 elif op == "ident": 644 line += " %s)" % e[1] 645 elif op == "defined": 646 line += " %s)" % e[1] 647 elif op == "call": 648 arg = e[1] 649 line += " %s [" % arg[0] 650 prefix = "" 651 for param in arg[1]: 652 par = "" 653 for tok in param: 654 par += str(tok) 655 line += "%s%s" % (prefix, par) 656 prefix = "," 657 line += "])" 658 elif op in CppExpr.unaries: 659 line += " %s)" % self.dump_node(e[1]) 660 elif op in CppExpr.binaries: 661 line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) 662 else: 663 line += " ?%s)" % repr(e[1]) 664 665 return line 666 667 def __repr__(self): 668 return self.dump_node(self.expr) 669 670 def source_node(self, e): 671 op = e[0] 672 if op == "int": 673 return "%d" % e[1] 674 if op == "hex": 675 return "0x%x" % e[1] 676 if op == "oct": 677 return "0%o" % e[1] 678 if op == "ident": 679 # XXX: should try to expand 680 return e[1] 681 if op == "defined": 682 return "defined(%s)" % e[1] 683 684 prec = CppExpr.precedences.get(op, 1000) 685 arg = e[1] 686 if op in CppExpr.unaries: 687 arg_src = self.source_node(arg) 688 arg_op = arg[0] 689 arg_prec = CppExpr.precedences.get(arg_op, 1000) 690 if arg_prec < prec: 691 return "!(" + arg_src + ")" 692 else: 693 return "!" + arg_src 694 if op in CppExpr.binaries: 695 arg2 = e[2] 696 arg1_op = arg[0] 697 arg2_op = arg2[0] 698 arg1_src = self.source_node(arg) 699 arg2_src = self.source_node(arg2) 700 if CppExpr.precedences.get(arg1_op, 1000) < prec: 701 arg1_src = "(%s)" % arg1_src 702 if CppExpr.precedences.get(arg2_op, 1000) < prec: 703 arg2_src = "(%s)" % arg2_src 704 705 return "%s %s %s" % (arg1_src, op, arg2_src) 706 return "???" 707 708 def __str__(self): 709 return self.source_node(self.expr) 710 711 @staticmethod 712 def int_node(e): 713 if e[0] in ["int", "oct", "hex"]: 714 return e[1] 715 else: 716 return None 717 718 def toInt(self): 719 return self.int_node(self.expr) 720 721 def optimize_node(self, e, macros=None): 722 if macros is None: 723 macros = {} 724 op = e[0] 725 726 if op == "defined": 727 op, name = e 728 if macros.has_key(name): 729 if macros[name] == kCppUndefinedMacro: 730 return ("int", 0) 731 else: 732 try: 733 value = int(macros[name]) 734 return ("int", value) 735 except ValueError: 736 return ("defined", macros[name]) 737 738 if kernel_remove_config_macros and name.startswith("CONFIG_"): 739 return ("int", 0) 740 741 return e 742 743 elif op == "ident": 744 op, name = e 745 if macros.has_key(name): 746 try: 747 value = int(macros[name]) 748 expanded = ("int", value) 749 except ValueError: 750 expanded = ("ident", macros[name]) 751 return self.optimize_node(expanded, macros) 752 return e 753 754 elif op == "!": 755 op, v = e 756 v = self.optimize_node(v, macros) 757 if v[0] == "int": 758 if v[1] == 0: 759 return ("int", 1) 760 else: 761 return ("int", 0) 762 return ('!', v) 763 764 elif op == "&&": 765 op, l, r = e 766 l = self.optimize_node(l, macros) 767 r = self.optimize_node(r, macros) 768 li = self.int_node(l) 769 ri = self.int_node(r) 770 if li is not None: 771 if li == 0: 772 return ("int", 0) 773 else: 774 return r 775 elif ri is not None: 776 if ri == 0: 777 return ("int", 0) 778 else: 779 return l 780 return (op, l, r) 781 782 elif op == "||": 783 op, l, r = e 784 l = self.optimize_node(l, macros) 785 r = self.optimize_node(r, macros) 786 li = self.int_node(l) 787 ri = self.int_node(r) 788 if li is not None: 789 if li == 0: 790 return r 791 else: 792 return ("int", 1) 793 elif ri is not None: 794 if ri == 0: 795 return l 796 else: 797 return ("int", 1) 798 return (op, l, r) 799 800 else: 801 return e 802 803 def optimize(self, macros=None): 804 if macros is None: 805 macros = {} 806 self.expr = self.optimize_node(self.expr, macros) 807 808class CppExprTest(unittest.TestCase): 809 """CppExpr unit tests.""" 810 811 def get_expr(self, expr): 812 return repr(CppExpr(CppStringTokenizer(expr).tokens)) 813 814 def test_cpp_expr(self): 815 self.assertEqual(self.get_expr("0"), "(int 0)") 816 self.assertEqual(self.get_expr("1"), "(int 1)") 817 self.assertEqual(self.get_expr("-5"), "(int -5)") 818 self.assertEqual(self.get_expr("+1"), "(int 1)") 819 self.assertEqual(self.get_expr("0U"), "(int 0)") 820 self.assertEqual(self.get_expr("015"), "(oct 015)") 821 self.assertEqual(self.get_expr("015l"), "(oct 015)") 822 self.assertEqual(self.get_expr("0x3e"), "(hex 0x3e)") 823 self.assertEqual(self.get_expr("(0)"), "(int 0)") 824 self.assertEqual(self.get_expr("1 && 1"), "(&& (int 1) (int 1))") 825 self.assertEqual(self.get_expr("1 && 0"), "(&& (int 1) (int 0))") 826 self.assertEqual(self.get_expr("EXAMPLE"), "(ident EXAMPLE)") 827 self.assertEqual(self.get_expr("EXAMPLE - 3"), 828 "(- (ident EXAMPLE) (int 3))") 829 self.assertEqual(self.get_expr("defined(EXAMPLE)"), 830 "(defined EXAMPLE)") 831 self.assertEqual(self.get_expr("defined ( EXAMPLE ) "), 832 "(defined EXAMPLE)") 833 self.assertEqual(self.get_expr("!defined(EXAMPLE)"), 834 "(! (defined EXAMPLE))") 835 self.assertEqual(self.get_expr("defined(ABC) || defined(BINGO)"), 836 "(|| (defined ABC) (defined BINGO))") 837 self.assertEqual(self.get_expr("FOO(BAR,5)"), "(call FOO [BAR,5])") 838 self.assertEqual(self.get_expr("A == 1 || defined(B)"), 839 "(|| (== (ident A) (int 1)) (defined B))") 840 841 def get_expr_optimize(self, expr, macros=None): 842 if macros is None: 843 macros = {} 844 e = CppExpr(CppStringTokenizer(expr).tokens) 845 e.optimize(macros) 846 return repr(e) 847 848 def test_cpp_expr_optimize(self): 849 self.assertEqual(self.get_expr_optimize("0"), "(int 0)") 850 self.assertEqual(self.get_expr_optimize("1"), "(int 1)") 851 self.assertEqual(self.get_expr_optimize("1 && 1"), "(int 1)") 852 self.assertEqual(self.get_expr_optimize("1 && +1"), "(int 1)") 853 self.assertEqual(self.get_expr_optimize("0x1 && 01"), "(oct 01)") 854 self.assertEqual(self.get_expr_optimize("1 && 0"), "(int 0)") 855 self.assertEqual(self.get_expr_optimize("0 && 1"), "(int 0)") 856 self.assertEqual(self.get_expr_optimize("0 && 0"), "(int 0)") 857 self.assertEqual(self.get_expr_optimize("1 || 1"), "(int 1)") 858 self.assertEqual(self.get_expr_optimize("1 || 0"), "(int 1)") 859 self.assertEqual(self.get_expr_optimize("0 || 1"), "(int 1)") 860 self.assertEqual(self.get_expr_optimize("0 || 0"), "(int 0)") 861 self.assertEqual(self.get_expr_optimize("A"), "(ident A)") 862 self.assertEqual(self.get_expr_optimize("A", {"A": 1}), "(int 1)") 863 self.assertEqual(self.get_expr_optimize("A || B", {"A": 1}), "(int 1)") 864 self.assertEqual(self.get_expr_optimize("A || B", {"B": 1}), "(int 1)") 865 self.assertEqual(self.get_expr_optimize("A && B", {"A": 1}), "(ident B)") 866 self.assertEqual(self.get_expr_optimize("A && B", {"B": 1}), "(ident A)") 867 self.assertEqual(self.get_expr_optimize("A && B"), "(&& (ident A) (ident B))") 868 self.assertEqual(self.get_expr_optimize("EXAMPLE"), "(ident EXAMPLE)") 869 self.assertEqual(self.get_expr_optimize("EXAMPLE - 3"), "(- (ident EXAMPLE) (int 3))") 870 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)"), "(defined EXAMPLE)") 871 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)", 872 {"EXAMPLE": "XOWOE"}), 873 "(defined XOWOE)") 874 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)", 875 {"EXAMPLE": kCppUndefinedMacro}), 876 "(int 0)") 877 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)"), "(! (defined EXAMPLE))") 878 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)", 879 {"EXAMPLE": "XOWOE"}), 880 "(! (defined XOWOE))") 881 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)", 882 {"EXAMPLE": kCppUndefinedMacro}), 883 "(int 1)") 884 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)"), 885 "(|| (defined A) (defined B))") 886 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 887 {"A": "1"}), 888 "(int 1)") 889 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 890 {"B": "1"}), 891 "(int 1)") 892 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 893 {"B": kCppUndefinedMacro}), 894 "(defined A)") 895 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 896 {"A": kCppUndefinedMacro, 897 "B": kCppUndefinedMacro}), 898 "(int 0)") 899 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)"), 900 "(&& (defined A) (defined B))") 901 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 902 {"A": "1"}), 903 "(defined B)") 904 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 905 {"B": "1"}), 906 "(defined A)") 907 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 908 {"B": kCppUndefinedMacro}), 909 "(int 0)") 910 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 911 {"A": kCppUndefinedMacro}), 912 "(int 0)") 913 self.assertEqual(self.get_expr_optimize("A == 1 || defined(B)"), 914 "(|| (== (ident A) (int 1)) (defined B))") 915 self.assertEqual(self.get_expr_optimize( 916 "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", 917 {"__KERNEL__": kCppUndefinedMacro}), 918 "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))") 919 920 def get_expr_string(self, expr): 921 return str(CppExpr(CppStringTokenizer(expr).tokens)) 922 923 def test_cpp_expr_string(self): 924 self.assertEqual(self.get_expr_string("0"), "0") 925 self.assertEqual(self.get_expr_string("1"), "1") 926 self.assertEqual(self.get_expr_string("1 && 1"), "1 && 1") 927 self.assertEqual(self.get_expr_string("1 && 0"), "1 && 0") 928 self.assertEqual(self.get_expr_string("0 && 1"), "0 && 1") 929 self.assertEqual(self.get_expr_string("0 && 0"), "0 && 0") 930 self.assertEqual(self.get_expr_string("1 || 1"), "1 || 1") 931 self.assertEqual(self.get_expr_string("1 || 0"), "1 || 0") 932 self.assertEqual(self.get_expr_string("0 || 1"), "0 || 1") 933 self.assertEqual(self.get_expr_string("0 || 0"), "0 || 0") 934 self.assertEqual(self.get_expr_string("EXAMPLE"), "EXAMPLE") 935 self.assertEqual(self.get_expr_string("EXAMPLE - 3"), "EXAMPLE - 3") 936 self.assertEqual(self.get_expr_string("defined(EXAMPLE)"), "defined(EXAMPLE)") 937 self.assertEqual(self.get_expr_string("defined EXAMPLE"), "defined(EXAMPLE)") 938 self.assertEqual(self.get_expr_string("A == 1 || defined(B)"), "A == 1 || defined(B)") 939 940 941################################################################################ 942################################################################################ 943##### ##### 944##### C P P B L O C K ##### 945##### ##### 946################################################################################ 947################################################################################ 948 949 950class Block(object): 951 """A class used to model a block of input source text. 952 953 There are two block types: 954 - directive blocks: contain the tokens of a single pre-processor 955 directive (e.g. #if) 956 - text blocks, contain the tokens of non-directive blocks 957 958 The cpp parser class below will transform an input source file into a list 959 of Block objects (grouped in a BlockList object for convenience) 960 """ 961 962 def __init__(self, tokens, directive=None, lineno=0, identifier=None): 963 """Initialize a new block, if 'directive' is None, it is a text block. 964 965 NOTE: This automatically converts '#ifdef MACRO' into 966 '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'. 967 """ 968 969 if directive == "ifdef": 970 tok = Token() 971 tok.id = tokDEFINED 972 tokens = [tok] + tokens 973 directive = "if" 974 975 elif directive == "ifndef": 976 tok1 = Token() 977 tok2 = Token() 978 tok1.id = tokNOT 979 tok2.id = tokDEFINED 980 tokens = [tok1, tok2] + tokens 981 directive = "if" 982 983 self.tokens = tokens 984 self.directive = directive 985 self.define_id = identifier 986 if lineno > 0: 987 self.lineno = lineno 988 else: 989 self.lineno = self.tokens[0].location.line 990 991 if self.isIf(): 992 self.expr = CppExpr(self.tokens) 993 994 def isDirective(self): 995 """Return True iff this is a directive block.""" 996 return self.directive is not None 997 998 def isConditional(self): 999 """Return True iff this is a conditional directive block.""" 1000 return self.directive in ["if", "ifdef", "ifndef", "else", "elif", 1001 "endif"] 1002 1003 def isDefine(self): 1004 """Return the macro name in a #define directive, or None otherwise.""" 1005 if self.directive != "define": 1006 return None 1007 return self.define_id 1008 1009 def isIf(self): 1010 """Return True iff this is an #if-like directive block.""" 1011 return self.directive in ["if", "ifdef", "ifndef", "elif"] 1012 1013 def isEndif(self): 1014 """Return True iff this is an #endif directive block.""" 1015 return self.directive == "endif" 1016 1017 def isInclude(self): 1018 """Check whether this is a #include directive. 1019 1020 If true, returns the corresponding file name (with brackets or 1021 double-qoutes). None otherwise. 1022 """ 1023 1024 if self.directive != "include": 1025 return None 1026 return ''.join([str(x) for x in self.tokens]) 1027 1028 @staticmethod 1029 def format_blocks(tokens, indent=0): 1030 """Return the formatted lines of strings with proper indentation.""" 1031 newline = True 1032 result = [] 1033 buf = '' 1034 i = 0 1035 while i < len(tokens): 1036 t = tokens[i] 1037 if t.id == '{': 1038 buf += ' {' 1039 result.append(strip_space(buf)) 1040 # Do not indent if this is extern "C" { 1041 if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"': 1042 indent += 2 1043 buf = '' 1044 newline = True 1045 elif t.id == '}': 1046 if indent >= 2: 1047 indent -= 2 1048 if not newline: 1049 result.append(strip_space(buf)) 1050 # Look ahead to determine if it's the end of line. 1051 if (i + 1 < len(tokens) and 1052 (tokens[i+1].id == ';' or 1053 tokens[i+1].id in ['else', '__attribute__', 1054 '__attribute', '__packed'] or 1055 tokens[i+1].kind == TokenKind.IDENTIFIER)): 1056 buf = ' ' * indent + '}' 1057 newline = False 1058 else: 1059 result.append(' ' * indent + '}') 1060 buf = '' 1061 newline = True 1062 elif t.id == ';': 1063 result.append(strip_space(buf) + ';') 1064 buf = '' 1065 newline = True 1066 # We prefer a new line for each constant in enum. 1067 elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL: 1068 result.append(strip_space(buf) + ',') 1069 buf = '' 1070 newline = True 1071 else: 1072 if newline: 1073 buf += ' ' * indent + str(t) 1074 else: 1075 buf += ' ' + str(t) 1076 newline = False 1077 i += 1 1078 1079 if buf: 1080 result.append(strip_space(buf)) 1081 1082 return result, indent 1083 1084 def write(self, out, indent): 1085 """Dump the current block.""" 1086 # removeWhiteSpace() will sometimes creates non-directive blocks 1087 # without any tokens. These come from blocks that only contained 1088 # empty lines and spaces. They should not be printed in the final 1089 # output, and then should not be counted for this operation. 1090 # 1091 if self.directive is None and not self.tokens: 1092 return indent 1093 1094 if self.directive: 1095 out.write(str(self) + '\n') 1096 else: 1097 lines, indent = self.format_blocks(self.tokens, indent) 1098 for line in lines: 1099 out.write(line + '\n') 1100 1101 return indent 1102 1103 def __repr__(self): 1104 """Generate the representation of a given block.""" 1105 if self.directive: 1106 result = "#%s " % self.directive 1107 if self.isIf(): 1108 result += repr(self.expr) 1109 else: 1110 for tok in self.tokens: 1111 result += repr(tok) 1112 else: 1113 result = "" 1114 for tok in self.tokens: 1115 result += repr(tok) 1116 1117 return result 1118 1119 def __str__(self): 1120 """Generate the string representation of a given block.""" 1121 if self.directive: 1122 # "#if" 1123 if self.directive == "if": 1124 # small optimization to re-generate #ifdef and #ifndef 1125 e = self.expr.expr 1126 op = e[0] 1127 if op == "defined": 1128 result = "#ifdef %s" % e[1] 1129 elif op == "!" and e[1][0] == "defined": 1130 result = "#ifndef %s" % e[1][1] 1131 else: 1132 result = "#if " + str(self.expr) 1133 1134 # "#define" 1135 elif self.isDefine(): 1136 result = "#%s %s" % (self.directive, self.define_id) 1137 if self.tokens: 1138 result += " " 1139 expr = strip_space(' '.join([tok.id for tok in self.tokens])) 1140 # remove the space between name and '(' in function call 1141 result += re.sub(r'(\w+) \(', r'\1(', expr) 1142 1143 # "#error" 1144 # Concatenating tokens with a space separator, because they may 1145 # not be quoted and broken into several tokens 1146 elif self.directive == "error": 1147 result = "#error %s" % ' '.join([tok.id for tok in self.tokens]) 1148 1149 else: 1150 result = "#%s" % self.directive 1151 if self.tokens: 1152 result += " " 1153 result += ''.join([tok.id for tok in self.tokens]) 1154 else: 1155 lines, _ = self.format_blocks(self.tokens) 1156 result = '\n'.join(lines) 1157 1158 return result 1159 1160 1161class BlockList(object): 1162 """A convenience class used to hold and process a list of blocks. 1163 1164 It calls the cpp parser to get the blocks. 1165 """ 1166 1167 def __init__(self, blocks): 1168 self.blocks = blocks 1169 1170 def __len__(self): 1171 return len(self.blocks) 1172 1173 def __getitem__(self, n): 1174 return self.blocks[n] 1175 1176 def __repr__(self): 1177 return repr(self.blocks) 1178 1179 def __str__(self): 1180 result = '\n'.join([str(b) for b in self.blocks]) 1181 return result 1182 1183 def dump(self): 1184 """Dump all the blocks in current BlockList.""" 1185 print '##### BEGIN #####' 1186 for i, b in enumerate(self.blocks): 1187 print '### BLOCK %d ###' % i 1188 print b 1189 print '##### END #####' 1190 1191 def optimizeIf01(self): 1192 """Remove the code between #if 0 .. #endif in a BlockList.""" 1193 self.blocks = optimize_if01(self.blocks) 1194 1195 def optimizeMacros(self, macros): 1196 """Remove known defined and undefined macros from a BlockList.""" 1197 for b in self.blocks: 1198 if b.isIf(): 1199 b.expr.optimize(macros) 1200 1201 def optimizeAll(self, macros): 1202 self.optimizeMacros(macros) 1203 self.optimizeIf01() 1204 return 1205 1206 def findIncludes(self): 1207 """Return the list of included files in a BlockList.""" 1208 result = [] 1209 for b in self.blocks: 1210 i = b.isInclude() 1211 if i: 1212 result.append(i) 1213 return result 1214 1215 def write(self, out): 1216 indent = 0 1217 for b in self.blocks: 1218 indent = b.write(out, indent) 1219 1220 def removeVarsAndFuncs(self, keep): 1221 """Remove variable and function declarations. 1222 1223 All extern and static declarations corresponding to variable and 1224 function declarations are removed. We only accept typedefs and 1225 enum/structs/union declarations. 1226 1227 In addition, remove any macros expanding in the headers. Usually, 1228 these macros are static inline functions, which is why they are 1229 removed. 1230 1231 However, we keep the definitions corresponding to the set of known 1232 static inline functions in the set 'keep', which is useful 1233 for optimized byteorder swap functions and stuff like that. 1234 """ 1235 1236 # state = NORMAL => normal (i.e. LN + spaces) 1237 # state = OTHER_DECL => typedef/struct encountered, ends with ";" 1238 # state = VAR_DECL => var declaration encountered, ends with ";" 1239 # state = FUNC_DECL => func declaration encountered, ends with "}" 1240 NORMAL = 0 1241 OTHER_DECL = 1 1242 VAR_DECL = 2 1243 FUNC_DECL = 3 1244 1245 state = NORMAL 1246 depth = 0 1247 blocksToKeep = [] 1248 blocksInProgress = [] 1249 blocksOfDirectives = [] 1250 ident = "" 1251 state_token = "" 1252 macros = set() 1253 for block in self.blocks: 1254 if block.isDirective(): 1255 # Record all macros. 1256 if block.directive == 'define': 1257 macro_name = block.define_id 1258 paren_index = macro_name.find('(') 1259 if paren_index == -1: 1260 macros.add(macro_name) 1261 else: 1262 macros.add(macro_name[0:paren_index]) 1263 blocksInProgress.append(block) 1264 # If this is in a function/variable declaration, we might need 1265 # to emit the directives alone, so save them separately. 1266 blocksOfDirectives.append(block) 1267 continue 1268 1269 numTokens = len(block.tokens) 1270 lastTerminatorIndex = 0 1271 i = 0 1272 while i < numTokens: 1273 token_id = block.tokens[i].id 1274 terminator = False 1275 if token_id == '{': 1276 depth += 1 1277 if (i >= 2 and block.tokens[i-2].id == 'extern' and 1278 block.tokens[i-1].id == '"C"'): 1279 # For an extern "C" { pretend as though this is depth 0. 1280 depth -= 1 1281 elif token_id == '}': 1282 if depth > 0: 1283 depth -= 1 1284 if depth == 0: 1285 if state == OTHER_DECL: 1286 # Loop through until we hit the ';' 1287 i += 1 1288 while i < numTokens: 1289 if block.tokens[i].id == ';': 1290 token_id = ';' 1291 break 1292 i += 1 1293 # If we didn't hit the ';', just consider this the 1294 # terminator any way. 1295 terminator = True 1296 elif depth == 0: 1297 if token_id == ';': 1298 if state == NORMAL: 1299 blocksToKeep.extend(blocksInProgress) 1300 blocksInProgress = [] 1301 blocksOfDirectives = [] 1302 state = FUNC_DECL 1303 terminator = True 1304 elif (state == NORMAL and token_id == '(' and i >= 1 and 1305 block.tokens[i-1].kind == TokenKind.IDENTIFIER and 1306 block.tokens[i-1].id in macros): 1307 # This is a plain macro being expanded in the header 1308 # which needs to be removed. 1309 blocksToKeep.extend(blocksInProgress) 1310 if lastTerminatorIndex < i - 1: 1311 blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1])) 1312 blocksInProgress = [] 1313 blocksOfDirectives = [] 1314 1315 # Skip until we see the terminating ')' 1316 i += 1 1317 paren_depth = 1 1318 while i < numTokens: 1319 if block.tokens[i].id == ')': 1320 paren_depth -= 1 1321 if paren_depth == 0: 1322 break 1323 elif block.tokens[i].id == '(': 1324 paren_depth += 1 1325 i += 1 1326 lastTerminatorIndex = i + 1 1327 elif (state != FUNC_DECL and token_id == '(' and 1328 state_token != 'typedef'): 1329 blocksToKeep.extend(blocksInProgress) 1330 blocksInProgress = [] 1331 blocksOfDirectives = [] 1332 state = VAR_DECL 1333 elif state == NORMAL and token_id in ['struct', 'typedef', 1334 'enum', 'union', 1335 '__extension__']: 1336 state = OTHER_DECL 1337 state_token = token_id 1338 elif block.tokens[i].kind == TokenKind.IDENTIFIER: 1339 if state != VAR_DECL or ident == "": 1340 ident = token_id 1341 1342 if terminator: 1343 if state != VAR_DECL and state != FUNC_DECL or ident in keep: 1344 blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1])) 1345 blocksToKeep.extend(blocksInProgress) 1346 else: 1347 # Only keep the directives found. 1348 blocksToKeep.extend(blocksOfDirectives) 1349 lastTerminatorIndex = i + 1 1350 blocksInProgress = [] 1351 blocksOfDirectives = [] 1352 state = NORMAL 1353 ident = "" 1354 state_token = "" 1355 i += 1 1356 if lastTerminatorIndex < numTokens: 1357 blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens])) 1358 if len(blocksInProgress) > 0: 1359 blocksToKeep.extend(blocksInProgress) 1360 self.blocks = blocksToKeep 1361 1362 def replaceTokens(self, replacements): 1363 """Replace tokens according to the given dict.""" 1364 extra_includes = [] 1365 for b in self.blocks: 1366 made_change = False 1367 if b.isInclude() is None: 1368 i = 0 1369 while i < len(b.tokens): 1370 tok = b.tokens[i] 1371 if (tok.kind == TokenKind.KEYWORD and tok.id == 'struct' 1372 and (i + 2) < len(b.tokens) and b.tokens[i + 2].id == '{'): 1373 struct_name = b.tokens[i + 1].id 1374 if struct_name in kernel_struct_replacements: 1375 extra_includes.append("<bits/%s.h>" % struct_name) 1376 end = i + 2 1377 while end < len(b.tokens) and b.tokens[end].id != '}': 1378 end += 1 1379 end += 1 # Swallow '}' 1380 while end < len(b.tokens) and b.tokens[end].id != ';': 1381 end += 1 1382 end += 1 # Swallow ';' 1383 # Remove these tokens. We'll replace them later with a #include block. 1384 b.tokens[i:end] = [] 1385 made_change = True 1386 # We've just modified b.tokens, so revisit the current offset. 1387 continue 1388 if tok.kind == TokenKind.IDENTIFIER: 1389 if tok.id in replacements: 1390 tok.id = replacements[tok.id] 1391 made_change = True 1392 i += 1 1393 1394 if b.isDefine() and b.define_id in replacements: 1395 b.define_id = replacements[b.define_id] 1396 made_change = True 1397 1398 if made_change and b.isIf(): 1399 # Keep 'expr' in sync with 'tokens'. 1400 b.expr = CppExpr(b.tokens) 1401 1402 for extra_include in extra_includes: 1403 replacement = CppStringTokenizer(extra_include) 1404 self.blocks.insert(2, Block(replacement.tokens, directive='include')) 1405 1406 1407 1408def strip_space(s): 1409 """Strip out redundant space in a given string.""" 1410 1411 # NOTE: It ought to be more clever to not destroy spaces in string tokens. 1412 replacements = {' . ': '.', 1413 ' [': '[', 1414 '[ ': '[', 1415 ' ]': ']', 1416 '( ': '(', 1417 ' )': ')', 1418 ' ,': ',', 1419 '# ': '#', 1420 ' ;': ';', 1421 '~ ': '~', 1422 ' -> ': '->'} 1423 result = s 1424 for r in replacements: 1425 result = result.replace(r, replacements[r]) 1426 1427 # Remove the space between function name and the parenthesis. 1428 result = re.sub(r'(\w+) \(', r'\1(', result) 1429 return result 1430 1431 1432class BlockParser(object): 1433 """A class that converts an input source file into a BlockList object.""" 1434 1435 def __init__(self, tokzer=None): 1436 """Initialize a block parser. 1437 1438 The input source is provided through a Tokenizer object. 1439 """ 1440 self._tokzer = tokzer 1441 self._parsed = False 1442 1443 @property 1444 def parsed(self): 1445 return self._parsed 1446 1447 @staticmethod 1448 def _short_extent(extent): 1449 return '%d:%d - %d:%d' % (extent.start.line, extent.start.column, 1450 extent.end.line, extent.end.column) 1451 1452 def getBlocks(self, tokzer=None): 1453 """Return all the blocks parsed.""" 1454 1455 def consume_extent(i, tokens, extent=None, detect_change=False): 1456 """Return tokens that belong to the given extent. 1457 1458 It parses all the tokens that follow tokens[i], until getting out 1459 of the extent. When detect_change is True, it may terminate early 1460 when detecting preprocessing directives inside the extent. 1461 """ 1462 1463 result = [] 1464 if extent is None: 1465 extent = tokens[i].cursor.extent 1466 1467 while i < len(tokens) and tokens[i].location in extent: 1468 t = tokens[i] 1469 if debugBlockParser: 1470 print ' ' * 2, t.id, t.kind, t.cursor.kind 1471 if (detect_change and t.cursor.extent != extent and 1472 t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE): 1473 break 1474 result.append(t) 1475 i += 1 1476 return (i, result) 1477 1478 def consume_line(i, tokens): 1479 """Return tokens that follow tokens[i] in the same line.""" 1480 result = [] 1481 line = tokens[i].location.line 1482 while i < len(tokens) and tokens[i].location.line == line: 1483 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1484 break 1485 result.append(tokens[i]) 1486 i += 1 1487 return (i, result) 1488 1489 if tokzer is None: 1490 tokzer = self._tokzer 1491 tokens = tokzer.tokens 1492 1493 blocks = [] 1494 buf = [] 1495 i = 0 1496 1497 while i < len(tokens): 1498 t = tokens[i] 1499 cursor = t.cursor 1500 1501 if debugBlockParser: 1502 print ("%d: Processing [%s], kind=[%s], cursor=[%s], " 1503 "extent=[%s]" % (t.location.line, t.spelling, t.kind, 1504 cursor.kind, 1505 self._short_extent(cursor.extent))) 1506 1507 if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1508 if buf: 1509 blocks.append(Block(buf)) 1510 buf = [] 1511 1512 j = i 1513 if j + 1 >= len(tokens): 1514 raise BadExpectedToken("### BAD TOKEN at %s" % (t.location)) 1515 directive = tokens[j+1].id 1516 1517 if directive == 'define': 1518 if i+2 >= len(tokens): 1519 raise BadExpectedToken("### BAD TOKEN at %s" % 1520 (tokens[i].location)) 1521 1522 # Skip '#' and 'define'. 1523 extent = tokens[i].cursor.extent 1524 i += 2 1525 id = '' 1526 # We need to separate the id from the remaining of 1527 # the line, especially for the function-like macro. 1528 if (i + 1 < len(tokens) and tokens[i+1].id == '(' and 1529 (tokens[i].location.column + len(tokens[i].spelling) == 1530 tokens[i+1].location.column)): 1531 while i < len(tokens): 1532 id += tokens[i].id 1533 if tokens[i].spelling == ')': 1534 i += 1 1535 break 1536 i += 1 1537 else: 1538 id += tokens[i].id 1539 # Advance to the next token that follows the macro id 1540 i += 1 1541 1542 (i, ret) = consume_extent(i, tokens, extent=extent) 1543 blocks.append(Block(ret, directive=directive, 1544 lineno=t.location.line, identifier=id)) 1545 1546 else: 1547 (i, ret) = consume_extent(i, tokens) 1548 blocks.append(Block(ret[2:], directive=directive, 1549 lineno=t.location.line)) 1550 1551 elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: 1552 if buf: 1553 blocks.append(Block(buf)) 1554 buf = [] 1555 directive = tokens[i+1].id 1556 (i, ret) = consume_extent(i, tokens) 1557 1558 blocks.append(Block(ret[2:], directive=directive, 1559 lineno=t.location.line)) 1560 1561 elif cursor.kind == CursorKind.VAR_DECL: 1562 if buf: 1563 blocks.append(Block(buf)) 1564 buf = [] 1565 1566 (i, ret) = consume_extent(i, tokens, detect_change=True) 1567 buf += ret 1568 1569 elif cursor.kind == CursorKind.FUNCTION_DECL: 1570 if buf: 1571 blocks.append(Block(buf)) 1572 buf = [] 1573 1574 (i, ret) = consume_extent(i, tokens, detect_change=True) 1575 buf += ret 1576 1577 else: 1578 (i, ret) = consume_line(i, tokens) 1579 buf += ret 1580 1581 if buf: 1582 blocks.append(Block(buf)) 1583 1584 # _parsed=True indicates a successful parsing, although may result an 1585 # empty BlockList. 1586 self._parsed = True 1587 1588 return BlockList(blocks) 1589 1590 def parse(self, tokzer): 1591 return self.getBlocks(tokzer) 1592 1593 def parseFile(self, path): 1594 return self.getBlocks(CppFileTokenizer(path)) 1595 1596 1597class BlockParserTests(unittest.TestCase): 1598 """BlockParser unit tests.""" 1599 1600 def get_blocks(self, lines): 1601 blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines))) 1602 return map(lambda a: str(a), blocks) 1603 1604 def test_hash(self): 1605 self.assertEqual(self.get_blocks(["#error hello"]), ["#error hello"]) 1606 1607 def test_empty_line(self): 1608 self.assertEqual(self.get_blocks(["foo", "", "bar"]), ["foo bar"]) 1609 1610 def test_hash_with_space(self): 1611 # We currently cannot handle the following case with libclang properly. 1612 # Fortunately it doesn't appear in current headers. 1613 #self.assertEqual(self.get_blocks(["foo", " # ", "bar"]), ["foo", "bar"]) 1614 pass 1615 1616 def test_with_comment(self): 1617 self.assertEqual(self.get_blocks(["foo", 1618 " # /* ahah */ if defined(__KERNEL__) /* more */", 1619 "bar", "#endif"]), 1620 ["foo", "#ifdef __KERNEL__", "bar", "#endif"]) 1621 1622 1623################################################################################ 1624################################################################################ 1625##### ##### 1626##### B L O C K L I S T O P T I M I Z A T I O N ##### 1627##### ##### 1628################################################################################ 1629################################################################################ 1630 1631 1632def find_matching_endif(blocks, i): 1633 """Traverse the blocks to find out the matching #endif.""" 1634 n = len(blocks) 1635 depth = 1 1636 while i < n: 1637 if blocks[i].isDirective(): 1638 dir_ = blocks[i].directive 1639 if dir_ in ["if", "ifndef", "ifdef"]: 1640 depth += 1 1641 elif depth == 1 and dir_ in ["else", "elif"]: 1642 return i 1643 elif dir_ == "endif": 1644 depth -= 1 1645 if depth == 0: 1646 return i 1647 i += 1 1648 return i 1649 1650 1651def optimize_if01(blocks): 1652 """Remove the code between #if 0 .. #endif in a list of CppBlocks.""" 1653 i = 0 1654 n = len(blocks) 1655 result = [] 1656 while i < n: 1657 j = i 1658 while j < n and not blocks[j].isIf(): 1659 j += 1 1660 if j > i: 1661 logging.debug("appending lines %d to %d", blocks[i].lineno, 1662 blocks[j-1].lineno) 1663 result += blocks[i:j] 1664 if j >= n: 1665 break 1666 expr = blocks[j].expr 1667 r = expr.toInt() 1668 if r is None: 1669 result.append(blocks[j]) 1670 i = j + 1 1671 continue 1672 1673 if r == 0: 1674 # if 0 => skip everything until the corresponding #endif 1675 start_dir = blocks[j].directive 1676 j = find_matching_endif(blocks, j + 1) 1677 if j >= n: 1678 # unterminated #if 0, finish here 1679 break 1680 dir_ = blocks[j].directive 1681 if dir_ == "endif": 1682 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)", 1683 blocks[i].lineno, blocks[j].lineno) 1684 if start_dir == "elif": 1685 # Put an endif since we started with an elif. 1686 result += blocks[j:j+1] 1687 i = j + 1 1688 elif dir_ == "else": 1689 # convert 'else' into 'if 1' 1690 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d " 1691 "to %d)", blocks[i].lineno, blocks[j-1].lineno) 1692 if start_dir == "elif": 1693 blocks[j].directive = "elif" 1694 else: 1695 blocks[j].directive = "if" 1696 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens) 1697 i = j 1698 elif dir_ == "elif": 1699 # convert 'elif' into 'if' 1700 logging.debug("convert 'if 0' .. 'elif' into 'if'") 1701 if start_dir == "elif": 1702 blocks[j].directive = "elif" 1703 else: 1704 blocks[j].directive = "if" 1705 i = j 1706 continue 1707 1708 # if 1 => find corresponding endif and remove/transform them 1709 k = find_matching_endif(blocks, j + 1) 1710 if k >= n: 1711 # unterminated #if 1, finish here 1712 logging.debug("unterminated 'if 1'") 1713 result += blocks[j+1:k] 1714 break 1715 1716 start_dir = blocks[j].directive 1717 dir_ = blocks[k].directive 1718 if dir_ == "endif": 1719 logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)", 1720 blocks[j].lineno, blocks[k].lineno) 1721 if start_dir == "elif": 1722 # Add the elif in to the results and convert it to an elif 1. 1723 blocks[j].tokens = CppStringTokenizer("1").tokens 1724 result += blocks[j:j+1] 1725 result += optimize_if01(blocks[j+1:k]) 1726 if start_dir == "elif": 1727 # Add the endif in to the results. 1728 result += blocks[k:k+1] 1729 i = k + 1 1730 elif dir_ == "else": 1731 # convert 'else' into 'if 0' 1732 logging.debug("convert 'if 1' .. 'else' (lines %d to %d)", 1733 blocks[j].lineno, blocks[k].lineno) 1734 if start_dir == "elif": 1735 # Add the elif in to the results and convert it to an elif 1. 1736 blocks[j].tokens = CppStringTokenizer("1").tokens 1737 result += blocks[j:j+1] 1738 result += optimize_if01(blocks[j+1:k]) 1739 if start_dir == "elif": 1740 blocks[k].directive = "elif" 1741 else: 1742 blocks[k].directive = "if" 1743 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1744 i = k 1745 elif dir_ == "elif": 1746 # convert 'elif' into 'if 0' 1747 logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)", 1748 blocks[j].lineno, blocks[k].lineno) 1749 result += optimize_if01(blocks[j+1:k]) 1750 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1751 i = k 1752 return result 1753 1754class OptimizerTests(unittest.TestCase): 1755 def parse(self, text, macros=None): 1756 out = utils.StringOutput() 1757 blocks = BlockParser().parse(CppStringTokenizer(text)) 1758 blocks.replaceTokens(kernel_token_replacements) 1759 blocks.optimizeAll(macros) 1760 blocks.write(out) 1761 return out.get() 1762 1763 def test_if1(self): 1764 text = """\ 1765#if 1 1766#define GOOD 1767#endif 1768""" 1769 expected = """\ 1770#define GOOD 1771""" 1772 self.assertEqual(self.parse(text), expected) 1773 1774 def test_if0(self): 1775 text = """\ 1776#if 0 1777#define SHOULD_SKIP1 1778#define SHOULD_SKIP2 1779#endif 1780""" 1781 expected = "" 1782 self.assertEqual(self.parse(text), expected) 1783 1784 def test_if1_else(self): 1785 text = """\ 1786#if 1 1787#define GOOD 1788#else 1789#define BAD 1790#endif 1791""" 1792 expected = """\ 1793#define GOOD 1794""" 1795 self.assertEqual(self.parse(text), expected) 1796 1797 def test_if0_else(self): 1798 text = """\ 1799#if 0 1800#define BAD 1801#else 1802#define GOOD 1803#endif 1804""" 1805 expected = """\ 1806#define GOOD 1807""" 1808 self.assertEqual(self.parse(text), expected) 1809 1810 def test_if_elif1(self): 1811 text = """\ 1812#if defined(something) 1813#define EXISTS 1814#elif 1 1815#define GOOD 1816#endif 1817""" 1818 expected = """\ 1819#ifdef something 1820#define EXISTS 1821#elif 1 1822#define GOOD 1823#endif 1824""" 1825 self.assertEqual(self.parse(text), expected) 1826 1827 def test_if_elif1_macro(self): 1828 text = """\ 1829#if defined(something) 1830#define EXISTS 1831#elif defined(WILL_BE_ONE) 1832#define GOOD 1833#endif 1834""" 1835 expected = """\ 1836#ifdef something 1837#define EXISTS 1838#elif 1 1839#define GOOD 1840#endif 1841""" 1842 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1843 1844 1845 def test_if_elif1_else(self): 1846 text = """\ 1847#if defined(something) 1848#define EXISTS 1849#elif 1 1850#define GOOD 1851#else 1852#define BAD 1853#endif 1854""" 1855 expected = """\ 1856#ifdef something 1857#define EXISTS 1858#elif 1 1859#define GOOD 1860#endif 1861""" 1862 self.assertEqual(self.parse(text), expected) 1863 1864 def test_if_elif1_else_macro(self): 1865 text = """\ 1866#if defined(something) 1867#define EXISTS 1868#elif defined(WILL_BE_ONE) 1869#define GOOD 1870#else 1871#define BAD 1872#endif 1873""" 1874 expected = """\ 1875#ifdef something 1876#define EXISTS 1877#elif 1 1878#define GOOD 1879#endif 1880""" 1881 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1882 1883 1884 def test_if_elif1_else_macro(self): 1885 text = """\ 1886#if defined(something) 1887#define EXISTS 1888#elif defined(WILL_BE_ONE) 1889#define GOOD 1890#else 1891#define BAD 1892#endif 1893""" 1894 expected = """\ 1895#ifdef something 1896#define EXISTS 1897#elif 1 1898#define GOOD 1899#endif 1900""" 1901 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1902 1903 def test_macro_set_to_undefined_single(self): 1904 text = """\ 1905#if defined(__KERNEL__) 1906#define BAD_KERNEL 1907#endif 1908""" 1909 expected = "" 1910 macros = {"__KERNEL__": kCppUndefinedMacro} 1911 self.assertEqual(self.parse(text, macros), expected) 1912 1913 def test_macro_set_to_undefined_if(self): 1914 text = """\ 1915#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) 1916#define CHECK 1917#endif 1918""" 1919 expected = """\ 1920#if !defined(__GLIBC__) || __GLIBC__ < 2 1921#define CHECK 1922#endif 1923""" 1924 macros = {"__KERNEL__": kCppUndefinedMacro} 1925 self.assertEqual(self.parse(text, macros), expected) 1926 1927 def test_endif_comment_removed(self): 1928 text = """\ 1929#ifndef SIGRTMAX 1930#define SIGRTMAX 123 1931#endif /* SIGRTMAX */ 1932""" 1933 expected = """\ 1934#ifndef __SIGRTMAX 1935#define __SIGRTMAX 123 1936#endif 1937""" 1938 self.assertEqual(self.parse(text), expected) 1939 1940 def test_multilevel_if0(self): 1941 text = """\ 1942#if 0 1943#if 1 1944#define BAD_6 1945#endif 1946#endif 1947""" 1948 expected = "" 1949 self.assertEqual(self.parse(text), expected) 1950 1951class FullPathTest(unittest.TestCase): 1952 """Test of the full path parsing.""" 1953 1954 def parse(self, text, keep=None): 1955 if not keep: 1956 keep = set() 1957 out = utils.StringOutput() 1958 blocks = BlockParser().parse(CppStringTokenizer(text)) 1959 blocks.removeVarsAndFuncs(keep) 1960 blocks.replaceTokens(kernel_token_replacements) 1961 blocks.optimizeAll(None) 1962 blocks.write(out) 1963 return out.get() 1964 1965 def test_function_removed(self): 1966 text = """\ 1967static inline __u64 function() 1968{ 1969} 1970""" 1971 expected = "" 1972 self.assertEqual(self.parse(text), expected) 1973 1974 def test_function_removed_with_struct(self): 1975 text = """\ 1976static inline struct something* function() 1977{ 1978} 1979""" 1980 expected = "" 1981 self.assertEqual(self.parse(text), expected) 1982 1983 def test_function_kept(self): 1984 text = """\ 1985static inline __u64 function() 1986{ 1987} 1988""" 1989 expected = """\ 1990static inline __u64 function() { 1991} 1992""" 1993 self.assertEqual(self.parse(text, set(["function"])), expected) 1994 1995 def test_var_removed(self): 1996 text = "__u64 variable;" 1997 expected = "" 1998 self.assertEqual(self.parse(text), expected) 1999 2000 def test_var_kept(self): 2001 text = "__u64 variable;" 2002 expected = "__u64 variable;\n" 2003 self.assertEqual(self.parse(text, set(["variable"])), expected) 2004 2005 def test_keep_function_typedef(self): 2006 text = "typedef void somefunction_t(void);" 2007 expected = "typedef void somefunction_t(void);\n" 2008 self.assertEqual(self.parse(text), expected) 2009 2010 def test_struct_keep_attribute(self): 2011 text = """\ 2012struct something_s { 2013 __u32 s1; 2014 __u32 s2; 2015} __attribute__((packed)); 2016""" 2017 expected = """\ 2018struct something_s { 2019 __u32 s1; 2020 __u32 s2; 2021} __attribute__((packed)); 2022""" 2023 self.assertEqual(self.parse(text), expected) 2024 2025 def test_function_keep_attribute_structs(self): 2026 text = """\ 2027static __inline__ struct some_struct1 * function(struct some_struct2 * e) { 2028} 2029""" 2030 expected = """\ 2031static __inline__ struct some_struct1 * function(struct some_struct2 * e) { 2032} 2033""" 2034 self.assertEqual(self.parse(text, set(["function"])), expected) 2035 2036 def test_struct_after_struct(self): 2037 text = """\ 2038struct first { 2039}; 2040 2041struct second { 2042 unsigned short s1; 2043#define SOMETHING 8 2044 unsigned short s2; 2045}; 2046""" 2047 expected = """\ 2048struct first { 2049}; 2050struct second { 2051 unsigned short s1; 2052#define SOMETHING 8 2053 unsigned short s2; 2054}; 2055""" 2056 self.assertEqual(self.parse(text), expected) 2057 2058 def test_other_not_removed(self): 2059 text = """\ 2060typedef union { 2061 __u64 tu1; 2062 __u64 tu2; 2063} typedef_name; 2064 2065union { 2066 __u64 u1; 2067 __u64 u2; 2068}; 2069 2070struct { 2071 __u64 s1; 2072 __u64 s2; 2073}; 2074 2075enum { 2076 ENUM1 = 0, 2077 ENUM2, 2078}; 2079 2080__extension__ typedef __signed__ long long __s64; 2081""" 2082 expected = """\ 2083typedef union { 2084 __u64 tu1; 2085 __u64 tu2; 2086} typedef_name; 2087union { 2088 __u64 u1; 2089 __u64 u2; 2090}; 2091struct { 2092 __u64 s1; 2093 __u64 s2; 2094}; 2095enum { 2096 ENUM1 = 0, 2097 ENUM2, 2098}; 2099__extension__ typedef __signed__ long long __s64; 2100""" 2101 2102 self.assertEqual(self.parse(text), expected) 2103 2104 def test_semicolon_after_function(self): 2105 text = """\ 2106static inline __u64 function() 2107{ 2108}; 2109 2110struct should_see { 2111 __u32 field; 2112}; 2113""" 2114 expected = """\ 2115struct should_see { 2116 __u32 field; 2117}; 2118""" 2119 self.assertEqual(self.parse(text), expected) 2120 2121 def test_define_in_middle_keep(self): 2122 text = """\ 2123enum { 2124 ENUM0 = 0x10, 2125 ENUM1 = 0x20, 2126#define SOMETHING SOMETHING_ELSE 2127 ENUM2 = 0x40, 2128}; 2129""" 2130 expected = """\ 2131enum { 2132 ENUM0 = 0x10, 2133 ENUM1 = 0x20, 2134#define SOMETHING SOMETHING_ELSE 2135 ENUM2 = 0x40, 2136}; 2137""" 2138 self.assertEqual(self.parse(text), expected) 2139 2140 def test_define_in_middle_remove(self): 2141 text = """\ 2142static inline function() { 2143#define SOMETHING1 SOMETHING_ELSE1 2144 i = 0; 2145 { 2146 i = 1; 2147 } 2148#define SOMETHING2 SOMETHING_ELSE2 2149} 2150""" 2151 expected = """\ 2152#define SOMETHING1 SOMETHING_ELSE1 2153#define SOMETHING2 SOMETHING_ELSE2 2154""" 2155 self.assertEqual(self.parse(text), expected) 2156 2157 def test_define_in_middle_force_keep(self): 2158 text = """\ 2159static inline function() { 2160#define SOMETHING1 SOMETHING_ELSE1 2161 i = 0; 2162 { 2163 i = 1; 2164 } 2165#define SOMETHING2 SOMETHING_ELSE2 2166} 2167""" 2168 expected = """\ 2169static inline function() { 2170#define SOMETHING1 SOMETHING_ELSE1 2171 i = 0; 2172 { 2173 i = 1; 2174 } 2175#define SOMETHING2 SOMETHING_ELSE2 2176} 2177""" 2178 self.assertEqual(self.parse(text, set(["function"])), expected) 2179 2180 def test_define_before_remove(self): 2181 text = """\ 2182#define SHOULD_BE_KEPT NOTHING1 2183#define ANOTHER_TO_KEEP NOTHING2 2184static inline function() { 2185#define SOMETHING1 SOMETHING_ELSE1 2186 i = 0; 2187 { 2188 i = 1; 2189 } 2190#define SOMETHING2 SOMETHING_ELSE2 2191} 2192""" 2193 expected = """\ 2194#define SHOULD_BE_KEPT NOTHING1 2195#define ANOTHER_TO_KEEP NOTHING2 2196#define SOMETHING1 SOMETHING_ELSE1 2197#define SOMETHING2 SOMETHING_ELSE2 2198""" 2199 self.assertEqual(self.parse(text), expected) 2200 2201 def test_extern_C(self): 2202 text = """\ 2203#if defined(__cplusplus) 2204extern "C" { 2205#endif 2206 2207struct something { 2208}; 2209 2210#if defined(__cplusplus) 2211} 2212#endif 2213""" 2214 expected = """\ 2215#ifdef __cplusplus 2216extern "C" { 2217#endif 2218struct something { 2219}; 2220#ifdef __cplusplus 2221} 2222#endif 2223""" 2224 self.assertEqual(self.parse(text), expected) 2225 2226 def test_macro_definition_removed(self): 2227 text = """\ 2228#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {} 2229MACRO_FUNCTION_NO_PARAMS() 2230 2231#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } 2232MACRO_FUNCTION_PARAMS(a = 1) 2233 2234something that should still be kept 2235MACRO_FUNCTION_PARAMS(b) 2236""" 2237 expected = """\ 2238#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { } 2239#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } 2240something that should still be kept 2241""" 2242 self.assertEqual(self.parse(text), expected) 2243 2244 2245if __name__ == '__main__': 2246 unittest.main() 2247