1#!/usr/bin/env python3 2"""A glorified C pre-processor parser.""" 3 4import ctypes 5import logging 6import os 7import re 8import site 9import unittest 10import utils 11 12top = os.getenv('ANDROID_BUILD_TOP') 13if top is None: 14 utils.panic('ANDROID_BUILD_TOP not set.\n') 15 16# Set up the env vars for libclang. 17site.addsitedir(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/python3/site-packages/')) 18 19import clang.cindex 20from clang.cindex import conf 21from clang.cindex import Cursor 22from clang.cindex import CursorKind 23from clang.cindex import SourceLocation 24from clang.cindex import SourceRange 25from clang.cindex import TokenGroup 26from clang.cindex import TokenKind 27from clang.cindex import TranslationUnit 28 29# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc. 30# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 31clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/libclang.so')) 32 33from defaults import * 34 35 36debugBlockParser = False 37debugCppExpr = False 38debugOptimIf01 = False 39 40############################################################################### 41############################################################################### 42##### ##### 43##### C P P T O K E N S ##### 44##### ##### 45############################################################################### 46############################################################################### 47 48# the list of supported C-preprocessor tokens 49# plus a couple of C tokens as well 50tokEOF = "\0" 51tokLN = "\n" 52tokSTRINGIFY = "#" 53tokCONCAT = "##" 54tokLOGICAND = "&&" 55tokLOGICOR = "||" 56tokSHL = "<<" 57tokSHR = ">>" 58tokEQUAL = "==" 59tokNEQUAL = "!=" 60tokLT = "<" 61tokLTE = "<=" 62tokGT = ">" 63tokGTE = ">=" 64tokELLIPSIS = "..." 65tokSPACE = " " 66tokDEFINED = "defined" 67tokLPAREN = "(" 68tokRPAREN = ")" 69tokNOT = "!" 70tokPLUS = "+" 71tokMINUS = "-" 72tokMULTIPLY = "*" 73tokDIVIDE = "/" 74tokMODULUS = "%" 75tokBINAND = "&" 76tokBINOR = "|" 77tokBINXOR = "^" 78tokCOMMA = "," 79tokLBRACE = "{" 80tokRBRACE = "}" 81tokARROW = "->" 82tokINCREMENT = "++" 83tokDECREMENT = "--" 84tokNUMBER = "<number>" 85tokIDENT = "<ident>" 86tokSTRING = "<string>" 87 88 89class Token(clang.cindex.Token): 90 """A class that represents one token after parsing. 91 92 It inherits the class in libclang, with an extra id property to hold the 93 new spelling of the token. The spelling property in the base class is 94 defined as read-only. New names after macro instantiation are saved in 95 their ids now. It also facilitates the renaming of directive optimizations 96 like replacing 'ifndef X' with 'if !defined(X)'. 97 98 It also overrides the cursor property of the base class. Because the one 99 in libclang always queries based on a single token, which usually doesn't 100 hold useful information. The cursor in this class can be set by calling 101 CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the 102 base class. 103 """ 104 105 def __init__(self, tu=None, group=None, int_data=None, ptr_data=None, 106 cursor=None): 107 clang.cindex.Token.__init__(self) 108 self._id = None 109 self._tu = tu 110 self._group = group 111 self._cursor = cursor 112 # self.int_data and self.ptr_data are from the base class. But 113 # self.int_data doesn't accept a None value. 114 if int_data is not None: 115 self.int_data = int_data 116 self.ptr_data = ptr_data 117 118 @property 119 def id(self): 120 """Name of the token.""" 121 if self._id is None: 122 return self.spelling 123 else: 124 return self._id 125 126 @id.setter 127 def id(self, new_id): 128 """Setting name of the token.""" 129 self._id = new_id 130 131 @property 132 def cursor(self): 133 if self._cursor is None: 134 self._cursor = clang.cindex.Token.cursor 135 return self._cursor 136 137 @cursor.setter 138 def cursor(self, new_cursor): 139 self._cursor = new_cursor 140 141 def __repr__(self): 142 if self.id == 'defined': 143 return self.id 144 elif self.kind == TokenKind.IDENTIFIER: 145 return "(ident %s)" % self.id 146 147 return self.id 148 149 def __str__(self): 150 return self.id 151 152 153class BadExpectedToken(Exception): 154 """An exception that will be raised for unexpected tokens.""" 155 pass 156 157 158class UnparseableStruct(Exception): 159 """An exception that will be raised for structs that cannot be parsed.""" 160 pass 161 162 163# The __contains__ function in libclang SourceRange class contains a bug. It 164# gives wrong result when dealing with single line range. 165# Bug filed with upstream: 166# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277 167def SourceRange__contains__(self, other): 168 """Determine if a given location is inside the range.""" 169 if not isinstance(other, SourceLocation): 170 return False 171 if other.file is None and self.start.file is None: 172 pass 173 elif (self.start.file.name != other.file.name or 174 other.file.name != self.end.file.name): 175 # same file name 176 return False 177 # same file, in between lines 178 if self.start.line < other.line < self.end.line: 179 return True 180 # same file, same line 181 elif self.start.line == other.line == self.end.line: 182 if self.start.column <= other.column <= self.end.column: 183 return True 184 elif self.start.line == other.line: 185 # same file first line 186 if self.start.column <= other.column: 187 return True 188 elif other.line == self.end.line: 189 # same file last line 190 if other.column <= self.end.column: 191 return True 192 return False 193 194 195SourceRange.__contains__ = SourceRange__contains__ 196 197 198################################################################################ 199################################################################################ 200##### ##### 201##### C P P T O K E N I Z E R ##### 202##### ##### 203################################################################################ 204################################################################################ 205 206 207class CppTokenizer(object): 208 """A tokenizer that converts some input text into a list of tokens. 209 210 It calls libclang's tokenizer to get the parsed tokens. In addition, it 211 updates the cursor property in each token after parsing, by calling 212 getTokensWithCursors(). 213 """ 214 215 clang_flags = ['-E', '-x', 'c'] 216 options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD 217 218 def __init__(self): 219 """Initialize a new CppTokenizer object.""" 220 self._indexer = clang.cindex.Index.create() 221 self._tu = None 222 self._index = 0 223 self.tokens = None 224 225 def _getTokensWithCursors(self): 226 """Helper method to return all tokens with their cursors. 227 228 The cursor property in a clang Token doesn't provide enough 229 information. Because it is queried based on single token each time 230 without any context, i.e. via calling conf.lib.clang_annotateTokens() 231 with only one token given. So we often see 'INVALID_FILE' in one 232 token's cursor. In this function it passes all the available tokens 233 to get more informative cursors. 234 """ 235 236 tokens_memory = ctypes.POINTER(clang.cindex.Token)() 237 tokens_count = ctypes.c_uint() 238 239 conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent, 240 ctypes.byref(tokens_memory), 241 ctypes.byref(tokens_count)) 242 243 count = int(tokens_count.value) 244 245 # If we get no tokens, no memory was allocated. Be sure not to return 246 # anything and potentially call a destructor on nothing. 247 if count < 1: 248 return 249 250 cursors = (Cursor * count)() 251 cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor)) 252 253 conf.lib.clang_annotateTokens(self._tu, tokens_memory, count, 254 cursors_memory) 255 256 tokens_array = ctypes.cast( 257 tokens_memory, 258 ctypes.POINTER(clang.cindex.Token * count)).contents 259 token_group = TokenGroup(self._tu, tokens_memory, tokens_count) 260 261 tokens = [] 262 for i in range(0, count): 263 token = Token(self._tu, token_group, 264 int_data=tokens_array[i].int_data, 265 ptr_data=tokens_array[i].ptr_data, 266 cursor=cursors[i]) 267 # We only want non-comment tokens. 268 if token.kind != TokenKind.COMMENT: 269 tokens.append(token) 270 271 return tokens 272 273 def parseString(self, lines): 274 """Parse a list of text lines into a BlockList object.""" 275 file_ = 'no-filename-available.c' 276 self._tu = self._indexer.parse(file_, self.clang_flags, 277 unsaved_files=[(file_, lines)], 278 options=self.options) 279 self.tokens = self._getTokensWithCursors() 280 281 def parseFile(self, file_): 282 """Parse a file into a BlockList object.""" 283 self._tu = self._indexer.parse(file_, self.clang_flags, 284 options=self.options) 285 self.tokens = self._getTokensWithCursors() 286 287 def nextToken(self): 288 """Return next token from the list.""" 289 if self._index < len(self.tokens): 290 t = self.tokens[self._index] 291 self._index += 1 292 return t 293 else: 294 return None 295 296 297class CppStringTokenizer(CppTokenizer): 298 """A CppTokenizer derived class that accepts a string of text as input.""" 299 300 def __init__(self, line): 301 CppTokenizer.__init__(self) 302 self.parseString(line) 303 304 305class CppFileTokenizer(CppTokenizer): 306 """A CppTokenizer derived class that accepts a file as input.""" 307 308 def __init__(self, file_): 309 CppTokenizer.__init__(self) 310 self.parseFile(file_) 311 312 313# Unit testing 314# 315class CppTokenizerTests(unittest.TestCase): 316 """CppTokenizer tests.""" 317 318 def get_tokens(self, token_string, line_col=False): 319 tokens = CppStringTokenizer(token_string) 320 token_list = [] 321 while True: 322 token = tokens.nextToken() 323 if not token: 324 break 325 if line_col: 326 token_list.append((token.id, token.location.line, 327 token.location.column)) 328 else: 329 token_list.append(token.id) 330 return token_list 331 332 def test_hash(self): 333 self.assertEqual(self.get_tokens("#an/example && (01923_xy)"), 334 ["#", "an", "/", "example", tokLOGICAND, tokLPAREN, 335 "01923_xy", tokRPAREN]) 336 337 def test_parens(self): 338 self.assertEqual(self.get_tokens("FOO(BAR) && defined(BAZ)"), 339 ["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND, 340 "defined", tokLPAREN, "BAZ", tokRPAREN]) 341 342 def test_comment(self): 343 self.assertEqual(self.get_tokens("/*\n#\n*/"), []) 344 345 def test_line_cross(self): 346 self.assertEqual(self.get_tokens("first\nsecond"), ["first", "second"]) 347 348 def test_line_cross_line_col(self): 349 self.assertEqual(self.get_tokens("first second\n third", True), 350 [("first", 1, 1), ("second", 1, 7), ("third", 2, 3)]) 351 352 def test_comment_line_col(self): 353 self.assertEqual(self.get_tokens("boo /* what the\nhell */", True), 354 [("boo", 1, 1)]) 355 356 def test_escapes(self): 357 self.assertEqual(self.get_tokens("an \\\n example", True), 358 [("an", 1, 1), ("example", 2, 2)]) 359 360 361################################################################################ 362################################################################################ 363##### ##### 364##### C P P E X P R E S S I O N S ##### 365##### ##### 366################################################################################ 367################################################################################ 368 369 370class CppExpr(object): 371 """A class that models the condition of #if directives into an expr tree. 372 373 Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where 374 "op" is a string describing the operation 375 """ 376 377 unaries = ["!", "~"] 378 binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", 379 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"] 380 precedences = { 381 "?": 1, ":": 1, 382 "||": 2, 383 "&&": 3, 384 "|": 4, 385 "^": 5, 386 "&": 6, 387 "==": 7, "!=": 7, 388 "<": 8, "<=": 8, ">": 8, ">=": 8, 389 "<<": 9, ">>": 9, 390 "+": 10, "-": 10, 391 "*": 11, "/": 11, "%": 11, 392 "!": 12, "~": 12 393 } 394 395 def __init__(self, tokens): 396 """Initialize a CppExpr. 'tokens' must be a CppToken list.""" 397 self.tokens = tokens 398 self._num_tokens = len(tokens) 399 self._index = 0 400 401 if debugCppExpr: 402 print("CppExpr: trying to parse %s" % repr(tokens)) 403 self.expr = self.parseExpression(0) 404 if debugCppExpr: 405 print("CppExpr: got " + repr(self.expr)) 406 if self._index != self._num_tokens: 407 self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s" 408 % (self._index, self._num_tokens, repr(tokens))) 409 410 def throw(self, exception, msg): 411 if self._index < self._num_tokens: 412 tok = self.tokens[self._index] 413 print("%d:%d: %s" % (tok.location.line, tok.location.column, msg)) 414 else: 415 print("EOF: %s" % msg) 416 raise exception(msg) 417 418 def expectId(self, id): 419 """Check that a given token id is at the current position.""" 420 token = self.tokens[self._index] 421 if self._index >= self._num_tokens or token.id != id: 422 self.throw(BadExpectedToken, 423 "### expecting '%s' in expression, got '%s'" % ( 424 id, token.id)) 425 self._index += 1 426 427 def is_decimal(self): 428 token = self.tokens[self._index].id 429 if token[-1] in "ULul": 430 token = token[:-1] 431 try: 432 val = int(token, 10) 433 self._index += 1 434 return ('int', val) 435 except ValueError: 436 return None 437 438 def is_octal(self): 439 token = self.tokens[self._index].id 440 if token[-1] in "ULul": 441 token = token[:-1] 442 if len(token) < 2 or token[0] != '0': 443 return None 444 try: 445 val = int(token, 8) 446 self._index += 1 447 return ('oct', val) 448 except ValueError: 449 return None 450 451 def is_hexadecimal(self): 452 token = self.tokens[self._index].id 453 if token[-1] in "ULul": 454 token = token[:-1] 455 if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'): 456 return None 457 try: 458 val = int(token, 16) 459 self._index += 1 460 return ('hex', val) 461 except ValueError: 462 return None 463 464 def is_integer(self): 465 if self.tokens[self._index].kind != TokenKind.LITERAL: 466 return None 467 468 c = self.is_hexadecimal() 469 if c: 470 return c 471 472 c = self.is_octal() 473 if c: 474 return c 475 476 c = self.is_decimal() 477 if c: 478 return c 479 480 return None 481 482 def is_number(self): 483 t = self.tokens[self._index] 484 if t.id == tokMINUS and self._index + 1 < self._num_tokens: 485 self._index += 1 486 c = self.is_integer() 487 if c: 488 op, val = c 489 return (op, -val) 490 if t.id == tokPLUS and self._index + 1 < self._num_tokens: 491 self._index += 1 492 c = self.is_integer() 493 if c: 494 return c 495 496 return self.is_integer() 497 498 def is_defined(self): 499 t = self.tokens[self._index] 500 if t.id != tokDEFINED: 501 return None 502 503 # We have the defined keyword, check the rest. 504 self._index += 1 505 used_parens = False 506 if (self._index < self._num_tokens and 507 self.tokens[self._index].id == tokLPAREN): 508 used_parens = True 509 self._index += 1 510 511 if self._index >= self._num_tokens: 512 self.throw(BadExpectedToken, 513 "### 'defined' must be followed by macro name or left " 514 "paren") 515 516 t = self.tokens[self._index] 517 if t.kind != TokenKind.IDENTIFIER: 518 self.throw(BadExpectedToken, 519 "### 'defined' must be followed by macro name") 520 521 self._index += 1 522 if used_parens: 523 self.expectId(tokRPAREN) 524 525 return ("defined", t.id) 526 527 def is_call_or_ident(self): 528 if self._index >= self._num_tokens: 529 return None 530 531 t = self.tokens[self._index] 532 if t.kind != TokenKind.IDENTIFIER: 533 return None 534 535 name = t.id 536 537 self._index += 1 538 if (self._index >= self._num_tokens or 539 self.tokens[self._index].id != tokLPAREN): 540 return ("ident", name) 541 542 params = [] 543 depth = 1 544 self._index += 1 545 j = self._index 546 while self._index < self._num_tokens: 547 id = self.tokens[self._index].id 548 if id == tokLPAREN: 549 depth += 1 550 elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): 551 k = self._index 552 param = self.tokens[j:k] 553 params.append(param) 554 if id == tokRPAREN: 555 break 556 j = self._index + 1 557 elif id == tokRPAREN: 558 depth -= 1 559 self._index += 1 560 561 if self._index >= self._num_tokens: 562 return None 563 564 self._index += 1 565 return ("call", (name, params)) 566 567 # Implements the "precedence climbing" algorithm from 568 # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. 569 # The "classic" algorithm would be fine if we were using a tool to 570 # generate the parser, but we're not. Dijkstra's "shunting yard" 571 # algorithm hasn't been necessary yet. 572 573 def parseExpression(self, minPrecedence): 574 if self._index >= self._num_tokens: 575 return None 576 577 node = self.parsePrimary() 578 while (self.token() and self.isBinary(self.token()) and 579 self.precedence(self.token()) >= minPrecedence): 580 op = self.token() 581 self.nextToken() 582 rhs = self.parseExpression(self.precedence(op) + 1) 583 node = (op.id, node, rhs) 584 585 return node 586 587 def parsePrimary(self): 588 op = self.token() 589 if self.isUnary(op): 590 self.nextToken() 591 return (op.id, self.parseExpression(self.precedence(op))) 592 593 primary = None 594 if op.id == tokLPAREN: 595 self.nextToken() 596 primary = self.parseExpression(0) 597 self.expectId(tokRPAREN) 598 elif op.id == "?": 599 self.nextToken() 600 primary = self.parseExpression(0) 601 self.expectId(":") 602 elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL: 603 primary = self.is_number() 604 # Checking for 'defined' needs to come first now because 'defined' is 605 # recognized as IDENTIFIER. 606 elif op.id == tokDEFINED: 607 primary = self.is_defined() 608 elif op.kind == TokenKind.IDENTIFIER: 609 primary = self.is_call_or_ident() 610 else: 611 self.throw(BadExpectedToken, 612 "didn't expect to see a %s in factor" % ( 613 self.tokens[self._index].id)) 614 return primary 615 616 def isBinary(self, token): 617 return token.id in self.binaries 618 619 def isUnary(self, token): 620 return token.id in self.unaries 621 622 def precedence(self, token): 623 return self.precedences.get(token.id) 624 625 def token(self): 626 if self._index >= self._num_tokens: 627 return None 628 return self.tokens[self._index] 629 630 def nextToken(self): 631 self._index += 1 632 if self._index >= self._num_tokens: 633 return None 634 return self.tokens[self._index] 635 636 def dump_node(self, e): 637 op = e[0] 638 line = "(" + op 639 if op == "int": 640 line += " %d)" % e[1] 641 elif op == "oct": 642 line += " 0%o)" % e[1] 643 elif op == "hex": 644 line += " 0x%x)" % e[1] 645 elif op == "ident": 646 line += " %s)" % e[1] 647 elif op == "defined": 648 line += " %s)" % e[1] 649 elif op == "call": 650 arg = e[1] 651 line += " %s [" % arg[0] 652 prefix = "" 653 for param in arg[1]: 654 par = "" 655 for tok in param: 656 par += str(tok) 657 line += "%s%s" % (prefix, par) 658 prefix = "," 659 line += "])" 660 elif op in CppExpr.unaries: 661 line += " %s)" % self.dump_node(e[1]) 662 elif op in CppExpr.binaries: 663 line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) 664 else: 665 line += " ?%s)" % repr(e[1]) 666 667 return line 668 669 def __repr__(self): 670 return self.dump_node(self.expr) 671 672 def source_node(self, e): 673 op = e[0] 674 if op == "int": 675 return "%d" % e[1] 676 if op == "hex": 677 return "0x%x" % e[1] 678 if op == "oct": 679 return "0%o" % e[1] 680 if op == "ident": 681 # XXX: should try to expand 682 return e[1] 683 if op == "defined": 684 return "defined(%s)" % e[1] 685 686 prec = CppExpr.precedences.get(op, 1000) 687 arg = e[1] 688 if op in CppExpr.unaries: 689 arg_src = self.source_node(arg) 690 arg_op = arg[0] 691 arg_prec = CppExpr.precedences.get(arg_op, 1000) 692 if arg_prec < prec: 693 return "!(" + arg_src + ")" 694 else: 695 return "!" + arg_src 696 if op in CppExpr.binaries: 697 arg2 = e[2] 698 arg1_op = arg[0] 699 arg2_op = arg2[0] 700 arg1_src = self.source_node(arg) 701 arg2_src = self.source_node(arg2) 702 if CppExpr.precedences.get(arg1_op, 1000) < prec: 703 arg1_src = "(%s)" % arg1_src 704 if CppExpr.precedences.get(arg2_op, 1000) < prec: 705 arg2_src = "(%s)" % arg2_src 706 707 return "%s %s %s" % (arg1_src, op, arg2_src) 708 return "???" 709 710 def __str__(self): 711 return self.source_node(self.expr) 712 713 @staticmethod 714 def int_node(e): 715 if e[0] in ["int", "oct", "hex"]: 716 return e[1] 717 else: 718 return None 719 720 def toInt(self): 721 return self.int_node(self.expr) 722 723 def optimize_node(self, e, macros=None): 724 if macros is None: 725 macros = {} 726 op = e[0] 727 728 if op == "defined": 729 op, name = e 730 if name in macros: 731 if macros[name] == kCppUndefinedMacro: 732 return ("int", 0) 733 else: 734 try: 735 value = int(macros[name]) 736 return ("int", value) 737 except ValueError: 738 return ("defined", macros[name]) 739 740 if kernel_remove_config_macros and name.startswith("CONFIG_"): 741 return ("int", 0) 742 743 return e 744 745 elif op == "ident": 746 op, name = e 747 if name in macros: 748 try: 749 value = int(macros[name]) 750 expanded = ("int", value) 751 except ValueError: 752 expanded = ("ident", macros[name]) 753 return self.optimize_node(expanded, macros) 754 return e 755 756 elif op == "!": 757 op, v = e 758 v = self.optimize_node(v, macros) 759 if v[0] == "int": 760 if v[1] == 0: 761 return ("int", 1) 762 else: 763 return ("int", 0) 764 return ('!', v) 765 766 elif op == "&&": 767 op, l, r = e 768 l = self.optimize_node(l, macros) 769 r = self.optimize_node(r, macros) 770 li = self.int_node(l) 771 ri = self.int_node(r) 772 if li is not None: 773 if li == 0: 774 return ("int", 0) 775 else: 776 return r 777 elif ri is not None: 778 if ri == 0: 779 return ("int", 0) 780 else: 781 return l 782 return (op, l, r) 783 784 elif op == "||": 785 op, l, r = e 786 l = self.optimize_node(l, macros) 787 r = self.optimize_node(r, macros) 788 li = self.int_node(l) 789 ri = self.int_node(r) 790 if li is not None: 791 if li == 0: 792 return r 793 else: 794 return ("int", 1) 795 elif ri is not None: 796 if ri == 0: 797 return l 798 else: 799 return ("int", 1) 800 return (op, l, r) 801 802 else: 803 return e 804 805 def optimize(self, macros=None): 806 if macros is None: 807 macros = {} 808 self.expr = self.optimize_node(self.expr, macros) 809 810class CppExprTest(unittest.TestCase): 811 """CppExpr unit tests.""" 812 813 def get_expr(self, expr): 814 return repr(CppExpr(CppStringTokenizer(expr).tokens)) 815 816 def test_cpp_expr(self): 817 self.assertEqual(self.get_expr("0"), "(int 0)") 818 self.assertEqual(self.get_expr("1"), "(int 1)") 819 self.assertEqual(self.get_expr("-5"), "(int -5)") 820 self.assertEqual(self.get_expr("+1"), "(int 1)") 821 self.assertEqual(self.get_expr("0U"), "(int 0)") 822 self.assertEqual(self.get_expr("015"), "(oct 015)") 823 self.assertEqual(self.get_expr("015l"), "(oct 015)") 824 self.assertEqual(self.get_expr("0x3e"), "(hex 0x3e)") 825 self.assertEqual(self.get_expr("(0)"), "(int 0)") 826 self.assertEqual(self.get_expr("1 && 1"), "(&& (int 1) (int 1))") 827 self.assertEqual(self.get_expr("1 && 0"), "(&& (int 1) (int 0))") 828 self.assertEqual(self.get_expr("EXAMPLE"), "(ident EXAMPLE)") 829 self.assertEqual(self.get_expr("EXAMPLE - 3"), 830 "(- (ident EXAMPLE) (int 3))") 831 self.assertEqual(self.get_expr("defined(EXAMPLE)"), 832 "(defined EXAMPLE)") 833 self.assertEqual(self.get_expr("defined ( EXAMPLE ) "), 834 "(defined EXAMPLE)") 835 self.assertEqual(self.get_expr("!defined(EXAMPLE)"), 836 "(! (defined EXAMPLE))") 837 self.assertEqual(self.get_expr("defined(ABC) || defined(BINGO)"), 838 "(|| (defined ABC) (defined BINGO))") 839 self.assertEqual(self.get_expr("FOO(BAR,5)"), "(call FOO [BAR,5])") 840 self.assertEqual(self.get_expr("A == 1 || defined(B)"), 841 "(|| (== (ident A) (int 1)) (defined B))") 842 843 def get_expr_optimize(self, expr, macros=None): 844 if macros is None: 845 macros = {} 846 e = CppExpr(CppStringTokenizer(expr).tokens) 847 e.optimize(macros) 848 return repr(e) 849 850 def test_cpp_expr_optimize(self): 851 self.assertEqual(self.get_expr_optimize("0"), "(int 0)") 852 self.assertEqual(self.get_expr_optimize("1"), "(int 1)") 853 self.assertEqual(self.get_expr_optimize("1 && 1"), "(int 1)") 854 self.assertEqual(self.get_expr_optimize("1 && +1"), "(int 1)") 855 self.assertEqual(self.get_expr_optimize("0x1 && 01"), "(oct 01)") 856 self.assertEqual(self.get_expr_optimize("1 && 0"), "(int 0)") 857 self.assertEqual(self.get_expr_optimize("0 && 1"), "(int 0)") 858 self.assertEqual(self.get_expr_optimize("0 && 0"), "(int 0)") 859 self.assertEqual(self.get_expr_optimize("1 || 1"), "(int 1)") 860 self.assertEqual(self.get_expr_optimize("1 || 0"), "(int 1)") 861 self.assertEqual(self.get_expr_optimize("0 || 1"), "(int 1)") 862 self.assertEqual(self.get_expr_optimize("0 || 0"), "(int 0)") 863 self.assertEqual(self.get_expr_optimize("A"), "(ident A)") 864 self.assertEqual(self.get_expr_optimize("A", {"A": 1}), "(int 1)") 865 self.assertEqual(self.get_expr_optimize("A || B", {"A": 1}), "(int 1)") 866 self.assertEqual(self.get_expr_optimize("A || B", {"B": 1}), "(int 1)") 867 self.assertEqual(self.get_expr_optimize("A && B", {"A": 1}), "(ident B)") 868 self.assertEqual(self.get_expr_optimize("A && B", {"B": 1}), "(ident A)") 869 self.assertEqual(self.get_expr_optimize("A && B"), "(&& (ident A) (ident B))") 870 self.assertEqual(self.get_expr_optimize("EXAMPLE"), "(ident EXAMPLE)") 871 self.assertEqual(self.get_expr_optimize("EXAMPLE - 3"), "(- (ident EXAMPLE) (int 3))") 872 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)"), "(defined EXAMPLE)") 873 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)", 874 {"EXAMPLE": "XOWOE"}), 875 "(defined XOWOE)") 876 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)", 877 {"EXAMPLE": kCppUndefinedMacro}), 878 "(int 0)") 879 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)"), "(! (defined EXAMPLE))") 880 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)", 881 {"EXAMPLE": "XOWOE"}), 882 "(! (defined XOWOE))") 883 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)", 884 {"EXAMPLE": kCppUndefinedMacro}), 885 "(int 1)") 886 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)"), 887 "(|| (defined A) (defined B))") 888 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 889 {"A": "1"}), 890 "(int 1)") 891 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 892 {"B": "1"}), 893 "(int 1)") 894 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 895 {"B": kCppUndefinedMacro}), 896 "(defined A)") 897 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 898 {"A": kCppUndefinedMacro, 899 "B": kCppUndefinedMacro}), 900 "(int 0)") 901 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)"), 902 "(&& (defined A) (defined B))") 903 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 904 {"A": "1"}), 905 "(defined B)") 906 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 907 {"B": "1"}), 908 "(defined A)") 909 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 910 {"B": kCppUndefinedMacro}), 911 "(int 0)") 912 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 913 {"A": kCppUndefinedMacro}), 914 "(int 0)") 915 self.assertEqual(self.get_expr_optimize("A == 1 || defined(B)"), 916 "(|| (== (ident A) (int 1)) (defined B))") 917 self.assertEqual(self.get_expr_optimize( 918 "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", 919 {"__KERNEL__": kCppUndefinedMacro}), 920 "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))") 921 922 def get_expr_string(self, expr): 923 return str(CppExpr(CppStringTokenizer(expr).tokens)) 924 925 def test_cpp_expr_string(self): 926 self.assertEqual(self.get_expr_string("0"), "0") 927 self.assertEqual(self.get_expr_string("1"), "1") 928 self.assertEqual(self.get_expr_string("1 && 1"), "1 && 1") 929 self.assertEqual(self.get_expr_string("1 && 0"), "1 && 0") 930 self.assertEqual(self.get_expr_string("0 && 1"), "0 && 1") 931 self.assertEqual(self.get_expr_string("0 && 0"), "0 && 0") 932 self.assertEqual(self.get_expr_string("1 || 1"), "1 || 1") 933 self.assertEqual(self.get_expr_string("1 || 0"), "1 || 0") 934 self.assertEqual(self.get_expr_string("0 || 1"), "0 || 1") 935 self.assertEqual(self.get_expr_string("0 || 0"), "0 || 0") 936 self.assertEqual(self.get_expr_string("EXAMPLE"), "EXAMPLE") 937 self.assertEqual(self.get_expr_string("EXAMPLE - 3"), "EXAMPLE - 3") 938 self.assertEqual(self.get_expr_string("defined(EXAMPLE)"), "defined(EXAMPLE)") 939 self.assertEqual(self.get_expr_string("defined EXAMPLE"), "defined(EXAMPLE)") 940 self.assertEqual(self.get_expr_string("A == 1 || defined(B)"), "A == 1 || defined(B)") 941 942 943################################################################################ 944################################################################################ 945##### ##### 946##### C P P B L O C K ##### 947##### ##### 948################################################################################ 949################################################################################ 950 951 952class Block(object): 953 """A class used to model a block of input source text. 954 955 There are two block types: 956 - directive blocks: contain the tokens of a single pre-processor 957 directive (e.g. #if) 958 - text blocks, contain the tokens of non-directive blocks 959 960 The cpp parser class below will transform an input source file into a list 961 of Block objects (grouped in a BlockList object for convenience) 962 """ 963 964 def __init__(self, tokens, directive=None, lineno=0, identifier=None): 965 """Initialize a new block, if 'directive' is None, it is a text block. 966 967 NOTE: This automatically converts '#ifdef MACRO' into 968 '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'. 969 """ 970 971 if directive == "ifdef": 972 tok = Token() 973 tok.id = tokDEFINED 974 tokens = [tok] + tokens 975 directive = "if" 976 977 elif directive == "ifndef": 978 tok1 = Token() 979 tok2 = Token() 980 tok1.id = tokNOT 981 tok2.id = tokDEFINED 982 tokens = [tok1, tok2] + tokens 983 directive = "if" 984 985 self.tokens = tokens 986 self.directive = directive 987 self.define_id = identifier 988 if lineno > 0: 989 self.lineno = lineno 990 else: 991 self.lineno = self.tokens[0].location.line 992 993 if self.isIf(): 994 self.expr = CppExpr(self.tokens) 995 996 def isDirective(self): 997 """Return True iff this is a directive block.""" 998 return self.directive is not None 999 1000 def isConditional(self): 1001 """Return True iff this is a conditional directive block.""" 1002 return self.directive in ["if", "ifdef", "ifndef", "else", "elif", 1003 "endif"] 1004 1005 def isDefine(self): 1006 """Return the macro name in a #define directive, or None otherwise.""" 1007 if self.directive != "define": 1008 return None 1009 return self.define_id 1010 1011 def isIf(self): 1012 """Return True iff this is an #if-like directive block.""" 1013 return self.directive in ["if", "ifdef", "ifndef", "elif"] 1014 1015 def isEndif(self): 1016 """Return True iff this is an #endif directive block.""" 1017 return self.directive == "endif" 1018 1019 def isInclude(self): 1020 """Check whether this is a #include directive. 1021 1022 If true, returns the corresponding file name (with brackets or 1023 double-qoutes). None otherwise. 1024 """ 1025 1026 if self.directive != "include": 1027 return None 1028 return ''.join([str(x) for x in self.tokens]) 1029 1030 @staticmethod 1031 def format_blocks(tokens, indent=0): 1032 """Return the formatted lines of strings with proper indentation.""" 1033 newline = True 1034 result = [] 1035 buf = '' 1036 i = 0 1037 while i < len(tokens): 1038 t = tokens[i] 1039 if t.id == '{': 1040 buf += ' {' 1041 result.append(strip_space(buf)) 1042 # Do not indent if this is extern "C" { 1043 if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"': 1044 indent += 2 1045 buf = '' 1046 newline = True 1047 elif t.id == '}': 1048 if indent >= 2: 1049 indent -= 2 1050 if not newline: 1051 result.append(strip_space(buf)) 1052 # Look ahead to determine if it's the end of line. 1053 if (i + 1 < len(tokens) and 1054 (tokens[i+1].id == ';' or 1055 tokens[i+1].id in ['else', '__attribute__', 1056 '__attribute', '__packed'] or 1057 tokens[i+1].kind == TokenKind.IDENTIFIER)): 1058 buf = ' ' * indent + '}' 1059 newline = False 1060 else: 1061 result.append(' ' * indent + '}') 1062 buf = '' 1063 newline = True 1064 elif t.id == ';': 1065 result.append(strip_space(buf) + ';') 1066 buf = '' 1067 newline = True 1068 # We prefer a new line for each constant in enum. 1069 elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL: 1070 result.append(strip_space(buf) + ',') 1071 buf = '' 1072 newline = True 1073 else: 1074 if newline: 1075 buf += ' ' * indent + str(t) 1076 else: 1077 buf += ' ' + str(t) 1078 newline = False 1079 i += 1 1080 1081 if buf: 1082 result.append(strip_space(buf)) 1083 1084 return result, indent 1085 1086 def write(self, out, indent): 1087 """Dump the current block.""" 1088 # removeWhiteSpace() will sometimes creates non-directive blocks 1089 # without any tokens. These come from blocks that only contained 1090 # empty lines and spaces. They should not be printed in the final 1091 # output, and then should not be counted for this operation. 1092 # 1093 if self.directive is None and not self.tokens: 1094 return indent 1095 1096 if self.directive: 1097 out.write(str(self) + '\n') 1098 else: 1099 lines, indent = self.format_blocks(self.tokens, indent) 1100 for line in lines: 1101 out.write(line + '\n') 1102 1103 return indent 1104 1105 def __repr__(self): 1106 """Generate the representation of a given block.""" 1107 if self.directive: 1108 result = "#%s " % self.directive 1109 if self.isIf(): 1110 result += repr(self.expr) 1111 else: 1112 for tok in self.tokens: 1113 result += repr(tok) 1114 else: 1115 result = "" 1116 for tok in self.tokens: 1117 result += repr(tok) 1118 1119 return result 1120 1121 def __str__(self): 1122 """Generate the string representation of a given block.""" 1123 if self.directive: 1124 # "#if" 1125 if self.directive == "if": 1126 # small optimization to re-generate #ifdef and #ifndef 1127 e = self.expr.expr 1128 op = e[0] 1129 if op == "defined": 1130 result = "#ifdef %s" % e[1] 1131 elif op == "!" and e[1][0] == "defined": 1132 result = "#ifndef %s" % e[1][1] 1133 else: 1134 result = "#if " + str(self.expr) 1135 1136 # "#define" 1137 elif self.isDefine(): 1138 result = "#%s %s" % (self.directive, self.define_id) 1139 if self.tokens: 1140 result += " " 1141 expr = strip_space(' '.join([tok.id for tok in self.tokens])) 1142 # remove the space between name and '(' in function call 1143 result += re.sub(r'(\w+) \(', r'\1(', expr) 1144 1145 # "#error" 1146 # Concatenating tokens with a space separator, because they may 1147 # not be quoted and broken into several tokens 1148 elif self.directive == "error": 1149 result = "#error %s" % ' '.join([tok.id for tok in self.tokens]) 1150 1151 else: 1152 result = "#%s" % self.directive 1153 if self.tokens: 1154 result += " " 1155 result += ''.join([tok.id for tok in self.tokens]) 1156 else: 1157 lines, _ = self.format_blocks(self.tokens) 1158 result = '\n'.join(lines) 1159 1160 return result 1161 1162 1163class BlockList(object): 1164 """A convenience class used to hold and process a list of blocks. 1165 1166 It calls the cpp parser to get the blocks. 1167 """ 1168 1169 def __init__(self, blocks): 1170 self.blocks = blocks 1171 1172 def __len__(self): 1173 return len(self.blocks) 1174 1175 def __getitem__(self, n): 1176 return self.blocks[n] 1177 1178 def __repr__(self): 1179 return repr(self.blocks) 1180 1181 def __str__(self): 1182 result = '\n'.join([str(b) for b in self.blocks]) 1183 return result 1184 1185 def dump(self): 1186 """Dump all the blocks in current BlockList.""" 1187 print('##### BEGIN #####') 1188 for i, b in enumerate(self.blocks): 1189 print('### BLOCK %d ###' % i) 1190 print(b) 1191 print('##### END #####') 1192 1193 def optimizeIf01(self): 1194 """Remove the code between #if 0 .. #endif in a BlockList.""" 1195 self.blocks = optimize_if01(self.blocks) 1196 1197 def optimizeMacros(self, macros): 1198 """Remove known defined and undefined macros from a BlockList.""" 1199 for b in self.blocks: 1200 if b.isIf(): 1201 b.expr.optimize(macros) 1202 1203 def removeStructs(self, structs): 1204 """Remove structs.""" 1205 extra_includes = [] 1206 block_num = 0 1207 num_blocks = len(self.blocks) 1208 while block_num < num_blocks: 1209 b = self.blocks[block_num] 1210 block_num += 1 1211 # Have to look in each block for a top-level struct definition. 1212 if b.directive: 1213 continue 1214 num_tokens = len(b.tokens) 1215 # A struct definition usually looks like: 1216 # struct 1217 # ident 1218 # { 1219 # } 1220 # ; 1221 # However, the structure might be spread across multiple blocks 1222 # if the structure looks like this: 1223 # struct ident 1224 # { 1225 # #ifdef VARIABLE 1226 # pid_t pid; 1227 # #endif 1228 # }: 1229 # So the total number of tokens in the block might be less than 1230 # five but assume at least three. 1231 if num_tokens < 3: 1232 continue 1233 1234 # This is a simple struct finder, it might fail if a top-level 1235 # structure has an #if type directives that confuses the algorithm 1236 # for finding the end of the structure. Or if there is another 1237 # structure definition embedded in the structure. 1238 i = 0 1239 while i < num_tokens - 2: 1240 if (b.tokens[i].kind != TokenKind.KEYWORD or 1241 b.tokens[i].id != "struct"): 1242 i += 1 1243 continue 1244 if (b.tokens[i + 1].kind == TokenKind.IDENTIFIER and 1245 b.tokens[i + 2].kind == TokenKind.PUNCTUATION and 1246 b.tokens[i + 2].id == "{" and b.tokens[i + 1].id in structs): 1247 # Add an include for the structure to be removed of the form: 1248 # #include <bits/STRUCT_NAME.h> 1249 struct_token = b.tokens[i + 1] 1250 if not structs[struct_token.id]: 1251 extra_includes.append("<bits/%s.h>" % struct_token.id) 1252 1253 # Search forward for the end of the structure. 1254 # Very simple search, look for } and ; tokens. 1255 # If we hit the end of the block, we'll need to start 1256 # looking at the next block. 1257 j = i + 3 1258 depth = 1 1259 struct_removed = False 1260 while not struct_removed: 1261 while j < num_tokens: 1262 if b.tokens[j].kind == TokenKind.PUNCTUATION: 1263 if b.tokens[j].id == '{': 1264 depth += 1 1265 elif b.tokens[j].id == '}': 1266 depth -= 1 1267 elif b.tokens[j].id == ';' and depth == 0: 1268 b.tokens = b.tokens[0:i] + b.tokens[j + 1:num_tokens] 1269 num_tokens = len(b.tokens) 1270 struct_removed = True 1271 break 1272 j += 1 1273 if not struct_removed: 1274 b.tokens = b.tokens[0:i] 1275 1276 # Skip directive blocks. 1277 start_block = block_num 1278 while block_num < num_blocks: 1279 if not self.blocks[block_num].directive: 1280 break 1281 block_num += 1 1282 if block_num >= num_blocks: 1283 # Unparsable struct, error out. 1284 raise UnparseableStruct("Cannot remove struct %s: %s" % (struct_token.id, struct_token.location)) 1285 self.blocks = self.blocks[0:start_block] + self.blocks[block_num:num_blocks] 1286 num_blocks = len(self.blocks) 1287 b = self.blocks[start_block] 1288 block_num = start_block + 1 1289 num_tokens = len(b.tokens) 1290 i = 0 1291 j = 0 1292 continue 1293 i += 1 1294 1295 for extra_include in extra_includes: 1296 replacement = CppStringTokenizer(extra_include) 1297 self.blocks.insert(2, Block(replacement.tokens, directive='include')) 1298 1299 def optimizeAll(self, macros): 1300 self.optimizeMacros(macros) 1301 self.optimizeIf01() 1302 return 1303 1304 def findIncludes(self): 1305 """Return the list of included files in a BlockList.""" 1306 result = [] 1307 for b in self.blocks: 1308 i = b.isInclude() 1309 if i: 1310 result.append(i) 1311 return result 1312 1313 def write(self, out): 1314 indent = 0 1315 for b in self.blocks: 1316 indent = b.write(out, indent) 1317 1318 def removeVarsAndFuncs(self, keep): 1319 """Remove variable and function declarations. 1320 1321 All extern and static declarations corresponding to variable and 1322 function declarations are removed. We only accept typedefs and 1323 enum/structs/union declarations. 1324 1325 In addition, remove any macros expanding in the headers. Usually, 1326 these macros are static inline functions, which is why they are 1327 removed. 1328 1329 However, we keep the definitions corresponding to the set of known 1330 static inline functions in the set 'keep', which is useful 1331 for optimized byteorder swap functions and stuff like that. 1332 """ 1333 1334 # state = NORMAL => normal (i.e. LN + spaces) 1335 # state = OTHER_DECL => typedef/struct encountered, ends with ";" 1336 # state = VAR_DECL => var declaration encountered, ends with ";" 1337 # state = FUNC_DECL => func declaration encountered, ends with "}" 1338 NORMAL = 0 1339 OTHER_DECL = 1 1340 VAR_DECL = 2 1341 FUNC_DECL = 3 1342 1343 state = NORMAL 1344 depth = 0 1345 blocksToKeep = [] 1346 blocksInProgress = [] 1347 blocksOfDirectives = [] 1348 ident = "" 1349 state_token = "" 1350 macros = set() 1351 for block in self.blocks: 1352 if block.isDirective(): 1353 # Record all macros. 1354 if block.directive == 'define': 1355 macro_name = block.define_id 1356 paren_index = macro_name.find('(') 1357 if paren_index == -1: 1358 macros.add(macro_name) 1359 else: 1360 macros.add(macro_name[0:paren_index]) 1361 blocksInProgress.append(block) 1362 # If this is in a function/variable declaration, we might need 1363 # to emit the directives alone, so save them separately. 1364 blocksOfDirectives.append(block) 1365 continue 1366 1367 numTokens = len(block.tokens) 1368 lastTerminatorIndex = 0 1369 i = 0 1370 while i < numTokens: 1371 token_id = block.tokens[i].id 1372 terminator = False 1373 if token_id == '{': 1374 depth += 1 1375 if (i >= 2 and block.tokens[i-2].id == 'extern' and 1376 block.tokens[i-1].id == '"C"'): 1377 # For an extern "C" { pretend as though this is depth 0. 1378 depth -= 1 1379 elif token_id == '}': 1380 if depth > 0: 1381 depth -= 1 1382 if depth == 0: 1383 if state == OTHER_DECL: 1384 # Loop through until we hit the ';' 1385 i += 1 1386 while i < numTokens: 1387 if block.tokens[i].id == ';': 1388 token_id = ';' 1389 break 1390 i += 1 1391 # If we didn't hit the ';', just consider this the 1392 # terminator any way. 1393 terminator = True 1394 elif depth == 0: 1395 if token_id == ';': 1396 if state == NORMAL: 1397 blocksToKeep.extend(blocksInProgress) 1398 blocksInProgress = [] 1399 blocksOfDirectives = [] 1400 state = FUNC_DECL 1401 terminator = True 1402 elif (state == NORMAL and token_id == '(' and i >= 1 and 1403 block.tokens[i-1].kind == TokenKind.IDENTIFIER and 1404 block.tokens[i-1].id in macros): 1405 # This is a plain macro being expanded in the header 1406 # which needs to be removed. 1407 blocksToKeep.extend(blocksInProgress) 1408 if lastTerminatorIndex < i - 1: 1409 blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1])) 1410 blocksInProgress = [] 1411 blocksOfDirectives = [] 1412 1413 # Skip until we see the terminating ')' 1414 i += 1 1415 paren_depth = 1 1416 while i < numTokens: 1417 if block.tokens[i].id == ')': 1418 paren_depth -= 1 1419 if paren_depth == 0: 1420 break 1421 elif block.tokens[i].id == '(': 1422 paren_depth += 1 1423 i += 1 1424 lastTerminatorIndex = i + 1 1425 elif (state != FUNC_DECL and token_id == '(' and 1426 state_token != 'typedef'): 1427 blocksToKeep.extend(blocksInProgress) 1428 blocksInProgress = [] 1429 blocksOfDirectives = [] 1430 state = VAR_DECL 1431 elif state == NORMAL and token_id in ['struct', 'typedef', 1432 'enum', 'union', 1433 '__extension__']: 1434 state = OTHER_DECL 1435 state_token = token_id 1436 elif block.tokens[i].kind == TokenKind.IDENTIFIER: 1437 if state != VAR_DECL or ident == "": 1438 ident = token_id 1439 1440 if terminator: 1441 if state != VAR_DECL and state != FUNC_DECL or ident in keep: 1442 blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1])) 1443 blocksToKeep.extend(blocksInProgress) 1444 else: 1445 # Only keep the directives found. 1446 blocksToKeep.extend(blocksOfDirectives) 1447 lastTerminatorIndex = i + 1 1448 blocksInProgress = [] 1449 blocksOfDirectives = [] 1450 state = NORMAL 1451 ident = "" 1452 state_token = "" 1453 i += 1 1454 if lastTerminatorIndex < numTokens: 1455 blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens])) 1456 if len(blocksInProgress) > 0: 1457 blocksToKeep.extend(blocksInProgress) 1458 self.blocks = blocksToKeep 1459 1460 def replaceTokens(self, replacements): 1461 """Replace tokens according to the given dict.""" 1462 for b in self.blocks: 1463 made_change = False 1464 if b.isInclude() is None: 1465 i = 0 1466 while i < len(b.tokens): 1467 tok = b.tokens[i] 1468 if tok.kind == TokenKind.IDENTIFIER: 1469 if tok.id in replacements: 1470 tok.id = replacements[tok.id] 1471 made_change = True 1472 i += 1 1473 1474 if b.isDefine() and b.define_id in replacements: 1475 b.define_id = replacements[b.define_id] 1476 made_change = True 1477 1478 if made_change and b.isIf(): 1479 # Keep 'expr' in sync with 'tokens'. 1480 b.expr = CppExpr(b.tokens) 1481 1482 1483 1484def strip_space(s): 1485 """Strip out redundant space in a given string.""" 1486 1487 # NOTE: It ought to be more clever to not destroy spaces in string tokens. 1488 replacements = {' . ': '.', 1489 ' [': '[', 1490 '[ ': '[', 1491 ' ]': ']', 1492 '( ': '(', 1493 ' )': ')', 1494 ' ,': ',', 1495 '# ': '#', 1496 ' ;': ';', 1497 '~ ': '~', 1498 ' -> ': '->'} 1499 result = s 1500 for r in replacements: 1501 result = result.replace(r, replacements[r]) 1502 1503 # Remove the space between function name and the parenthesis. 1504 result = re.sub(r'(\w+) \(', r'\1(', result) 1505 return result 1506 1507 1508class BlockParser(object): 1509 """A class that converts an input source file into a BlockList object.""" 1510 1511 def __init__(self, tokzer=None): 1512 """Initialize a block parser. 1513 1514 The input source is provided through a Tokenizer object. 1515 """ 1516 self._tokzer = tokzer 1517 self._parsed = False 1518 1519 @property 1520 def parsed(self): 1521 return self._parsed 1522 1523 @staticmethod 1524 def _short_extent(extent): 1525 return '%d:%d - %d:%d' % (extent.start.line, extent.start.column, 1526 extent.end.line, extent.end.column) 1527 1528 def getBlocks(self, tokzer=None): 1529 """Return all the blocks parsed.""" 1530 1531 def consume_extent(i, tokens, extent=None, detect_change=False): 1532 """Return tokens that belong to the given extent. 1533 1534 It parses all the tokens that follow tokens[i], until getting out 1535 of the extent. When detect_change is True, it may terminate early 1536 when detecting preprocessing directives inside the extent. 1537 """ 1538 1539 result = [] 1540 if extent is None: 1541 extent = tokens[i].cursor.extent 1542 1543 while i < len(tokens) and tokens[i].location in extent: 1544 t = tokens[i] 1545 if debugBlockParser: 1546 print(' ' * 2, t.id, t.kind, t.cursor.kind) 1547 if (detect_change and t.cursor.extent != extent and 1548 t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE): 1549 break 1550 result.append(t) 1551 i += 1 1552 return (i, result) 1553 1554 def consume_line(i, tokens): 1555 """Return tokens that follow tokens[i] in the same line.""" 1556 result = [] 1557 line = tokens[i].location.line 1558 while i < len(tokens) and tokens[i].location.line == line: 1559 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1560 break 1561 result.append(tokens[i]) 1562 i += 1 1563 return (i, result) 1564 1565 if tokzer is None: 1566 tokzer = self._tokzer 1567 tokens = tokzer.tokens 1568 1569 blocks = [] 1570 buf = [] 1571 i = 0 1572 1573 while i < len(tokens): 1574 t = tokens[i] 1575 cursor = t.cursor 1576 1577 if debugBlockParser: 1578 print ("%d: Processing [%s], kind=[%s], cursor=[%s], " 1579 "extent=[%s]" % (t.location.line, t.spelling, t.kind, 1580 cursor.kind, 1581 self._short_extent(cursor.extent))) 1582 1583 if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1584 if buf: 1585 blocks.append(Block(buf)) 1586 buf = [] 1587 1588 j = i 1589 if j + 1 >= len(tokens): 1590 raise BadExpectedToken("### BAD TOKEN at %s" % (t.location)) 1591 directive = tokens[j+1].id 1592 1593 if directive == 'define': 1594 if i+2 >= len(tokens): 1595 raise BadExpectedToken("### BAD TOKEN at %s" % 1596 (tokens[i].location)) 1597 1598 # Skip '#' and 'define'. 1599 extent = tokens[i].cursor.extent 1600 i += 2 1601 id = '' 1602 # We need to separate the id from the remaining of 1603 # the line, especially for the function-like macro. 1604 if (i + 1 < len(tokens) and tokens[i+1].id == '(' and 1605 (tokens[i].location.column + len(tokens[i].spelling) == 1606 tokens[i+1].location.column)): 1607 while i < len(tokens): 1608 id += tokens[i].id 1609 if tokens[i].spelling == ')': 1610 i += 1 1611 break 1612 i += 1 1613 else: 1614 id += tokens[i].id 1615 # Advance to the next token that follows the macro id 1616 i += 1 1617 1618 (i, ret) = consume_extent(i, tokens, extent=extent) 1619 blocks.append(Block(ret, directive=directive, 1620 lineno=t.location.line, identifier=id)) 1621 1622 else: 1623 (i, ret) = consume_extent(i, tokens) 1624 blocks.append(Block(ret[2:], directive=directive, 1625 lineno=t.location.line)) 1626 1627 elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: 1628 if buf: 1629 blocks.append(Block(buf)) 1630 buf = [] 1631 directive = tokens[i+1].id 1632 (i, ret) = consume_extent(i, tokens) 1633 1634 blocks.append(Block(ret[2:], directive=directive, 1635 lineno=t.location.line)) 1636 1637 elif cursor.kind == CursorKind.VAR_DECL: 1638 if buf: 1639 blocks.append(Block(buf)) 1640 buf = [] 1641 1642 (i, ret) = consume_extent(i, tokens, detect_change=True) 1643 buf += ret 1644 1645 elif cursor.kind == CursorKind.FUNCTION_DECL: 1646 if buf: 1647 blocks.append(Block(buf)) 1648 buf = [] 1649 1650 (i, ret) = consume_extent(i, tokens, detect_change=True) 1651 buf += ret 1652 1653 else: 1654 (i, ret) = consume_line(i, tokens) 1655 buf += ret 1656 1657 if buf: 1658 blocks.append(Block(buf)) 1659 1660 # _parsed=True indicates a successful parsing, although may result an 1661 # empty BlockList. 1662 self._parsed = True 1663 1664 return BlockList(blocks) 1665 1666 def parse(self, tokzer): 1667 return self.getBlocks(tokzer) 1668 1669 def parseFile(self, path): 1670 return self.getBlocks(CppFileTokenizer(path)) 1671 1672 1673class BlockParserTests(unittest.TestCase): 1674 """BlockParser unit tests.""" 1675 1676 def get_blocks(self, lines): 1677 blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines))) 1678 return list(map(lambda a: str(a), blocks)) 1679 1680 def test_hash(self): 1681 self.assertEqual(self.get_blocks(["#error hello"]), ["#error hello"]) 1682 1683 def test_empty_line(self): 1684 self.assertEqual(self.get_blocks(["foo", "", "bar"]), ["foo bar"]) 1685 1686 def test_hash_with_space(self): 1687 # We currently cannot handle the following case with libclang properly. 1688 # Fortunately it doesn't appear in current headers. 1689 #self.assertEqual(self.get_blocks(["foo", " # ", "bar"]), ["foo", "bar"]) 1690 pass 1691 1692 def test_with_comment(self): 1693 self.assertEqual(self.get_blocks(["foo", 1694 " # /* ahah */ if defined(__KERNEL__) /* more */", 1695 "bar", "#endif"]), 1696 ["foo", "#ifdef __KERNEL__", "bar", "#endif"]) 1697 1698 1699################################################################################ 1700################################################################################ 1701##### ##### 1702##### B L O C K L I S T O P T I M I Z A T I O N ##### 1703##### ##### 1704################################################################################ 1705################################################################################ 1706 1707 1708def find_matching_endif(blocks, i): 1709 """Traverse the blocks to find out the matching #endif.""" 1710 n = len(blocks) 1711 depth = 1 1712 while i < n: 1713 if blocks[i].isDirective(): 1714 dir_ = blocks[i].directive 1715 if dir_ in ["if", "ifndef", "ifdef"]: 1716 depth += 1 1717 elif depth == 1 and dir_ in ["else", "elif"]: 1718 return i 1719 elif dir_ == "endif": 1720 depth -= 1 1721 if depth == 0: 1722 return i 1723 i += 1 1724 return i 1725 1726 1727def optimize_if01(blocks): 1728 """Remove the code between #if 0 .. #endif in a list of CppBlocks.""" 1729 i = 0 1730 n = len(blocks) 1731 result = [] 1732 while i < n: 1733 j = i 1734 while j < n and not blocks[j].isIf(): 1735 j += 1 1736 if j > i: 1737 logging.debug("appending lines %d to %d", blocks[i].lineno, 1738 blocks[j-1].lineno) 1739 result += blocks[i:j] 1740 if j >= n: 1741 break 1742 expr = blocks[j].expr 1743 r = expr.toInt() 1744 if r is None: 1745 result.append(blocks[j]) 1746 i = j + 1 1747 continue 1748 1749 if r == 0: 1750 # if 0 => skip everything until the corresponding #endif 1751 start_dir = blocks[j].directive 1752 j = find_matching_endif(blocks, j + 1) 1753 if j >= n: 1754 # unterminated #if 0, finish here 1755 break 1756 dir_ = blocks[j].directive 1757 if dir_ == "endif": 1758 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)", 1759 blocks[i].lineno, blocks[j].lineno) 1760 if start_dir == "elif": 1761 # Put an endif since we started with an elif. 1762 result += blocks[j:j+1] 1763 i = j + 1 1764 elif dir_ == "else": 1765 # convert 'else' into 'if 1' 1766 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d " 1767 "to %d)", blocks[i].lineno, blocks[j-1].lineno) 1768 if start_dir == "elif": 1769 blocks[j].directive = "elif" 1770 else: 1771 blocks[j].directive = "if" 1772 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens) 1773 i = j 1774 elif dir_ == "elif": 1775 # convert 'elif' into 'if' 1776 logging.debug("convert 'if 0' .. 'elif' into 'if'") 1777 if start_dir == "elif": 1778 blocks[j].directive = "elif" 1779 else: 1780 blocks[j].directive = "if" 1781 i = j 1782 continue 1783 1784 # if 1 => find corresponding endif and remove/transform them 1785 k = find_matching_endif(blocks, j + 1) 1786 if k >= n: 1787 # unterminated #if 1, finish here 1788 logging.debug("unterminated 'if 1'") 1789 result += blocks[j+1:k] 1790 break 1791 1792 start_dir = blocks[j].directive 1793 dir_ = blocks[k].directive 1794 if dir_ == "endif": 1795 logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)", 1796 blocks[j].lineno, blocks[k].lineno) 1797 if start_dir == "elif": 1798 # Add the elif in to the results and convert it to an elif 1. 1799 blocks[j].tokens = CppStringTokenizer("1").tokens 1800 result += blocks[j:j+1] 1801 result += optimize_if01(blocks[j+1:k]) 1802 if start_dir == "elif": 1803 # Add the endif in to the results. 1804 result += blocks[k:k+1] 1805 i = k + 1 1806 elif dir_ == "else": 1807 # convert 'else' into 'if 0' 1808 logging.debug("convert 'if 1' .. 'else' (lines %d to %d)", 1809 blocks[j].lineno, blocks[k].lineno) 1810 if start_dir == "elif": 1811 # Add the elif in to the results and convert it to an elif 1. 1812 blocks[j].tokens = CppStringTokenizer("1").tokens 1813 result += blocks[j:j+1] 1814 result += optimize_if01(blocks[j+1:k]) 1815 if start_dir == "elif": 1816 blocks[k].directive = "elif" 1817 else: 1818 blocks[k].directive = "if" 1819 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1820 i = k 1821 elif dir_ == "elif": 1822 # convert 'elif' into 'if 0' 1823 logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)", 1824 blocks[j].lineno, blocks[k].lineno) 1825 result += optimize_if01(blocks[j+1:k]) 1826 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1827 i = k 1828 return result 1829 1830class OptimizerTests(unittest.TestCase): 1831 def parse(self, text, macros=None): 1832 out = utils.StringOutput() 1833 blocks = BlockParser().parse(CppStringTokenizer(text)) 1834 blocks.optimizeAll(macros) 1835 blocks.write(out) 1836 return out.get() 1837 1838 def test_if1(self): 1839 text = """\ 1840#if 1 1841#define GOOD 1842#endif 1843""" 1844 expected = """\ 1845#define GOOD 1846""" 1847 self.assertEqual(self.parse(text), expected) 1848 1849 def test_if0(self): 1850 text = """\ 1851#if 0 1852#define SHOULD_SKIP1 1853#define SHOULD_SKIP2 1854#endif 1855""" 1856 expected = "" 1857 self.assertEqual(self.parse(text), expected) 1858 1859 def test_if1_else(self): 1860 text = """\ 1861#if 1 1862#define GOOD 1863#else 1864#define BAD 1865#endif 1866""" 1867 expected = """\ 1868#define GOOD 1869""" 1870 self.assertEqual(self.parse(text), expected) 1871 1872 def test_if0_else(self): 1873 text = """\ 1874#if 0 1875#define BAD 1876#else 1877#define GOOD 1878#endif 1879""" 1880 expected = """\ 1881#define GOOD 1882""" 1883 self.assertEqual(self.parse(text), expected) 1884 1885 def test_if_elif1(self): 1886 text = """\ 1887#if defined(something) 1888#define EXISTS 1889#elif 1 1890#define GOOD 1891#endif 1892""" 1893 expected = """\ 1894#ifdef something 1895#define EXISTS 1896#elif 1 1897#define GOOD 1898#endif 1899""" 1900 self.assertEqual(self.parse(text), expected) 1901 1902 def test_if_elif1_macro(self): 1903 text = """\ 1904#if defined(something) 1905#define EXISTS 1906#elif defined(WILL_BE_ONE) 1907#define GOOD 1908#endif 1909""" 1910 expected = """\ 1911#ifdef something 1912#define EXISTS 1913#elif 1 1914#define GOOD 1915#endif 1916""" 1917 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1918 1919 1920 def test_if_elif1_else(self): 1921 text = """\ 1922#if defined(something) 1923#define EXISTS 1924#elif 1 1925#define GOOD 1926#else 1927#define BAD 1928#endif 1929""" 1930 expected = """\ 1931#ifdef something 1932#define EXISTS 1933#elif 1 1934#define GOOD 1935#endif 1936""" 1937 self.assertEqual(self.parse(text), expected) 1938 1939 def test_if_elif1_else_macro(self): 1940 text = """\ 1941#if defined(something) 1942#define EXISTS 1943#elif defined(WILL_BE_ONE) 1944#define GOOD 1945#else 1946#define BAD 1947#endif 1948""" 1949 expected = """\ 1950#ifdef something 1951#define EXISTS 1952#elif 1 1953#define GOOD 1954#endif 1955""" 1956 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1957 1958 1959 def test_if_elif1_else_macro(self): 1960 text = """\ 1961#if defined(something) 1962#define EXISTS 1963#elif defined(WILL_BE_ONE) 1964#define GOOD 1965#else 1966#define BAD 1967#endif 1968""" 1969 expected = """\ 1970#ifdef something 1971#define EXISTS 1972#elif 1 1973#define GOOD 1974#endif 1975""" 1976 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1977 1978 def test_macro_set_to_undefined_single(self): 1979 text = """\ 1980#if defined(__KERNEL__) 1981#define BAD_KERNEL 1982#endif 1983""" 1984 expected = "" 1985 macros = {"__KERNEL__": kCppUndefinedMacro} 1986 self.assertEqual(self.parse(text, macros), expected) 1987 1988 def test_macro_set_to_undefined_if(self): 1989 text = """\ 1990#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) 1991#define CHECK 1992#endif 1993""" 1994 expected = """\ 1995#if !defined(__GLIBC__) || __GLIBC__ < 2 1996#define CHECK 1997#endif 1998""" 1999 macros = {"__KERNEL__": kCppUndefinedMacro} 2000 self.assertEqual(self.parse(text, macros), expected) 2001 2002 def test_endif_comment_removed(self): 2003 text = """\ 2004#ifndef SIGRTMAX 2005#define SIGRTMAX 123 2006#endif /* SIGRTMAX */ 2007""" 2008 expected = """\ 2009#ifndef SIGRTMAX 2010#define SIGRTMAX 123 2011#endif 2012""" 2013 self.assertEqual(self.parse(text), expected) 2014 2015 def test_multilevel_if0(self): 2016 text = """\ 2017#if 0 2018#if 1 2019#define BAD_6 2020#endif 2021#endif 2022""" 2023 expected = "" 2024 self.assertEqual(self.parse(text), expected) 2025 2026class RemoveStructsTests(unittest.TestCase): 2027 def parse(self, text, structs): 2028 out = utils.StringOutput() 2029 blocks = BlockParser().parse(CppStringTokenizer(text)) 2030 blocks.removeStructs(structs) 2031 blocks.write(out) 2032 return out.get() 2033 2034 def test_remove_struct_from_start(self): 2035 text = """\ 2036struct remove { 2037 int val1; 2038 int val2; 2039}; 2040struct something { 2041 struct timeval val1; 2042 struct timeval val2; 2043}; 2044""" 2045 expected = """\ 2046struct something { 2047 struct timeval val1; 2048 struct timeval val2; 2049}; 2050""" 2051 self.assertEqual(self.parse(text, {"remove": True}), expected) 2052 2053 def test_remove_struct_from_end(self): 2054 text = """\ 2055struct something { 2056 struct timeval val1; 2057 struct timeval val2; 2058}; 2059struct remove { 2060 int val1; 2061 int val2; 2062}; 2063""" 2064 expected = """\ 2065struct something { 2066 struct timeval val1; 2067 struct timeval val2; 2068}; 2069""" 2070 self.assertEqual(self.parse(text, {"remove": True}), expected) 2071 2072 def test_remove_minimal_struct(self): 2073 text = """\ 2074struct remove { 2075}; 2076""" 2077 expected = ""; 2078 self.assertEqual(self.parse(text, {"remove": True}), expected) 2079 2080 def test_remove_struct_with_struct_fields(self): 2081 text = """\ 2082struct something { 2083 struct remove val1; 2084 struct remove val2; 2085}; 2086struct remove { 2087 int val1; 2088 struct something val3; 2089 int val2; 2090}; 2091""" 2092 expected = """\ 2093struct something { 2094 struct remove val1; 2095 struct remove val2; 2096}; 2097""" 2098 self.assertEqual(self.parse(text, {"remove": True}), expected) 2099 2100 def test_remove_consecutive_structs(self): 2101 text = """\ 2102struct keep1 { 2103 struct timeval val1; 2104 struct timeval val2; 2105}; 2106struct remove1 { 2107 int val1; 2108 int val2; 2109}; 2110struct remove2 { 2111 int val1; 2112 int val2; 2113 int val3; 2114}; 2115struct keep2 { 2116 struct timeval val1; 2117 struct timeval val2; 2118}; 2119""" 2120 expected = """\ 2121struct keep1 { 2122 struct timeval val1; 2123 struct timeval val2; 2124}; 2125struct keep2 { 2126 struct timeval val1; 2127 struct timeval val2; 2128}; 2129""" 2130 self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected) 2131 2132 def test_remove_multiple_structs(self): 2133 text = """\ 2134struct keep1 { 2135 int val; 2136}; 2137struct remove1 { 2138 int val1; 2139 int val2; 2140}; 2141struct keep2 { 2142 int val; 2143}; 2144struct remove2 { 2145 struct timeval val1; 2146 struct timeval val2; 2147}; 2148struct keep3 { 2149 int val; 2150}; 2151""" 2152 expected = """\ 2153struct keep1 { 2154 int val; 2155}; 2156struct keep2 { 2157 int val; 2158}; 2159struct keep3 { 2160 int val; 2161}; 2162""" 2163 self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected) 2164 2165 def test_remove_struct_with_inline_structs(self): 2166 text = """\ 2167struct remove { 2168 int val1; 2169 int val2; 2170 struct { 2171 int val1; 2172 struct { 2173 int val1; 2174 } level2; 2175 } level1; 2176}; 2177struct something { 2178 struct timeval val1; 2179 struct timeval val2; 2180}; 2181""" 2182 expected = """\ 2183struct something { 2184 struct timeval val1; 2185 struct timeval val2; 2186}; 2187""" 2188 self.assertEqual(self.parse(text, {"remove": True}), expected) 2189 2190 def test_remove_struct_across_blocks(self): 2191 text = """\ 2192struct remove { 2193 int val1; 2194 int val2; 2195#ifdef PARAMETER1 2196 PARAMETER1 2197#endif 2198#ifdef PARAMETER2 2199 PARAMETER2 2200#endif 2201}; 2202struct something { 2203 struct timeval val1; 2204 struct timeval val2; 2205}; 2206""" 2207 expected = """\ 2208struct something { 2209 struct timeval val1; 2210 struct timeval val2; 2211}; 2212""" 2213 self.assertEqual(self.parse(text, {"remove": True}), expected) 2214 2215 def test_remove_struct_across_blocks_multiple_structs(self): 2216 text = """\ 2217struct remove1 { 2218 int val1; 2219 int val2; 2220#ifdef PARAMETER1 2221 PARAMETER1 2222#endif 2223#ifdef PARAMETER2 2224 PARAMETER2 2225#endif 2226}; 2227struct remove2 { 2228}; 2229struct something { 2230 struct timeval val1; 2231 struct timeval val2; 2232}; 2233""" 2234 expected = """\ 2235struct something { 2236 struct timeval val1; 2237 struct timeval val2; 2238}; 2239""" 2240 self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected) 2241 2242 def test_remove_multiple_struct_and_add_includes(self): 2243 text = """\ 2244struct remove1 { 2245 int val1; 2246 int val2; 2247}; 2248struct remove2 { 2249 struct timeval val1; 2250 struct timeval val2; 2251}; 2252""" 2253 expected = """\ 2254#include <bits/remove1.h> 2255#include <bits/remove2.h> 2256""" 2257 self.assertEqual(self.parse(text, {"remove1": False, "remove2": False}), expected) 2258 2259 2260class FullPathTest(unittest.TestCase): 2261 """Test of the full path parsing.""" 2262 2263 def parse(self, text, keep=None): 2264 if not keep: 2265 keep = set() 2266 out = utils.StringOutput() 2267 blocks = BlockParser().parse(CppStringTokenizer(text)) 2268 2269 blocks.removeStructs(kernel_structs_to_remove) 2270 blocks.removeVarsAndFuncs(keep) 2271 blocks.replaceTokens(kernel_token_replacements) 2272 blocks.optimizeAll(None) 2273 2274 blocks.write(out) 2275 return out.get() 2276 2277 def test_function_removed(self): 2278 text = """\ 2279static inline __u64 function() 2280{ 2281} 2282""" 2283 expected = "" 2284 self.assertEqual(self.parse(text), expected) 2285 2286 def test_function_removed_with_struct(self): 2287 text = """\ 2288static inline struct something* function() 2289{ 2290} 2291""" 2292 expected = "" 2293 self.assertEqual(self.parse(text), expected) 2294 2295 def test_function_kept(self): 2296 text = """\ 2297static inline __u64 function() 2298{ 2299} 2300""" 2301 expected = """\ 2302static inline __u64 function() { 2303} 2304""" 2305 self.assertEqual(self.parse(text, set(["function"])), expected) 2306 2307 def test_var_removed(self): 2308 text = "__u64 variable;" 2309 expected = "" 2310 self.assertEqual(self.parse(text), expected) 2311 2312 def test_var_kept(self): 2313 text = "__u64 variable;" 2314 expected = "__u64 variable;\n" 2315 self.assertEqual(self.parse(text, set(["variable"])), expected) 2316 2317 def test_keep_function_typedef(self): 2318 text = "typedef void somefunction_t(void);" 2319 expected = "typedef void somefunction_t(void);\n" 2320 self.assertEqual(self.parse(text), expected) 2321 2322 def test_struct_keep_attribute(self): 2323 text = """\ 2324struct something_s { 2325 __u32 s1; 2326 __u32 s2; 2327} __attribute__((packed)); 2328""" 2329 expected = """\ 2330struct something_s { 2331 __u32 s1; 2332 __u32 s2; 2333} __attribute__((packed)); 2334""" 2335 self.assertEqual(self.parse(text), expected) 2336 2337 def test_function_keep_attribute_structs(self): 2338 text = """\ 2339static __inline__ struct some_struct1 * function(struct some_struct2 * e) { 2340} 2341""" 2342 expected = """\ 2343static __inline__ struct some_struct1 * function(struct some_struct2 * e) { 2344} 2345""" 2346 self.assertEqual(self.parse(text, set(["function"])), expected) 2347 2348 def test_struct_after_struct(self): 2349 text = """\ 2350struct first { 2351}; 2352 2353struct second { 2354 unsigned short s1; 2355#define SOMETHING 8 2356 unsigned short s2; 2357}; 2358""" 2359 expected = """\ 2360struct first { 2361}; 2362struct second { 2363 unsigned short s1; 2364#define SOMETHING 8 2365 unsigned short s2; 2366}; 2367""" 2368 self.assertEqual(self.parse(text), expected) 2369 2370 def test_other_not_removed(self): 2371 text = """\ 2372typedef union { 2373 __u64 tu1; 2374 __u64 tu2; 2375} typedef_name; 2376 2377union { 2378 __u64 u1; 2379 __u64 u2; 2380}; 2381 2382struct { 2383 __u64 s1; 2384 __u64 s2; 2385}; 2386 2387enum { 2388 ENUM1 = 0, 2389 ENUM2, 2390}; 2391 2392__extension__ typedef __signed__ long long __s64; 2393""" 2394 expected = """\ 2395typedef union { 2396 __u64 tu1; 2397 __u64 tu2; 2398} typedef_name; 2399union { 2400 __u64 u1; 2401 __u64 u2; 2402}; 2403struct { 2404 __u64 s1; 2405 __u64 s2; 2406}; 2407enum { 2408 ENUM1 = 0, 2409 ENUM2, 2410}; 2411__extension__ typedef __signed__ long long __s64; 2412""" 2413 2414 self.assertEqual(self.parse(text), expected) 2415 2416 def test_semicolon_after_function(self): 2417 text = """\ 2418static inline __u64 function() 2419{ 2420}; 2421 2422struct should_see { 2423 __u32 field; 2424}; 2425""" 2426 expected = """\ 2427struct should_see { 2428 __u32 field; 2429}; 2430""" 2431 self.assertEqual(self.parse(text), expected) 2432 2433 def test_define_in_middle_keep(self): 2434 text = """\ 2435enum { 2436 ENUM0 = 0x10, 2437 ENUM1 = 0x20, 2438#define SOMETHING SOMETHING_ELSE 2439 ENUM2 = 0x40, 2440}; 2441""" 2442 expected = """\ 2443enum { 2444 ENUM0 = 0x10, 2445 ENUM1 = 0x20, 2446#define SOMETHING SOMETHING_ELSE 2447 ENUM2 = 0x40, 2448}; 2449""" 2450 self.assertEqual(self.parse(text), expected) 2451 2452 def test_define_in_middle_remove(self): 2453 text = """\ 2454static inline function() { 2455#define SOMETHING1 SOMETHING_ELSE1 2456 i = 0; 2457 { 2458 i = 1; 2459 } 2460#define SOMETHING2 SOMETHING_ELSE2 2461} 2462""" 2463 expected = """\ 2464#define SOMETHING1 SOMETHING_ELSE1 2465#define SOMETHING2 SOMETHING_ELSE2 2466""" 2467 self.assertEqual(self.parse(text), expected) 2468 2469 def test_define_in_middle_force_keep(self): 2470 text = """\ 2471static inline function() { 2472#define SOMETHING1 SOMETHING_ELSE1 2473 i = 0; 2474 { 2475 i = 1; 2476 } 2477#define SOMETHING2 SOMETHING_ELSE2 2478} 2479""" 2480 expected = """\ 2481static inline function() { 2482#define SOMETHING1 SOMETHING_ELSE1 2483 i = 0; 2484 { 2485 i = 1; 2486 } 2487#define SOMETHING2 SOMETHING_ELSE2 2488} 2489""" 2490 self.assertEqual(self.parse(text, set(["function"])), expected) 2491 2492 def test_define_before_remove(self): 2493 text = """\ 2494#define SHOULD_BE_KEPT NOTHING1 2495#define ANOTHER_TO_KEEP NOTHING2 2496static inline function() { 2497#define SOMETHING1 SOMETHING_ELSE1 2498 i = 0; 2499 { 2500 i = 1; 2501 } 2502#define SOMETHING2 SOMETHING_ELSE2 2503} 2504""" 2505 expected = """\ 2506#define SHOULD_BE_KEPT NOTHING1 2507#define ANOTHER_TO_KEEP NOTHING2 2508#define SOMETHING1 SOMETHING_ELSE1 2509#define SOMETHING2 SOMETHING_ELSE2 2510""" 2511 self.assertEqual(self.parse(text), expected) 2512 2513 def test_extern_C(self): 2514 text = """\ 2515#if defined(__cplusplus) 2516extern "C" { 2517#endif 2518 2519struct something { 2520}; 2521 2522#if defined(__cplusplus) 2523} 2524#endif 2525""" 2526 expected = """\ 2527#ifdef __cplusplus 2528extern "C" { 2529#endif 2530struct something { 2531}; 2532#ifdef __cplusplus 2533} 2534#endif 2535""" 2536 self.assertEqual(self.parse(text), expected) 2537 2538 def test_macro_definition_removed(self): 2539 text = """\ 2540#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {} 2541MACRO_FUNCTION_NO_PARAMS() 2542 2543#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } 2544MACRO_FUNCTION_PARAMS(a = 1) 2545 2546something that should still be kept 2547MACRO_FUNCTION_PARAMS(b) 2548""" 2549 expected = """\ 2550#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { } 2551#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } 2552something that should still be kept 2553""" 2554 self.assertEqual(self.parse(text), expected) 2555 2556 def test_verify_timeval_itemerval(self): 2557 text = """\ 2558struct __kernel_old_timeval { 2559 struct something val; 2560}; 2561struct __kernel_old_itimerval { 2562 struct __kernel_old_timeval val; 2563}; 2564struct fields { 2565 struct __kernel_old_timeval timeval; 2566 struct __kernel_old_itimerval itimerval; 2567}; 2568""" 2569 expected = """\ 2570struct fields { 2571 struct timeval timeval; 2572 struct itimerval itimerval; 2573}; 2574""" 2575 self.assertEqual(self.parse(text), expected) 2576 2577 def test_token_replacement(self): 2578 text = """\ 2579#define SIGRTMIN 32 2580#define SIGRTMAX _NSIG 2581""" 2582 expected = """\ 2583#define __SIGRTMIN 32 2584#define __SIGRTMAX _KERNEL__NSIG 2585""" 2586 self.assertEqual(self.parse(text), expected) 2587 2588 2589if __name__ == '__main__': 2590 unittest.main() 2591