• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2"""A glorified C pre-processor parser."""
3
4import ctypes
5import logging
6import os
7import re
8import site
9import utils
10
11top = os.getenv('ANDROID_BUILD_TOP')
12if top is None:
13    utils.panic('ANDROID_BUILD_TOP not set.\n')
14
15# Set up the env vars for libclang.
16site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))
17
18import clang.cindex
19from clang.cindex import conf
20from clang.cindex import Cursor
21from clang.cindex import CursorKind
22from clang.cindex import SourceLocation
23from clang.cindex import SourceRange
24from clang.cindex import TokenGroup
25from clang.cindex import TokenKind
26from clang.cindex import TranslationUnit
27
28# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
29# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
30clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64/libclang_android.so'))
31
32from defaults import kCppUndefinedMacro
33from defaults import kernel_remove_config_macros
34from defaults import kernel_struct_replacements
35from defaults import kernel_token_replacements
36
37
38debugBlockParser = False
39debugCppExpr = False
40debugOptimIf01 = False
41
42###############################################################################
43###############################################################################
44#####                                                                     #####
45#####           C P P   T O K E N S                                       #####
46#####                                                                     #####
47###############################################################################
48###############################################################################
49
50# the list of supported C-preprocessor tokens
51# plus a couple of C tokens as well
52tokEOF = "\0"
53tokLN = "\n"
54tokSTRINGIFY = "#"
55tokCONCAT = "##"
56tokLOGICAND = "&&"
57tokLOGICOR = "||"
58tokSHL = "<<"
59tokSHR = ">>"
60tokEQUAL = "=="
61tokNEQUAL = "!="
62tokLT = "<"
63tokLTE = "<="
64tokGT = ">"
65tokGTE = ">="
66tokELLIPSIS = "..."
67tokSPACE = " "
68tokDEFINED = "defined"
69tokLPAREN = "("
70tokRPAREN = ")"
71tokNOT = "!"
72tokPLUS = "+"
73tokMINUS = "-"
74tokMULTIPLY = "*"
75tokDIVIDE = "/"
76tokMODULUS = "%"
77tokBINAND = "&"
78tokBINOR = "|"
79tokBINXOR = "^"
80tokCOMMA = ","
81tokLBRACE = "{"
82tokRBRACE = "}"
83tokARROW = "->"
84tokINCREMENT = "++"
85tokDECREMENT = "--"
86tokNUMBER = "<number>"
87tokIDENT = "<ident>"
88tokSTRING = "<string>"
89
90
91class Token(clang.cindex.Token):
92    """A class that represents one token after parsing.
93
94    It inherits the class in libclang, with an extra id property to hold the
95    new spelling of the token. The spelling property in the base class is
96    defined as read-only. New names after macro instantiation are saved in
97    their ids now. It also facilitates the renaming of directive optimizations
98    like replacing 'ifndef X' with 'if !defined(X)'.
99
100    It also overrides the cursor property of the base class. Because the one
101    in libclang always queries based on a single token, which usually doesn't
102    hold useful information. The cursor in this class can be set by calling
103    CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
104    base class.
105    """
106
107    def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
108                 cursor=None):
109        clang.cindex.Token.__init__(self)
110        self._id = None
111        self._tu = tu
112        self._group = group
113        self._cursor = cursor
114        # self.int_data and self.ptr_data are from the base class. But
115        # self.int_data doesn't accept a None value.
116        if int_data is not None:
117            self.int_data = int_data
118        self.ptr_data = ptr_data
119
120    @property
121    def id(self):
122        """Name of the token."""
123        if self._id is None:
124            return self.spelling
125        else:
126            return self._id
127
128    @id.setter
129    def id(self, new_id):
130        """Setting name of the token."""
131        self._id = new_id
132
133    @property
134    def cursor(self):
135        if self._cursor is None:
136            self._cursor = clang.cindex.Token.cursor
137        return self._cursor
138
139    @cursor.setter
140    def cursor(self, new_cursor):
141        self._cursor = new_cursor
142
143    def __repr__(self):
144        if self.id == 'defined':
145            return self.id
146        elif self.kind == TokenKind.IDENTIFIER:
147            return "(ident %s)" % self.id
148
149        return self.id
150
151    def __str__(self):
152        return self.id
153
154
155class BadExpectedToken(Exception):
156    """An exception that will be raised for unexpected tokens."""
157    pass
158
159
160# The __contains__ function in libclang SourceRange class contains a bug. It
161# gives wrong result when dealing with single line range.
162# Bug filed with upstream:
163# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
164def SourceRange__contains__(self, other):
165    """Determine if a given location is inside the range."""
166    if not isinstance(other, SourceLocation):
167        return False
168    if other.file is None and self.start.file is None:
169        pass
170    elif (self.start.file.name != other.file.name or
171          other.file.name != self.end.file.name):
172        # same file name
173        return False
174    # same file, in between lines
175    if self.start.line < other.line < self.end.line:
176        return True
177    # same file, same line
178    elif self.start.line == other.line == self.end.line:
179        if self.start.column <= other.column <= self.end.column:
180            return True
181    elif self.start.line == other.line:
182        # same file first line
183        if self.start.column <= other.column:
184            return True
185    elif other.line == self.end.line:
186        # same file last line
187        if other.column <= self.end.column:
188            return True
189    return False
190
191
192SourceRange.__contains__ = SourceRange__contains__
193
194
195################################################################################
196################################################################################
197#####                                                                      #####
198#####           C P P   T O K E N I Z E R                                  #####
199#####                                                                      #####
200################################################################################
201################################################################################
202
203
204class CppTokenizer(object):
205    """A tokenizer that converts some input text into a list of tokens.
206
207    It calls libclang's tokenizer to get the parsed tokens. In addition, it
208    updates the cursor property in each token after parsing, by calling
209    getTokensWithCursors().
210    """
211
212    clang_flags = ['-E', '-x', 'c']
213    options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
214
215    def __init__(self):
216        """Initialize a new CppTokenizer object."""
217        self._indexer = clang.cindex.Index.create()
218        self._tu = None
219        self._index = 0
220        self.tokens = None
221
222    def _getTokensWithCursors(self):
223        """Helper method to return all tokens with their cursors.
224
225        The cursor property in a clang Token doesn't provide enough
226        information. Because it is queried based on single token each time
227        without any context, i.e. via calling conf.lib.clang_annotateTokens()
228        with only one token given. So we often see 'INVALID_FILE' in one
229        token's cursor. In this function it passes all the available tokens
230        to get more informative cursors.
231        """
232
233        tokens_memory = ctypes.POINTER(clang.cindex.Token)()
234        tokens_count = ctypes.c_uint()
235
236        conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
237                                ctypes.byref(tokens_memory),
238                                ctypes.byref(tokens_count))
239
240        count = int(tokens_count.value)
241
242        # If we get no tokens, no memory was allocated. Be sure not to return
243        # anything and potentially call a destructor on nothing.
244        if count < 1:
245            return
246
247        cursors = (Cursor * count)()
248        cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
249
250        conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
251                                      cursors_memory)
252
253        tokens_array = ctypes.cast(
254            tokens_memory,
255            ctypes.POINTER(clang.cindex.Token * count)).contents
256        token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
257
258        tokens = []
259        for i in xrange(0, count):
260            token = Token(self._tu, token_group,
261                          int_data=tokens_array[i].int_data,
262                          ptr_data=tokens_array[i].ptr_data,
263                          cursor=cursors[i])
264            # We only want non-comment tokens.
265            if token.kind != TokenKind.COMMENT:
266                tokens.append(token)
267
268        return tokens
269
270    def parseString(self, lines):
271        """Parse a list of text lines into a BlockList object."""
272        file_ = 'dummy.c'
273        self._tu = self._indexer.parse(file_, self.clang_flags,
274                                       unsaved_files=[(file_, lines)],
275                                       options=self.options)
276        self.tokens = self._getTokensWithCursors()
277
278    def parseFile(self, file_):
279        """Parse a file into a BlockList object."""
280        self._tu = self._indexer.parse(file_, self.clang_flags,
281                                       options=self.options)
282        self.tokens = self._getTokensWithCursors()
283
284    def nextToken(self):
285        """Return next token from the list."""
286        if self._index < len(self.tokens):
287            t = self.tokens[self._index]
288            self._index += 1
289            return t
290        else:
291            return None
292
293
294class CppStringTokenizer(CppTokenizer):
295    """A CppTokenizer derived class that accepts a string of text as input."""
296
297    def __init__(self, line):
298        CppTokenizer.__init__(self)
299        self.parseString(line)
300
301
302class CppFileTokenizer(CppTokenizer):
303    """A CppTokenizer derived class that accepts a file as input."""
304
305    def __init__(self, file_):
306        CppTokenizer.__init__(self)
307        self.parseFile(file_)
308
309
310# Unit testing
311#
312class CppTokenizerTester(object):
313    """A class used to test CppTokenizer classes."""
314
315    def __init__(self, tokenizer=None):
316        self._tokenizer = tokenizer
317        self._token = None
318
319    def setTokenizer(self, tokenizer):
320        self._tokenizer = tokenizer
321
322    def expect(self, id):
323        self._token = self._tokenizer.nextToken()
324        if self._token is None:
325            tokid = ''
326        else:
327            tokid = self._token.id
328        if tokid == id:
329            return
330        raise BadExpectedToken("###  BAD TOKEN: '%s' expecting '%s'" % (
331            tokid, id))
332
333    def expectToken(self, id, line, col):
334        self.expect(id)
335        if self._token.location.line != line:
336            raise BadExpectedToken(
337                "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (
338                    id, self._token.lineno, line))
339        if self._token.location.column != col:
340            raise BadExpectedToken("###  BAD COLNO: '%d' expecting '%d'" % (
341                self._token.colno, col))
342
343    def expectTokens(self, tokens):
344        for id, line, col in tokens:
345            self.expectToken(id, line, col)
346
347    def expectList(self, list_):
348        for item in list_:
349            self.expect(item)
350
351
352def test_CppTokenizer():
353    tester = CppTokenizerTester()
354
355    tester.setTokenizer(CppStringTokenizer("#an/example  && (01923_xy)"))
356    tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
357                       "01923_xy", tokRPAREN])
358
359    tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)"))
360    tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
361                       "defined", tokLPAREN, "BAZ", tokRPAREN])
362
363    tester.setTokenizer(CppStringTokenizer("/*\n#\n*/"))
364    tester.expectList([])
365
366    tester.setTokenizer(CppStringTokenizer("first\nsecond"))
367    tester.expectList(["first", "second"])
368
369    tester.setTokenizer(CppStringTokenizer("first second\n  third"))
370    tester.expectTokens([("first", 1, 1),
371                         ("second", 1, 7),
372                         ("third", 2, 3)])
373
374    tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */"))
375    tester.expectTokens([("boo", 1, 1)])
376
377    tester.setTokenizer(CppStringTokenizer("an \\\n example"))
378    tester.expectTokens([("an", 1, 1),
379                         ("example", 2, 2)])
380    return True
381
382
383################################################################################
384################################################################################
385#####                                                                      #####
386#####           C P P   E X P R E S S I O N S                              #####
387#####                                                                      #####
388################################################################################
389################################################################################
390
391
392class CppExpr(object):
393    """A class that models the condition of #if directives into an expr tree.
394
395    Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
396    "op" is a string describing the operation
397    """
398
399    unaries = ["!", "~"]
400    binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
401                "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
402    precedences = {
403        "?": 1, ":": 1,
404        "||": 2,
405        "&&": 3,
406        "|": 4,
407        "^": 5,
408        "&": 6,
409        "==": 7, "!=": 7,
410        "<": 8, "<=": 8, ">": 8, ">=": 8,
411        "<<": 9, ">>": 9,
412        "+": 10, "-": 10,
413        "*": 11, "/": 11, "%": 11,
414        "!": 12, "~": 12
415    }
416
417    def __init__(self, tokens):
418        """Initialize a CppExpr. 'tokens' must be a CppToken list."""
419        self.tokens = tokens
420        self._num_tokens = len(tokens)
421        self._index = 0
422
423        if debugCppExpr:
424            print "CppExpr: trying to parse %s" % repr(tokens)
425        self.expr = self.parseExpression(0)
426        if debugCppExpr:
427            print "CppExpr: got " + repr(self.expr)
428        if self._index != self._num_tokens:
429            self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
430                       % (self._index, self._num_tokens, repr(tokens)))
431
432    def throw(self, exception, msg):
433        if self._index < self._num_tokens:
434            tok = self.tokens[self._index]
435            print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
436        else:
437            print "EOF: %s" % msg
438        raise exception(msg)
439
440    def expectId(self, id):
441        """Check that a given token id is at the current position."""
442        token = self.tokens[self._index]
443        if self._index >= self._num_tokens or token.id != id:
444            self.throw(BadExpectedToken,
445                       "### expecting '%s' in expression, got '%s'" % (
446                           id, token.id))
447        self._index += 1
448
449    def is_decimal(self):
450        token = self.tokens[self._index].id
451        if token[-1] in "ULul":
452            token = token[:-1]
453        try:
454            val = int(token, 10)
455            self._index += 1
456            return ('int', val)
457        except ValueError:
458            return None
459
460    def is_octal(self):
461        token = self.tokens[self._index].id
462        if token[-1] in "ULul":
463            token = token[:-1]
464        if len(token) < 2 or token[0] != '0':
465            return None
466        try:
467            val = int(token, 8)
468            self._index += 1
469            return ('oct', val)
470        except ValueError:
471            return None
472
473    def is_hexadecimal(self):
474        token = self.tokens[self._index].id
475        if token[-1] in "ULul":
476            token = token[:-1]
477        if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
478            return None
479        try:
480            val = int(token, 16)
481            self._index += 1
482            return ('hex', val)
483        except ValueError:
484            return None
485
486    def is_integer(self):
487        if self.tokens[self._index].kind != TokenKind.LITERAL:
488            return None
489
490        c = self.is_hexadecimal()
491        if c:
492            return c
493
494        c = self.is_octal()
495        if c:
496            return c
497
498        c = self.is_decimal()
499        if c:
500            return c
501
502        return None
503
504    def is_number(self):
505        t = self.tokens[self._index]
506        if t.id == tokMINUS and self._index + 1 < self._num_tokens:
507            self._index += 1
508            c = self.is_integer()
509            if c:
510                op, val = c
511                return (op, -val)
512        if t.id == tokPLUS and self._index + 1 < self._num_tokens:
513            self._index += 1
514            c = self.is_integer()
515            if c:
516                return c
517
518        return self.is_integer()
519
520    def is_defined(self):
521        t = self.tokens[self._index]
522        if t.id != tokDEFINED:
523            return None
524
525        # We have the defined keyword, check the rest.
526        self._index += 1
527        used_parens = False
528        if (self._index < self._num_tokens and
529            self.tokens[self._index].id == tokLPAREN):
530            used_parens = True
531            self._index += 1
532
533        if self._index >= self._num_tokens:
534            self.throw(BadExpectedToken,
535                       "### 'defined' must be followed by macro name or left "
536                       "paren")
537
538        t = self.tokens[self._index]
539        if t.kind != TokenKind.IDENTIFIER:
540            self.throw(BadExpectedToken,
541                       "### 'defined' must be followed by macro name")
542
543        self._index += 1
544        if used_parens:
545            self.expectId(tokRPAREN)
546
547        return ("defined", t.id)
548
549    def is_call_or_ident(self):
550        if self._index >= self._num_tokens:
551            return None
552
553        t = self.tokens[self._index]
554        if t.kind != TokenKind.IDENTIFIER:
555            return None
556
557        name = t.id
558
559        self._index += 1
560        if (self._index >= self._num_tokens or
561            self.tokens[self._index].id != tokLPAREN):
562            return ("ident", name)
563
564        params = []
565        depth = 1
566        self._index += 1
567        j = self._index
568        while self._index < self._num_tokens:
569            id = self.tokens[self._index].id
570            if id == tokLPAREN:
571                depth += 1
572            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
573                k = self._index
574                param = self.tokens[j:k]
575                params.append(param)
576                if id == tokRPAREN:
577                    break
578                j = self._index + 1
579            elif id == tokRPAREN:
580                depth -= 1
581            self._index += 1
582
583        if self._index >= self._num_tokens:
584            return None
585
586        self._index += 1
587        return ("call", (name, params))
588
589    # Implements the "precedence climbing" algorithm from
590    # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
591    # The "classic" algorithm would be fine if we were using a tool to
592    # generate the parser, but we're not. Dijkstra's "shunting yard"
593    # algorithm hasn't been necessary yet.
594
595    def parseExpression(self, minPrecedence):
596        if self._index >= self._num_tokens:
597            return None
598
599        node = self.parsePrimary()
600        while (self.token() and self.isBinary(self.token()) and
601               self.precedence(self.token()) >= minPrecedence):
602            op = self.token()
603            self.nextToken()
604            rhs = self.parseExpression(self.precedence(op) + 1)
605            node = (op.id, node, rhs)
606
607        return node
608
609    def parsePrimary(self):
610        op = self.token()
611        if self.isUnary(op):
612            self.nextToken()
613            return (op.id, self.parseExpression(self.precedence(op)))
614
615        primary = None
616        if op.id == tokLPAREN:
617            self.nextToken()
618            primary = self.parseExpression(0)
619            self.expectId(tokRPAREN)
620        elif op.id == "?":
621            self.nextToken()
622            primary = self.parseExpression(0)
623            self.expectId(":")
624        elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
625            primary = self.is_number()
626        # Checking for 'defined' needs to come first now because 'defined' is
627        # recognized as IDENTIFIER.
628        elif op.id == tokDEFINED:
629            primary = self.is_defined()
630        elif op.kind == TokenKind.IDENTIFIER:
631            primary = self.is_call_or_ident()
632        else:
633            self.throw(BadExpectedToken,
634                       "didn't expect to see a %s in factor" % (
635                           self.tokens[self._index].id))
636        return primary
637
638    def isBinary(self, token):
639        return token.id in self.binaries
640
641    def isUnary(self, token):
642        return token.id in self.unaries
643
644    def precedence(self, token):
645        return self.precedences.get(token.id)
646
647    def token(self):
648        if self._index >= self._num_tokens:
649            return None
650        return self.tokens[self._index]
651
652    def nextToken(self):
653        self._index += 1
654        if self._index >= self._num_tokens:
655            return None
656        return self.tokens[self._index]
657
658    def dump_node(self, e):
659        op = e[0]
660        line = "(" + op
661        if op == "int":
662            line += " %d)" % e[1]
663        elif op == "oct":
664            line += " 0%o)" % e[1]
665        elif op == "hex":
666            line += " 0x%x)" % e[1]
667        elif op == "ident":
668            line += " %s)" % e[1]
669        elif op == "defined":
670            line += " %s)" % e[1]
671        elif op == "call":
672            arg = e[1]
673            line += " %s [" % arg[0]
674            prefix = ""
675            for param in arg[1]:
676                par = ""
677                for tok in param:
678                    par += str(tok)
679                line += "%s%s" % (prefix, par)
680                prefix = ","
681            line += "])"
682        elif op in CppExpr.unaries:
683            line += " %s)" % self.dump_node(e[1])
684        elif op in CppExpr.binaries:
685            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
686        else:
687            line += " ?%s)" % repr(e[1])
688
689        return line
690
691    def __repr__(self):
692        return self.dump_node(self.expr)
693
694    def source_node(self, e):
695        op = e[0]
696        if op == "int":
697            return "%d" % e[1]
698        if op == "hex":
699            return "0x%x" % e[1]
700        if op == "oct":
701            return "0%o" % e[1]
702        if op == "ident":
703            # XXX: should try to expand
704            return e[1]
705        if op == "defined":
706            return "defined(%s)" % e[1]
707
708        prec = CppExpr.precedences.get(op, 1000)
709        arg = e[1]
710        if op in CppExpr.unaries:
711            arg_src = self.source_node(arg)
712            arg_op = arg[0]
713            arg_prec = CppExpr.precedences.get(arg_op, 1000)
714            if arg_prec < prec:
715                return "!(" + arg_src + ")"
716            else:
717                return "!" + arg_src
718        if op in CppExpr.binaries:
719            arg2 = e[2]
720            arg1_op = arg[0]
721            arg2_op = arg2[0]
722            arg1_src = self.source_node(arg)
723            arg2_src = self.source_node(arg2)
724            if CppExpr.precedences.get(arg1_op, 1000) < prec:
725                arg1_src = "(%s)" % arg1_src
726            if CppExpr.precedences.get(arg2_op, 1000) < prec:
727                arg2_src = "(%s)" % arg2_src
728
729            return "%s %s %s" % (arg1_src, op, arg2_src)
730        return "???"
731
732    def __str__(self):
733        return self.source_node(self.expr)
734
735    @staticmethod
736    def int_node(e):
737        if e[0] in ["int", "oct", "hex"]:
738            return e[1]
739        else:
740            return None
741
742    def toInt(self):
743        return self.int_node(self.expr)
744
745    def optimize_node(self, e, macros=None):
746        if macros is None:
747            macros = {}
748        op = e[0]
749
750        if op == "defined":
751            op, name = e
752            if macros.has_key(name):
753                if macros[name] == kCppUndefinedMacro:
754                    return ("int", 0)
755                else:
756                    try:
757                        value = int(macros[name])
758                        return ("int", value)
759                    except ValueError:
760                        return ("defined", macros[name])
761
762            if kernel_remove_config_macros and name.startswith("CONFIG_"):
763                return ("int", 0)
764
765            return e
766
767        elif op == "ident":
768            op, name = e
769            if macros.has_key(name):
770                try:
771                    value = int(macros[name])
772                    expanded = ("int", value)
773                except ValueError:
774                    expanded = ("ident", macros[name])
775                return self.optimize_node(expanded, macros)
776            return e
777
778        elif op == "!":
779            op, v = e
780            v = self.optimize_node(v, macros)
781            if v[0] == "int":
782                if v[1] == 0:
783                    return ("int", 1)
784                else:
785                    return ("int", 0)
786            return ('!', v)
787
788        elif op == "&&":
789            op, l, r = e
790            l = self.optimize_node(l, macros)
791            r = self.optimize_node(r, macros)
792            li = self.int_node(l)
793            ri = self.int_node(r)
794            if li is not None:
795                if li == 0:
796                    return ("int", 0)
797                else:
798                    return r
799            elif ri is not None:
800                if ri == 0:
801                    return ("int", 0)
802                else:
803                    return l
804            return (op, l, r)
805
806        elif op == "||":
807            op, l, r = e
808            l = self.optimize_node(l, macros)
809            r = self.optimize_node(r, macros)
810            li = self.int_node(l)
811            ri = self.int_node(r)
812            if li is not None:
813                if li == 0:
814                    return r
815                else:
816                    return ("int", 1)
817            elif ri is not None:
818                if ri == 0:
819                    return l
820                else:
821                    return ("int", 1)
822            return (op, l, r)
823
824        else:
825            return e
826
827    def optimize(self, macros=None):
828        if macros is None:
829            macros = {}
830        self.expr = self.optimize_node(self.expr, macros)
831
832
833def test_cpp_expr(expr, expected):
834    e = CppExpr(CppStringTokenizer(expr).tokens)
835    s1 = repr(e)
836    if s1 != expected:
837        print ("[FAIL]: expression '%s' generates '%s', should be "
838               "'%s'" % (expr, s1, expected))
839        global failure_count
840        failure_count += 1
841
842
843def test_cpp_expr_optim(expr, expected, macros=None):
844    if macros is None:
845        macros = {}
846    e = CppExpr(CppStringTokenizer(expr).tokens)
847    e.optimize(macros)
848    s1 = repr(e)
849    if s1 != expected:
850        print ("[FAIL]: optimized expression '%s' generates '%s' with "
851               "macros %s, should be '%s'" % (expr, s1, macros, expected))
852        global failure_count
853        failure_count += 1
854
855
856def test_cpp_expr_source(expr, expected):
857    e = CppExpr(CppStringTokenizer(expr).tokens)
858    s1 = str(e)
859    if s1 != expected:
860        print ("[FAIL]: source expression '%s' generates '%s', should "
861               "be '%s'" % (expr, s1, expected))
862        global failure_count
863        failure_count += 1
864
865
866def test_CppExpr():
867    test_cpp_expr("0", "(int 0)")
868    test_cpp_expr("1", "(int 1)")
869    test_cpp_expr("-5", "(int -5)")
870    test_cpp_expr("+1", "(int 1)")
871    test_cpp_expr("0U", "(int 0)")
872    test_cpp_expr("015", "(oct 015)")
873    test_cpp_expr("015l", "(oct 015)")
874    test_cpp_expr("0x3e", "(hex 0x3e)")
875    test_cpp_expr("(0)", "(int 0)")
876    test_cpp_expr("1 && 1", "(&& (int 1) (int 1))")
877    test_cpp_expr("1 && 0", "(&& (int 1) (int 0))")
878    test_cpp_expr("EXAMPLE", "(ident EXAMPLE)")
879    test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
880    test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)")
881    test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)")
882    test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
883    test_cpp_expr("defined(ABC) || defined(BINGO)",
884                  "(|| (defined ABC) (defined BINGO))")
885    test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])")
886    test_cpp_expr("A == 1 || defined(B)",
887                  "(|| (== (ident A) (int 1)) (defined B))")
888
889    test_cpp_expr_optim("0", "(int 0)")
890    test_cpp_expr_optim("1", "(int 1)")
891    test_cpp_expr_optim("1 && 1", "(int 1)")
892    test_cpp_expr_optim("1 && +1", "(int 1)")
893    test_cpp_expr_optim("0x1 && 01", "(oct 01)")
894    test_cpp_expr_optim("1 && 0", "(int 0)")
895    test_cpp_expr_optim("0 && 1", "(int 0)")
896    test_cpp_expr_optim("0 && 0", "(int 0)")
897    test_cpp_expr_optim("1 || 1", "(int 1)")
898    test_cpp_expr_optim("1 || 0", "(int 1)")
899    test_cpp_expr_optim("0 || 1", "(int 1)")
900    test_cpp_expr_optim("0 || 0", "(int 0)")
901    test_cpp_expr_optim("A", "(ident A)")
902    test_cpp_expr_optim("A", "(int 1)", {"A": 1})
903    test_cpp_expr_optim("A || B", "(int 1)", {"A": 1})
904    test_cpp_expr_optim("A || B", "(int 1)", {"B": 1})
905    test_cpp_expr_optim("A && B", "(ident B)", {"A": 1})
906    test_cpp_expr_optim("A && B", "(ident A)", {"B": 1})
907    test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))")
908    test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)")
909    test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
910    test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)")
911    test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)",
912                        {"EXAMPLE": "XOWOE"})
913    test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)",
914                        {"EXAMPLE": kCppUndefinedMacro})
915    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
916    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))",
917                        {"EXAMPLE": "XOWOE"})
918    test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)",
919                        {"EXAMPLE": kCppUndefinedMacro})
920    test_cpp_expr_optim("defined(A) || defined(B)",
921                        "(|| (defined A) (defined B))")
922    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"})
923    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"})
924    test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)",
925                        {"B": kCppUndefinedMacro})
926    test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)",
927                        {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro})
928    test_cpp_expr_optim("defined(A) && defined(B)",
929                        "(&& (defined A) (defined B))")
930    test_cpp_expr_optim("defined(A) && defined(B)",
931                        "(defined B)", {"A": "1"})
932    test_cpp_expr_optim("defined(A) && defined(B)",
933                        "(defined A)", {"B": "1"})
934    test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)",
935                        {"B": kCppUndefinedMacro})
936    test_cpp_expr_optim("defined(A) && defined(B)",
937                        "(int 0)", {"A": kCppUndefinedMacro})
938    test_cpp_expr_optim("A == 1 || defined(B)",
939                        "(|| (== (ident A) (int 1)) (defined B))")
940    test_cpp_expr_optim(
941        "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
942        "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))",
943        {"__KERNEL__": kCppUndefinedMacro})
944
945    test_cpp_expr_source("0", "0")
946    test_cpp_expr_source("1", "1")
947    test_cpp_expr_source("1 && 1", "1 && 1")
948    test_cpp_expr_source("1 && 0", "1 && 0")
949    test_cpp_expr_source("0 && 1", "0 && 1")
950    test_cpp_expr_source("0 && 0", "0 && 0")
951    test_cpp_expr_source("1 || 1", "1 || 1")
952    test_cpp_expr_source("1 || 0", "1 || 0")
953    test_cpp_expr_source("0 || 1", "0 || 1")
954    test_cpp_expr_source("0 || 0", "0 || 0")
955    test_cpp_expr_source("EXAMPLE", "EXAMPLE")
956    test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3")
957    test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)")
958    test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)")
959    test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)")
960
961
962################################################################################
963################################################################################
964#####                                                                      #####
965#####          C P P   B L O C K                                           #####
966#####                                                                      #####
967################################################################################
968################################################################################
969
970
971class Block(object):
972    """A class used to model a block of input source text.
973
974    There are two block types:
975      - directive blocks: contain the tokens of a single pre-processor
976        directive (e.g. #if)
977      - text blocks, contain the tokens of non-directive blocks
978
979    The cpp parser class below will transform an input source file into a list
980    of Block objects (grouped in a BlockList object for convenience)
981    """
982
983    def __init__(self, tokens, directive=None, lineno=0, identifier=None):
984        """Initialize a new block, if 'directive' is None, it is a text block.
985
986        NOTE: This automatically converts '#ifdef MACRO' into
987        '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
988        """
989
990        if directive == "ifdef":
991            tok = Token()
992            tok.id = tokDEFINED
993            tokens = [tok] + tokens
994            directive = "if"
995
996        elif directive == "ifndef":
997            tok1 = Token()
998            tok2 = Token()
999            tok1.id = tokNOT
1000            tok2.id = tokDEFINED
1001            tokens = [tok1, tok2] + tokens
1002            directive = "if"
1003
1004        self.tokens = tokens
1005        self.directive = directive
1006        self.define_id = identifier
1007        if lineno > 0:
1008            self.lineno = lineno
1009        else:
1010            self.lineno = self.tokens[0].location.line
1011
1012        if self.isIf():
1013            self.expr = CppExpr(self.tokens)
1014
1015    def isDirective(self):
1016        """Return True iff this is a directive block."""
1017        return self.directive is not None
1018
1019    def isConditional(self):
1020        """Return True iff this is a conditional directive block."""
1021        return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
1022                                  "endif"]
1023
1024    def isDefine(self):
1025        """Return the macro name in a #define directive, or None otherwise."""
1026        if self.directive != "define":
1027            return None
1028        return self.define_id
1029
1030    def isIf(self):
1031        """Return True iff this is an #if-like directive block."""
1032        return self.directive in ["if", "ifdef", "ifndef", "elif"]
1033
1034    def isEndif(self):
1035        """Return True iff this is an #endif directive block."""
1036        return self.directive == "endif"
1037
1038    def isInclude(self):
1039        """Check whether this is a #include directive.
1040
1041        If true, returns the corresponding file name (with brackets or
1042        double-qoutes). None otherwise.
1043        """
1044
1045        if self.directive != "include":
1046            return None
1047        return ''.join([str(x) for x in self.tokens])
1048
1049    @staticmethod
1050    def format_blocks(tokens, indent=0):
1051        """Return the formatted lines of strings with proper indentation."""
1052        newline = True
1053        result = []
1054        buf = ''
1055        i = 0
1056        while i < len(tokens):
1057            t = tokens[i]
1058            if t.id == '{':
1059                buf += ' {'
1060                result.append(strip_space(buf))
1061                indent += 2
1062                buf = ''
1063                newline = True
1064            elif t.id == '}':
1065                indent -= 2
1066                if not newline:
1067                    result.append(strip_space(buf))
1068                # Look ahead to determine if it's the end of line.
1069                if (i + 1 < len(tokens) and
1070                    (tokens[i+1].id == ';' or
1071                     tokens[i+1].id in ['else', '__attribute__',
1072                                        '__attribute', '__packed'] or
1073                     tokens[i+1].kind == TokenKind.IDENTIFIER)):
1074                    buf = ' ' * indent + '}'
1075                    newline = False
1076                else:
1077                    result.append(' ' * indent + '}')
1078                    buf = ''
1079                    newline = True
1080            elif t.id == ';':
1081                result.append(strip_space(buf) + ';')
1082                buf = ''
1083                newline = True
1084            # We prefer a new line for each constant in enum.
1085            elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
1086                result.append(strip_space(buf) + ',')
1087                buf = ''
1088                newline = True
1089            else:
1090                if newline:
1091                    buf += ' ' * indent + str(t)
1092                else:
1093                    buf += ' ' + str(t)
1094                newline = False
1095            i += 1
1096
1097        if buf:
1098            result.append(strip_space(buf))
1099
1100        return result, indent
1101
1102    def write(self, out, indent):
1103        """Dump the current block."""
1104        # removeWhiteSpace() will sometimes creates non-directive blocks
1105        # without any tokens. These come from blocks that only contained
1106        # empty lines and spaces. They should not be printed in the final
1107        # output, and then should not be counted for this operation.
1108        #
1109        if self.directive is None and not self.tokens:
1110            return indent
1111
1112        if self.directive:
1113            out.write(str(self) + '\n')
1114        else:
1115            lines, indent = self.format_blocks(self.tokens, indent)
1116            for line in lines:
1117                out.write(line + '\n')
1118
1119        return indent
1120
1121    def __repr__(self):
1122        """Generate the representation of a given block."""
1123        if self.directive:
1124            result = "#%s " % self.directive
1125            if self.isIf():
1126                result += repr(self.expr)
1127            else:
1128                for tok in self.tokens:
1129                    result += repr(tok)
1130        else:
1131            result = ""
1132            for tok in self.tokens:
1133                result += repr(tok)
1134
1135        return result
1136
1137    def __str__(self):
1138        """Generate the string representation of a given block."""
1139        if self.directive:
1140            # "#if"
1141            if self.directive == "if":
1142                # small optimization to re-generate #ifdef and #ifndef
1143                e = self.expr.expr
1144                op = e[0]
1145                if op == "defined":
1146                    result = "#ifdef %s" % e[1]
1147                elif op == "!" and e[1][0] == "defined":
1148                    result = "#ifndef %s" % e[1][1]
1149                else:
1150                    result = "#if " + str(self.expr)
1151
1152            # "#define"
1153            elif self.isDefine():
1154                result = "#%s %s" % (self.directive, self.define_id)
1155                if self.tokens:
1156                    result += " "
1157                expr = strip_space(' '.join([tok.id for tok in self.tokens]))
1158                # remove the space between name and '(' in function call
1159                result += re.sub(r'(\w+) \(', r'\1(', expr)
1160
1161            # "#error"
1162            # Concatenating tokens with a space separator, because they may
1163            # not be quoted and broken into several tokens
1164            elif self.directive == "error":
1165                result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
1166
1167            else:
1168                result = "#%s" % self.directive
1169                if self.tokens:
1170                    result += " "
1171                result += ''.join([tok.id for tok in self.tokens])
1172        else:
1173            lines, _ = self.format_blocks(self.tokens)
1174            result = '\n'.join(lines)
1175
1176        return result
1177
1178
1179class BlockList(object):
1180    """A convenience class used to hold and process a list of blocks.
1181
1182    It calls the cpp parser to get the blocks.
1183    """
1184
1185    def __init__(self, blocks):
1186        self.blocks = blocks
1187
1188    def __len__(self):
1189        return len(self.blocks)
1190
1191    def __getitem__(self, n):
1192        return self.blocks[n]
1193
1194    def __repr__(self):
1195        return repr(self.blocks)
1196
1197    def __str__(self):
1198        result = '\n'.join([str(b) for b in self.blocks])
1199        return result
1200
1201    def dump(self):
1202        """Dump all the blocks in current BlockList."""
1203        print '##### BEGIN #####'
1204        for i, b in enumerate(self.blocks):
1205            print '### BLOCK %d ###' % i
1206            print b
1207        print '##### END #####'
1208
1209    def optimizeIf01(self):
1210        """Remove the code between #if 0 .. #endif in a BlockList."""
1211        self.blocks = optimize_if01(self.blocks)
1212
1213    def optimizeMacros(self, macros):
1214        """Remove known defined and undefined macros from a BlockList."""
1215        for b in self.blocks:
1216            if b.isIf():
1217                b.expr.optimize(macros)
1218
1219    def optimizeAll(self, macros):
1220        self.optimizeMacros(macros)
1221        self.optimizeIf01()
1222        return
1223
1224    def findIncludes(self):
1225        """Return the list of included files in a BlockList."""
1226        result = []
1227        for b in self.blocks:
1228            i = b.isInclude()
1229            if i:
1230                result.append(i)
1231        return result
1232
1233    def write(self, out):
1234        indent = 0
1235        for b in self.blocks:
1236            indent = b.write(out, indent)
1237
1238    def removeVarsAndFuncs(self, keep):
1239        """Remove variable and function declarations.
1240
1241        All extern and static declarations corresponding to variable and
1242        function declarations are removed. We only accept typedefs and
1243        enum/structs/union declarations.
1244
1245        However, we keep the definitions corresponding to the set of known
1246        static inline functions in the set 'keep', which is useful
1247        for optimized byteorder swap functions and stuff like that.
1248        """
1249
1250        # NOTE: It's also removing function-like macros, such as __SYSCALL(...)
1251        # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
1252        # It could be problematic when we have function-like macros but without
1253        # '}' following them. It will skip all the tokens/blocks until seeing a
1254        # '}' as the function end. Fortunately we don't have such cases in the
1255        # current kernel headers.
1256
1257        # state = 0 => normal (i.e. LN + spaces)
1258        # state = 1 => typedef/struct encountered, ends with ";"
1259        # state = 2 => var declaration encountered, ends with ";"
1260        # state = 3 => func declaration encountered, ends with "}"
1261
1262        state = 0
1263        depth = 0
1264        blocks2 = []
1265        skipTokens = False
1266        for b in self.blocks:
1267            if b.isDirective():
1268                blocks2.append(b)
1269            else:
1270                n = len(b.tokens)
1271                i = 0
1272                if skipTokens:
1273                    first = n
1274                else:
1275                    first = 0
1276                while i < n:
1277                    tok = b.tokens[i]
1278                    tokid = tok.id
1279                    # If we are not looking for the start of a new
1280                    # type/var/func, then skip over tokens until
1281                    # we find our terminator, managing the depth of
1282                    # accolades as we go.
1283                    if state > 0:
1284                        terminator = False
1285                        if tokid == '{':
1286                            depth += 1
1287                        elif tokid == '}':
1288                            if depth > 0:
1289                                depth -= 1
1290                            if (depth == 0) and (state == 3):
1291                                terminator = True
1292                        elif tokid == ';' and depth == 0:
1293                            terminator = True
1294
1295                        if terminator:
1296                            # we found the terminator
1297                            state = 0
1298                            if skipTokens:
1299                                skipTokens = False
1300                                first = i + 1
1301
1302                        i += 1
1303                        continue
1304
1305                    # Is it a new type definition, then start recording it
1306                    if tok.id in ['struct', 'typedef', 'enum', 'union',
1307                                  '__extension__']:
1308                        state = 1
1309                        i += 1
1310                        continue
1311
1312                    # Is it a variable or function definition. If so, first
1313                    # try to determine which type it is, and also extract
1314                    # its name.
1315                    #
1316                    # We're going to parse the next tokens of the same block
1317                    # until we find a semicolon or a left parenthesis.
1318                    #
1319                    # The semicolon corresponds to a variable definition,
1320                    # the left-parenthesis to a function definition.
1321                    #
1322                    # We also assume that the var/func name is the last
1323                    # identifier before the terminator.
1324                    #
1325                    j = i + 1
1326                    ident = ""
1327                    while j < n:
1328                        tokid = b.tokens[j].id
1329                        if tokid == '(':  # a function declaration
1330                            state = 3
1331                            break
1332                        elif tokid == ';':  # a variable declaration
1333                            state = 2
1334                            break
1335                        if b.tokens[j].kind == TokenKind.IDENTIFIER:
1336                            ident = b.tokens[j].id
1337                        j += 1
1338
1339                    if j >= n:
1340                        # This can only happen when the declaration
1341                        # does not end on the current block (e.g. with
1342                        # a directive mixed inside it.
1343                        #
1344                        # We will treat it as malformed because
1345                        # it's very hard to recover from this case
1346                        # without making our parser much more
1347                        # complex.
1348                        #
1349                        logging.debug("### skip unterminated static '%s'",
1350                                      ident)
1351                        break
1352
1353                    if ident in keep:
1354                        logging.debug("### keep var/func '%s': %s", ident,
1355                                      repr(b.tokens[i:j]))
1356                    else:
1357                        # We're going to skip the tokens for this declaration
1358                        logging.debug("### skip var/func '%s': %s", ident,
1359                                      repr(b.tokens[i:j]))
1360                        if i > first:
1361                            blocks2.append(Block(b.tokens[first:i]))
1362                        skipTokens = True
1363                        first = n
1364
1365                    i += 1
1366
1367                if i > first:
1368                    #print "### final '%s'" % repr(b.tokens[first:i])
1369                    blocks2.append(Block(b.tokens[first:i]))
1370
1371        self.blocks = blocks2
1372
1373    def replaceTokens(self, replacements):
1374        """Replace tokens according to the given dict."""
1375        extra_includes = []
1376        for b in self.blocks:
1377            made_change = False
1378            if b.isInclude() is None:
1379                i = 0
1380                while i < len(b.tokens):
1381                    tok = b.tokens[i]
1382                    if (tok.kind == TokenKind.KEYWORD and tok.id == 'struct'
1383                        and (i + 2) < len(b.tokens) and b.tokens[i + 2].id == '{'):
1384                        struct_name = b.tokens[i + 1].id
1385                        if struct_name in kernel_struct_replacements:
1386                            extra_includes.append("<bits/%s.h>" % struct_name)
1387                            end = i + 2
1388                            while end < len(b.tokens) and b.tokens[end].id != '}':
1389                                end += 1
1390                            end += 1 # Swallow '}'
1391                            while end < len(b.tokens) and b.tokens[end].id != ';':
1392                                end += 1
1393                            end += 1 # Swallow ';'
1394                            # Remove these tokens. We'll replace them later with a #include block.
1395                            b.tokens[i:end] = []
1396                            made_change = True
1397                            # We've just modified b.tokens, so revisit the current offset.
1398                            continue
1399                    if tok.kind == TokenKind.IDENTIFIER:
1400                        if tok.id in replacements:
1401                            tok.id = replacements[tok.id]
1402                            made_change = True
1403                    i += 1
1404
1405                if b.isDefine() and b.define_id in replacements:
1406                    b.define_id = replacements[b.define_id]
1407                    made_change = True
1408
1409            if made_change and b.isIf():
1410                # Keep 'expr' in sync with 'tokens'.
1411                b.expr = CppExpr(b.tokens)
1412
1413        for extra_include in extra_includes:
1414            replacement = CppStringTokenizer(extra_include)
1415            self.blocks.insert(2, Block(replacement.tokens, directive='include'))
1416
1417
1418
1419def strip_space(s):
1420    """Strip out redundant space in a given string."""
1421
1422    # NOTE: It ought to be more clever to not destroy spaces in string tokens.
1423    replacements = {' . ': '.',
1424                    ' [': '[',
1425                    '[ ': '[',
1426                    ' ]': ']',
1427                    '( ': '(',
1428                    ' )': ')',
1429                    ' ,': ',',
1430                    '# ': '#',
1431                    ' ;': ';',
1432                    '~ ': '~',
1433                    ' -> ': '->'}
1434    result = s
1435    for r in replacements:
1436        result = result.replace(r, replacements[r])
1437
1438    # Remove the space between function name and the parenthesis.
1439    result = re.sub(r'(\w+) \(', r'\1(', result)
1440    return result
1441
1442
1443class BlockParser(object):
1444    """A class that converts an input source file into a BlockList object."""
1445
1446    def __init__(self, tokzer=None):
1447        """Initialize a block parser.
1448
1449        The input source is provided through a Tokenizer object.
1450        """
1451        self._tokzer = tokzer
1452        self._parsed = False
1453
1454    @property
1455    def parsed(self):
1456        return self._parsed
1457
1458    @staticmethod
1459    def _short_extent(extent):
1460        return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
1461                                  extent.end.line, extent.end.column)
1462
1463    def getBlocks(self, tokzer=None):
1464        """Return all the blocks parsed."""
1465
1466        def consume_extent(i, tokens, extent=None, detect_change=False):
1467            """Return tokens that belong to the given extent.
1468
1469            It parses all the tokens that follow tokens[i], until getting out
1470            of the extent. When detect_change is True, it may terminate early
1471            when detecting preprocessing directives inside the extent.
1472            """
1473
1474            result = []
1475            if extent is None:
1476                extent = tokens[i].cursor.extent
1477
1478            while i < len(tokens) and tokens[i].location in extent:
1479                t = tokens[i]
1480                if debugBlockParser:
1481                    print ' ' * 2, t.id, t.kind, t.cursor.kind
1482                if (detect_change and t.cursor.extent != extent and
1483                    t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
1484                    break
1485                result.append(t)
1486                i += 1
1487            return (i, result)
1488
1489        def consume_line(i, tokens):
1490            """Return tokens that follow tokens[i] in the same line."""
1491            result = []
1492            line = tokens[i].location.line
1493            while i < len(tokens) and tokens[i].location.line == line:
1494                if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1495                    break
1496                result.append(tokens[i])
1497                i += 1
1498            return (i, result)
1499
1500        if tokzer is None:
1501            tokzer = self._tokzer
1502        tokens = tokzer.tokens
1503
1504        blocks = []
1505        buf = []
1506        i = 0
1507
1508        while i < len(tokens):
1509            t = tokens[i]
1510            cursor = t.cursor
1511
1512            if debugBlockParser:
1513                print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
1514                       "extent=[%s]" % (t.location.line, t.spelling, t.kind,
1515                                        cursor.kind,
1516                                        self._short_extent(cursor.extent)))
1517
1518            if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1519                if buf:
1520                    blocks.append(Block(buf))
1521                    buf = []
1522
1523                j = i
1524                if j + 1 >= len(tokens):
1525                    raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
1526                directive = tokens[j+1].id
1527
1528                if directive == 'define':
1529                    if i+2 >= len(tokens):
1530                        raise BadExpectedToken("### BAD TOKEN at %s" %
1531                                               (tokens[i].location))
1532
1533                    # Skip '#' and 'define'.
1534                    extent = tokens[i].cursor.extent
1535                    i += 2
1536                    id = ''
1537                    # We need to separate the id from the remaining of
1538                    # the line, especially for the function-like macro.
1539                    if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
1540                        (tokens[i].location.column + len(tokens[i].spelling) ==
1541                         tokens[i+1].location.column)):
1542                        while i < len(tokens):
1543                            id += tokens[i].id
1544                            if tokens[i].spelling == ')':
1545                                i += 1
1546                                break
1547                            i += 1
1548                    else:
1549                        id += tokens[i].id
1550                        # Advance to the next token that follows the macro id
1551                        i += 1
1552
1553                    (i, ret) = consume_extent(i, tokens, extent=extent)
1554                    blocks.append(Block(ret, directive=directive,
1555                                        lineno=t.location.line, identifier=id))
1556
1557                else:
1558                    (i, ret) = consume_extent(i, tokens)
1559                    blocks.append(Block(ret[2:], directive=directive,
1560                                        lineno=t.location.line))
1561
1562            elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
1563                if buf:
1564                    blocks.append(Block(buf))
1565                    buf = []
1566                directive = tokens[i+1].id
1567                (i, ret) = consume_extent(i, tokens)
1568
1569                blocks.append(Block(ret[2:], directive=directive,
1570                                    lineno=t.location.line))
1571
1572            elif cursor.kind == CursorKind.VAR_DECL:
1573                if buf:
1574                    blocks.append(Block(buf))
1575                    buf = []
1576
1577                (i, ret) = consume_extent(i, tokens, detect_change=True)
1578                buf += ret
1579
1580            elif cursor.kind == CursorKind.FUNCTION_DECL:
1581                if buf:
1582                    blocks.append(Block(buf))
1583                    buf = []
1584
1585                (i, ret) = consume_extent(i, tokens, detect_change=True)
1586                buf += ret
1587
1588            else:
1589                (i, ret) = consume_line(i, tokens)
1590                buf += ret
1591
1592        if buf:
1593            blocks.append(Block(buf))
1594
1595        # _parsed=True indicates a successful parsing, although may result an
1596        # empty BlockList.
1597        self._parsed = True
1598
1599        return BlockList(blocks)
1600
1601    def parse(self, tokzer):
1602        return self.getBlocks(tokzer)
1603
1604    def parseFile(self, path):
1605        return self.getBlocks(CppFileTokenizer(path))
1606
1607
1608def test_block_parsing(lines, expected):
1609    """Helper method to test the correctness of BlockParser.parse."""
1610    blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
1611    if len(blocks) != len(expected):
1612        raise BadExpectedToken("BlockParser.parse() returned '%s' expecting "
1613                               "'%s'" % (str(blocks), repr(expected)))
1614    for n in range(len(blocks)):
1615        if str(blocks[n]) != expected[n]:
1616            raise BadExpectedToken("BlockParser.parse()[%d] is '%s', "
1617                                   "expecting '%s'" % (n, str(blocks[n]),
1618                                                       expected[n]))
1619
1620
1621def test_BlockParser():
1622    test_block_parsing(["#error hello"], ["#error hello"])
1623    test_block_parsing(["foo", "", "bar"], ["foo bar"])
1624
1625    # We currently cannot handle the following case with libclang properly.
1626    # Fortunately it doesn't appear in current headers.
1627    # test_block_parsing(["foo", "  #  ", "bar"], ["foo", "bar"])
1628
1629    test_block_parsing(["foo",
1630                        "  #  /* ahah */ if defined(__KERNEL__) /* more */",
1631                        "bar", "#endif"],
1632                       ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
1633
1634
1635################################################################################
1636################################################################################
1637#####                                                                      #####
1638#####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
1639#####                                                                      #####
1640################################################################################
1641################################################################################
1642
1643
1644def find_matching_endif(blocks, i):
1645    """Traverse the blocks to find out the matching #endif."""
1646    n = len(blocks)
1647    depth = 1
1648    while i < n:
1649        if blocks[i].isDirective():
1650            dir_ = blocks[i].directive
1651            if dir_ in ["if", "ifndef", "ifdef"]:
1652                depth += 1
1653            elif depth == 1 and dir_ in ["else", "elif"]:
1654                return i
1655            elif dir_ == "endif":
1656                depth -= 1
1657                if depth == 0:
1658                    return i
1659        i += 1
1660    return i
1661
1662
1663def optimize_if01(blocks):
1664    """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
1665    i = 0
1666    n = len(blocks)
1667    result = []
1668    while i < n:
1669        j = i
1670        while j < n and not blocks[j].isIf():
1671            j += 1
1672        if j > i:
1673            logging.debug("appending lines %d to %d", blocks[i].lineno,
1674                          blocks[j-1].lineno)
1675            result += blocks[i:j]
1676        if j >= n:
1677            break
1678        expr = blocks[j].expr
1679        r = expr.toInt()
1680        if r is None:
1681            result.append(blocks[j])
1682            i = j + 1
1683            continue
1684
1685        if r == 0:
1686            # if 0 => skip everything until the corresponding #endif
1687            j = find_matching_endif(blocks, j + 1)
1688            if j >= n:
1689                # unterminated #if 0, finish here
1690                break
1691            dir_ = blocks[j].directive
1692            if dir_ == "endif":
1693                logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
1694                              blocks[i].lineno, blocks[j].lineno)
1695                i = j + 1
1696            elif dir_ == "else":
1697                # convert 'else' into 'if 1'
1698                logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
1699                              "to %d)", blocks[i].lineno, blocks[j-1].lineno)
1700                blocks[j].directive = "if"
1701                blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
1702                i = j
1703            elif dir_ == "elif":
1704                # convert 'elif' into 'if'
1705                logging.debug("convert 'if 0' .. 'elif' into 'if'")
1706                blocks[j].directive = "if"
1707                i = j
1708            continue
1709
1710        # if 1 => find corresponding endif and remove/transform them
1711        k = find_matching_endif(blocks, j + 1)
1712        if k >= n:
1713            # unterminated #if 1, finish here
1714            logging.debug("unterminated 'if 1'")
1715            result += blocks[j+1:k]
1716            break
1717
1718        dir_ = blocks[k].directive
1719        if dir_ == "endif":
1720            logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
1721                          blocks[j].lineno, blocks[k].lineno)
1722            result += optimize_if01(blocks[j+1:k])
1723            i = k + 1
1724        elif dir_ == "else":
1725            # convert 'else' into 'if 0'
1726            logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
1727                          blocks[j].lineno, blocks[k].lineno)
1728            result += optimize_if01(blocks[j+1:k])
1729            blocks[k].directive = "if"
1730            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1731            i = k
1732        elif dir_ == "elif":
1733            # convert 'elif' into 'if 0'
1734            logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
1735                          blocks[j].lineno, blocks[k].lineno)
1736            result += optimize_if01(blocks[j+1:k])
1737            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1738            i = k
1739    return result
1740
1741
1742def test_optimizeAll():
1743    text = """\
1744#if 1
1745#define  GOOD_1
1746#endif
1747#if 0
1748#define  BAD_2
1749#define  BAD_3
1750#endif
1751
1752#if 1
1753#define  GOOD_2
1754#else
1755#define  BAD_4
1756#endif
1757
1758#if 0
1759#define  BAD_5
1760#else
1761#define  GOOD_3
1762#endif
1763
1764#if defined(__KERNEL__)
1765#define BAD_KERNEL
1766#endif
1767
1768#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
1769#define X
1770#endif
1771
1772#ifndef SIGRTMAX
1773#define SIGRTMAX 123
1774#endif /* SIGRTMAX */
1775
1776#if 0
1777#if 1
1778#define  BAD_6
1779#endif
1780#endif\
1781"""
1782
1783    expected = """\
1784#define GOOD_1
1785#define GOOD_2
1786#define GOOD_3
1787#if !defined(__GLIBC__) || __GLIBC__ < 2
1788#define X
1789#endif
1790#ifndef __SIGRTMAX
1791#define __SIGRTMAX 123
1792#endif
1793"""
1794
1795    out = utils.StringOutput()
1796    blocks = BlockParser().parse(CppStringTokenizer(text))
1797    blocks.replaceTokens(kernel_token_replacements)
1798    blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro})
1799    blocks.write(out)
1800    if out.get() != expected:
1801        print "[FAIL]: macro optimization failed\n"
1802        print "<<<< expecting '",
1803        print expected,
1804        print "'\n>>>> result '",
1805        print out.get(),
1806        print "'\n----"
1807        global failure_count
1808        failure_count += 1
1809
1810
1811def runUnitTests():
1812    """Always run all unit tests for this program."""
1813    test_CppTokenizer()
1814    test_CppExpr()
1815    test_optimizeAll()
1816    test_BlockParser()
1817
1818
1819failure_count = 0
1820runUnitTests()
1821if failure_count != 0:
1822    utils.panic("Unit tests failed in cpp.py.\n")
1823