• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2"""A glorified C pre-processor parser."""
3
4import ctypes
5import logging
6import os
7import re
8import site
9import unittest
10import utils
11
12top = os.getenv('ANDROID_BUILD_TOP')
13if top is None:
14    utils.panic('ANDROID_BUILD_TOP not set.\n')
15
16# Set up the env vars for libclang.
17site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))
18
19import clang.cindex
20from clang.cindex import conf
21from clang.cindex import Cursor
22from clang.cindex import CursorKind
23from clang.cindex import SourceLocation
24from clang.cindex import SourceRange
25from clang.cindex import TokenGroup
26from clang.cindex import TokenKind
27from clang.cindex import TranslationUnit
28
29# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
30# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
31clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64/libclang_android.so'))
32
33from defaults import kCppUndefinedMacro
34from defaults import kernel_remove_config_macros
35from defaults import kernel_struct_replacements
36from defaults import kernel_token_replacements
37
38
39debugBlockParser = False
40debugCppExpr = False
41debugOptimIf01 = False
42
43###############################################################################
44###############################################################################
45#####                                                                     #####
46#####           C P P   T O K E N S                                       #####
47#####                                                                     #####
48###############################################################################
49###############################################################################
50
51# the list of supported C-preprocessor tokens
52# plus a couple of C tokens as well
53tokEOF = "\0"
54tokLN = "\n"
55tokSTRINGIFY = "#"
56tokCONCAT = "##"
57tokLOGICAND = "&&"
58tokLOGICOR = "||"
59tokSHL = "<<"
60tokSHR = ">>"
61tokEQUAL = "=="
62tokNEQUAL = "!="
63tokLT = "<"
64tokLTE = "<="
65tokGT = ">"
66tokGTE = ">="
67tokELLIPSIS = "..."
68tokSPACE = " "
69tokDEFINED = "defined"
70tokLPAREN = "("
71tokRPAREN = ")"
72tokNOT = "!"
73tokPLUS = "+"
74tokMINUS = "-"
75tokMULTIPLY = "*"
76tokDIVIDE = "/"
77tokMODULUS = "%"
78tokBINAND = "&"
79tokBINOR = "|"
80tokBINXOR = "^"
81tokCOMMA = ","
82tokLBRACE = "{"
83tokRBRACE = "}"
84tokARROW = "->"
85tokINCREMENT = "++"
86tokDECREMENT = "--"
87tokNUMBER = "<number>"
88tokIDENT = "<ident>"
89tokSTRING = "<string>"
90
91
92class Token(clang.cindex.Token):
93    """A class that represents one token after parsing.
94
95    It inherits the class in libclang, with an extra id property to hold the
96    new spelling of the token. The spelling property in the base class is
97    defined as read-only. New names after macro instantiation are saved in
98    their ids now. It also facilitates the renaming of directive optimizations
99    like replacing 'ifndef X' with 'if !defined(X)'.
100
101    It also overrides the cursor property of the base class. Because the one
102    in libclang always queries based on a single token, which usually doesn't
103    hold useful information. The cursor in this class can be set by calling
104    CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
105    base class.
106    """
107
108    def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
109                 cursor=None):
110        clang.cindex.Token.__init__(self)
111        self._id = None
112        self._tu = tu
113        self._group = group
114        self._cursor = cursor
115        # self.int_data and self.ptr_data are from the base class. But
116        # self.int_data doesn't accept a None value.
117        if int_data is not None:
118            self.int_data = int_data
119        self.ptr_data = ptr_data
120
121    @property
122    def id(self):
123        """Name of the token."""
124        if self._id is None:
125            return self.spelling
126        else:
127            return self._id
128
129    @id.setter
130    def id(self, new_id):
131        """Setting name of the token."""
132        self._id = new_id
133
134    @property
135    def cursor(self):
136        if self._cursor is None:
137            self._cursor = clang.cindex.Token.cursor
138        return self._cursor
139
140    @cursor.setter
141    def cursor(self, new_cursor):
142        self._cursor = new_cursor
143
144    def __repr__(self):
145        if self.id == 'defined':
146            return self.id
147        elif self.kind == TokenKind.IDENTIFIER:
148            return "(ident %s)" % self.id
149
150        return self.id
151
152    def __str__(self):
153        return self.id
154
155
156class BadExpectedToken(Exception):
157    """An exception that will be raised for unexpected tokens."""
158    pass
159
160
161# The __contains__ function in libclang SourceRange class contains a bug. It
162# gives wrong result when dealing with single line range.
163# Bug filed with upstream:
164# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
165def SourceRange__contains__(self, other):
166    """Determine if a given location is inside the range."""
167    if not isinstance(other, SourceLocation):
168        return False
169    if other.file is None and self.start.file is None:
170        pass
171    elif (self.start.file.name != other.file.name or
172          other.file.name != self.end.file.name):
173        # same file name
174        return False
175    # same file, in between lines
176    if self.start.line < other.line < self.end.line:
177        return True
178    # same file, same line
179    elif self.start.line == other.line == self.end.line:
180        if self.start.column <= other.column <= self.end.column:
181            return True
182    elif self.start.line == other.line:
183        # same file first line
184        if self.start.column <= other.column:
185            return True
186    elif other.line == self.end.line:
187        # same file last line
188        if other.column <= self.end.column:
189            return True
190    return False
191
192
193SourceRange.__contains__ = SourceRange__contains__
194
195
196################################################################################
197################################################################################
198#####                                                                      #####
199#####           C P P   T O K E N I Z E R                                  #####
200#####                                                                      #####
201################################################################################
202################################################################################
203
204
205class CppTokenizer(object):
206    """A tokenizer that converts some input text into a list of tokens.
207
208    It calls libclang's tokenizer to get the parsed tokens. In addition, it
209    updates the cursor property in each token after parsing, by calling
210    getTokensWithCursors().
211    """
212
213    clang_flags = ['-E', '-x', 'c']
214    options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
215
216    def __init__(self):
217        """Initialize a new CppTokenizer object."""
218        self._indexer = clang.cindex.Index.create()
219        self._tu = None
220        self._index = 0
221        self.tokens = None
222
223    def _getTokensWithCursors(self):
224        """Helper method to return all tokens with their cursors.
225
226        The cursor property in a clang Token doesn't provide enough
227        information. Because it is queried based on single token each time
228        without any context, i.e. via calling conf.lib.clang_annotateTokens()
229        with only one token given. So we often see 'INVALID_FILE' in one
230        token's cursor. In this function it passes all the available tokens
231        to get more informative cursors.
232        """
233
234        tokens_memory = ctypes.POINTER(clang.cindex.Token)()
235        tokens_count = ctypes.c_uint()
236
237        conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
238                                ctypes.byref(tokens_memory),
239                                ctypes.byref(tokens_count))
240
241        count = int(tokens_count.value)
242
243        # If we get no tokens, no memory was allocated. Be sure not to return
244        # anything and potentially call a destructor on nothing.
245        if count < 1:
246            return
247
248        cursors = (Cursor * count)()
249        cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
250
251        conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
252                                      cursors_memory)
253
254        tokens_array = ctypes.cast(
255            tokens_memory,
256            ctypes.POINTER(clang.cindex.Token * count)).contents
257        token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
258
259        tokens = []
260        for i in xrange(0, count):
261            token = Token(self._tu, token_group,
262                          int_data=tokens_array[i].int_data,
263                          ptr_data=tokens_array[i].ptr_data,
264                          cursor=cursors[i])
265            # We only want non-comment tokens.
266            if token.kind != TokenKind.COMMENT:
267                tokens.append(token)
268
269        return tokens
270
271    def parseString(self, lines):
272        """Parse a list of text lines into a BlockList object."""
273        file_ = 'dummy.c'
274        self._tu = self._indexer.parse(file_, self.clang_flags,
275                                       unsaved_files=[(file_, lines)],
276                                       options=self.options)
277        self.tokens = self._getTokensWithCursors()
278
279    def parseFile(self, file_):
280        """Parse a file into a BlockList object."""
281        self._tu = self._indexer.parse(file_, self.clang_flags,
282                                       options=self.options)
283        self.tokens = self._getTokensWithCursors()
284
285    def nextToken(self):
286        """Return next token from the list."""
287        if self._index < len(self.tokens):
288            t = self.tokens[self._index]
289            self._index += 1
290            return t
291        else:
292            return None
293
294
295class CppStringTokenizer(CppTokenizer):
296    """A CppTokenizer derived class that accepts a string of text as input."""
297
298    def __init__(self, line):
299        CppTokenizer.__init__(self)
300        self.parseString(line)
301
302
303class CppFileTokenizer(CppTokenizer):
304    """A CppTokenizer derived class that accepts a file as input."""
305
306    def __init__(self, file_):
307        CppTokenizer.__init__(self)
308        self.parseFile(file_)
309
310
311# Unit testing
312#
313class CppTokenizerTests(unittest.TestCase):
314    """CppTokenizer tests."""
315
316    def get_tokens(self, token_string, line_col=False):
317        tokens = CppStringTokenizer(token_string)
318        token_list = []
319        while True:
320            token = tokens.nextToken()
321            if not token:
322                break
323            if line_col:
324                token_list.append((token.id, token.location.line,
325                                   token.location.column))
326            else:
327                token_list.append(token.id)
328        return token_list
329
330    def test_hash(self):
331        self.assertEqual(self.get_tokens("#an/example  && (01923_xy)"),
332                         ["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
333                          "01923_xy", tokRPAREN])
334
335    def test_parens(self):
336        self.assertEqual(self.get_tokens("FOO(BAR) && defined(BAZ)"),
337                         ["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
338                          "defined", tokLPAREN, "BAZ", tokRPAREN])
339
340    def test_comment(self):
341        self.assertEqual(self.get_tokens("/*\n#\n*/"), [])
342
343    def test_line_cross(self):
344        self.assertEqual(self.get_tokens("first\nsecond"), ["first", "second"])
345
346    def test_line_cross_line_col(self):
347        self.assertEqual(self.get_tokens("first second\n  third", True),
348                         [("first", 1, 1), ("second", 1, 7), ("third", 2, 3)])
349
350    def test_comment_line_col(self):
351        self.assertEqual(self.get_tokens("boo /* what the\nhell */", True),
352                         [("boo", 1, 1)])
353
354    def test_escapes(self):
355        self.assertEqual(self.get_tokens("an \\\n example", True),
356                         [("an", 1, 1), ("example", 2, 2)])
357
358
359################################################################################
360################################################################################
361#####                                                                      #####
362#####           C P P   E X P R E S S I O N S                              #####
363#####                                                                      #####
364################################################################################
365################################################################################
366
367
368class CppExpr(object):
369    """A class that models the condition of #if directives into an expr tree.
370
371    Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
372    "op" is a string describing the operation
373    """
374
375    unaries = ["!", "~"]
376    binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
377                "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
378    precedences = {
379        "?": 1, ":": 1,
380        "||": 2,
381        "&&": 3,
382        "|": 4,
383        "^": 5,
384        "&": 6,
385        "==": 7, "!=": 7,
386        "<": 8, "<=": 8, ">": 8, ">=": 8,
387        "<<": 9, ">>": 9,
388        "+": 10, "-": 10,
389        "*": 11, "/": 11, "%": 11,
390        "!": 12, "~": 12
391    }
392
393    def __init__(self, tokens):
394        """Initialize a CppExpr. 'tokens' must be a CppToken list."""
395        self.tokens = tokens
396        self._num_tokens = len(tokens)
397        self._index = 0
398
399        if debugCppExpr:
400            print "CppExpr: trying to parse %s" % repr(tokens)
401        self.expr = self.parseExpression(0)
402        if debugCppExpr:
403            print "CppExpr: got " + repr(self.expr)
404        if self._index != self._num_tokens:
405            self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
406                       % (self._index, self._num_tokens, repr(tokens)))
407
408    def throw(self, exception, msg):
409        if self._index < self._num_tokens:
410            tok = self.tokens[self._index]
411            print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
412        else:
413            print "EOF: %s" % msg
414        raise exception(msg)
415
416    def expectId(self, id):
417        """Check that a given token id is at the current position."""
418        token = self.tokens[self._index]
419        if self._index >= self._num_tokens or token.id != id:
420            self.throw(BadExpectedToken,
421                       "### expecting '%s' in expression, got '%s'" % (
422                           id, token.id))
423        self._index += 1
424
425    def is_decimal(self):
426        token = self.tokens[self._index].id
427        if token[-1] in "ULul":
428            token = token[:-1]
429        try:
430            val = int(token, 10)
431            self._index += 1
432            return ('int', val)
433        except ValueError:
434            return None
435
436    def is_octal(self):
437        token = self.tokens[self._index].id
438        if token[-1] in "ULul":
439            token = token[:-1]
440        if len(token) < 2 or token[0] != '0':
441            return None
442        try:
443            val = int(token, 8)
444            self._index += 1
445            return ('oct', val)
446        except ValueError:
447            return None
448
449    def is_hexadecimal(self):
450        token = self.tokens[self._index].id
451        if token[-1] in "ULul":
452            token = token[:-1]
453        if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
454            return None
455        try:
456            val = int(token, 16)
457            self._index += 1
458            return ('hex', val)
459        except ValueError:
460            return None
461
462    def is_integer(self):
463        if self.tokens[self._index].kind != TokenKind.LITERAL:
464            return None
465
466        c = self.is_hexadecimal()
467        if c:
468            return c
469
470        c = self.is_octal()
471        if c:
472            return c
473
474        c = self.is_decimal()
475        if c:
476            return c
477
478        return None
479
480    def is_number(self):
481        t = self.tokens[self._index]
482        if t.id == tokMINUS and self._index + 1 < self._num_tokens:
483            self._index += 1
484            c = self.is_integer()
485            if c:
486                op, val = c
487                return (op, -val)
488        if t.id == tokPLUS and self._index + 1 < self._num_tokens:
489            self._index += 1
490            c = self.is_integer()
491            if c:
492                return c
493
494        return self.is_integer()
495
496    def is_defined(self):
497        t = self.tokens[self._index]
498        if t.id != tokDEFINED:
499            return None
500
501        # We have the defined keyword, check the rest.
502        self._index += 1
503        used_parens = False
504        if (self._index < self._num_tokens and
505            self.tokens[self._index].id == tokLPAREN):
506            used_parens = True
507            self._index += 1
508
509        if self._index >= self._num_tokens:
510            self.throw(BadExpectedToken,
511                       "### 'defined' must be followed by macro name or left "
512                       "paren")
513
514        t = self.tokens[self._index]
515        if t.kind != TokenKind.IDENTIFIER:
516            self.throw(BadExpectedToken,
517                       "### 'defined' must be followed by macro name")
518
519        self._index += 1
520        if used_parens:
521            self.expectId(tokRPAREN)
522
523        return ("defined", t.id)
524
525    def is_call_or_ident(self):
526        if self._index >= self._num_tokens:
527            return None
528
529        t = self.tokens[self._index]
530        if t.kind != TokenKind.IDENTIFIER:
531            return None
532
533        name = t.id
534
535        self._index += 1
536        if (self._index >= self._num_tokens or
537            self.tokens[self._index].id != tokLPAREN):
538            return ("ident", name)
539
540        params = []
541        depth = 1
542        self._index += 1
543        j = self._index
544        while self._index < self._num_tokens:
545            id = self.tokens[self._index].id
546            if id == tokLPAREN:
547                depth += 1
548            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
549                k = self._index
550                param = self.tokens[j:k]
551                params.append(param)
552                if id == tokRPAREN:
553                    break
554                j = self._index + 1
555            elif id == tokRPAREN:
556                depth -= 1
557            self._index += 1
558
559        if self._index >= self._num_tokens:
560            return None
561
562        self._index += 1
563        return ("call", (name, params))
564
565    # Implements the "precedence climbing" algorithm from
566    # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
567    # The "classic" algorithm would be fine if we were using a tool to
568    # generate the parser, but we're not. Dijkstra's "shunting yard"
569    # algorithm hasn't been necessary yet.
570
571    def parseExpression(self, minPrecedence):
572        if self._index >= self._num_tokens:
573            return None
574
575        node = self.parsePrimary()
576        while (self.token() and self.isBinary(self.token()) and
577               self.precedence(self.token()) >= minPrecedence):
578            op = self.token()
579            self.nextToken()
580            rhs = self.parseExpression(self.precedence(op) + 1)
581            node = (op.id, node, rhs)
582
583        return node
584
585    def parsePrimary(self):
586        op = self.token()
587        if self.isUnary(op):
588            self.nextToken()
589            return (op.id, self.parseExpression(self.precedence(op)))
590
591        primary = None
592        if op.id == tokLPAREN:
593            self.nextToken()
594            primary = self.parseExpression(0)
595            self.expectId(tokRPAREN)
596        elif op.id == "?":
597            self.nextToken()
598            primary = self.parseExpression(0)
599            self.expectId(":")
600        elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
601            primary = self.is_number()
602        # Checking for 'defined' needs to come first now because 'defined' is
603        # recognized as IDENTIFIER.
604        elif op.id == tokDEFINED:
605            primary = self.is_defined()
606        elif op.kind == TokenKind.IDENTIFIER:
607            primary = self.is_call_or_ident()
608        else:
609            self.throw(BadExpectedToken,
610                       "didn't expect to see a %s in factor" % (
611                           self.tokens[self._index].id))
612        return primary
613
614    def isBinary(self, token):
615        return token.id in self.binaries
616
617    def isUnary(self, token):
618        return token.id in self.unaries
619
620    def precedence(self, token):
621        return self.precedences.get(token.id)
622
623    def token(self):
624        if self._index >= self._num_tokens:
625            return None
626        return self.tokens[self._index]
627
628    def nextToken(self):
629        self._index += 1
630        if self._index >= self._num_tokens:
631            return None
632        return self.tokens[self._index]
633
634    def dump_node(self, e):
635        op = e[0]
636        line = "(" + op
637        if op == "int":
638            line += " %d)" % e[1]
639        elif op == "oct":
640            line += " 0%o)" % e[1]
641        elif op == "hex":
642            line += " 0x%x)" % e[1]
643        elif op == "ident":
644            line += " %s)" % e[1]
645        elif op == "defined":
646            line += " %s)" % e[1]
647        elif op == "call":
648            arg = e[1]
649            line += " %s [" % arg[0]
650            prefix = ""
651            for param in arg[1]:
652                par = ""
653                for tok in param:
654                    par += str(tok)
655                line += "%s%s" % (prefix, par)
656                prefix = ","
657            line += "])"
658        elif op in CppExpr.unaries:
659            line += " %s)" % self.dump_node(e[1])
660        elif op in CppExpr.binaries:
661            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
662        else:
663            line += " ?%s)" % repr(e[1])
664
665        return line
666
667    def __repr__(self):
668        return self.dump_node(self.expr)
669
670    def source_node(self, e):
671        op = e[0]
672        if op == "int":
673            return "%d" % e[1]
674        if op == "hex":
675            return "0x%x" % e[1]
676        if op == "oct":
677            return "0%o" % e[1]
678        if op == "ident":
679            # XXX: should try to expand
680            return e[1]
681        if op == "defined":
682            return "defined(%s)" % e[1]
683
684        prec = CppExpr.precedences.get(op, 1000)
685        arg = e[1]
686        if op in CppExpr.unaries:
687            arg_src = self.source_node(arg)
688            arg_op = arg[0]
689            arg_prec = CppExpr.precedences.get(arg_op, 1000)
690            if arg_prec < prec:
691                return "!(" + arg_src + ")"
692            else:
693                return "!" + arg_src
694        if op in CppExpr.binaries:
695            arg2 = e[2]
696            arg1_op = arg[0]
697            arg2_op = arg2[0]
698            arg1_src = self.source_node(arg)
699            arg2_src = self.source_node(arg2)
700            if CppExpr.precedences.get(arg1_op, 1000) < prec:
701                arg1_src = "(%s)" % arg1_src
702            if CppExpr.precedences.get(arg2_op, 1000) < prec:
703                arg2_src = "(%s)" % arg2_src
704
705            return "%s %s %s" % (arg1_src, op, arg2_src)
706        return "???"
707
708    def __str__(self):
709        return self.source_node(self.expr)
710
711    @staticmethod
712    def int_node(e):
713        if e[0] in ["int", "oct", "hex"]:
714            return e[1]
715        else:
716            return None
717
718    def toInt(self):
719        return self.int_node(self.expr)
720
721    def optimize_node(self, e, macros=None):
722        if macros is None:
723            macros = {}
724        op = e[0]
725
726        if op == "defined":
727            op, name = e
728            if macros.has_key(name):
729                if macros[name] == kCppUndefinedMacro:
730                    return ("int", 0)
731                else:
732                    try:
733                        value = int(macros[name])
734                        return ("int", value)
735                    except ValueError:
736                        return ("defined", macros[name])
737
738            if kernel_remove_config_macros and name.startswith("CONFIG_"):
739                return ("int", 0)
740
741            return e
742
743        elif op == "ident":
744            op, name = e
745            if macros.has_key(name):
746                try:
747                    value = int(macros[name])
748                    expanded = ("int", value)
749                except ValueError:
750                    expanded = ("ident", macros[name])
751                return self.optimize_node(expanded, macros)
752            return e
753
754        elif op == "!":
755            op, v = e
756            v = self.optimize_node(v, macros)
757            if v[0] == "int":
758                if v[1] == 0:
759                    return ("int", 1)
760                else:
761                    return ("int", 0)
762            return ('!', v)
763
764        elif op == "&&":
765            op, l, r = e
766            l = self.optimize_node(l, macros)
767            r = self.optimize_node(r, macros)
768            li = self.int_node(l)
769            ri = self.int_node(r)
770            if li is not None:
771                if li == 0:
772                    return ("int", 0)
773                else:
774                    return r
775            elif ri is not None:
776                if ri == 0:
777                    return ("int", 0)
778                else:
779                    return l
780            return (op, l, r)
781
782        elif op == "||":
783            op, l, r = e
784            l = self.optimize_node(l, macros)
785            r = self.optimize_node(r, macros)
786            li = self.int_node(l)
787            ri = self.int_node(r)
788            if li is not None:
789                if li == 0:
790                    return r
791                else:
792                    return ("int", 1)
793            elif ri is not None:
794                if ri == 0:
795                    return l
796                else:
797                    return ("int", 1)
798            return (op, l, r)
799
800        else:
801            return e
802
803    def optimize(self, macros=None):
804        if macros is None:
805            macros = {}
806        self.expr = self.optimize_node(self.expr, macros)
807
808class CppExprTest(unittest.TestCase):
809    """CppExpr unit tests."""
810
811    def get_expr(self, expr):
812        return repr(CppExpr(CppStringTokenizer(expr).tokens))
813
814    def test_cpp_expr(self):
815        self.assertEqual(self.get_expr("0"), "(int 0)")
816        self.assertEqual(self.get_expr("1"), "(int 1)")
817        self.assertEqual(self.get_expr("-5"), "(int -5)")
818        self.assertEqual(self.get_expr("+1"), "(int 1)")
819        self.assertEqual(self.get_expr("0U"), "(int 0)")
820        self.assertEqual(self.get_expr("015"), "(oct 015)")
821        self.assertEqual(self.get_expr("015l"), "(oct 015)")
822        self.assertEqual(self.get_expr("0x3e"), "(hex 0x3e)")
823        self.assertEqual(self.get_expr("(0)"), "(int 0)")
824        self.assertEqual(self.get_expr("1 && 1"), "(&& (int 1) (int 1))")
825        self.assertEqual(self.get_expr("1 && 0"), "(&& (int 1) (int 0))")
826        self.assertEqual(self.get_expr("EXAMPLE"), "(ident EXAMPLE)")
827        self.assertEqual(self.get_expr("EXAMPLE - 3"),
828                         "(- (ident EXAMPLE) (int 3))")
829        self.assertEqual(self.get_expr("defined(EXAMPLE)"),
830                         "(defined EXAMPLE)")
831        self.assertEqual(self.get_expr("defined ( EXAMPLE ) "),
832                         "(defined EXAMPLE)")
833        self.assertEqual(self.get_expr("!defined(EXAMPLE)"),
834                         "(! (defined EXAMPLE))")
835        self.assertEqual(self.get_expr("defined(ABC) || defined(BINGO)"),
836                         "(|| (defined ABC) (defined BINGO))")
837        self.assertEqual(self.get_expr("FOO(BAR,5)"), "(call FOO [BAR,5])")
838        self.assertEqual(self.get_expr("A == 1 || defined(B)"),
839                         "(|| (== (ident A) (int 1)) (defined B))")
840
841    def get_expr_optimize(self, expr, macros=None):
842        if macros is None:
843            macros = {}
844        e = CppExpr(CppStringTokenizer(expr).tokens)
845        e.optimize(macros)
846        return repr(e)
847
848    def test_cpp_expr_optimize(self):
849        self.assertEqual(self.get_expr_optimize("0"), "(int 0)")
850        self.assertEqual(self.get_expr_optimize("1"), "(int 1)")
851        self.assertEqual(self.get_expr_optimize("1 && 1"), "(int 1)")
852        self.assertEqual(self.get_expr_optimize("1 && +1"), "(int 1)")
853        self.assertEqual(self.get_expr_optimize("0x1 && 01"), "(oct 01)")
854        self.assertEqual(self.get_expr_optimize("1 && 0"), "(int 0)")
855        self.assertEqual(self.get_expr_optimize("0 && 1"), "(int 0)")
856        self.assertEqual(self.get_expr_optimize("0 && 0"), "(int 0)")
857        self.assertEqual(self.get_expr_optimize("1 || 1"), "(int 1)")
858        self.assertEqual(self.get_expr_optimize("1 || 0"), "(int 1)")
859        self.assertEqual(self.get_expr_optimize("0 || 1"), "(int 1)")
860        self.assertEqual(self.get_expr_optimize("0 || 0"), "(int 0)")
861        self.assertEqual(self.get_expr_optimize("A"), "(ident A)")
862        self.assertEqual(self.get_expr_optimize("A", {"A": 1}), "(int 1)")
863        self.assertEqual(self.get_expr_optimize("A || B", {"A": 1}), "(int 1)")
864        self.assertEqual(self.get_expr_optimize("A || B", {"B": 1}), "(int 1)")
865        self.assertEqual(self.get_expr_optimize("A && B", {"A": 1}), "(ident B)")
866        self.assertEqual(self.get_expr_optimize("A && B", {"B": 1}), "(ident A)")
867        self.assertEqual(self.get_expr_optimize("A && B"), "(&& (ident A) (ident B))")
868        self.assertEqual(self.get_expr_optimize("EXAMPLE"), "(ident EXAMPLE)")
869        self.assertEqual(self.get_expr_optimize("EXAMPLE - 3"), "(- (ident EXAMPLE) (int 3))")
870        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)"), "(defined EXAMPLE)")
871        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
872                                                {"EXAMPLE": "XOWOE"}),
873                         "(defined XOWOE)")
874        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
875                                                {"EXAMPLE": kCppUndefinedMacro}),
876                         "(int 0)")
877        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)"), "(! (defined EXAMPLE))")
878        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
879                                                {"EXAMPLE": "XOWOE"}),
880                         "(! (defined XOWOE))")
881        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
882                                                {"EXAMPLE": kCppUndefinedMacro}),
883                         "(int 1)")
884        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)"),
885                        "(|| (defined A) (defined B))")
886        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
887                                                {"A": "1"}),
888                         "(int 1)")
889        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
890                                                {"B": "1"}),
891                         "(int 1)")
892        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
893                                                {"B": kCppUndefinedMacro}),
894                         "(defined A)")
895        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
896                                                {"A": kCppUndefinedMacro,
897                                                 "B": kCppUndefinedMacro}),
898                         "(int 0)")
899        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)"),
900                         "(&& (defined A) (defined B))")
901        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
902                                                {"A": "1"}),
903                         "(defined B)")
904        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
905                                                {"B": "1"}),
906                         "(defined A)")
907        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
908                                                {"B": kCppUndefinedMacro}),
909                        "(int 0)")
910        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
911                                                {"A": kCppUndefinedMacro}),
912                        "(int 0)")
913        self.assertEqual(self.get_expr_optimize("A == 1 || defined(B)"),
914                         "(|| (== (ident A) (int 1)) (defined B))")
915        self.assertEqual(self.get_expr_optimize(
916              "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
917              {"__KERNEL__": kCppUndefinedMacro}),
918              "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))")
919
920    def get_expr_string(self, expr):
921        return str(CppExpr(CppStringTokenizer(expr).tokens))
922
923    def test_cpp_expr_string(self):
924        self.assertEqual(self.get_expr_string("0"), "0")
925        self.assertEqual(self.get_expr_string("1"), "1")
926        self.assertEqual(self.get_expr_string("1 && 1"), "1 && 1")
927        self.assertEqual(self.get_expr_string("1 && 0"), "1 && 0")
928        self.assertEqual(self.get_expr_string("0 && 1"), "0 && 1")
929        self.assertEqual(self.get_expr_string("0 && 0"), "0 && 0")
930        self.assertEqual(self.get_expr_string("1 || 1"), "1 || 1")
931        self.assertEqual(self.get_expr_string("1 || 0"), "1 || 0")
932        self.assertEqual(self.get_expr_string("0 || 1"), "0 || 1")
933        self.assertEqual(self.get_expr_string("0 || 0"), "0 || 0")
934        self.assertEqual(self.get_expr_string("EXAMPLE"), "EXAMPLE")
935        self.assertEqual(self.get_expr_string("EXAMPLE - 3"), "EXAMPLE - 3")
936        self.assertEqual(self.get_expr_string("defined(EXAMPLE)"), "defined(EXAMPLE)")
937        self.assertEqual(self.get_expr_string("defined EXAMPLE"), "defined(EXAMPLE)")
938        self.assertEqual(self.get_expr_string("A == 1 || defined(B)"), "A == 1 || defined(B)")
939
940
941################################################################################
942################################################################################
943#####                                                                      #####
944#####          C P P   B L O C K                                           #####
945#####                                                                      #####
946################################################################################
947################################################################################
948
949
950class Block(object):
951    """A class used to model a block of input source text.
952
953    There are two block types:
954      - directive blocks: contain the tokens of a single pre-processor
955        directive (e.g. #if)
956      - text blocks, contain the tokens of non-directive blocks
957
958    The cpp parser class below will transform an input source file into a list
959    of Block objects (grouped in a BlockList object for convenience)
960    """
961
962    def __init__(self, tokens, directive=None, lineno=0, identifier=None):
963        """Initialize a new block, if 'directive' is None, it is a text block.
964
965        NOTE: This automatically converts '#ifdef MACRO' into
966        '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
967        """
968
969        if directive == "ifdef":
970            tok = Token()
971            tok.id = tokDEFINED
972            tokens = [tok] + tokens
973            directive = "if"
974
975        elif directive == "ifndef":
976            tok1 = Token()
977            tok2 = Token()
978            tok1.id = tokNOT
979            tok2.id = tokDEFINED
980            tokens = [tok1, tok2] + tokens
981            directive = "if"
982
983        self.tokens = tokens
984        self.directive = directive
985        self.define_id = identifier
986        if lineno > 0:
987            self.lineno = lineno
988        else:
989            self.lineno = self.tokens[0].location.line
990
991        if self.isIf():
992            self.expr = CppExpr(self.tokens)
993
994    def isDirective(self):
995        """Return True iff this is a directive block."""
996        return self.directive is not None
997
998    def isConditional(self):
999        """Return True iff this is a conditional directive block."""
1000        return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
1001                                  "endif"]
1002
1003    def isDefine(self):
1004        """Return the macro name in a #define directive, or None otherwise."""
1005        if self.directive != "define":
1006            return None
1007        return self.define_id
1008
1009    def isIf(self):
1010        """Return True iff this is an #if-like directive block."""
1011        return self.directive in ["if", "ifdef", "ifndef", "elif"]
1012
1013    def isEndif(self):
1014        """Return True iff this is an #endif directive block."""
1015        return self.directive == "endif"
1016
1017    def isInclude(self):
1018        """Check whether this is a #include directive.
1019
1020        If true, returns the corresponding file name (with brackets or
1021        double-qoutes). None otherwise.
1022        """
1023
1024        if self.directive != "include":
1025            return None
1026        return ''.join([str(x) for x in self.tokens])
1027
1028    @staticmethod
1029    def format_blocks(tokens, indent=0):
1030        """Return the formatted lines of strings with proper indentation."""
1031        newline = True
1032        result = []
1033        buf = ''
1034        i = 0
1035        while i < len(tokens):
1036            t = tokens[i]
1037            if t.id == '{':
1038                buf += ' {'
1039                result.append(strip_space(buf))
1040                # Do not indent if this is extern "C" {
1041                if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"':
1042                    indent += 2
1043                buf = ''
1044                newline = True
1045            elif t.id == '}':
1046                if indent >= 2:
1047                    indent -= 2
1048                if not newline:
1049                    result.append(strip_space(buf))
1050                # Look ahead to determine if it's the end of line.
1051                if (i + 1 < len(tokens) and
1052                    (tokens[i+1].id == ';' or
1053                     tokens[i+1].id in ['else', '__attribute__',
1054                                        '__attribute', '__packed'] or
1055                     tokens[i+1].kind == TokenKind.IDENTIFIER)):
1056                    buf = ' ' * indent + '}'
1057                    newline = False
1058                else:
1059                    result.append(' ' * indent + '}')
1060                    buf = ''
1061                    newline = True
1062            elif t.id == ';':
1063                result.append(strip_space(buf) + ';')
1064                buf = ''
1065                newline = True
1066            # We prefer a new line for each constant in enum.
1067            elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
1068                result.append(strip_space(buf) + ',')
1069                buf = ''
1070                newline = True
1071            else:
1072                if newline:
1073                    buf += ' ' * indent + str(t)
1074                else:
1075                    buf += ' ' + str(t)
1076                newline = False
1077            i += 1
1078
1079        if buf:
1080            result.append(strip_space(buf))
1081
1082        return result, indent
1083
1084    def write(self, out, indent):
1085        """Dump the current block."""
1086        # removeWhiteSpace() will sometimes creates non-directive blocks
1087        # without any tokens. These come from blocks that only contained
1088        # empty lines and spaces. They should not be printed in the final
1089        # output, and then should not be counted for this operation.
1090        #
1091        if self.directive is None and not self.tokens:
1092            return indent
1093
1094        if self.directive:
1095            out.write(str(self) + '\n')
1096        else:
1097            lines, indent = self.format_blocks(self.tokens, indent)
1098            for line in lines:
1099                out.write(line + '\n')
1100
1101        return indent
1102
1103    def __repr__(self):
1104        """Generate the representation of a given block."""
1105        if self.directive:
1106            result = "#%s " % self.directive
1107            if self.isIf():
1108                result += repr(self.expr)
1109            else:
1110                for tok in self.tokens:
1111                    result += repr(tok)
1112        else:
1113            result = ""
1114            for tok in self.tokens:
1115                result += repr(tok)
1116
1117        return result
1118
1119    def __str__(self):
1120        """Generate the string representation of a given block."""
1121        if self.directive:
1122            # "#if"
1123            if self.directive == "if":
1124                # small optimization to re-generate #ifdef and #ifndef
1125                e = self.expr.expr
1126                op = e[0]
1127                if op == "defined":
1128                    result = "#ifdef %s" % e[1]
1129                elif op == "!" and e[1][0] == "defined":
1130                    result = "#ifndef %s" % e[1][1]
1131                else:
1132                    result = "#if " + str(self.expr)
1133
1134            # "#define"
1135            elif self.isDefine():
1136                result = "#%s %s" % (self.directive, self.define_id)
1137                if self.tokens:
1138                    result += " "
1139                expr = strip_space(' '.join([tok.id for tok in self.tokens]))
1140                # remove the space between name and '(' in function call
1141                result += re.sub(r'(\w+) \(', r'\1(', expr)
1142
1143            # "#error"
1144            # Concatenating tokens with a space separator, because they may
1145            # not be quoted and broken into several tokens
1146            elif self.directive == "error":
1147                result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
1148
1149            else:
1150                result = "#%s" % self.directive
1151                if self.tokens:
1152                    result += " "
1153                result += ''.join([tok.id for tok in self.tokens])
1154        else:
1155            lines, _ = self.format_blocks(self.tokens)
1156            result = '\n'.join(lines)
1157
1158        return result
1159
1160
1161class BlockList(object):
1162    """A convenience class used to hold and process a list of blocks.
1163
1164    It calls the cpp parser to get the blocks.
1165    """
1166
1167    def __init__(self, blocks):
1168        self.blocks = blocks
1169
1170    def __len__(self):
1171        return len(self.blocks)
1172
1173    def __getitem__(self, n):
1174        return self.blocks[n]
1175
1176    def __repr__(self):
1177        return repr(self.blocks)
1178
1179    def __str__(self):
1180        result = '\n'.join([str(b) for b in self.blocks])
1181        return result
1182
1183    def dump(self):
1184        """Dump all the blocks in current BlockList."""
1185        print '##### BEGIN #####'
1186        for i, b in enumerate(self.blocks):
1187            print '### BLOCK %d ###' % i
1188            print b
1189        print '##### END #####'
1190
1191    def optimizeIf01(self):
1192        """Remove the code between #if 0 .. #endif in a BlockList."""
1193        self.blocks = optimize_if01(self.blocks)
1194
1195    def optimizeMacros(self, macros):
1196        """Remove known defined and undefined macros from a BlockList."""
1197        for b in self.blocks:
1198            if b.isIf():
1199                b.expr.optimize(macros)
1200
1201    def optimizeAll(self, macros):
1202        self.optimizeMacros(macros)
1203        self.optimizeIf01()
1204        return
1205
1206    def findIncludes(self):
1207        """Return the list of included files in a BlockList."""
1208        result = []
1209        for b in self.blocks:
1210            i = b.isInclude()
1211            if i:
1212                result.append(i)
1213        return result
1214
1215    def write(self, out):
1216        indent = 0
1217        for b in self.blocks:
1218            indent = b.write(out, indent)
1219
1220    def removeVarsAndFuncs(self, keep):
1221        """Remove variable and function declarations.
1222
1223        All extern and static declarations corresponding to variable and
1224        function declarations are removed. We only accept typedefs and
1225        enum/structs/union declarations.
1226
1227        In addition, remove any macros expanding in the headers. Usually,
1228        these macros are static inline functions, which is why they are
1229        removed.
1230
1231        However, we keep the definitions corresponding to the set of known
1232        static inline functions in the set 'keep', which is useful
1233        for optimized byteorder swap functions and stuff like that.
1234        """
1235
1236        # state = NORMAL => normal (i.e. LN + spaces)
1237        # state = OTHER_DECL => typedef/struct encountered, ends with ";"
1238        # state = VAR_DECL => var declaration encountered, ends with ";"
1239        # state = FUNC_DECL => func declaration encountered, ends with "}"
1240        NORMAL = 0
1241        OTHER_DECL = 1
1242        VAR_DECL = 2
1243        FUNC_DECL = 3
1244
1245        state = NORMAL
1246        depth = 0
1247        blocksToKeep = []
1248        blocksInProgress = []
1249        blocksOfDirectives = []
1250        ident = ""
1251        state_token = ""
1252        macros = set()
1253        for block in self.blocks:
1254            if block.isDirective():
1255                # Record all macros.
1256                if block.directive == 'define':
1257                    macro_name = block.define_id
1258                    paren_index = macro_name.find('(')
1259                    if paren_index == -1:
1260                        macros.add(macro_name)
1261                    else:
1262                        macros.add(macro_name[0:paren_index])
1263                blocksInProgress.append(block)
1264                # If this is in a function/variable declaration, we might need
1265                # to emit the directives alone, so save them separately.
1266                blocksOfDirectives.append(block)
1267                continue
1268
1269            numTokens = len(block.tokens)
1270            lastTerminatorIndex = 0
1271            i = 0
1272            while i < numTokens:
1273                token_id = block.tokens[i].id
1274                terminator = False
1275                if token_id == '{':
1276                    depth += 1
1277                    if (i >= 2 and block.tokens[i-2].id == 'extern' and
1278                        block.tokens[i-1].id == '"C"'):
1279                        # For an extern "C" { pretend as though this is depth 0.
1280                        depth -= 1
1281                elif token_id == '}':
1282                    if depth > 0:
1283                        depth -= 1
1284                    if depth == 0:
1285                        if state == OTHER_DECL:
1286                            # Loop through until we hit the ';'
1287                            i += 1
1288                            while i < numTokens:
1289                                if block.tokens[i].id == ';':
1290                                    token_id = ';'
1291                                    break
1292                                i += 1
1293                            # If we didn't hit the ';', just consider this the
1294                            # terminator any way.
1295                        terminator = True
1296                elif depth == 0:
1297                    if token_id == ';':
1298                        if state == NORMAL:
1299                            blocksToKeep.extend(blocksInProgress)
1300                            blocksInProgress = []
1301                            blocksOfDirectives = []
1302                            state = FUNC_DECL
1303                        terminator = True
1304                    elif (state == NORMAL and token_id == '(' and i >= 1 and
1305                          block.tokens[i-1].kind == TokenKind.IDENTIFIER and
1306                          block.tokens[i-1].id in macros):
1307                        # This is a plain macro being expanded in the header
1308                        # which needs to be removed.
1309                        blocksToKeep.extend(blocksInProgress)
1310                        if lastTerminatorIndex < i - 1:
1311                            blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1]))
1312                        blocksInProgress = []
1313                        blocksOfDirectives = []
1314
1315                        # Skip until we see the terminating ')'
1316                        i += 1
1317                        paren_depth = 1
1318                        while i < numTokens:
1319                            if block.tokens[i].id == ')':
1320                                paren_depth -= 1
1321                                if paren_depth == 0:
1322                                    break
1323                            elif block.tokens[i].id == '(':
1324                                paren_depth += 1
1325                            i += 1
1326                        lastTerminatorIndex = i + 1
1327                    elif (state != FUNC_DECL and token_id == '(' and
1328                          state_token != 'typedef'):
1329                        blocksToKeep.extend(blocksInProgress)
1330                        blocksInProgress = []
1331                        blocksOfDirectives = []
1332                        state = VAR_DECL
1333                    elif state == NORMAL and token_id in ['struct', 'typedef',
1334                                                          'enum', 'union',
1335                                                          '__extension__']:
1336                        state = OTHER_DECL
1337                        state_token = token_id
1338                    elif block.tokens[i].kind == TokenKind.IDENTIFIER:
1339                        if state != VAR_DECL or ident == "":
1340                            ident = token_id
1341
1342                if terminator:
1343                    if state != VAR_DECL and state != FUNC_DECL or ident in keep:
1344                        blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1]))
1345                        blocksToKeep.extend(blocksInProgress)
1346                    else:
1347                        # Only keep the directives found.
1348                        blocksToKeep.extend(blocksOfDirectives)
1349                    lastTerminatorIndex = i + 1
1350                    blocksInProgress = []
1351                    blocksOfDirectives = []
1352                    state = NORMAL
1353                    ident = ""
1354                    state_token = ""
1355                i += 1
1356            if lastTerminatorIndex < numTokens:
1357                blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens]))
1358        if len(blocksInProgress) > 0:
1359            blocksToKeep.extend(blocksInProgress)
1360        self.blocks = blocksToKeep
1361
1362    def replaceTokens(self, replacements):
1363        """Replace tokens according to the given dict."""
1364        extra_includes = []
1365        for b in self.blocks:
1366            made_change = False
1367            if b.isInclude() is None:
1368                i = 0
1369                while i < len(b.tokens):
1370                    tok = b.tokens[i]
1371                    if (tok.kind == TokenKind.KEYWORD and tok.id == 'struct'
1372                        and (i + 2) < len(b.tokens) and b.tokens[i + 2].id == '{'):
1373                        struct_name = b.tokens[i + 1].id
1374                        if struct_name in kernel_struct_replacements:
1375                            extra_includes.append("<bits/%s.h>" % struct_name)
1376                            end = i + 2
1377                            while end < len(b.tokens) and b.tokens[end].id != '}':
1378                                end += 1
1379                            end += 1 # Swallow '}'
1380                            while end < len(b.tokens) and b.tokens[end].id != ';':
1381                                end += 1
1382                            end += 1 # Swallow ';'
1383                            # Remove these tokens. We'll replace them later with a #include block.
1384                            b.tokens[i:end] = []
1385                            made_change = True
1386                            # We've just modified b.tokens, so revisit the current offset.
1387                            continue
1388                    if tok.kind == TokenKind.IDENTIFIER:
1389                        if tok.id in replacements:
1390                            tok.id = replacements[tok.id]
1391                            made_change = True
1392                    i += 1
1393
1394                if b.isDefine() and b.define_id in replacements:
1395                    b.define_id = replacements[b.define_id]
1396                    made_change = True
1397
1398            if made_change and b.isIf():
1399                # Keep 'expr' in sync with 'tokens'.
1400                b.expr = CppExpr(b.tokens)
1401
1402        for extra_include in extra_includes:
1403            replacement = CppStringTokenizer(extra_include)
1404            self.blocks.insert(2, Block(replacement.tokens, directive='include'))
1405
1406
1407
1408def strip_space(s):
1409    """Strip out redundant space in a given string."""
1410
1411    # NOTE: It ought to be more clever to not destroy spaces in string tokens.
1412    replacements = {' . ': '.',
1413                    ' [': '[',
1414                    '[ ': '[',
1415                    ' ]': ']',
1416                    '( ': '(',
1417                    ' )': ')',
1418                    ' ,': ',',
1419                    '# ': '#',
1420                    ' ;': ';',
1421                    '~ ': '~',
1422                    ' -> ': '->'}
1423    result = s
1424    for r in replacements:
1425        result = result.replace(r, replacements[r])
1426
1427    # Remove the space between function name and the parenthesis.
1428    result = re.sub(r'(\w+) \(', r'\1(', result)
1429    return result
1430
1431
1432class BlockParser(object):
1433    """A class that converts an input source file into a BlockList object."""
1434
1435    def __init__(self, tokzer=None):
1436        """Initialize a block parser.
1437
1438        The input source is provided through a Tokenizer object.
1439        """
1440        self._tokzer = tokzer
1441        self._parsed = False
1442
1443    @property
1444    def parsed(self):
1445        return self._parsed
1446
1447    @staticmethod
1448    def _short_extent(extent):
1449        return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
1450                                  extent.end.line, extent.end.column)
1451
1452    def getBlocks(self, tokzer=None):
1453        """Return all the blocks parsed."""
1454
1455        def consume_extent(i, tokens, extent=None, detect_change=False):
1456            """Return tokens that belong to the given extent.
1457
1458            It parses all the tokens that follow tokens[i], until getting out
1459            of the extent. When detect_change is True, it may terminate early
1460            when detecting preprocessing directives inside the extent.
1461            """
1462
1463            result = []
1464            if extent is None:
1465                extent = tokens[i].cursor.extent
1466
1467            while i < len(tokens) and tokens[i].location in extent:
1468                t = tokens[i]
1469                if debugBlockParser:
1470                    print ' ' * 2, t.id, t.kind, t.cursor.kind
1471                if (detect_change and t.cursor.extent != extent and
1472                    t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
1473                    break
1474                result.append(t)
1475                i += 1
1476            return (i, result)
1477
1478        def consume_line(i, tokens):
1479            """Return tokens that follow tokens[i] in the same line."""
1480            result = []
1481            line = tokens[i].location.line
1482            while i < len(tokens) and tokens[i].location.line == line:
1483                if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1484                    break
1485                result.append(tokens[i])
1486                i += 1
1487            return (i, result)
1488
1489        if tokzer is None:
1490            tokzer = self._tokzer
1491        tokens = tokzer.tokens
1492
1493        blocks = []
1494        buf = []
1495        i = 0
1496
1497        while i < len(tokens):
1498            t = tokens[i]
1499            cursor = t.cursor
1500
1501            if debugBlockParser:
1502                print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
1503                       "extent=[%s]" % (t.location.line, t.spelling, t.kind,
1504                                        cursor.kind,
1505                                        self._short_extent(cursor.extent)))
1506
1507            if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1508                if buf:
1509                    blocks.append(Block(buf))
1510                    buf = []
1511
1512                j = i
1513                if j + 1 >= len(tokens):
1514                    raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
1515                directive = tokens[j+1].id
1516
1517                if directive == 'define':
1518                    if i+2 >= len(tokens):
1519                        raise BadExpectedToken("### BAD TOKEN at %s" %
1520                                               (tokens[i].location))
1521
1522                    # Skip '#' and 'define'.
1523                    extent = tokens[i].cursor.extent
1524                    i += 2
1525                    id = ''
1526                    # We need to separate the id from the remaining of
1527                    # the line, especially for the function-like macro.
1528                    if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
1529                        (tokens[i].location.column + len(tokens[i].spelling) ==
1530                         tokens[i+1].location.column)):
1531                        while i < len(tokens):
1532                            id += tokens[i].id
1533                            if tokens[i].spelling == ')':
1534                                i += 1
1535                                break
1536                            i += 1
1537                    else:
1538                        id += tokens[i].id
1539                        # Advance to the next token that follows the macro id
1540                        i += 1
1541
1542                    (i, ret) = consume_extent(i, tokens, extent=extent)
1543                    blocks.append(Block(ret, directive=directive,
1544                                        lineno=t.location.line, identifier=id))
1545
1546                else:
1547                    (i, ret) = consume_extent(i, tokens)
1548                    blocks.append(Block(ret[2:], directive=directive,
1549                                        lineno=t.location.line))
1550
1551            elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
1552                if buf:
1553                    blocks.append(Block(buf))
1554                    buf = []
1555                directive = tokens[i+1].id
1556                (i, ret) = consume_extent(i, tokens)
1557
1558                blocks.append(Block(ret[2:], directive=directive,
1559                                    lineno=t.location.line))
1560
1561            elif cursor.kind == CursorKind.VAR_DECL:
1562                if buf:
1563                    blocks.append(Block(buf))
1564                    buf = []
1565
1566                (i, ret) = consume_extent(i, tokens, detect_change=True)
1567                buf += ret
1568
1569            elif cursor.kind == CursorKind.FUNCTION_DECL:
1570                if buf:
1571                    blocks.append(Block(buf))
1572                    buf = []
1573
1574                (i, ret) = consume_extent(i, tokens, detect_change=True)
1575                buf += ret
1576
1577            else:
1578                (i, ret) = consume_line(i, tokens)
1579                buf += ret
1580
1581        if buf:
1582            blocks.append(Block(buf))
1583
1584        # _parsed=True indicates a successful parsing, although may result an
1585        # empty BlockList.
1586        self._parsed = True
1587
1588        return BlockList(blocks)
1589
1590    def parse(self, tokzer):
1591        return self.getBlocks(tokzer)
1592
1593    def parseFile(self, path):
1594        return self.getBlocks(CppFileTokenizer(path))
1595
1596
1597class BlockParserTests(unittest.TestCase):
1598    """BlockParser unit tests."""
1599
1600    def get_blocks(self, lines):
1601        blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
1602        return map(lambda a: str(a), blocks)
1603
1604    def test_hash(self):
1605        self.assertEqual(self.get_blocks(["#error hello"]), ["#error hello"])
1606
1607    def test_empty_line(self):
1608        self.assertEqual(self.get_blocks(["foo", "", "bar"]), ["foo bar"])
1609
1610    def test_hash_with_space(self):
1611        # We currently cannot handle the following case with libclang properly.
1612        # Fortunately it doesn't appear in current headers.
1613        #self.assertEqual(self.get_blocks(["foo", "  #  ", "bar"]), ["foo", "bar"])
1614        pass
1615
1616    def test_with_comment(self):
1617        self.assertEqual(self.get_blocks(["foo",
1618                                          "  #  /* ahah */ if defined(__KERNEL__) /* more */",
1619                                          "bar", "#endif"]),
1620                         ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
1621
1622
1623################################################################################
1624################################################################################
1625#####                                                                      #####
1626#####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
1627#####                                                                      #####
1628################################################################################
1629################################################################################
1630
1631
1632def find_matching_endif(blocks, i):
1633    """Traverse the blocks to find out the matching #endif."""
1634    n = len(blocks)
1635    depth = 1
1636    while i < n:
1637        if blocks[i].isDirective():
1638            dir_ = blocks[i].directive
1639            if dir_ in ["if", "ifndef", "ifdef"]:
1640                depth += 1
1641            elif depth == 1 and dir_ in ["else", "elif"]:
1642                return i
1643            elif dir_ == "endif":
1644                depth -= 1
1645                if depth == 0:
1646                    return i
1647        i += 1
1648    return i
1649
1650
1651def optimize_if01(blocks):
1652    """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
1653    i = 0
1654    n = len(blocks)
1655    result = []
1656    while i < n:
1657        j = i
1658        while j < n and not blocks[j].isIf():
1659            j += 1
1660        if j > i:
1661            logging.debug("appending lines %d to %d", blocks[i].lineno,
1662                          blocks[j-1].lineno)
1663            result += blocks[i:j]
1664        if j >= n:
1665            break
1666        expr = blocks[j].expr
1667        r = expr.toInt()
1668        if r is None:
1669            result.append(blocks[j])
1670            i = j + 1
1671            continue
1672
1673        if r == 0:
1674            # if 0 => skip everything until the corresponding #endif
1675            start_dir = blocks[j].directive
1676            j = find_matching_endif(blocks, j + 1)
1677            if j >= n:
1678                # unterminated #if 0, finish here
1679                break
1680            dir_ = blocks[j].directive
1681            if dir_ == "endif":
1682                logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
1683                              blocks[i].lineno, blocks[j].lineno)
1684                if start_dir == "elif":
1685                    # Put an endif since we started with an elif.
1686                    result += blocks[j:j+1]
1687                i = j + 1
1688            elif dir_ == "else":
1689                # convert 'else' into 'if 1'
1690                logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
1691                              "to %d)", blocks[i].lineno, blocks[j-1].lineno)
1692                if start_dir == "elif":
1693                    blocks[j].directive = "elif"
1694                else:
1695                    blocks[j].directive = "if"
1696                blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
1697                i = j
1698            elif dir_ == "elif":
1699                # convert 'elif' into 'if'
1700                logging.debug("convert 'if 0' .. 'elif' into 'if'")
1701                if start_dir == "elif":
1702                    blocks[j].directive = "elif"
1703                else:
1704                    blocks[j].directive = "if"
1705                i = j
1706            continue
1707
1708        # if 1 => find corresponding endif and remove/transform them
1709        k = find_matching_endif(blocks, j + 1)
1710        if k >= n:
1711            # unterminated #if 1, finish here
1712            logging.debug("unterminated 'if 1'")
1713            result += blocks[j+1:k]
1714            break
1715
1716        start_dir = blocks[j].directive
1717        dir_ = blocks[k].directive
1718        if dir_ == "endif":
1719            logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
1720                          blocks[j].lineno, blocks[k].lineno)
1721            if start_dir == "elif":
1722                # Add the elif in to the results and convert it to an elif 1.
1723                blocks[j].tokens = CppStringTokenizer("1").tokens
1724                result += blocks[j:j+1]
1725            result += optimize_if01(blocks[j+1:k])
1726            if start_dir == "elif":
1727                # Add the endif in to the results.
1728                result += blocks[k:k+1]
1729            i = k + 1
1730        elif dir_ == "else":
1731            # convert 'else' into 'if 0'
1732            logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
1733                          blocks[j].lineno, blocks[k].lineno)
1734            if start_dir == "elif":
1735                # Add the elif in to the results and convert it to an elif 1.
1736                blocks[j].tokens = CppStringTokenizer("1").tokens
1737                result += blocks[j:j+1]
1738            result += optimize_if01(blocks[j+1:k])
1739            if start_dir == "elif":
1740                blocks[k].directive = "elif"
1741            else:
1742                blocks[k].directive = "if"
1743            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1744            i = k
1745        elif dir_ == "elif":
1746            # convert 'elif' into 'if 0'
1747            logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
1748                          blocks[j].lineno, blocks[k].lineno)
1749            result += optimize_if01(blocks[j+1:k])
1750            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1751            i = k
1752    return result
1753
1754class OptimizerTests(unittest.TestCase):
1755    def parse(self, text, macros=None):
1756        out = utils.StringOutput()
1757        blocks = BlockParser().parse(CppStringTokenizer(text))
1758        blocks.replaceTokens(kernel_token_replacements)
1759        blocks.optimizeAll(macros)
1760        blocks.write(out)
1761        return out.get()
1762
1763    def test_if1(self):
1764        text = """\
1765#if 1
1766#define  GOOD
1767#endif
1768"""
1769        expected = """\
1770#define GOOD
1771"""
1772        self.assertEqual(self.parse(text), expected)
1773
1774    def test_if0(self):
1775        text = """\
1776#if 0
1777#define  SHOULD_SKIP1
1778#define  SHOULD_SKIP2
1779#endif
1780"""
1781        expected = ""
1782        self.assertEqual(self.parse(text), expected)
1783
1784    def test_if1_else(self):
1785        text = """\
1786#if 1
1787#define  GOOD
1788#else
1789#define  BAD
1790#endif
1791"""
1792        expected = """\
1793#define GOOD
1794"""
1795        self.assertEqual(self.parse(text), expected)
1796
1797    def test_if0_else(self):
1798        text = """\
1799#if 0
1800#define  BAD
1801#else
1802#define  GOOD
1803#endif
1804"""
1805        expected = """\
1806#define GOOD
1807"""
1808        self.assertEqual(self.parse(text), expected)
1809
1810    def test_if_elif1(self):
1811        text = """\
1812#if defined(something)
1813#define EXISTS
1814#elif 1
1815#define GOOD
1816#endif
1817"""
1818        expected = """\
1819#ifdef something
1820#define EXISTS
1821#elif 1
1822#define GOOD
1823#endif
1824"""
1825        self.assertEqual(self.parse(text), expected)
1826
1827    def test_if_elif1_macro(self):
1828        text = """\
1829#if defined(something)
1830#define EXISTS
1831#elif defined(WILL_BE_ONE)
1832#define GOOD
1833#endif
1834"""
1835        expected = """\
1836#ifdef something
1837#define EXISTS
1838#elif 1
1839#define GOOD
1840#endif
1841"""
1842        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1843
1844
1845    def test_if_elif1_else(self):
1846        text = """\
1847#if defined(something)
1848#define EXISTS
1849#elif 1
1850#define GOOD
1851#else
1852#define BAD
1853#endif
1854"""
1855        expected = """\
1856#ifdef something
1857#define EXISTS
1858#elif 1
1859#define GOOD
1860#endif
1861"""
1862        self.assertEqual(self.parse(text), expected)
1863
1864    def test_if_elif1_else_macro(self):
1865        text = """\
1866#if defined(something)
1867#define EXISTS
1868#elif defined(WILL_BE_ONE)
1869#define GOOD
1870#else
1871#define BAD
1872#endif
1873"""
1874        expected = """\
1875#ifdef something
1876#define EXISTS
1877#elif 1
1878#define GOOD
1879#endif
1880"""
1881        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1882
1883
1884    def test_if_elif1_else_macro(self):
1885        text = """\
1886#if defined(something)
1887#define EXISTS
1888#elif defined(WILL_BE_ONE)
1889#define GOOD
1890#else
1891#define BAD
1892#endif
1893"""
1894        expected = """\
1895#ifdef something
1896#define EXISTS
1897#elif 1
1898#define GOOD
1899#endif
1900"""
1901        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1902
1903    def test_macro_set_to_undefined_single(self):
1904        text = """\
1905#if defined(__KERNEL__)
1906#define BAD_KERNEL
1907#endif
1908"""
1909        expected = ""
1910        macros = {"__KERNEL__": kCppUndefinedMacro}
1911        self.assertEqual(self.parse(text, macros), expected)
1912
1913    def test_macro_set_to_undefined_if(self):
1914        text = """\
1915#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
1916#define CHECK
1917#endif
1918"""
1919        expected = """\
1920#if !defined(__GLIBC__) || __GLIBC__ < 2
1921#define CHECK
1922#endif
1923"""
1924        macros = {"__KERNEL__": kCppUndefinedMacro}
1925        self.assertEqual(self.parse(text, macros), expected)
1926
1927    def test_endif_comment_removed(self):
1928        text = """\
1929#ifndef SIGRTMAX
1930#define SIGRTMAX 123
1931#endif /* SIGRTMAX */
1932"""
1933        expected = """\
1934#ifndef __SIGRTMAX
1935#define __SIGRTMAX 123
1936#endif
1937"""
1938        self.assertEqual(self.parse(text), expected)
1939
1940    def test_multilevel_if0(self):
1941        text = """\
1942#if 0
1943#if 1
1944#define  BAD_6
1945#endif
1946#endif
1947"""
1948        expected = ""
1949        self.assertEqual(self.parse(text), expected)
1950
1951class FullPathTest(unittest.TestCase):
1952    """Test of the full path parsing."""
1953
1954    def parse(self, text, keep=None):
1955        if not keep:
1956            keep = set()
1957        out = utils.StringOutput()
1958        blocks = BlockParser().parse(CppStringTokenizer(text))
1959        blocks.removeVarsAndFuncs(keep)
1960        blocks.replaceTokens(kernel_token_replacements)
1961        blocks.optimizeAll(None)
1962        blocks.write(out)
1963        return out.get()
1964
1965    def test_function_removed(self):
1966        text = """\
1967static inline __u64 function()
1968{
1969}
1970"""
1971        expected = ""
1972        self.assertEqual(self.parse(text), expected)
1973
1974    def test_function_removed_with_struct(self):
1975        text = """\
1976static inline struct something* function()
1977{
1978}
1979"""
1980        expected = ""
1981        self.assertEqual(self.parse(text), expected)
1982
1983    def test_function_kept(self):
1984        text = """\
1985static inline __u64 function()
1986{
1987}
1988"""
1989        expected = """\
1990static inline __u64 function() {
1991}
1992"""
1993        self.assertEqual(self.parse(text, set(["function"])), expected)
1994
1995    def test_var_removed(self):
1996        text = "__u64 variable;"
1997        expected = ""
1998        self.assertEqual(self.parse(text), expected)
1999
2000    def test_var_kept(self):
2001        text = "__u64 variable;"
2002        expected = "__u64 variable;\n"
2003        self.assertEqual(self.parse(text, set(["variable"])), expected)
2004
2005    def test_keep_function_typedef(self):
2006        text = "typedef void somefunction_t(void);"
2007        expected = "typedef void somefunction_t(void);\n"
2008        self.assertEqual(self.parse(text), expected)
2009
2010    def test_struct_keep_attribute(self):
2011        text = """\
2012struct something_s {
2013  __u32 s1;
2014  __u32 s2;
2015} __attribute__((packed));
2016"""
2017        expected = """\
2018struct something_s {
2019  __u32 s1;
2020  __u32 s2;
2021} __attribute__((packed));
2022"""
2023        self.assertEqual(self.parse(text), expected)
2024
2025    def test_function_keep_attribute_structs(self):
2026        text = """\
2027static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
2028}
2029"""
2030        expected = """\
2031static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
2032}
2033"""
2034        self.assertEqual(self.parse(text, set(["function"])), expected)
2035
2036    def test_struct_after_struct(self):
2037        text = """\
2038struct first {
2039};
2040
2041struct second {
2042  unsigned short s1;
2043#define SOMETHING 8
2044  unsigned short s2;
2045};
2046"""
2047        expected = """\
2048struct first {
2049};
2050struct second {
2051  unsigned short s1;
2052#define SOMETHING 8
2053  unsigned short s2;
2054};
2055"""
2056        self.assertEqual(self.parse(text), expected)
2057
2058    def test_other_not_removed(self):
2059        text = """\
2060typedef union {
2061  __u64 tu1;
2062  __u64 tu2;
2063} typedef_name;
2064
2065union {
2066  __u64 u1;
2067  __u64 u2;
2068};
2069
2070struct {
2071  __u64 s1;
2072  __u64 s2;
2073};
2074
2075enum {
2076  ENUM1 = 0,
2077  ENUM2,
2078};
2079
2080__extension__ typedef __signed__ long long __s64;
2081"""
2082        expected = """\
2083typedef union {
2084  __u64 tu1;
2085  __u64 tu2;
2086} typedef_name;
2087union {
2088  __u64 u1;
2089  __u64 u2;
2090};
2091struct {
2092  __u64 s1;
2093  __u64 s2;
2094};
2095enum {
2096  ENUM1 = 0,
2097  ENUM2,
2098};
2099__extension__ typedef __signed__ long long __s64;
2100"""
2101
2102        self.assertEqual(self.parse(text), expected)
2103
2104    def test_semicolon_after_function(self):
2105        text = """\
2106static inline __u64 function()
2107{
2108};
2109
2110struct should_see {
2111        __u32                           field;
2112};
2113"""
2114        expected = """\
2115struct should_see {
2116  __u32 field;
2117};
2118"""
2119        self.assertEqual(self.parse(text), expected)
2120
2121    def test_define_in_middle_keep(self):
2122        text = """\
2123enum {
2124  ENUM0 = 0x10,
2125  ENUM1 = 0x20,
2126#define SOMETHING SOMETHING_ELSE
2127  ENUM2 = 0x40,
2128};
2129"""
2130        expected = """\
2131enum {
2132  ENUM0 = 0x10,
2133  ENUM1 = 0x20,
2134#define SOMETHING SOMETHING_ELSE
2135  ENUM2 = 0x40,
2136};
2137"""
2138        self.assertEqual(self.parse(text), expected)
2139
2140    def test_define_in_middle_remove(self):
2141        text = """\
2142static inline function() {
2143#define SOMETHING1 SOMETHING_ELSE1
2144  i = 0;
2145  {
2146    i = 1;
2147  }
2148#define SOMETHING2 SOMETHING_ELSE2
2149}
2150"""
2151        expected = """\
2152#define SOMETHING1 SOMETHING_ELSE1
2153#define SOMETHING2 SOMETHING_ELSE2
2154"""
2155        self.assertEqual(self.parse(text), expected)
2156
2157    def test_define_in_middle_force_keep(self):
2158        text = """\
2159static inline function() {
2160#define SOMETHING1 SOMETHING_ELSE1
2161  i = 0;
2162  {
2163    i = 1;
2164  }
2165#define SOMETHING2 SOMETHING_ELSE2
2166}
2167"""
2168        expected = """\
2169static inline function() {
2170#define SOMETHING1 SOMETHING_ELSE1
2171  i = 0;
2172 {
2173    i = 1;
2174  }
2175#define SOMETHING2 SOMETHING_ELSE2
2176}
2177"""
2178        self.assertEqual(self.parse(text, set(["function"])), expected)
2179
2180    def test_define_before_remove(self):
2181        text = """\
2182#define SHOULD_BE_KEPT NOTHING1
2183#define ANOTHER_TO_KEEP NOTHING2
2184static inline function() {
2185#define SOMETHING1 SOMETHING_ELSE1
2186  i = 0;
2187  {
2188    i = 1;
2189  }
2190#define SOMETHING2 SOMETHING_ELSE2
2191}
2192"""
2193        expected = """\
2194#define SHOULD_BE_KEPT NOTHING1
2195#define ANOTHER_TO_KEEP NOTHING2
2196#define SOMETHING1 SOMETHING_ELSE1
2197#define SOMETHING2 SOMETHING_ELSE2
2198"""
2199        self.assertEqual(self.parse(text), expected)
2200
2201    def test_extern_C(self):
2202        text = """\
2203#if defined(__cplusplus)
2204extern "C" {
2205#endif
2206
2207struct something {
2208};
2209
2210#if defined(__cplusplus)
2211}
2212#endif
2213"""
2214        expected = """\
2215#ifdef __cplusplus
2216extern "C" {
2217#endif
2218struct something {
2219};
2220#ifdef __cplusplus
2221}
2222#endif
2223"""
2224        self.assertEqual(self.parse(text), expected)
2225
2226    def test_macro_definition_removed(self):
2227        text = """\
2228#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {}
2229MACRO_FUNCTION_NO_PARAMS()
2230
2231#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
2232MACRO_FUNCTION_PARAMS(a = 1)
2233
2234something that should still be kept
2235MACRO_FUNCTION_PARAMS(b)
2236"""
2237        expected = """\
2238#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { }
2239#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
2240something that should still be kept
2241"""
2242        self.assertEqual(self.parse(text), expected)
2243
2244
2245if __name__ == '__main__':
2246    unittest.main()
2247