• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2"""A glorified C pre-processor parser."""
3
4import ctypes
5import logging
6import os
7import re
8import site
9import unittest
10import utils
11
12top = os.getenv('ANDROID_BUILD_TOP')
13if top is None:
14    utils.panic('ANDROID_BUILD_TOP not set.\n')
15
16# Set up the env vars for libclang.
17site.addsitedir(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/python3/site-packages/'))
18
19import clang.cindex
20from clang.cindex import conf
21from clang.cindex import Cursor
22from clang.cindex import CursorKind
23from clang.cindex import SourceLocation
24from clang.cindex import SourceRange
25from clang.cindex import TokenGroup
26from clang.cindex import TokenKind
27from clang.cindex import TranslationUnit
28
29# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
30# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
31clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/libclang.so'))
32
33from defaults import *
34
35
36debugBlockParser = False
37debugCppExpr = False
38debugOptimIf01 = False
39
40###############################################################################
41###############################################################################
42#####                                                                     #####
43#####           C P P   T O K E N S                                       #####
44#####                                                                     #####
45###############################################################################
46###############################################################################
47
48# the list of supported C-preprocessor tokens
49# plus a couple of C tokens as well
50tokEOF = "\0"
51tokLN = "\n"
52tokSTRINGIFY = "#"
53tokCONCAT = "##"
54tokLOGICAND = "&&"
55tokLOGICOR = "||"
56tokSHL = "<<"
57tokSHR = ">>"
58tokEQUAL = "=="
59tokNEQUAL = "!="
60tokLT = "<"
61tokLTE = "<="
62tokGT = ">"
63tokGTE = ">="
64tokELLIPSIS = "..."
65tokSPACE = " "
66tokDEFINED = "defined"
67tokLPAREN = "("
68tokRPAREN = ")"
69tokNOT = "!"
70tokPLUS = "+"
71tokMINUS = "-"
72tokMULTIPLY = "*"
73tokDIVIDE = "/"
74tokMODULUS = "%"
75tokBINAND = "&"
76tokBINOR = "|"
77tokBINXOR = "^"
78tokCOMMA = ","
79tokLBRACE = "{"
80tokRBRACE = "}"
81tokARROW = "->"
82tokINCREMENT = "++"
83tokDECREMENT = "--"
84tokNUMBER = "<number>"
85tokIDENT = "<ident>"
86tokSTRING = "<string>"
87
88
89class Token(clang.cindex.Token):
90    """A class that represents one token after parsing.
91
92    It inherits the class in libclang, with an extra id property to hold the
93    new spelling of the token. The spelling property in the base class is
94    defined as read-only. New names after macro instantiation are saved in
95    their ids now. It also facilitates the renaming of directive optimizations
96    like replacing 'ifndef X' with 'if !defined(X)'.
97
98    It also overrides the cursor property of the base class. Because the one
99    in libclang always queries based on a single token, which usually doesn't
100    hold useful information. The cursor in this class can be set by calling
101    CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
102    base class.
103    """
104
105    def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
106                 cursor=None):
107        clang.cindex.Token.__init__(self)
108        self._id = None
109        self._tu = tu
110        self._group = group
111        self._cursor = cursor
112        # self.int_data and self.ptr_data are from the base class. But
113        # self.int_data doesn't accept a None value.
114        if int_data is not None:
115            self.int_data = int_data
116        self.ptr_data = ptr_data
117
118    @property
119    def id(self):
120        """Name of the token."""
121        if self._id is None:
122            return self.spelling
123        else:
124            return self._id
125
126    @id.setter
127    def id(self, new_id):
128        """Setting name of the token."""
129        self._id = new_id
130
131    @property
132    def cursor(self):
133        if self._cursor is None:
134            self._cursor = clang.cindex.Token.cursor
135        return self._cursor
136
137    @cursor.setter
138    def cursor(self, new_cursor):
139        self._cursor = new_cursor
140
141    def __repr__(self):
142        if self.id == 'defined':
143            return self.id
144        elif self.kind == TokenKind.IDENTIFIER:
145            return "(ident %s)" % self.id
146
147        return self.id
148
149    def __str__(self):
150        return self.id
151
152
153class BadExpectedToken(Exception):
154    """An exception that will be raised for unexpected tokens."""
155    pass
156
157
158class UnparseableStruct(Exception):
159    """An exception that will be raised for structs that cannot be parsed."""
160    pass
161
162
163# The __contains__ function in libclang SourceRange class contains a bug. It
164# gives wrong result when dealing with single line range.
165# Bug filed with upstream:
166# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
167def SourceRange__contains__(self, other):
168    """Determine if a given location is inside the range."""
169    if not isinstance(other, SourceLocation):
170        return False
171    if other.file is None and self.start.file is None:
172        pass
173    elif (self.start.file.name != other.file.name or
174          other.file.name != self.end.file.name):
175        # same file name
176        return False
177    # same file, in between lines
178    if self.start.line < other.line < self.end.line:
179        return True
180    # same file, same line
181    elif self.start.line == other.line == self.end.line:
182        if self.start.column <= other.column <= self.end.column:
183            return True
184    elif self.start.line == other.line:
185        # same file first line
186        if self.start.column <= other.column:
187            return True
188    elif other.line == self.end.line:
189        # same file last line
190        if other.column <= self.end.column:
191            return True
192    return False
193
194
195SourceRange.__contains__ = SourceRange__contains__
196
197
198################################################################################
199################################################################################
200#####                                                                      #####
201#####           C P P   T O K E N I Z E R                                  #####
202#####                                                                      #####
203################################################################################
204################################################################################
205
206
207class CppTokenizer(object):
208    """A tokenizer that converts some input text into a list of tokens.
209
210    It calls libclang's tokenizer to get the parsed tokens. In addition, it
211    updates the cursor property in each token after parsing, by calling
212    getTokensWithCursors().
213    """
214
215    clang_flags = ['-E', '-x', 'c']
216    options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
217
218    def __init__(self):
219        """Initialize a new CppTokenizer object."""
220        self._indexer = clang.cindex.Index.create()
221        self._tu = None
222        self._index = 0
223        self.tokens = None
224
225    def _getTokensWithCursors(self):
226        """Helper method to return all tokens with their cursors.
227
228        The cursor property in a clang Token doesn't provide enough
229        information. Because it is queried based on single token each time
230        without any context, i.e. via calling conf.lib.clang_annotateTokens()
231        with only one token given. So we often see 'INVALID_FILE' in one
232        token's cursor. In this function it passes all the available tokens
233        to get more informative cursors.
234        """
235
236        tokens_memory = ctypes.POINTER(clang.cindex.Token)()
237        tokens_count = ctypes.c_uint()
238
239        conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
240                                ctypes.byref(tokens_memory),
241                                ctypes.byref(tokens_count))
242
243        count = int(tokens_count.value)
244
245        # If we get no tokens, no memory was allocated. Be sure not to return
246        # anything and potentially call a destructor on nothing.
247        if count < 1:
248            return
249
250        cursors = (Cursor * count)()
251        cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
252
253        conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
254                                      cursors_memory)
255
256        tokens_array = ctypes.cast(
257            tokens_memory,
258            ctypes.POINTER(clang.cindex.Token * count)).contents
259        token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
260
261        tokens = []
262        for i in range(0, count):
263            token = Token(self._tu, token_group,
264                          int_data=tokens_array[i].int_data,
265                          ptr_data=tokens_array[i].ptr_data,
266                          cursor=cursors[i])
267            # We only want non-comment tokens.
268            if token.kind != TokenKind.COMMENT:
269                tokens.append(token)
270
271        return tokens
272
273    def parseString(self, lines):
274        """Parse a list of text lines into a BlockList object."""
275        file_ = 'no-filename-available.c'
276        self._tu = self._indexer.parse(file_, self.clang_flags,
277                                       unsaved_files=[(file_, lines)],
278                                       options=self.options)
279        self.tokens = self._getTokensWithCursors()
280
281    def parseFile(self, file_):
282        """Parse a file into a BlockList object."""
283        self._tu = self._indexer.parse(file_, self.clang_flags,
284                                       options=self.options)
285        self.tokens = self._getTokensWithCursors()
286
287    def nextToken(self):
288        """Return next token from the list."""
289        if self._index < len(self.tokens):
290            t = self.tokens[self._index]
291            self._index += 1
292            return t
293        else:
294            return None
295
296
297class CppStringTokenizer(CppTokenizer):
298    """A CppTokenizer derived class that accepts a string of text as input."""
299
300    def __init__(self, line):
301        CppTokenizer.__init__(self)
302        self.parseString(line)
303
304
305class CppFileTokenizer(CppTokenizer):
306    """A CppTokenizer derived class that accepts a file as input."""
307
308    def __init__(self, file_):
309        CppTokenizer.__init__(self)
310        self.parseFile(file_)
311
312
313# Unit testing
314#
315class CppTokenizerTests(unittest.TestCase):
316    """CppTokenizer tests."""
317
318    def get_tokens(self, token_string, line_col=False):
319        tokens = CppStringTokenizer(token_string)
320        token_list = []
321        while True:
322            token = tokens.nextToken()
323            if not token:
324                break
325            if line_col:
326                token_list.append((token.id, token.location.line,
327                                   token.location.column))
328            else:
329                token_list.append(token.id)
330        return token_list
331
332    def test_hash(self):
333        self.assertEqual(self.get_tokens("#an/example  && (01923_xy)"),
334                         ["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
335                          "01923_xy", tokRPAREN])
336
337    def test_parens(self):
338        self.assertEqual(self.get_tokens("FOO(BAR) && defined(BAZ)"),
339                         ["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
340                          "defined", tokLPAREN, "BAZ", tokRPAREN])
341
342    def test_comment(self):
343        self.assertEqual(self.get_tokens("/*\n#\n*/"), [])
344
345    def test_line_cross(self):
346        self.assertEqual(self.get_tokens("first\nsecond"), ["first", "second"])
347
348    def test_line_cross_line_col(self):
349        self.assertEqual(self.get_tokens("first second\n  third", True),
350                         [("first", 1, 1), ("second", 1, 7), ("third", 2, 3)])
351
352    def test_comment_line_col(self):
353        self.assertEqual(self.get_tokens("boo /* what the\nhell */", True),
354                         [("boo", 1, 1)])
355
356    def test_escapes(self):
357        self.assertEqual(self.get_tokens("an \\\n example", True),
358                         [("an", 1, 1), ("example", 2, 2)])
359
360
361################################################################################
362################################################################################
363#####                                                                      #####
364#####           C P P   E X P R E S S I O N S                              #####
365#####                                                                      #####
366################################################################################
367################################################################################
368
369
370class CppExpr(object):
371    """A class that models the condition of #if directives into an expr tree.
372
373    Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
374    "op" is a string describing the operation
375    """
376
377    unaries = ["!", "~"]
378    binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
379                "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
380    precedences = {
381        "?": 1, ":": 1,
382        "||": 2,
383        "&&": 3,
384        "|": 4,
385        "^": 5,
386        "&": 6,
387        "==": 7, "!=": 7,
388        "<": 8, "<=": 8, ">": 8, ">=": 8,
389        "<<": 9, ">>": 9,
390        "+": 10, "-": 10,
391        "*": 11, "/": 11, "%": 11,
392        "!": 12, "~": 12
393    }
394
395    def __init__(self, tokens):
396        """Initialize a CppExpr. 'tokens' must be a CppToken list."""
397        self.tokens = tokens
398        self._num_tokens = len(tokens)
399        self._index = 0
400
401        if debugCppExpr:
402            print("CppExpr: trying to parse %s" % repr(tokens))
403        self.expr = self.parseExpression(0)
404        if debugCppExpr:
405            print("CppExpr: got " + repr(self.expr))
406        if self._index != self._num_tokens:
407            self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
408                       % (self._index, self._num_tokens, repr(tokens)))
409
410    def throw(self, exception, msg):
411        if self._index < self._num_tokens:
412            tok = self.tokens[self._index]
413            print("%d:%d: %s" % (tok.location.line, tok.location.column, msg))
414        else:
415            print("EOF: %s" % msg)
416        raise exception(msg)
417
418    def expectId(self, id):
419        """Check that a given token id is at the current position."""
420        token = self.tokens[self._index]
421        if self._index >= self._num_tokens or token.id != id:
422            self.throw(BadExpectedToken,
423                       "### expecting '%s' in expression, got '%s'" % (
424                           id, token.id))
425        self._index += 1
426
427    def is_decimal(self):
428        token = self.tokens[self._index].id
429        if token[-1] in "ULul":
430            token = token[:-1]
431        try:
432            val = int(token, 10)
433            self._index += 1
434            return ('int', val)
435        except ValueError:
436            return None
437
438    def is_octal(self):
439        token = self.tokens[self._index].id
440        if token[-1] in "ULul":
441            token = token[:-1]
442        if len(token) < 2 or token[0] != '0':
443            return None
444        try:
445            val = int(token, 8)
446            self._index += 1
447            return ('oct', val)
448        except ValueError:
449            return None
450
451    def is_hexadecimal(self):
452        token = self.tokens[self._index].id
453        if token[-1] in "ULul":
454            token = token[:-1]
455        if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
456            return None
457        try:
458            val = int(token, 16)
459            self._index += 1
460            return ('hex', val)
461        except ValueError:
462            return None
463
464    def is_integer(self):
465        if self.tokens[self._index].kind != TokenKind.LITERAL:
466            return None
467
468        c = self.is_hexadecimal()
469        if c:
470            return c
471
472        c = self.is_octal()
473        if c:
474            return c
475
476        c = self.is_decimal()
477        if c:
478            return c
479
480        return None
481
482    def is_number(self):
483        t = self.tokens[self._index]
484        if t.id == tokMINUS and self._index + 1 < self._num_tokens:
485            self._index += 1
486            c = self.is_integer()
487            if c:
488                op, val = c
489                return (op, -val)
490        if t.id == tokPLUS and self._index + 1 < self._num_tokens:
491            self._index += 1
492            c = self.is_integer()
493            if c:
494                return c
495
496        return self.is_integer()
497
498    def is_defined(self):
499        t = self.tokens[self._index]
500        if t.id != tokDEFINED:
501            return None
502
503        # We have the defined keyword, check the rest.
504        self._index += 1
505        used_parens = False
506        if (self._index < self._num_tokens and
507            self.tokens[self._index].id == tokLPAREN):
508            used_parens = True
509            self._index += 1
510
511        if self._index >= self._num_tokens:
512            self.throw(BadExpectedToken,
513                       "### 'defined' must be followed by macro name or left "
514                       "paren")
515
516        t = self.tokens[self._index]
517        if t.kind != TokenKind.IDENTIFIER:
518            self.throw(BadExpectedToken,
519                       "### 'defined' must be followed by macro name")
520
521        self._index += 1
522        if used_parens:
523            self.expectId(tokRPAREN)
524
525        return ("defined", t.id)
526
527    def is_call_or_ident(self):
528        if self._index >= self._num_tokens:
529            return None
530
531        t = self.tokens[self._index]
532        if t.kind != TokenKind.IDENTIFIER:
533            return None
534
535        name = t.id
536
537        self._index += 1
538        if (self._index >= self._num_tokens or
539            self.tokens[self._index].id != tokLPAREN):
540            return ("ident", name)
541
542        params = []
543        depth = 1
544        self._index += 1
545        j = self._index
546        while self._index < self._num_tokens:
547            id = self.tokens[self._index].id
548            if id == tokLPAREN:
549                depth += 1
550            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
551                k = self._index
552                param = self.tokens[j:k]
553                params.append(param)
554                if id == tokRPAREN:
555                    break
556                j = self._index + 1
557            elif id == tokRPAREN:
558                depth -= 1
559            self._index += 1
560
561        if self._index >= self._num_tokens:
562            return None
563
564        self._index += 1
565        return ("call", (name, params))
566
567    # Implements the "precedence climbing" algorithm from
568    # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
569    # The "classic" algorithm would be fine if we were using a tool to
570    # generate the parser, but we're not. Dijkstra's "shunting yard"
571    # algorithm hasn't been necessary yet.
572
573    def parseExpression(self, minPrecedence):
574        if self._index >= self._num_tokens:
575            return None
576
577        node = self.parsePrimary()
578        while (self.token() and self.isBinary(self.token()) and
579               self.precedence(self.token()) >= minPrecedence):
580            op = self.token()
581            self.nextToken()
582            rhs = self.parseExpression(self.precedence(op) + 1)
583            node = (op.id, node, rhs)
584
585        return node
586
587    def parsePrimary(self):
588        op = self.token()
589        if self.isUnary(op):
590            self.nextToken()
591            return (op.id, self.parseExpression(self.precedence(op)))
592
593        primary = None
594        if op.id == tokLPAREN:
595            self.nextToken()
596            primary = self.parseExpression(0)
597            self.expectId(tokRPAREN)
598        elif op.id == "?":
599            self.nextToken()
600            primary = self.parseExpression(0)
601            self.expectId(":")
602        elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
603            primary = self.is_number()
604        # Checking for 'defined' needs to come first now because 'defined' is
605        # recognized as IDENTIFIER.
606        elif op.id == tokDEFINED:
607            primary = self.is_defined()
608        elif op.kind == TokenKind.IDENTIFIER:
609            primary = self.is_call_or_ident()
610        else:
611            self.throw(BadExpectedToken,
612                       "didn't expect to see a %s in factor" % (
613                           self.tokens[self._index].id))
614        return primary
615
616    def isBinary(self, token):
617        return token.id in self.binaries
618
619    def isUnary(self, token):
620        return token.id in self.unaries
621
622    def precedence(self, token):
623        return self.precedences.get(token.id)
624
625    def token(self):
626        if self._index >= self._num_tokens:
627            return None
628        return self.tokens[self._index]
629
630    def nextToken(self):
631        self._index += 1
632        if self._index >= self._num_tokens:
633            return None
634        return self.tokens[self._index]
635
636    def dump_node(self, e):
637        op = e[0]
638        line = "(" + op
639        if op == "int":
640            line += " %d)" % e[1]
641        elif op == "oct":
642            line += " 0%o)" % e[1]
643        elif op == "hex":
644            line += " 0x%x)" % e[1]
645        elif op == "ident":
646            line += " %s)" % e[1]
647        elif op == "defined":
648            line += " %s)" % e[1]
649        elif op == "call":
650            arg = e[1]
651            line += " %s [" % arg[0]
652            prefix = ""
653            for param in arg[1]:
654                par = ""
655                for tok in param:
656                    par += str(tok)
657                line += "%s%s" % (prefix, par)
658                prefix = ","
659            line += "])"
660        elif op in CppExpr.unaries:
661            line += " %s)" % self.dump_node(e[1])
662        elif op in CppExpr.binaries:
663            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
664        else:
665            line += " ?%s)" % repr(e[1])
666
667        return line
668
669    def __repr__(self):
670        return self.dump_node(self.expr)
671
672    def source_node(self, e):
673        op = e[0]
674        if op == "int":
675            return "%d" % e[1]
676        if op == "hex":
677            return "0x%x" % e[1]
678        if op == "oct":
679            return "0%o" % e[1]
680        if op == "ident":
681            # XXX: should try to expand
682            return e[1]
683        if op == "defined":
684            return "defined(%s)" % e[1]
685
686        prec = CppExpr.precedences.get(op, 1000)
687        arg = e[1]
688        if op in CppExpr.unaries:
689            arg_src = self.source_node(arg)
690            arg_op = arg[0]
691            arg_prec = CppExpr.precedences.get(arg_op, 1000)
692            if arg_prec < prec:
693                return "!(" + arg_src + ")"
694            else:
695                return "!" + arg_src
696        if op in CppExpr.binaries:
697            arg2 = e[2]
698            arg1_op = arg[0]
699            arg2_op = arg2[0]
700            arg1_src = self.source_node(arg)
701            arg2_src = self.source_node(arg2)
702            if CppExpr.precedences.get(arg1_op, 1000) < prec:
703                arg1_src = "(%s)" % arg1_src
704            if CppExpr.precedences.get(arg2_op, 1000) < prec:
705                arg2_src = "(%s)" % arg2_src
706
707            return "%s %s %s" % (arg1_src, op, arg2_src)
708        return "???"
709
710    def __str__(self):
711        return self.source_node(self.expr)
712
713    @staticmethod
714    def int_node(e):
715        if e[0] in ["int", "oct", "hex"]:
716            return e[1]
717        else:
718            return None
719
720    def toInt(self):
721        return self.int_node(self.expr)
722
723    def optimize_node(self, e, macros=None):
724        if macros is None:
725            macros = {}
726        op = e[0]
727
728        if op == "defined":
729            op, name = e
730            if name in macros:
731                if macros[name] == kCppUndefinedMacro:
732                    return ("int", 0)
733                else:
734                    try:
735                        value = int(macros[name])
736                        return ("int", value)
737                    except ValueError:
738                        return ("defined", macros[name])
739
740            if kernel_remove_config_macros and name.startswith("CONFIG_"):
741                return ("int", 0)
742
743            return e
744
745        elif op == "ident":
746            op, name = e
747            if name in macros:
748                try:
749                    value = int(macros[name])
750                    expanded = ("int", value)
751                except ValueError:
752                    expanded = ("ident", macros[name])
753                return self.optimize_node(expanded, macros)
754            return e
755
756        elif op == "!":
757            op, v = e
758            v = self.optimize_node(v, macros)
759            if v[0] == "int":
760                if v[1] == 0:
761                    return ("int", 1)
762                else:
763                    return ("int", 0)
764            return ('!', v)
765
766        elif op == "&&":
767            op, l, r = e
768            l = self.optimize_node(l, macros)
769            r = self.optimize_node(r, macros)
770            li = self.int_node(l)
771            ri = self.int_node(r)
772            if li is not None:
773                if li == 0:
774                    return ("int", 0)
775                else:
776                    return r
777            elif ri is not None:
778                if ri == 0:
779                    return ("int", 0)
780                else:
781                    return l
782            return (op, l, r)
783
784        elif op == "||":
785            op, l, r = e
786            l = self.optimize_node(l, macros)
787            r = self.optimize_node(r, macros)
788            li = self.int_node(l)
789            ri = self.int_node(r)
790            if li is not None:
791                if li == 0:
792                    return r
793                else:
794                    return ("int", 1)
795            elif ri is not None:
796                if ri == 0:
797                    return l
798                else:
799                    return ("int", 1)
800            return (op, l, r)
801
802        else:
803            return e
804
805    def optimize(self, macros=None):
806        if macros is None:
807            macros = {}
808        self.expr = self.optimize_node(self.expr, macros)
809
810class CppExprTest(unittest.TestCase):
811    """CppExpr unit tests."""
812
813    def get_expr(self, expr):
814        return repr(CppExpr(CppStringTokenizer(expr).tokens))
815
816    def test_cpp_expr(self):
817        self.assertEqual(self.get_expr("0"), "(int 0)")
818        self.assertEqual(self.get_expr("1"), "(int 1)")
819        self.assertEqual(self.get_expr("-5"), "(int -5)")
820        self.assertEqual(self.get_expr("+1"), "(int 1)")
821        self.assertEqual(self.get_expr("0U"), "(int 0)")
822        self.assertEqual(self.get_expr("015"), "(oct 015)")
823        self.assertEqual(self.get_expr("015l"), "(oct 015)")
824        self.assertEqual(self.get_expr("0x3e"), "(hex 0x3e)")
825        self.assertEqual(self.get_expr("(0)"), "(int 0)")
826        self.assertEqual(self.get_expr("1 && 1"), "(&& (int 1) (int 1))")
827        self.assertEqual(self.get_expr("1 && 0"), "(&& (int 1) (int 0))")
828        self.assertEqual(self.get_expr("EXAMPLE"), "(ident EXAMPLE)")
829        self.assertEqual(self.get_expr("EXAMPLE - 3"),
830                         "(- (ident EXAMPLE) (int 3))")
831        self.assertEqual(self.get_expr("defined(EXAMPLE)"),
832                         "(defined EXAMPLE)")
833        self.assertEqual(self.get_expr("defined ( EXAMPLE ) "),
834                         "(defined EXAMPLE)")
835        self.assertEqual(self.get_expr("!defined(EXAMPLE)"),
836                         "(! (defined EXAMPLE))")
837        self.assertEqual(self.get_expr("defined(ABC) || defined(BINGO)"),
838                         "(|| (defined ABC) (defined BINGO))")
839        self.assertEqual(self.get_expr("FOO(BAR,5)"), "(call FOO [BAR,5])")
840        self.assertEqual(self.get_expr("A == 1 || defined(B)"),
841                         "(|| (== (ident A) (int 1)) (defined B))")
842
843    def get_expr_optimize(self, expr, macros=None):
844        if macros is None:
845            macros = {}
846        e = CppExpr(CppStringTokenizer(expr).tokens)
847        e.optimize(macros)
848        return repr(e)
849
850    def test_cpp_expr_optimize(self):
851        self.assertEqual(self.get_expr_optimize("0"), "(int 0)")
852        self.assertEqual(self.get_expr_optimize("1"), "(int 1)")
853        self.assertEqual(self.get_expr_optimize("1 && 1"), "(int 1)")
854        self.assertEqual(self.get_expr_optimize("1 && +1"), "(int 1)")
855        self.assertEqual(self.get_expr_optimize("0x1 && 01"), "(oct 01)")
856        self.assertEqual(self.get_expr_optimize("1 && 0"), "(int 0)")
857        self.assertEqual(self.get_expr_optimize("0 && 1"), "(int 0)")
858        self.assertEqual(self.get_expr_optimize("0 && 0"), "(int 0)")
859        self.assertEqual(self.get_expr_optimize("1 || 1"), "(int 1)")
860        self.assertEqual(self.get_expr_optimize("1 || 0"), "(int 1)")
861        self.assertEqual(self.get_expr_optimize("0 || 1"), "(int 1)")
862        self.assertEqual(self.get_expr_optimize("0 || 0"), "(int 0)")
863        self.assertEqual(self.get_expr_optimize("A"), "(ident A)")
864        self.assertEqual(self.get_expr_optimize("A", {"A": 1}), "(int 1)")
865        self.assertEqual(self.get_expr_optimize("A || B", {"A": 1}), "(int 1)")
866        self.assertEqual(self.get_expr_optimize("A || B", {"B": 1}), "(int 1)")
867        self.assertEqual(self.get_expr_optimize("A && B", {"A": 1}), "(ident B)")
868        self.assertEqual(self.get_expr_optimize("A && B", {"B": 1}), "(ident A)")
869        self.assertEqual(self.get_expr_optimize("A && B"), "(&& (ident A) (ident B))")
870        self.assertEqual(self.get_expr_optimize("EXAMPLE"), "(ident EXAMPLE)")
871        self.assertEqual(self.get_expr_optimize("EXAMPLE - 3"), "(- (ident EXAMPLE) (int 3))")
872        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)"), "(defined EXAMPLE)")
873        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
874                                                {"EXAMPLE": "XOWOE"}),
875                         "(defined XOWOE)")
876        self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
877                                                {"EXAMPLE": kCppUndefinedMacro}),
878                         "(int 0)")
879        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)"), "(! (defined EXAMPLE))")
880        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
881                                                {"EXAMPLE": "XOWOE"}),
882                         "(! (defined XOWOE))")
883        self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
884                                                {"EXAMPLE": kCppUndefinedMacro}),
885                         "(int 1)")
886        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)"),
887                        "(|| (defined A) (defined B))")
888        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
889                                                {"A": "1"}),
890                         "(int 1)")
891        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
892                                                {"B": "1"}),
893                         "(int 1)")
894        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
895                                                {"B": kCppUndefinedMacro}),
896                         "(defined A)")
897        self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
898                                                {"A": kCppUndefinedMacro,
899                                                 "B": kCppUndefinedMacro}),
900                         "(int 0)")
901        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)"),
902                         "(&& (defined A) (defined B))")
903        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
904                                                {"A": "1"}),
905                         "(defined B)")
906        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
907                                                {"B": "1"}),
908                         "(defined A)")
909        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
910                                                {"B": kCppUndefinedMacro}),
911                        "(int 0)")
912        self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
913                                                {"A": kCppUndefinedMacro}),
914                        "(int 0)")
915        self.assertEqual(self.get_expr_optimize("A == 1 || defined(B)"),
916                         "(|| (== (ident A) (int 1)) (defined B))")
917        self.assertEqual(self.get_expr_optimize(
918              "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
919              {"__KERNEL__": kCppUndefinedMacro}),
920              "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))")
921
922    def get_expr_string(self, expr):
923        return str(CppExpr(CppStringTokenizer(expr).tokens))
924
925    def test_cpp_expr_string(self):
926        self.assertEqual(self.get_expr_string("0"), "0")
927        self.assertEqual(self.get_expr_string("1"), "1")
928        self.assertEqual(self.get_expr_string("1 && 1"), "1 && 1")
929        self.assertEqual(self.get_expr_string("1 && 0"), "1 && 0")
930        self.assertEqual(self.get_expr_string("0 && 1"), "0 && 1")
931        self.assertEqual(self.get_expr_string("0 && 0"), "0 && 0")
932        self.assertEqual(self.get_expr_string("1 || 1"), "1 || 1")
933        self.assertEqual(self.get_expr_string("1 || 0"), "1 || 0")
934        self.assertEqual(self.get_expr_string("0 || 1"), "0 || 1")
935        self.assertEqual(self.get_expr_string("0 || 0"), "0 || 0")
936        self.assertEqual(self.get_expr_string("EXAMPLE"), "EXAMPLE")
937        self.assertEqual(self.get_expr_string("EXAMPLE - 3"), "EXAMPLE - 3")
938        self.assertEqual(self.get_expr_string("defined(EXAMPLE)"), "defined(EXAMPLE)")
939        self.assertEqual(self.get_expr_string("defined EXAMPLE"), "defined(EXAMPLE)")
940        self.assertEqual(self.get_expr_string("A == 1 || defined(B)"), "A == 1 || defined(B)")
941
942
943################################################################################
944################################################################################
945#####                                                                      #####
946#####          C P P   B L O C K                                           #####
947#####                                                                      #####
948################################################################################
949################################################################################
950
951
952class Block(object):
953    """A class used to model a block of input source text.
954
955    There are two block types:
956      - directive blocks: contain the tokens of a single pre-processor
957        directive (e.g. #if)
958      - text blocks, contain the tokens of non-directive blocks
959
960    The cpp parser class below will transform an input source file into a list
961    of Block objects (grouped in a BlockList object for convenience)
962    """
963
964    def __init__(self, tokens, directive=None, lineno=0, identifier=None):
965        """Initialize a new block, if 'directive' is None, it is a text block.
966
967        NOTE: This automatically converts '#ifdef MACRO' into
968        '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
969        """
970
971        if directive == "ifdef":
972            tok = Token()
973            tok.id = tokDEFINED
974            tokens = [tok] + tokens
975            directive = "if"
976
977        elif directive == "ifndef":
978            tok1 = Token()
979            tok2 = Token()
980            tok1.id = tokNOT
981            tok2.id = tokDEFINED
982            tokens = [tok1, tok2] + tokens
983            directive = "if"
984
985        self.tokens = tokens
986        self.directive = directive
987        self.define_id = identifier
988        if lineno > 0:
989            self.lineno = lineno
990        else:
991            self.lineno = self.tokens[0].location.line
992
993        if self.isIf():
994            self.expr = CppExpr(self.tokens)
995
996    def isDirective(self):
997        """Return True iff this is a directive block."""
998        return self.directive is not None
999
1000    def isConditional(self):
1001        """Return True iff this is a conditional directive block."""
1002        return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
1003                                  "endif"]
1004
1005    def isDefine(self):
1006        """Return the macro name in a #define directive, or None otherwise."""
1007        if self.directive != "define":
1008            return None
1009        return self.define_id
1010
1011    def isIf(self):
1012        """Return True iff this is an #if-like directive block."""
1013        return self.directive in ["if", "ifdef", "ifndef", "elif"]
1014
1015    def isEndif(self):
1016        """Return True iff this is an #endif directive block."""
1017        return self.directive == "endif"
1018
1019    def isInclude(self):
1020        """Check whether this is a #include directive.
1021
1022        If true, returns the corresponding file name (with brackets or
1023        double-qoutes). None otherwise.
1024        """
1025
1026        if self.directive != "include":
1027            return None
1028        return ''.join([str(x) for x in self.tokens])
1029
1030    @staticmethod
1031    def format_blocks(tokens, indent=0):
1032        """Return the formatted lines of strings with proper indentation."""
1033        newline = True
1034        result = []
1035        buf = ''
1036        i = 0
1037        while i < len(tokens):
1038            t = tokens[i]
1039            if t.id == '{':
1040                buf += ' {'
1041                result.append(strip_space(buf))
1042                # Do not indent if this is extern "C" {
1043                if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"':
1044                    indent += 2
1045                buf = ''
1046                newline = True
1047            elif t.id == '}':
1048                if indent >= 2:
1049                    indent -= 2
1050                if not newline:
1051                    result.append(strip_space(buf))
1052                # Look ahead to determine if it's the end of line.
1053                if (i + 1 < len(tokens) and
1054                    (tokens[i+1].id == ';' or
1055                     tokens[i+1].id in ['else', '__attribute__',
1056                                        '__attribute', '__packed'] or
1057                     tokens[i+1].kind == TokenKind.IDENTIFIER)):
1058                    buf = ' ' * indent + '}'
1059                    newline = False
1060                else:
1061                    result.append(' ' * indent + '}')
1062                    buf = ''
1063                    newline = True
1064            elif t.id == ';':
1065                result.append(strip_space(buf) + ';')
1066                buf = ''
1067                newline = True
1068            # We prefer a new line for each constant in enum.
1069            elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
1070                result.append(strip_space(buf) + ',')
1071                buf = ''
1072                newline = True
1073            else:
1074                if newline:
1075                    buf += ' ' * indent + str(t)
1076                else:
1077                    buf += ' ' + str(t)
1078                newline = False
1079            i += 1
1080
1081        if buf:
1082            result.append(strip_space(buf))
1083
1084        return result, indent
1085
1086    def write(self, out, indent):
1087        """Dump the current block."""
1088        # removeWhiteSpace() will sometimes creates non-directive blocks
1089        # without any tokens. These come from blocks that only contained
1090        # empty lines and spaces. They should not be printed in the final
1091        # output, and then should not be counted for this operation.
1092        #
1093        if self.directive is None and not self.tokens:
1094            return indent
1095
1096        if self.directive:
1097            out.write(str(self) + '\n')
1098        else:
1099            lines, indent = self.format_blocks(self.tokens, indent)
1100            for line in lines:
1101                out.write(line + '\n')
1102
1103        return indent
1104
1105    def __repr__(self):
1106        """Generate the representation of a given block."""
1107        if self.directive:
1108            result = "#%s " % self.directive
1109            if self.isIf():
1110                result += repr(self.expr)
1111            else:
1112                for tok in self.tokens:
1113                    result += repr(tok)
1114        else:
1115            result = ""
1116            for tok in self.tokens:
1117                result += repr(tok)
1118
1119        return result
1120
1121    def __str__(self):
1122        """Generate the string representation of a given block."""
1123        if self.directive:
1124            # "#if"
1125            if self.directive == "if":
1126                # small optimization to re-generate #ifdef and #ifndef
1127                e = self.expr.expr
1128                op = e[0]
1129                if op == "defined":
1130                    result = "#ifdef %s" % e[1]
1131                elif op == "!" and e[1][0] == "defined":
1132                    result = "#ifndef %s" % e[1][1]
1133                else:
1134                    result = "#if " + str(self.expr)
1135
1136            # "#define"
1137            elif self.isDefine():
1138                result = "#%s %s" % (self.directive, self.define_id)
1139                if self.tokens:
1140                    result += " "
1141                expr = strip_space(' '.join([tok.id for tok in self.tokens]))
1142                # remove the space between name and '(' in function call
1143                result += re.sub(r'(\w+) \(', r'\1(', expr)
1144
1145            # "#error"
1146            # Concatenating tokens with a space separator, because they may
1147            # not be quoted and broken into several tokens
1148            elif self.directive == "error":
1149                result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
1150
1151            else:
1152                result = "#%s" % self.directive
1153                if self.tokens:
1154                    result += " "
1155                result += ''.join([tok.id for tok in self.tokens])
1156        else:
1157            lines, _ = self.format_blocks(self.tokens)
1158            result = '\n'.join(lines)
1159
1160        return result
1161
1162
1163class BlockList(object):
1164    """A convenience class used to hold and process a list of blocks.
1165
1166    It calls the cpp parser to get the blocks.
1167    """
1168
1169    def __init__(self, blocks):
1170        self.blocks = blocks
1171
1172    def __len__(self):
1173        return len(self.blocks)
1174
1175    def __getitem__(self, n):
1176        return self.blocks[n]
1177
1178    def __repr__(self):
1179        return repr(self.blocks)
1180
1181    def __str__(self):
1182        result = '\n'.join([str(b) for b in self.blocks])
1183        return result
1184
1185    def dump(self):
1186        """Dump all the blocks in current BlockList."""
1187        print('##### BEGIN #####')
1188        for i, b in enumerate(self.blocks):
1189            print('### BLOCK %d ###' % i)
1190            print(b)
1191        print('##### END #####')
1192
1193    def optimizeIf01(self):
1194        """Remove the code between #if 0 .. #endif in a BlockList."""
1195        self.blocks = optimize_if01(self.blocks)
1196
1197    def optimizeMacros(self, macros):
1198        """Remove known defined and undefined macros from a BlockList."""
1199        for b in self.blocks:
1200            if b.isIf():
1201                b.expr.optimize(macros)
1202
1203    def removeStructs(self, structs):
1204        """Remove structs."""
1205        extra_includes = []
1206        block_num = 0
1207        num_blocks = len(self.blocks)
1208        while block_num < num_blocks:
1209            b = self.blocks[block_num]
1210            block_num += 1
1211            # Have to look in each block for a top-level struct definition.
1212            if b.directive:
1213                continue
1214            num_tokens = len(b.tokens)
1215            # A struct definition usually looks like:
1216            #   struct
1217            #   ident
1218            #   {
1219            #   }
1220            #   ;
1221            # However, the structure might be spread across multiple blocks
1222            # if the structure looks like this:
1223            #   struct ident
1224            #   {
1225            #   #ifdef VARIABLE
1226            #     pid_t pid;
1227            #   #endif
1228            #   }:
1229            # So the total number of tokens in the block might be less than
1230            # five but assume at least three.
1231            if num_tokens < 3:
1232                continue
1233
1234            # This is a simple struct finder, it might fail if a top-level
1235            # structure has an #if type directives that confuses the algorithm
1236            # for finding the end of the structure. Or if there is another
1237            # structure definition embedded in the structure.
1238            i = 0
1239            while i < num_tokens - 2:
1240                if (b.tokens[i].kind != TokenKind.KEYWORD or
1241                    b.tokens[i].id != "struct"):
1242                    i += 1
1243                    continue
1244                if (b.tokens[i + 1].kind == TokenKind.IDENTIFIER and
1245                    b.tokens[i + 2].kind == TokenKind.PUNCTUATION and
1246                    b.tokens[i + 2].id == "{" and b.tokens[i + 1].id in structs):
1247                    # Add an include for the structure to be removed of the form:
1248                    #  #include <bits/STRUCT_NAME.h>
1249                    struct_token = b.tokens[i + 1]
1250                    if not structs[struct_token.id]:
1251                        extra_includes.append("<bits/%s.h>" % struct_token.id)
1252
1253                    # Search forward for the end of the structure.
1254                    # Very simple search, look for } and ; tokens.
1255                    # If we hit the end of the block, we'll need to start
1256                    # looking at the next block.
1257                    j = i + 3
1258                    depth = 1
1259                    struct_removed = False
1260                    while not struct_removed:
1261                        while j < num_tokens:
1262                            if b.tokens[j].kind == TokenKind.PUNCTUATION:
1263                                if b.tokens[j].id == '{':
1264                                    depth += 1
1265                                elif b.tokens[j].id == '}':
1266                                    depth -= 1
1267                                elif b.tokens[j].id == ';' and depth == 0:
1268                                    b.tokens = b.tokens[0:i] + b.tokens[j + 1:num_tokens]
1269                                    num_tokens = len(b.tokens)
1270                                    struct_removed = True
1271                                    break
1272                            j += 1
1273                        if not struct_removed:
1274                            b.tokens = b.tokens[0:i]
1275
1276                            # Skip directive blocks.
1277                            start_block = block_num
1278                            while block_num < num_blocks:
1279                                if not self.blocks[block_num].directive:
1280                                    break
1281                                block_num += 1
1282                            if block_num >= num_blocks:
1283                                # Unparsable struct, error out.
1284                                raise UnparseableStruct("Cannot remove struct %s: %s" % (struct_token.id, struct_token.location))
1285                            self.blocks = self.blocks[0:start_block] + self.blocks[block_num:num_blocks]
1286                            num_blocks = len(self.blocks)
1287                            b = self.blocks[start_block]
1288                            block_num = start_block + 1
1289                            num_tokens = len(b.tokens)
1290                            i = 0
1291                            j = 0
1292                    continue
1293                i += 1
1294
1295        for extra_include in extra_includes:
1296            replacement = CppStringTokenizer(extra_include)
1297            self.blocks.insert(2, Block(replacement.tokens, directive='include'))
1298
1299    def optimizeAll(self, macros):
1300        self.optimizeMacros(macros)
1301        self.optimizeIf01()
1302        return
1303
1304    def findIncludes(self):
1305        """Return the list of included files in a BlockList."""
1306        result = []
1307        for b in self.blocks:
1308            i = b.isInclude()
1309            if i:
1310                result.append(i)
1311        return result
1312
1313    def write(self, out):
1314        indent = 0
1315        for b in self.blocks:
1316            indent = b.write(out, indent)
1317
1318    def removeVarsAndFuncs(self, keep):
1319        """Remove variable and function declarations.
1320
1321        All extern and static declarations corresponding to variable and
1322        function declarations are removed. We only accept typedefs and
1323        enum/structs/union declarations.
1324
1325        In addition, remove any macros expanding in the headers. Usually,
1326        these macros are static inline functions, which is why they are
1327        removed.
1328
1329        However, we keep the definitions corresponding to the set of known
1330        static inline functions in the set 'keep', which is useful
1331        for optimized byteorder swap functions and stuff like that.
1332        """
1333
1334        # state = NORMAL => normal (i.e. LN + spaces)
1335        # state = OTHER_DECL => typedef/struct encountered, ends with ";"
1336        # state = VAR_DECL => var declaration encountered, ends with ";"
1337        # state = FUNC_DECL => func declaration encountered, ends with "}"
1338        NORMAL = 0
1339        OTHER_DECL = 1
1340        VAR_DECL = 2
1341        FUNC_DECL = 3
1342
1343        state = NORMAL
1344        depth = 0
1345        blocksToKeep = []
1346        blocksInProgress = []
1347        blocksOfDirectives = []
1348        ident = ""
1349        state_token = ""
1350        macros = set()
1351        for block in self.blocks:
1352            if block.isDirective():
1353                # Record all macros.
1354                if block.directive == 'define':
1355                    macro_name = block.define_id
1356                    paren_index = macro_name.find('(')
1357                    if paren_index == -1:
1358                        macros.add(macro_name)
1359                    else:
1360                        macros.add(macro_name[0:paren_index])
1361                blocksInProgress.append(block)
1362                # If this is in a function/variable declaration, we might need
1363                # to emit the directives alone, so save them separately.
1364                blocksOfDirectives.append(block)
1365                continue
1366
1367            numTokens = len(block.tokens)
1368            lastTerminatorIndex = 0
1369            i = 0
1370            while i < numTokens:
1371                token_id = block.tokens[i].id
1372                terminator = False
1373                if token_id == '{':
1374                    depth += 1
1375                    if (i >= 2 and block.tokens[i-2].id == 'extern' and
1376                        block.tokens[i-1].id == '"C"'):
1377                        # For an extern "C" { pretend as though this is depth 0.
1378                        depth -= 1
1379                elif token_id == '}':
1380                    if depth > 0:
1381                        depth -= 1
1382                    if depth == 0:
1383                        if state == OTHER_DECL:
1384                            # Loop through until we hit the ';'
1385                            i += 1
1386                            while i < numTokens:
1387                                if block.tokens[i].id == ';':
1388                                    token_id = ';'
1389                                    break
1390                                i += 1
1391                            # If we didn't hit the ';', just consider this the
1392                            # terminator any way.
1393                        terminator = True
1394                elif depth == 0:
1395                    if token_id == ';':
1396                        if state == NORMAL:
1397                            blocksToKeep.extend(blocksInProgress)
1398                            blocksInProgress = []
1399                            blocksOfDirectives = []
1400                            state = FUNC_DECL
1401                        terminator = True
1402                    elif (state == NORMAL and token_id == '(' and i >= 1 and
1403                          block.tokens[i-1].kind == TokenKind.IDENTIFIER and
1404                          block.tokens[i-1].id in macros):
1405                        # This is a plain macro being expanded in the header
1406                        # which needs to be removed.
1407                        blocksToKeep.extend(blocksInProgress)
1408                        if lastTerminatorIndex < i - 1:
1409                            blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1]))
1410                        blocksInProgress = []
1411                        blocksOfDirectives = []
1412
1413                        # Skip until we see the terminating ')'
1414                        i += 1
1415                        paren_depth = 1
1416                        while i < numTokens:
1417                            if block.tokens[i].id == ')':
1418                                paren_depth -= 1
1419                                if paren_depth == 0:
1420                                    break
1421                            elif block.tokens[i].id == '(':
1422                                paren_depth += 1
1423                            i += 1
1424                        lastTerminatorIndex = i + 1
1425                    elif (state != FUNC_DECL and token_id == '(' and
1426                          state_token != 'typedef'):
1427                        blocksToKeep.extend(blocksInProgress)
1428                        blocksInProgress = []
1429                        blocksOfDirectives = []
1430                        state = VAR_DECL
1431                    elif state == NORMAL and token_id in ['struct', 'typedef',
1432                                                          'enum', 'union',
1433                                                          '__extension__']:
1434                        state = OTHER_DECL
1435                        state_token = token_id
1436                    elif block.tokens[i].kind == TokenKind.IDENTIFIER:
1437                        if state != VAR_DECL or ident == "":
1438                            ident = token_id
1439
1440                if terminator:
1441                    if state != VAR_DECL and state != FUNC_DECL or ident in keep:
1442                        blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1]))
1443                        blocksToKeep.extend(blocksInProgress)
1444                    else:
1445                        # Only keep the directives found.
1446                        blocksToKeep.extend(blocksOfDirectives)
1447                    lastTerminatorIndex = i + 1
1448                    blocksInProgress = []
1449                    blocksOfDirectives = []
1450                    state = NORMAL
1451                    ident = ""
1452                    state_token = ""
1453                i += 1
1454            if lastTerminatorIndex < numTokens:
1455                blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens]))
1456        if len(blocksInProgress) > 0:
1457            blocksToKeep.extend(blocksInProgress)
1458        self.blocks = blocksToKeep
1459
1460    def replaceTokens(self, replacements):
1461        """Replace tokens according to the given dict."""
1462        for b in self.blocks:
1463            made_change = False
1464            if b.isInclude() is None:
1465                i = 0
1466                while i < len(b.tokens):
1467                    tok = b.tokens[i]
1468                    if tok.kind == TokenKind.IDENTIFIER:
1469                        if tok.id in replacements:
1470                            tok.id = replacements[tok.id]
1471                            made_change = True
1472                    i += 1
1473
1474                if b.isDefine() and b.define_id in replacements:
1475                    b.define_id = replacements[b.define_id]
1476                    made_change = True
1477
1478            if made_change and b.isIf():
1479                # Keep 'expr' in sync with 'tokens'.
1480                b.expr = CppExpr(b.tokens)
1481
1482
1483
1484def strip_space(s):
1485    """Strip out redundant space in a given string."""
1486
1487    # NOTE: It ought to be more clever to not destroy spaces in string tokens.
1488    replacements = {' . ': '.',
1489                    ' [': '[',
1490                    '[ ': '[',
1491                    ' ]': ']',
1492                    '( ': '(',
1493                    ' )': ')',
1494                    ' ,': ',',
1495                    '# ': '#',
1496                    ' ;': ';',
1497                    '~ ': '~',
1498                    ' -> ': '->'}
1499    result = s
1500    for r in replacements:
1501        result = result.replace(r, replacements[r])
1502
1503    # Remove the space between function name and the parenthesis.
1504    result = re.sub(r'(\w+) \(', r'\1(', result)
1505    return result
1506
1507
1508class BlockParser(object):
1509    """A class that converts an input source file into a BlockList object."""
1510
1511    def __init__(self, tokzer=None):
1512        """Initialize a block parser.
1513
1514        The input source is provided through a Tokenizer object.
1515        """
1516        self._tokzer = tokzer
1517        self._parsed = False
1518
1519    @property
1520    def parsed(self):
1521        return self._parsed
1522
1523    @staticmethod
1524    def _short_extent(extent):
1525        return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
1526                                  extent.end.line, extent.end.column)
1527
1528    def getBlocks(self, tokzer=None):
1529        """Return all the blocks parsed."""
1530
1531        def consume_extent(i, tokens, extent=None, detect_change=False):
1532            """Return tokens that belong to the given extent.
1533
1534            It parses all the tokens that follow tokens[i], until getting out
1535            of the extent. When detect_change is True, it may terminate early
1536            when detecting preprocessing directives inside the extent.
1537            """
1538
1539            result = []
1540            if extent is None:
1541                extent = tokens[i].cursor.extent
1542
1543            while i < len(tokens) and tokens[i].location in extent:
1544                t = tokens[i]
1545                if debugBlockParser:
1546                    print(' ' * 2, t.id, t.kind, t.cursor.kind)
1547                if (detect_change and t.cursor.extent != extent and
1548                    t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
1549                    break
1550                result.append(t)
1551                i += 1
1552            return (i, result)
1553
1554        def consume_line(i, tokens):
1555            """Return tokens that follow tokens[i] in the same line."""
1556            result = []
1557            line = tokens[i].location.line
1558            while i < len(tokens) and tokens[i].location.line == line:
1559                if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1560                    break
1561                result.append(tokens[i])
1562                i += 1
1563            return (i, result)
1564
1565        if tokzer is None:
1566            tokzer = self._tokzer
1567        tokens = tokzer.tokens
1568
1569        blocks = []
1570        buf = []
1571        i = 0
1572
1573        while i < len(tokens):
1574            t = tokens[i]
1575            cursor = t.cursor
1576
1577            if debugBlockParser:
1578                print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
1579                       "extent=[%s]" % (t.location.line, t.spelling, t.kind,
1580                                        cursor.kind,
1581                                        self._short_extent(cursor.extent)))
1582
1583            if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
1584                if buf:
1585                    blocks.append(Block(buf))
1586                    buf = []
1587
1588                j = i
1589                if j + 1 >= len(tokens):
1590                    raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
1591                directive = tokens[j+1].id
1592
1593                if directive == 'define':
1594                    if i+2 >= len(tokens):
1595                        raise BadExpectedToken("### BAD TOKEN at %s" %
1596                                               (tokens[i].location))
1597
1598                    # Skip '#' and 'define'.
1599                    extent = tokens[i].cursor.extent
1600                    i += 2
1601                    id = ''
1602                    # We need to separate the id from the remaining of
1603                    # the line, especially for the function-like macro.
1604                    if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
1605                        (tokens[i].location.column + len(tokens[i].spelling) ==
1606                         tokens[i+1].location.column)):
1607                        while i < len(tokens):
1608                            id += tokens[i].id
1609                            if tokens[i].spelling == ')':
1610                                i += 1
1611                                break
1612                            i += 1
1613                    else:
1614                        id += tokens[i].id
1615                        # Advance to the next token that follows the macro id
1616                        i += 1
1617
1618                    (i, ret) = consume_extent(i, tokens, extent=extent)
1619                    blocks.append(Block(ret, directive=directive,
1620                                        lineno=t.location.line, identifier=id))
1621
1622                else:
1623                    (i, ret) = consume_extent(i, tokens)
1624                    blocks.append(Block(ret[2:], directive=directive,
1625                                        lineno=t.location.line))
1626
1627            elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
1628                if buf:
1629                    blocks.append(Block(buf))
1630                    buf = []
1631                directive = tokens[i+1].id
1632                (i, ret) = consume_extent(i, tokens)
1633
1634                blocks.append(Block(ret[2:], directive=directive,
1635                                    lineno=t.location.line))
1636
1637            elif cursor.kind == CursorKind.VAR_DECL:
1638                if buf:
1639                    blocks.append(Block(buf))
1640                    buf = []
1641
1642                (i, ret) = consume_extent(i, tokens, detect_change=True)
1643                buf += ret
1644
1645            elif cursor.kind == CursorKind.FUNCTION_DECL:
1646                if buf:
1647                    blocks.append(Block(buf))
1648                    buf = []
1649
1650                (i, ret) = consume_extent(i, tokens, detect_change=True)
1651                buf += ret
1652
1653            else:
1654                (i, ret) = consume_line(i, tokens)
1655                buf += ret
1656
1657        if buf:
1658            blocks.append(Block(buf))
1659
1660        # _parsed=True indicates a successful parsing, although may result an
1661        # empty BlockList.
1662        self._parsed = True
1663
1664        return BlockList(blocks)
1665
1666    def parse(self, tokzer):
1667        return self.getBlocks(tokzer)
1668
1669    def parseFile(self, path):
1670        return self.getBlocks(CppFileTokenizer(path))
1671
1672
1673class BlockParserTests(unittest.TestCase):
1674    """BlockParser unit tests."""
1675
1676    def get_blocks(self, lines):
1677        blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
1678        return list(map(lambda a: str(a), blocks))
1679
1680    def test_hash(self):
1681        self.assertEqual(self.get_blocks(["#error hello"]), ["#error hello"])
1682
1683    def test_empty_line(self):
1684        self.assertEqual(self.get_blocks(["foo", "", "bar"]), ["foo bar"])
1685
1686    def test_hash_with_space(self):
1687        # We currently cannot handle the following case with libclang properly.
1688        # Fortunately it doesn't appear in current headers.
1689        #self.assertEqual(self.get_blocks(["foo", "  #  ", "bar"]), ["foo", "bar"])
1690        pass
1691
1692    def test_with_comment(self):
1693        self.assertEqual(self.get_blocks(["foo",
1694                                          "  #  /* ahah */ if defined(__KERNEL__) /* more */",
1695                                          "bar", "#endif"]),
1696                         ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
1697
1698
1699################################################################################
1700################################################################################
1701#####                                                                      #####
1702#####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
1703#####                                                                      #####
1704################################################################################
1705################################################################################
1706
1707
1708def find_matching_endif(blocks, i):
1709    """Traverse the blocks to find out the matching #endif."""
1710    n = len(blocks)
1711    depth = 1
1712    while i < n:
1713        if blocks[i].isDirective():
1714            dir_ = blocks[i].directive
1715            if dir_ in ["if", "ifndef", "ifdef"]:
1716                depth += 1
1717            elif depth == 1 and dir_ in ["else", "elif"]:
1718                return i
1719            elif dir_ == "endif":
1720                depth -= 1
1721                if depth == 0:
1722                    return i
1723        i += 1
1724    return i
1725
1726
1727def optimize_if01(blocks):
1728    """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
1729    i = 0
1730    n = len(blocks)
1731    result = []
1732    while i < n:
1733        j = i
1734        while j < n and not blocks[j].isIf():
1735            j += 1
1736        if j > i:
1737            logging.debug("appending lines %d to %d", blocks[i].lineno,
1738                          blocks[j-1].lineno)
1739            result += blocks[i:j]
1740        if j >= n:
1741            break
1742        expr = blocks[j].expr
1743        r = expr.toInt()
1744        if r is None:
1745            result.append(blocks[j])
1746            i = j + 1
1747            continue
1748
1749        if r == 0:
1750            # if 0 => skip everything until the corresponding #endif
1751            start_dir = blocks[j].directive
1752            j = find_matching_endif(blocks, j + 1)
1753            if j >= n:
1754                # unterminated #if 0, finish here
1755                break
1756            dir_ = blocks[j].directive
1757            if dir_ == "endif":
1758                logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
1759                              blocks[i].lineno, blocks[j].lineno)
1760                if start_dir == "elif":
1761                    # Put an endif since we started with an elif.
1762                    result += blocks[j:j+1]
1763                i = j + 1
1764            elif dir_ == "else":
1765                # convert 'else' into 'if 1'
1766                logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
1767                              "to %d)", blocks[i].lineno, blocks[j-1].lineno)
1768                if start_dir == "elif":
1769                    blocks[j].directive = "elif"
1770                else:
1771                    blocks[j].directive = "if"
1772                blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
1773                i = j
1774            elif dir_ == "elif":
1775                # convert 'elif' into 'if'
1776                logging.debug("convert 'if 0' .. 'elif' into 'if'")
1777                if start_dir == "elif":
1778                    blocks[j].directive = "elif"
1779                else:
1780                    blocks[j].directive = "if"
1781                i = j
1782            continue
1783
1784        # if 1 => find corresponding endif and remove/transform them
1785        k = find_matching_endif(blocks, j + 1)
1786        if k >= n:
1787            # unterminated #if 1, finish here
1788            logging.debug("unterminated 'if 1'")
1789            result += blocks[j+1:k]
1790            break
1791
1792        start_dir = blocks[j].directive
1793        dir_ = blocks[k].directive
1794        if dir_ == "endif":
1795            logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
1796                          blocks[j].lineno, blocks[k].lineno)
1797            if start_dir == "elif":
1798                # Add the elif in to the results and convert it to an elif 1.
1799                blocks[j].tokens = CppStringTokenizer("1").tokens
1800                result += blocks[j:j+1]
1801            result += optimize_if01(blocks[j+1:k])
1802            if start_dir == "elif":
1803                # Add the endif in to the results.
1804                result += blocks[k:k+1]
1805            i = k + 1
1806        elif dir_ == "else":
1807            # convert 'else' into 'if 0'
1808            logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
1809                          blocks[j].lineno, blocks[k].lineno)
1810            if start_dir == "elif":
1811                # Add the elif in to the results and convert it to an elif 1.
1812                blocks[j].tokens = CppStringTokenizer("1").tokens
1813                result += blocks[j:j+1]
1814            result += optimize_if01(blocks[j+1:k])
1815            if start_dir == "elif":
1816                blocks[k].directive = "elif"
1817            else:
1818                blocks[k].directive = "if"
1819            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1820            i = k
1821        elif dir_ == "elif":
1822            # convert 'elif' into 'if 0'
1823            logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
1824                          blocks[j].lineno, blocks[k].lineno)
1825            result += optimize_if01(blocks[j+1:k])
1826            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
1827            i = k
1828    return result
1829
1830class OptimizerTests(unittest.TestCase):
1831    def parse(self, text, macros=None):
1832        out = utils.StringOutput()
1833        blocks = BlockParser().parse(CppStringTokenizer(text))
1834        blocks.optimizeAll(macros)
1835        blocks.write(out)
1836        return out.get()
1837
1838    def test_if1(self):
1839        text = """\
1840#if 1
1841#define  GOOD
1842#endif
1843"""
1844        expected = """\
1845#define GOOD
1846"""
1847        self.assertEqual(self.parse(text), expected)
1848
1849    def test_if0(self):
1850        text = """\
1851#if 0
1852#define  SHOULD_SKIP1
1853#define  SHOULD_SKIP2
1854#endif
1855"""
1856        expected = ""
1857        self.assertEqual(self.parse(text), expected)
1858
1859    def test_if1_else(self):
1860        text = """\
1861#if 1
1862#define  GOOD
1863#else
1864#define  BAD
1865#endif
1866"""
1867        expected = """\
1868#define GOOD
1869"""
1870        self.assertEqual(self.parse(text), expected)
1871
1872    def test_if0_else(self):
1873        text = """\
1874#if 0
1875#define  BAD
1876#else
1877#define  GOOD
1878#endif
1879"""
1880        expected = """\
1881#define GOOD
1882"""
1883        self.assertEqual(self.parse(text), expected)
1884
1885    def test_if_elif1(self):
1886        text = """\
1887#if defined(something)
1888#define EXISTS
1889#elif 1
1890#define GOOD
1891#endif
1892"""
1893        expected = """\
1894#ifdef something
1895#define EXISTS
1896#elif 1
1897#define GOOD
1898#endif
1899"""
1900        self.assertEqual(self.parse(text), expected)
1901
1902    def test_if_elif1_macro(self):
1903        text = """\
1904#if defined(something)
1905#define EXISTS
1906#elif defined(WILL_BE_ONE)
1907#define GOOD
1908#endif
1909"""
1910        expected = """\
1911#ifdef something
1912#define EXISTS
1913#elif 1
1914#define GOOD
1915#endif
1916"""
1917        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1918
1919
1920    def test_if_elif1_else(self):
1921        text = """\
1922#if defined(something)
1923#define EXISTS
1924#elif 1
1925#define GOOD
1926#else
1927#define BAD
1928#endif
1929"""
1930        expected = """\
1931#ifdef something
1932#define EXISTS
1933#elif 1
1934#define GOOD
1935#endif
1936"""
1937        self.assertEqual(self.parse(text), expected)
1938
1939    def test_if_elif1_else_macro(self):
1940        text = """\
1941#if defined(something)
1942#define EXISTS
1943#elif defined(WILL_BE_ONE)
1944#define GOOD
1945#else
1946#define BAD
1947#endif
1948"""
1949        expected = """\
1950#ifdef something
1951#define EXISTS
1952#elif 1
1953#define GOOD
1954#endif
1955"""
1956        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1957
1958
1959    def test_if_elif1_else_macro(self):
1960        text = """\
1961#if defined(something)
1962#define EXISTS
1963#elif defined(WILL_BE_ONE)
1964#define GOOD
1965#else
1966#define BAD
1967#endif
1968"""
1969        expected = """\
1970#ifdef something
1971#define EXISTS
1972#elif 1
1973#define GOOD
1974#endif
1975"""
1976        self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
1977
1978    def test_macro_set_to_undefined_single(self):
1979        text = """\
1980#if defined(__KERNEL__)
1981#define BAD_KERNEL
1982#endif
1983"""
1984        expected = ""
1985        macros = {"__KERNEL__": kCppUndefinedMacro}
1986        self.assertEqual(self.parse(text, macros), expected)
1987
1988    def test_macro_set_to_undefined_if(self):
1989        text = """\
1990#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
1991#define CHECK
1992#endif
1993"""
1994        expected = """\
1995#if !defined(__GLIBC__) || __GLIBC__ < 2
1996#define CHECK
1997#endif
1998"""
1999        macros = {"__KERNEL__": kCppUndefinedMacro}
2000        self.assertEqual(self.parse(text, macros), expected)
2001
2002    def test_endif_comment_removed(self):
2003        text = """\
2004#ifndef SIGRTMAX
2005#define SIGRTMAX 123
2006#endif /* SIGRTMAX */
2007"""
2008        expected = """\
2009#ifndef SIGRTMAX
2010#define SIGRTMAX 123
2011#endif
2012"""
2013        self.assertEqual(self.parse(text), expected)
2014
2015    def test_multilevel_if0(self):
2016        text = """\
2017#if 0
2018#if 1
2019#define  BAD_6
2020#endif
2021#endif
2022"""
2023        expected = ""
2024        self.assertEqual(self.parse(text), expected)
2025
2026class RemoveStructsTests(unittest.TestCase):
2027    def parse(self, text, structs):
2028        out = utils.StringOutput()
2029        blocks = BlockParser().parse(CppStringTokenizer(text))
2030        blocks.removeStructs(structs)
2031        blocks.write(out)
2032        return out.get()
2033
2034    def test_remove_struct_from_start(self):
2035        text = """\
2036struct remove {
2037  int val1;
2038  int val2;
2039};
2040struct something {
2041  struct timeval val1;
2042  struct timeval val2;
2043};
2044"""
2045        expected = """\
2046struct something {
2047  struct timeval val1;
2048  struct timeval val2;
2049};
2050"""
2051        self.assertEqual(self.parse(text, {"remove": True}), expected)
2052
2053    def test_remove_struct_from_end(self):
2054        text = """\
2055struct something {
2056  struct timeval val1;
2057  struct timeval val2;
2058};
2059struct remove {
2060  int val1;
2061  int val2;
2062};
2063"""
2064        expected = """\
2065struct something {
2066  struct timeval val1;
2067  struct timeval val2;
2068};
2069"""
2070        self.assertEqual(self.parse(text, {"remove": True}), expected)
2071
2072    def test_remove_minimal_struct(self):
2073        text = """\
2074struct remove {
2075};
2076"""
2077        expected = "";
2078        self.assertEqual(self.parse(text, {"remove": True}), expected)
2079
2080    def test_remove_struct_with_struct_fields(self):
2081        text = """\
2082struct something {
2083  struct remove val1;
2084  struct remove val2;
2085};
2086struct remove {
2087  int val1;
2088  struct something val3;
2089  int val2;
2090};
2091"""
2092        expected = """\
2093struct something {
2094  struct remove val1;
2095  struct remove val2;
2096};
2097"""
2098        self.assertEqual(self.parse(text, {"remove": True}), expected)
2099
2100    def test_remove_consecutive_structs(self):
2101        text = """\
2102struct keep1 {
2103  struct timeval val1;
2104  struct timeval val2;
2105};
2106struct remove1 {
2107  int val1;
2108  int val2;
2109};
2110struct remove2 {
2111  int val1;
2112  int val2;
2113  int val3;
2114};
2115struct keep2 {
2116  struct timeval val1;
2117  struct timeval val2;
2118};
2119"""
2120        expected = """\
2121struct keep1 {
2122  struct timeval val1;
2123  struct timeval val2;
2124};
2125struct keep2 {
2126  struct timeval val1;
2127  struct timeval val2;
2128};
2129"""
2130        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
2131
2132    def test_remove_multiple_structs(self):
2133        text = """\
2134struct keep1 {
2135  int val;
2136};
2137struct remove1 {
2138  int val1;
2139  int val2;
2140};
2141struct keep2 {
2142  int val;
2143};
2144struct remove2 {
2145  struct timeval val1;
2146  struct timeval val2;
2147};
2148struct keep3 {
2149  int val;
2150};
2151"""
2152        expected = """\
2153struct keep1 {
2154  int val;
2155};
2156struct keep2 {
2157  int val;
2158};
2159struct keep3 {
2160  int val;
2161};
2162"""
2163        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
2164
2165    def test_remove_struct_with_inline_structs(self):
2166        text = """\
2167struct remove {
2168  int val1;
2169  int val2;
2170  struct {
2171    int val1;
2172    struct {
2173      int val1;
2174    } level2;
2175  } level1;
2176};
2177struct something {
2178  struct timeval val1;
2179  struct timeval val2;
2180};
2181"""
2182        expected = """\
2183struct something {
2184  struct timeval val1;
2185  struct timeval val2;
2186};
2187"""
2188        self.assertEqual(self.parse(text, {"remove": True}), expected)
2189
2190    def test_remove_struct_across_blocks(self):
2191        text = """\
2192struct remove {
2193  int val1;
2194  int val2;
2195#ifdef PARAMETER1
2196  PARAMETER1
2197#endif
2198#ifdef PARAMETER2
2199  PARAMETER2
2200#endif
2201};
2202struct something {
2203  struct timeval val1;
2204  struct timeval val2;
2205};
2206"""
2207        expected = """\
2208struct something {
2209  struct timeval val1;
2210  struct timeval val2;
2211};
2212"""
2213        self.assertEqual(self.parse(text, {"remove": True}), expected)
2214
2215    def test_remove_struct_across_blocks_multiple_structs(self):
2216        text = """\
2217struct remove1 {
2218  int val1;
2219  int val2;
2220#ifdef PARAMETER1
2221  PARAMETER1
2222#endif
2223#ifdef PARAMETER2
2224  PARAMETER2
2225#endif
2226};
2227struct remove2 {
2228};
2229struct something {
2230  struct timeval val1;
2231  struct timeval val2;
2232};
2233"""
2234        expected = """\
2235struct something {
2236  struct timeval val1;
2237  struct timeval val2;
2238};
2239"""
2240        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
2241
2242    def test_remove_multiple_struct_and_add_includes(self):
2243        text = """\
2244struct remove1 {
2245  int val1;
2246  int val2;
2247};
2248struct remove2 {
2249  struct timeval val1;
2250  struct timeval val2;
2251};
2252"""
2253        expected = """\
2254#include <bits/remove1.h>
2255#include <bits/remove2.h>
2256"""
2257        self.assertEqual(self.parse(text, {"remove1": False, "remove2": False}), expected)
2258
2259
2260class FullPathTest(unittest.TestCase):
2261    """Test of the full path parsing."""
2262
2263    def parse(self, text, keep=None):
2264        if not keep:
2265            keep = set()
2266        out = utils.StringOutput()
2267        blocks = BlockParser().parse(CppStringTokenizer(text))
2268
2269        blocks.removeStructs(kernel_structs_to_remove)
2270        blocks.removeVarsAndFuncs(keep)
2271        blocks.replaceTokens(kernel_token_replacements)
2272        blocks.optimizeAll(None)
2273
2274        blocks.write(out)
2275        return out.get()
2276
2277    def test_function_removed(self):
2278        text = """\
2279static inline __u64 function()
2280{
2281}
2282"""
2283        expected = ""
2284        self.assertEqual(self.parse(text), expected)
2285
2286    def test_function_removed_with_struct(self):
2287        text = """\
2288static inline struct something* function()
2289{
2290}
2291"""
2292        expected = ""
2293        self.assertEqual(self.parse(text), expected)
2294
2295    def test_function_kept(self):
2296        text = """\
2297static inline __u64 function()
2298{
2299}
2300"""
2301        expected = """\
2302static inline __u64 function() {
2303}
2304"""
2305        self.assertEqual(self.parse(text, set(["function"])), expected)
2306
2307    def test_var_removed(self):
2308        text = "__u64 variable;"
2309        expected = ""
2310        self.assertEqual(self.parse(text), expected)
2311
2312    def test_var_kept(self):
2313        text = "__u64 variable;"
2314        expected = "__u64 variable;\n"
2315        self.assertEqual(self.parse(text, set(["variable"])), expected)
2316
2317    def test_keep_function_typedef(self):
2318        text = "typedef void somefunction_t(void);"
2319        expected = "typedef void somefunction_t(void);\n"
2320        self.assertEqual(self.parse(text), expected)
2321
2322    def test_struct_keep_attribute(self):
2323        text = """\
2324struct something_s {
2325  __u32 s1;
2326  __u32 s2;
2327} __attribute__((packed));
2328"""
2329        expected = """\
2330struct something_s {
2331  __u32 s1;
2332  __u32 s2;
2333} __attribute__((packed));
2334"""
2335        self.assertEqual(self.parse(text), expected)
2336
2337    def test_function_keep_attribute_structs(self):
2338        text = """\
2339static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
2340}
2341"""
2342        expected = """\
2343static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
2344}
2345"""
2346        self.assertEqual(self.parse(text, set(["function"])), expected)
2347
2348    def test_struct_after_struct(self):
2349        text = """\
2350struct first {
2351};
2352
2353struct second {
2354  unsigned short s1;
2355#define SOMETHING 8
2356  unsigned short s2;
2357};
2358"""
2359        expected = """\
2360struct first {
2361};
2362struct second {
2363  unsigned short s1;
2364#define SOMETHING 8
2365  unsigned short s2;
2366};
2367"""
2368        self.assertEqual(self.parse(text), expected)
2369
2370    def test_other_not_removed(self):
2371        text = """\
2372typedef union {
2373  __u64 tu1;
2374  __u64 tu2;
2375} typedef_name;
2376
2377union {
2378  __u64 u1;
2379  __u64 u2;
2380};
2381
2382struct {
2383  __u64 s1;
2384  __u64 s2;
2385};
2386
2387enum {
2388  ENUM1 = 0,
2389  ENUM2,
2390};
2391
2392__extension__ typedef __signed__ long long __s64;
2393"""
2394        expected = """\
2395typedef union {
2396  __u64 tu1;
2397  __u64 tu2;
2398} typedef_name;
2399union {
2400  __u64 u1;
2401  __u64 u2;
2402};
2403struct {
2404  __u64 s1;
2405  __u64 s2;
2406};
2407enum {
2408  ENUM1 = 0,
2409  ENUM2,
2410};
2411__extension__ typedef __signed__ long long __s64;
2412"""
2413
2414        self.assertEqual(self.parse(text), expected)
2415
2416    def test_semicolon_after_function(self):
2417        text = """\
2418static inline __u64 function()
2419{
2420};
2421
2422struct should_see {
2423        __u32                           field;
2424};
2425"""
2426        expected = """\
2427struct should_see {
2428  __u32 field;
2429};
2430"""
2431        self.assertEqual(self.parse(text), expected)
2432
2433    def test_define_in_middle_keep(self):
2434        text = """\
2435enum {
2436  ENUM0 = 0x10,
2437  ENUM1 = 0x20,
2438#define SOMETHING SOMETHING_ELSE
2439  ENUM2 = 0x40,
2440};
2441"""
2442        expected = """\
2443enum {
2444  ENUM0 = 0x10,
2445  ENUM1 = 0x20,
2446#define SOMETHING SOMETHING_ELSE
2447  ENUM2 = 0x40,
2448};
2449"""
2450        self.assertEqual(self.parse(text), expected)
2451
2452    def test_define_in_middle_remove(self):
2453        text = """\
2454static inline function() {
2455#define SOMETHING1 SOMETHING_ELSE1
2456  i = 0;
2457  {
2458    i = 1;
2459  }
2460#define SOMETHING2 SOMETHING_ELSE2
2461}
2462"""
2463        expected = """\
2464#define SOMETHING1 SOMETHING_ELSE1
2465#define SOMETHING2 SOMETHING_ELSE2
2466"""
2467        self.assertEqual(self.parse(text), expected)
2468
2469    def test_define_in_middle_force_keep(self):
2470        text = """\
2471static inline function() {
2472#define SOMETHING1 SOMETHING_ELSE1
2473  i = 0;
2474  {
2475    i = 1;
2476  }
2477#define SOMETHING2 SOMETHING_ELSE2
2478}
2479"""
2480        expected = """\
2481static inline function() {
2482#define SOMETHING1 SOMETHING_ELSE1
2483  i = 0;
2484 {
2485    i = 1;
2486  }
2487#define SOMETHING2 SOMETHING_ELSE2
2488}
2489"""
2490        self.assertEqual(self.parse(text, set(["function"])), expected)
2491
2492    def test_define_before_remove(self):
2493        text = """\
2494#define SHOULD_BE_KEPT NOTHING1
2495#define ANOTHER_TO_KEEP NOTHING2
2496static inline function() {
2497#define SOMETHING1 SOMETHING_ELSE1
2498  i = 0;
2499  {
2500    i = 1;
2501  }
2502#define SOMETHING2 SOMETHING_ELSE2
2503}
2504"""
2505        expected = """\
2506#define SHOULD_BE_KEPT NOTHING1
2507#define ANOTHER_TO_KEEP NOTHING2
2508#define SOMETHING1 SOMETHING_ELSE1
2509#define SOMETHING2 SOMETHING_ELSE2
2510"""
2511        self.assertEqual(self.parse(text), expected)
2512
2513    def test_extern_C(self):
2514        text = """\
2515#if defined(__cplusplus)
2516extern "C" {
2517#endif
2518
2519struct something {
2520};
2521
2522#if defined(__cplusplus)
2523}
2524#endif
2525"""
2526        expected = """\
2527#ifdef __cplusplus
2528extern "C" {
2529#endif
2530struct something {
2531};
2532#ifdef __cplusplus
2533}
2534#endif
2535"""
2536        self.assertEqual(self.parse(text), expected)
2537
2538    def test_macro_definition_removed(self):
2539        text = """\
2540#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {}
2541MACRO_FUNCTION_NO_PARAMS()
2542
2543#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
2544MACRO_FUNCTION_PARAMS(a = 1)
2545
2546something that should still be kept
2547MACRO_FUNCTION_PARAMS(b)
2548"""
2549        expected = """\
2550#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { }
2551#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
2552something that should still be kept
2553"""
2554        self.assertEqual(self.parse(text), expected)
2555
2556    def test_verify_timeval_itemerval(self):
2557        text = """\
2558struct __kernel_old_timeval {
2559  struct something val;
2560};
2561struct __kernel_old_itimerval {
2562  struct __kernel_old_timeval val;
2563};
2564struct fields {
2565  struct __kernel_old_timeval timeval;
2566  struct __kernel_old_itimerval itimerval;
2567};
2568"""
2569        expected = """\
2570struct fields {
2571  struct timeval timeval;
2572  struct itimerval itimerval;
2573};
2574"""
2575        self.assertEqual(self.parse(text), expected)
2576
2577    def test_token_replacement(self):
2578        text = """\
2579#define SIGRTMIN 32
2580#define SIGRTMAX _NSIG
2581"""
2582        expected = """\
2583#define __SIGRTMIN 32
2584#define __SIGRTMAX _KERNEL__NSIG
2585"""
2586        self.assertEqual(self.parse(text), expected)
2587
2588
2589if __name__ == '__main__':
2590    unittest.main()
2591