• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# a glorified C pre-processor parser
2
3import sys, re, string
4from utils import *
5from defaults import *
6
7debugTokens             = False
8debugDirectiveTokenizer = False
9debugLineParsing        = False
10debugCppExpr            = False
11debugOptimIf01          = False
12
13#####################################################################################
14#####################################################################################
15#####                                                                           #####
16#####           C P P   T O K E N S                                             #####
17#####                                                                           #####
18#####################################################################################
19#####################################################################################
20
21# the list of supported C-preprocessor tokens
22# plus a couple of C tokens as well
23tokEOF       = "\0"
24tokLN        = "\n"
25tokSTRINGIFY = "#"
26tokCONCAT    = "##"
27tokLOGICAND  = "&&"
28tokLOGICOR   = "||"
29tokSHL       = "<<"
30tokSHR       = ">>"
31tokEQUAL     = "=="
32tokNEQUAL    = "!="
33tokLT        = "<"
34tokLTE       = "<="
35tokGT        = ">"
36tokGTE       = ">="
37tokELLIPSIS  = "..."
38tokSPACE     = " "
39tokDEFINED   = "defined"
40tokLPAREN    = "("
41tokRPAREN    = ")"
42tokNOT       = "!"
43tokPLUS      = "+"
44tokMINUS     = "-"
45tokMULTIPLY  = "*"
46tokDIVIDE    = "/"
47tokMODULUS   = "%"
48tokBINAND    = "&"
49tokBINOR     = "|"
50tokBINXOR    = "^"
51tokCOMMA     = ","
52tokLBRACE    = "{"
53tokRBRACE    = "}"
54tokARROW     = "->"
55tokINCREMENT = "++"
56tokDECREMENT = "--"
57tokNUMBER    = "<number>"
58tokIDENT     = "<ident>"
59tokSTRING    = "<string>"
60
61class Token:
62    """a simple class to hold information about a given token.
63       each token has a position in the source code, as well as
64       an 'id' and a 'value'. the id is a string that identifies
65       the token's class, while the value is the string of the
66       original token itself.
67
68       for example, the tokenizer concatenates a series of spaces
69       and tabs as a single tokSPACE id, whose value if the original
70       spaces+tabs sequence."""
71
72    def __init__(self):
73        self.id     = None
74        self.value  = None
75        self.lineno = 0
76        self.colno  = 0
77
78    def set(self,id,val=None):
79        self.id = id
80        if val:
81            self.value = val
82        else:
83            self.value = id
84        return None
85
86    def copyFrom(self,src):
87        self.id     = src.id
88        self.value  = src.value
89        self.lineno = src.lineno
90        self.colno  = src.colno
91
92    def __repr__(self):
93        if self.id == tokIDENT:
94            return "(ident %s)" % self.value
95        if self.id == tokNUMBER:
96            return "(number %s)" % self.value
97        if self.id == tokSTRING:
98            return "(string '%s')" % self.value
99        if self.id == tokLN:
100            return "<LN>"
101        if self.id == tokEOF:
102            return "<EOF>"
103        if self.id == tokSPACE and self.value == "\\":
104            # this corresponds to a trailing \ that was transformed into a tokSPACE
105            return "<\\>"
106
107        return self.id
108
109    def __str__(self):
110        if self.id == tokIDENT:
111            return self.value
112        if self.id == tokNUMBER:
113            return self.value
114        if self.id == tokSTRING:
115            return self.value
116        if self.id == tokEOF:
117            return "<EOF>"
118        if self.id == tokSPACE:
119            if self.value == "\\":  # trailing \
120                return "\\\n"
121            else:
122                return self.value
123
124        return self.id
125
126class BadExpectedToken(Exception):
127    def __init__(self,msg):
128        print msg
129
130#####################################################################################
131#####################################################################################
132#####                                                                           #####
133#####          C P P   T O K E N   C U R S O R                                  #####
134#####                                                                           #####
135#####################################################################################
136#####################################################################################
137
138class TokenCursor:
139    """a small class to iterate over a list of Token objects"""
140    def __init__(self,tokens):
141        self.tokens = tokens
142        self.n      = 0
143        self.count  = len(tokens)
144
145    def set(self,n):
146        """set the current position"""
147        if n < 0:
148            n = 0
149        if n > self.count:
150            n = self.count
151        self.n = n
152
153    def peekId(self):
154        """retrieve the id of the current token"""
155        if (self.n >= self.count):
156            return None
157        return self.tokens[self.n].id
158
159    def peek(self):
160        """retrieve the current token. does not change position"""
161        if (self.n >= self.count):
162            return None
163        return self.tokens[self.n]
164
165    def skip(self):
166        """increase current token position"""
167        if (self.n < self.count):
168            self.n += 1
169
170    def skipSpaces(self):
171        """skip over all space tokens, this includes tokSPACE and tokLN"""
172        while 1:
173            tok = self.peekId()
174            if tok != tokSPACE and tok != tokLN:
175                break
176            self.skip()
177
178    def skipIfId(self,id):
179        """skip an optional token"""
180        if self.peekId() == id:
181            self.skip()
182
183    def expectId(self,id):
184        """raise an exception if the current token hasn't a given id.
185           otherwise skip over it"""
186        tok = self.peek()
187        if tok.id != id:
188            raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id)
189        self.skip()
190
191    def remain(self):
192        """return the list of remaining tokens"""
193        return self.tokens[self.n:]
194
195
196#####################################################################################
197#####################################################################################
198#####                                                                           #####
199#####           C P P   T O K E N I Z E R                                       #####
200#####                                                                           #####
201#####################################################################################
202#####################################################################################
203
204# list of long symbols, i.e. those that take more than one characters
205cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
206                   tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
207
208class CppTokenizer:
209    """an abstract class used to convert some input text into a list
210       of tokens. real implementations follow and differ in the format
211       of the input text only"""
212
213    def __init__(self):
214        """initialize a new CppTokenizer object"""
215        self.eof  = False  # end of file reached ?
216        self.text = None   # content of current line, with final \n stripped
217        self.line = 0      # number of current line
218        self.pos  = 0      # current character position in current line
219        self.len  = 0      # length of current line text
220        self.held = Token()
221
222    def setLineText(self,line):
223        """set the content of the (next) current line. should be called
224           by fillLineText() in derived classes"""
225        self.text = line
226        self.len  = len(line)
227        self.pos  = 0
228
229    def fillLineText(self):
230        """refresh the content of 'line' with a new line of input"""
231        # to be overriden
232        self.eof = True
233
234    def markPos(self,tok):
235        """mark the position of the current token in the source file"""
236        if self.eof or self.pos > self.len:
237            tok.lineno = self.line + 1
238            tok.colno  = 0
239        else:
240            tok.lineno = self.line
241            tok.colno  = self.pos
242
243    def peekChar(self):
244        """return the current token under the cursor without moving it"""
245        if self.eof:
246            return tokEOF
247
248        if self.pos > self.len:
249            self.pos   = 0
250            self.line += 1
251            self.fillLineText()
252            if self.eof:
253                return tokEOF
254
255        if self.pos == self.len:
256            return tokLN
257        else:
258            return self.text[self.pos]
259
260    def peekNChar(self,n):
261        """try to peek the next n chars on the same line"""
262        if self.pos + n > self.len:
263            return None
264        return self.text[self.pos:self.pos+n]
265
266    def skipChar(self):
267        """increment the token cursor position"""
268        if not self.eof:
269            self.pos += 1
270
271    def skipNChars(self,n):
272        if self.pos + n <= self.len:
273            self.pos += n
274        else:
275            while n > 0:
276                self.skipChar()
277                n -= 1
278
279    def nextChar(self):
280        """retrieve the token at the current cursor position, then skip it"""
281        result = self.peekChar()
282        self.skipChar()
283        return  result
284
285    def getEscape(self):
286        # try to get all characters after a backslash (\)
287        result = self.nextChar()
288        if result == "0":
289            # octal number ?
290            num = self.peekNChar(3)
291            if num != None:
292                isOctal = True
293                for d in num:
294                    if not d in "01234567":
295                        isOctal = False
296                        break
297                if isOctal:
298                    result += num
299                    self.skipNChars(3)
300        elif result == "x" or result == "X":
301            # hex number ?
302            num = self.peekNChar(2)
303            if num != None:
304                isHex = True
305                for d in num:
306                    if not d in "012345678abcdefABCDEF":
307                        isHex = False
308                        break
309                if isHex:
310                    result += num
311                    self.skipNChars(2)
312        elif result == "u" or result == "U":
313            # unicode char ?
314            num = self.peekNChar(4)
315            if num != None:
316                isHex = True
317                for d in num:
318                    if not d in "012345678abcdefABCDEF":
319                        isHex = False
320                        break
321                if isHex:
322                    result += num
323                    self.skipNChars(4)
324
325        return result
326
327    def nextRealToken(self,tok):
328        """return next CPP token, used internally by nextToken()"""
329        c = self.nextChar()
330        if c == tokEOF or c == tokLN:
331            return tok.set(c)
332
333        if c == '/':
334            c = self.peekChar()
335            if c == '/':   # C++ comment line
336                self.skipChar()
337                while 1:
338                    c = self.nextChar()
339                    if c == tokEOF or c == tokLN:
340                        break
341                return tok.set(tokLN)
342            if c == '*':   # C comment start
343                self.skipChar()
344                value = "/*"
345                prev_c = None
346                while 1:
347                    c = self.nextChar()
348                    if c == tokEOF:
349                        #print "## EOF after '%s'" % value
350                        return tok.set(tokEOF,value)
351                    if c == '/' and prev_c == '*':
352                        break
353                    prev_c = c
354                    value += c
355
356                value += "/"
357                #print "## COMMENT: '%s'" % value
358                return tok.set(tokSPACE,value)
359            c = '/'
360
361        if c.isspace():
362            while 1:
363                c2 = self.peekChar()
364                if c2 == tokLN or not c2.isspace():
365                    break
366                c += c2
367                self.skipChar()
368            return tok.set(tokSPACE,c)
369
370        if c == '\\':
371            if debugTokens:
372                print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
373            if self.peekChar() == tokLN:   # trailing \
374                # eat the tokLN
375                self.skipChar()
376                # we replace a trailing \ by a tokSPACE whose value is
377                # simply "\\". this allows us to detect them later when
378                # needed.
379                return tok.set(tokSPACE,"\\")
380            else:
381                # treat as a single token here ?
382                c +=self.getEscape()
383                return tok.set(c)
384
385        if c == "'":  # chars
386            c2 = self.nextChar()
387            c += c2
388            if c2 == '\\':
389                c += self.getEscape()
390
391            while 1:
392                c2 = self.nextChar()
393                if c2 == tokEOF:
394                    break
395                c += c2
396                if c2 == "'":
397                    break
398
399            return tok.set(tokSTRING, c)
400
401        if c == '"':  # strings
402            quote = 0
403            while 1:
404                c2  = self.nextChar()
405                if c2 == tokEOF:
406                    return tok.set(tokSTRING,c)
407
408                c += c2
409                if not quote:
410                    if c2 == '"':
411                        return tok.set(tokSTRING,c)
412                    if c2 == "\\":
413                        quote = 1
414                else:
415                    quote = 0
416
417        if c >= "0" and c <= "9":  # integers ?
418            while 1:
419                c2 = self.peekChar()
420                if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
421                    break
422                c += c2
423                self.skipChar()
424            return tok.set(tokNUMBER,c)
425
426        if c.isalnum() or c == "_":  # identifiers ?
427            while 1:
428                c2 = self.peekChar()
429                if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
430                    break
431                c += c2
432                self.skipChar()
433            if c == tokDEFINED:
434                return tok.set(tokDEFINED)
435            else:
436                return tok.set(tokIDENT,c)
437
438        # check special symbols
439        for sk in cppLongSymbols:
440            if c == sk[0]:
441                sklen = len(sk[1:])
442                if self.pos + sklen <= self.len and \
443                   self.text[self.pos:self.pos+sklen] == sk[1:]:
444                    self.pos += sklen
445                    return tok.set(sk)
446
447        return tok.set(c)
448
449    def nextToken(self,tok):
450        """return the next token from the input text. this function
451           really updates 'tok', and does not return a new one"""
452        self.markPos(tok)
453        self.nextRealToken(tok)
454
455    def getToken(self):
456        tok = Token()
457        self.nextToken(tok)
458        if debugTokens:
459            print "getTokens: %s" % repr(tok)
460        return tok
461
462    def toTokenList(self):
463        """convert the input text of a CppTokenizer into a direct
464           list of token objects. tokEOF is stripped from the result"""
465        result = []
466        while 1:
467            tok = Token()
468            self.nextToken(tok)
469            if tok.id == tokEOF:
470                break
471            result.append(tok)
472        return result
473
474class CppLineTokenizer(CppTokenizer):
475    """a CppTokenizer derived class that accepts a single line of text as input"""
476    def __init__(self,line,lineno=1):
477        CppTokenizer.__init__(self)
478        self.line = lineno
479        self.setLineText(line)
480
481
482class CppLinesTokenizer(CppTokenizer):
483    """a CppTokenizer derived class that accepts a list of texdt lines as input.
484       the lines must not have a trailing \n"""
485    def __init__(self,lines=[],lineno=1):
486        """initialize a CppLinesTokenizer. you can later add lines using addLines()"""
487        CppTokenizer.__init__(self)
488        self.line  = lineno
489        self.lines = lines
490        self.index = 0
491        self.count = len(lines)
492
493        if self.count > 0:
494            self.fillLineText()
495        else:
496            self.eof = True
497
498    def addLine(self,line):
499        """add a line to a CppLinesTokenizer. this can be done after tokenization
500           happens"""
501        if self.count == 0:
502            self.setLineText(line)
503            self.index = 1
504        self.lines.append(line)
505        self.count += 1
506        self.eof    = False
507
508    def fillLineText(self):
509        if self.index < self.count:
510            self.setLineText(self.lines[self.index])
511            self.index += 1
512        else:
513            self.eof = True
514
515
516class CppFileTokenizer(CppTokenizer):
517    def __init__(self,file,lineno=1):
518        CppTokenizer.__init__(self)
519        self.file = file
520        self.line = lineno
521
522    def fillLineText(self):
523        line = self.file.readline()
524        if len(line) > 0:
525            if line[-1] == '\n':
526                line = line[:-1]
527            if len(line) > 0 and line[-1] == "\r":
528                line = line[:-1]
529            self.setLineText(line)
530        else:
531            self.eof = True
532
533# Unit testing
534#
535class CppTokenizerTester:
536    """a class used to test CppTokenizer classes"""
537    def __init__(self,tokenizer=None):
538        self.tokenizer = tokenizer
539        self.token     = Token()
540
541    def setTokenizer(self,tokenizer):
542        self.tokenizer = tokenizer
543
544    def expect(self,id):
545        self.tokenizer.nextToken(self.token)
546        tokid = self.token.id
547        if tokid == id:
548            return
549        if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
550            return
551        raise BadExpectedToken, "###  BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
552
553    def expectToken(self,id,line,col):
554        self.expect(id)
555        if self.token.lineno != line:
556            raise BadExpectedToken, "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
557        if self.token.colno != col:
558            raise BadExpectedToken, "###  BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
559
560    def expectTokenVal(self,id,value,line,col):
561        self.expectToken(id,line,col)
562        if self.token.value != value:
563            raise BadExpectedToken, "###  BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
564
565    def expectList(self,list):
566        for item in list:
567            self.expect(item)
568
569def test_CppTokenizer():
570    print "running CppTokenizer tests"
571    tester = CppTokenizerTester()
572
573    tester.setTokenizer( CppLineTokenizer("#an/example  && (01923_xy)") )
574    tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
575                       tokRPAREN, tokLN, tokEOF] )
576
577    tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
578    tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
579                        tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
580
581    tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) )
582    tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
583
584    tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
585    tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
586
587    tester.setTokenizer( CppLinesTokenizer( ["first second", "  third"] ) )
588    tester.expectToken( "first", 1, 0 )
589    tester.expectToken( tokSPACE, 1, 5 )
590    tester.expectToken( "second", 1, 6 )
591    tester.expectToken( tokLN, 1, 12 )
592    tester.expectToken( tokSPACE, 2, 0 )
593    tester.expectToken( "third", 2, 2 )
594
595    tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
596    tester.expectList( [ "boo", tokSPACE ] )
597    tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
598    tester.expectList( [ tokLN, tokEOF ] )
599
600    tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
601    tester.expectToken( "an", 1, 0 )
602    tester.expectToken( tokSPACE, 1, 2 )
603    tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
604    tester.expectToken( tokSPACE, 2, 0 )
605    tester.expectToken( "example", 2, 1 )
606    tester.expectToken( tokLN, 2, 8 )
607
608    return True
609
610
611#####################################################################################
612#####################################################################################
613#####                                                                           #####
614#####           C P P   E X P R E S S I O N S                                   #####
615#####                                                                           #####
616#####################################################################################
617#####################################################################################
618
619# Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc..
620# op is an "operator" string
621
622class Expr:
623    """a class used to model a CPP expression"""
624    opInteger   = "int"
625    opIdent     = "ident"
626    opCall      = "call"
627    opDefined   = "defined"
628    opTest      = "?"
629    opLogicNot  = "!"
630    opNot       = "~"
631    opNeg       = "[-]"
632    opUnaryPlus = "[+]"
633    opAdd = "+"
634    opSub = "-"
635    opMul = "*"
636    opDiv = "/"
637    opMod = "%"
638    opAnd = "&"
639    opOr  = "|"
640    opXor = "^"
641    opLogicAnd = "&&"
642    opLogicOr  = "||"
643    opEqual = "=="
644    opNotEqual = "!="
645    opLess = "<"
646    opLessEq = "<="
647    opGreater = ">"
648    opGreaterEq = ">="
649    opShl = "<<"
650    opShr = ">>"
651
652    unaries  = [ opLogicNot, opNot, opNeg, opUnaryPlus ]
653    binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr,
654                 opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ]
655
656    precedences = {
657                    opTest: 0,
658                    opLogicOr:  1,
659                    opLogicNot: 2,
660                    opOr : 3,
661                    opXor: 4,
662                    opAnd: 5,
663                    opEqual: 6, opNotEqual: 6,
664                    opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7,
665                    opShl:8, opShr:8,
666                    opAdd:9, opSub:9,
667                    opMul:10, opDiv:10, opMod:10,
668                    opLogicNot:11,
669                    opNot: 12,
670                    }
671
672    def __init__(self,op):
673        self.op = op
674
675    def __repr__(self):
676        return "(%s)" % self.op
677
678    def __str__(self):
679        return "operator(%s)" % self.op
680
681    def precedence(self):
682        """return the precedence of a given operator"""
683        return Expr.precedences.get(self.op, 1000)
684
685    def isUnary(self):
686        return self.op in Expr.unaries
687
688    def isBinary(self):
689        return self.op in Expr.binaries
690
691    def isDefined(self):
692        return self.op is opDefined
693
694    def toInt(self):
695        """return the integer value of a given expression. only valid for integer expressions
696           will return None otherwise"""
697        return None
698
699class IntExpr(Expr):
700    def __init__(self,value):
701        Expr.__init__(self,opInteger)
702        self.arg   = value
703
704    def __repr__(self):
705        return "(int %s)" % self.arg
706
707    def __str__(self):
708        return self.arg
709
710    def toInt(self):
711        s = self.arg  # string value
712        # get rid of U or L suffixes
713        while len(s) > 0 and s[-1] in "LUlu":
714            s = s[:-1]
715        return string.atoi(s)
716
717class IdentExpr(Expr):
718    def __init__(self,name):
719        Expr.__init__(self,opIdent)
720        self.name = name
721
722    def __repr__(self):
723        return "(ident %s)" % self.name
724
725    def __str__(self):
726        return self.name
727
728class CallExpr(Expr):
729    def __init__(self,funcname,params):
730        Expr.__init__(self,opCall)
731        self.funcname = funcname
732        self.params   = params
733
734    def __repr__(self):
735        result = "(call %s [" % self.funcname
736        comma  = ""
737        for param in self.params:
738            result += "%s%s" % (comma, repr(param))
739            comma   = ","
740        result += "])"
741        return result
742
743    def __str__(self):
744        result = "%s(" % self.funcname
745        comma = ""
746        for param in self.params:
747            result += "%s%s" % (comma, str(param))
748            comma = ","
749
750        result += ")"
751        return result
752
753class TestExpr(Expr):
754    def __init__(self,cond,iftrue,iffalse):
755        Expr.__init__(self,opTest)
756        self.cond    = cond
757        self.iftrue  = iftrue
758        self.iffalse = iffalse
759
760    def __repr__(self):
761        return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse))
762
763    def __str__(self):
764        return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse)
765
766class SingleArgExpr(Expr):
767    def __init__(self,op,arg):
768        Expr.__init__(self,op)
769        self.arg = arg
770
771    def __repr__(self):
772        return "(%s %s)" % (self.op, repr(self.arg))
773
774class DefinedExpr(SingleArgExpr):
775    def __init__(self,op,macroname):
776        SingleArgExpr.__init__(self.opDefined,macroname)
777
778    def __str__(self):
779        return "defined(%s)" % self.arg
780
781
782class UnaryExpr(SingleArgExpr):
783    def __init__(self,op,arg,opstr=None):
784        SingleArgExpr.__init__(self,op,arg)
785        if not opstr:
786            opstr = op
787        self.opstr = opstr
788
789    def __str__(self):
790        arg_s     = str(self.arg)
791        arg_prec  = self.arg.precedence()
792        self_prec = self.precedence()
793        if arg_prec < self_prec:
794            return "%s(%s)" % (self.opstr,arg_s)
795        else:
796            return "%s%s" % (self.opstr, arg_s)
797
798class TwoArgExpr(Expr):
799    def __init__(self,op,arg1,arg2):
800        Expr.__init__(self,op)
801        self.arg1 = arg1
802        self.arg2 = arg2
803
804    def __repr__(self):
805        return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2))
806
807class BinaryExpr(TwoArgExpr):
808    def __init__(self,op,arg1,arg2,opstr=None):
809        TwoArgExpr.__init__(self,op,arg1,arg2)
810        if not opstr:
811            opstr = op
812        self.opstr = opstr
813
814    def __str__(self):
815        arg1_s    = str(self.arg1)
816        arg2_s    = str(self.arg2)
817        arg1_prec = self.arg1.precedence()
818        arg2_prec = self.arg2.precedence()
819        self_prec = self.precedence()
820
821        result = ""
822        if arg1_prec < self_prec:
823            result += "(%s)" % arg1_s
824        else:
825            result += arg1_s
826
827        result += " %s " % self.opstr
828
829        if arg2_prec < self_prec:
830            result += "(%s)" % arg2_s
831        else:
832            result += arg2_s
833
834        return result
835
836#####################################################################################
837#####################################################################################
838#####                                                                           #####
839#####           C P P   E X P R E S S I O N   P A R S E R                       #####
840#####                                                                           #####
841#####################################################################################
842#####################################################################################
843
844
845class ExprParser:
846    """a class used to convert a list of tokens into a cpp Expr object"""
847
848    re_octal   = re.compile(r"\s*\(0[0-7]+\).*")
849    re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*")
850    re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*")
851
852    def __init__(self,tokens):
853        self.tok = tokens
854        self.n   = len(self.tok)
855        self.i   = 0
856
857    def mark(self):
858        return self.i
859
860    def release(self,pos):
861        self.i = pos
862
863    def peekId(self):
864        if self.i < self.n:
865            return self.tok[self.i].id
866        return None
867
868    def peek(self):
869        if self.i < self.n:
870            return self.tok[self.i]
871        return None
872
873    def skip(self):
874        if self.i < self.n:
875            self.i += 1
876
877    def skipOptional(self,id):
878        if self.i < self.n and self.tok[self.i].id == id:
879            self.i += 1
880
881    def skipSpaces(self):
882        i   = self.i
883        n   = self.n
884        tok = self.tok
885        while i < n and (tok[i] == tokSPACE or tok[i] == tokLN):
886            i += 1
887        self.i = i
888
889    # all the isXXX functions returns a (expr,nextpos) pair if a match is found
890    # or None if not
891
892    def is_integer(self):
893        id = self.tok[self.i].id
894        c  = id[0]
895        if c < '0' or c > '9':
896            return None
897
898        m = ExprParser.re_octal.match(id)
899        if m:
900            return (IntExpr(id), m.end(1))
901
902        m = ExprParser.re_decimal.match(id)
903        if m:
904            return (IntExpr(id), m.end(1))
905
906        m = ExprParser.re_hexadecimal(id)
907        if m:
908            return (IntExpr(id), m.end(1))
909
910        return None
911
912    def is_defined(self):
913        id = self.tok[self.i].id
914        if id != "defined":
915            return None
916
917        pos = self.mark()
918
919        use_paren = 0
920        if self.peekId() == tokLPAREN:
921            self.skip()
922            use_paren = 1
923
924        if self.peekId() != tokIDENT:
925            self.throw( BadExpectedToken, "identifier expected")
926
927        macroname = self.peek().value
928        self.skip()
929        if use_paren:
930            self.skipSpaces()
931            if self.peekId() != tokRPAREN:
932                self.throw( BadExpectedToken, "missing right-paren after 'defined' directive")
933            self.skip()
934
935        i = self.i
936        return (DefinedExpr(macroname),i+1)
937
938    def is_call_or_ident(self):
939        pass
940
941    def parse(self, i):
942        return None
943
944#####################################################################################
945#####################################################################################
946#####                                                                           #####
947#####           C P P   E X P R E S S I O N S                                   #####
948#####                                                                           #####
949#####################################################################################
950#####################################################################################
951
952class CppInvalidExpression(Exception):
953    """an exception raised when an invalid/unsupported cpp expression is detected"""
954    pass
955
956class CppExpr:
957    """a class that models the condition of #if directives into
958        an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
959        where "op" is a string describing the operation"""
960
961    unaries  = [ "!", "~" ]
962    binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ]
963    precedences = { "||": 1,
964                    "&&": 2,
965                     "|": 3,
966                     "^": 4,
967                     "&": 5,
968                     "==":6, "!=":6,
969                     "<":7, "<=":7, ">":7, ">=":7,
970                     "<<":8, ">>":8,
971                     "+":9, "-":9,
972                     "*":10, "/":10, "%":10,
973                     "!":11, "~":12
974                     }
975
976    def __init__(self, tokens):
977        """initialize a CppExpr. 'tokens' must be a CppToken list"""
978        self.tok  = tokens
979        self.n    = len(tokens)
980        if debugCppExpr:
981            print "CppExpr: trying to parse %s" % repr(tokens)
982        expr      = self.is_expr(0)
983        if debugCppExpr:
984            print "CppExpr: got " + repr(expr)
985        self.expr = expr[0]
986
987    re_cpp_constant = re.compile(r"((\d|\w|_)+)")
988
989    def throw(self,exception,i,msg):
990        if i < self.n:
991            tok = self.tok[i]
992            print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
993        else:
994            print "EOF: %s" % msg
995        raise exception
996
997    def skip_spaces(self,i):
998        """skip spaces in input token list"""
999        while i < self.n:
1000            t = self.tok[i]
1001            if t.id != tokSPACE and t.id != tokLN:
1002                break
1003            i += 1
1004        return i
1005
1006    def expectId(self,i,id):
1007        """check that a given token id is at the current position, then skip over it"""
1008        i = self.skip_spaces(i)
1009        if i >= self.n or self.tok[i].id != id:
1010            self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id))
1011        return i+1
1012
1013    def expectIdent(self,i):
1014        i = self.skip_spaces(i)
1015        if i >= self.n or self.tok[i].id != tokIDENT:
1016            self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id))
1017        return i+1
1018
1019    # the is_xxxxx function returns either None or a pair (e,nextpos)
1020    # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is
1021    # the corresponding next position in the input token list
1022    #
1023
1024    def is_decimal(self,i):
1025        v = self.tok[i].value[:]
1026        while len(v) > 0 and v[-1] in "ULul":
1027            v = v[:-1]
1028        for digit in v:
1029            if not digit.isdigit():
1030                return None
1031
1032        # for an integer expression tuple, the argument
1033        # is simply the value as an integer
1034        val = string.atoi(v)
1035        return ("int", val), i+1
1036
1037    def is_hexadecimal(self,i):
1038        v = self.tok[i].value[:]
1039        while len(v) > 0 and v[-1] in "ULul":
1040            v = v[:-1]
1041        if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
1042            for digit in v[2:]:
1043                if not digit in "0123456789abcdefABCDEF":
1044                    return None
1045
1046            # for an hex expression tuple, the argument
1047            # is the value as an integer
1048            val = int(v[2:], 16)
1049            return ("hex", val), i+1
1050
1051        return None
1052
1053    def is_integer(self,i):
1054        if self.tok[i].id != tokNUMBER:
1055            return None
1056
1057        c = self.is_decimal(i)
1058        if c: return c
1059
1060        c = self.is_hexadecimal(i)
1061        if c: return c
1062
1063        return None
1064
1065    def is_number(self,i):
1066        t = self.tok[i]
1067        if t.id == tokMINUS and i+1 < self.n:
1068            c = self.is_integer(i+1)
1069            if c:
1070                e, i2 = c
1071                op, val  = e
1072                return (op, -val), i2
1073        if t.id == tokPLUS and i+1 < self.n:
1074            c = self.is_integer(i+1)
1075            if c: return c
1076
1077        return self.is_integer(i)
1078
1079
1080    def is_alnum(self,i):
1081        """test wether a given token is alpha-numeric"""
1082        i = self.skip_spaces(i)
1083        if i >= self.n:
1084            return None
1085        t = self.tok[i]
1086        m = CppExpr.re_cpp_constant.match(t.id)
1087        if m:
1088            #print "... alnum '%s'" % m.group(1)
1089            r = m.group(1)
1090            return ("ident", r), i+1
1091        return None
1092
1093    def is_defined(self,i):
1094        t = self.tok[i]
1095        if t.id != tokDEFINED:
1096            return None
1097
1098        # we have the defined keyword, check the rest
1099        i = self.skip_spaces(i+1)
1100        use_parens = 0
1101        if i < self.n and self.tok[i].id == tokLPAREN:
1102            use_parens = 1
1103            i = self.skip_spaces(i+1)
1104
1105        if i >= self.n:
1106            self.throw(CppConstantExpected,i,"### 'defined' must be followed  by macro name or left paren")
1107
1108        t = self.tok[i]
1109        if t.id != tokIDENT:
1110            self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
1111
1112        i += 1
1113        if use_parens:
1114            i = self.expectId(i,tokRPAREN)
1115
1116        return ("defined",t.value), i
1117
1118
1119    def is_call_or_ident(self,i):
1120        i = self.skip_spaces(i)
1121        if i >= self.n:
1122            return None
1123
1124        t = self.tok[i]
1125        if t.id != tokIDENT:
1126            return None
1127
1128        name = t.value
1129
1130        i = self.skip_spaces(i+1)
1131        if i >= self.n or self.tok[i].id != tokLPAREN:
1132            return ("ident", name), i
1133
1134        params    = []
1135        depth     = 1
1136        i += 1
1137        j  = i
1138        while i < self.n:
1139            id = self.tok[i].id
1140            if id == tokLPAREN:
1141                depth += 1
1142            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
1143                while j < i and self.tok[j].id == tokSPACE:
1144                    j += 1
1145                k = i
1146                while k > j and self.tok[k-1].id == tokSPACE:
1147                    k -= 1
1148                param = self.tok[j:k]
1149                params.append( param )
1150                if id == tokRPAREN:
1151                    break
1152                j = i+1
1153            elif id == tokRPAREN:
1154                depth -= 1
1155            i += 1
1156
1157        if i >= self.n:
1158            return None
1159
1160        return ("call", (name, params)), i+1
1161
1162    def is_token(self,i,token):
1163        i = self.skip_spaces(i)
1164        if i >= self.n or self.tok[i].id != token:
1165            return None
1166        return token, i+1
1167
1168
1169    def is_value(self,i):
1170        t = self.tok[i]
1171        if t.id == tokSTRING:
1172            return ("string", t.value), i+1
1173
1174        c = self.is_number(i)
1175        if c: return c
1176
1177        c = self.is_defined(i)
1178        if c: return c
1179
1180        c = self.is_call_or_ident(i)
1181        if c: return c
1182
1183        i = self.skip_spaces(i)
1184        if i >= self.n or self.tok[i].id != tokLPAREN:
1185            return None
1186
1187        popcount = 1
1188        i2       = i+1
1189        while i2 < self.n:
1190            t = self.tok[i2]
1191            if t.id == tokLPAREN:
1192                popcount += 1
1193            elif t.id == tokRPAREN:
1194                popcount -= 1
1195                if popcount == 0:
1196                    break
1197            i2 += 1
1198
1199        if popcount != 0:
1200            self.throw(CppInvalidExpression, i, "expression missing closing parenthesis")
1201
1202        if debugCppExpr:
1203            print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2])
1204        oldcount   = self.n
1205        self.n     = i2
1206        c          = self.is_expr(i+1)
1207        self.n     = oldcount
1208        if not c:
1209            self.throw(CppInvalidExpression, i, "invalid expression within parenthesis")
1210
1211        e, i = c
1212        return e, i2+1
1213
1214    def is_unary(self,i):
1215        i = self.skip_spaces(i)
1216        if i >= self.n:
1217            return None
1218
1219        t = self.tok[i]
1220        if t.id in CppExpr.unaries:
1221            c = self.is_unary(i+1)
1222            if not c:
1223                self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id)
1224            e, i = c
1225            return (t.id, e), i
1226
1227        return self.is_value(i)
1228
1229    def is_binary(self,i):
1230        i = self.skip_spaces(i)
1231        if i >= self.n:
1232            return None
1233
1234        c = self.is_unary(i)
1235        if not c:
1236            return None
1237
1238        e1, i2 = c
1239        i2 = self.skip_spaces(i2)
1240        if i2 >= self.n:
1241            return c
1242
1243        t = self.tok[i2]
1244        if t.id in CppExpr.binaries:
1245            c = self.is_binary(i2+1)
1246            if not c:
1247                self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id )
1248            e2, i3 = c
1249            return (t.id, e1, e2), i3
1250
1251        return None
1252
1253    def is_expr(self,i):
1254        return self.is_binary(i)
1255
1256    def dump_node(self,e):
1257        op = e[0]
1258        line = "(" + op
1259        if op == "int":
1260            line += " %d)" % e[1]
1261        elif op == "hex":
1262            line += " 0x%x)" % e[1]
1263        elif op == "ident":
1264            line += " %s)" % e[1]
1265        elif op == "defined":
1266            line += " %s)" % e[1]
1267        elif op == "call":
1268            arg = e[1]
1269            line += " %s [" % arg[0]
1270            prefix = ""
1271            for param in arg[1]:
1272                par = ""
1273                for tok in param:
1274                    par += str(tok)
1275                line += "%s%s" % (prefix, par)
1276                prefix = ","
1277            line += "])"
1278        elif op in CppExpr.unaries:
1279            line += " %s)" % self.dump_node(e[1])
1280        elif op in CppExpr.binaries:
1281            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
1282        else:
1283            line += " ?%s)" % repr(e[1])
1284
1285        return line
1286
1287    def __repr__(self):
1288        return self.dump_node(self.expr)
1289
1290    def source_node(self,e):
1291        op = e[0]
1292        if op == "int":
1293            return "%d" % e[1]
1294        if op == "hex":
1295            return "0x%x" % e[1]
1296        if op == "ident":
1297            # XXX: should try to expand
1298            return e[1]
1299        if op == "defined":
1300            return "defined(%s)" % e[1]
1301
1302        prec = CppExpr.precedences.get(op,1000)
1303        arg  = e[1]
1304        if op in CppExpr.unaries:
1305            arg_src = self.source_node(arg)
1306            arg_op  = arg[0]
1307            arg_prec = CppExpr.precedences.get(arg[0],1000)
1308            if arg_prec < prec:
1309                return "!(" + arg_src + ")"
1310            else:
1311                return "!" + arg_src
1312        if op in CppExpr.binaries:
1313            arg2     = e[2]
1314            arg1_op  = arg[0]
1315            arg2_op  = arg2[0]
1316            arg1_src = self.source_node(arg)
1317            arg2_src = self.source_node(arg2)
1318            if CppExpr.precedences.get(arg1_op,1000) < prec:
1319                arg1_src = "(%s)" % arg1_src
1320            if CppExpr.precedences.get(arg2_op,1000) < prec:
1321                arg2_src = "(%s)" % arg2_src
1322
1323            return "%s %s %s" % (arg1_src, op, arg2_src)
1324        return "???"
1325
1326    def __str__(self):
1327        return self.source_node(self.expr)
1328
1329    def int_node(self,e):
1330        if e[0] == "int":
1331            return e[1]
1332        elif e[1] == "hex":
1333            return int(e[1],16)
1334        else:
1335            return None
1336
1337    def toInt(self):
1338        return self.int_node(self.expr)
1339
1340    def optimize_node(self,e,macros={}):
1341        op = e[0]
1342        if op == "defined":
1343            name = e[1]
1344            if macros.has_key(name):
1345                if macros[name] == kCppUndefinedMacro:
1346                    return ("int", 0)
1347                else:
1348                    return ("int", 1)
1349
1350            if kernel_remove_config_macros and name.startswith("CONFIG_"):
1351                return ("int", 0)
1352
1353        elif op == "!":
1354            op, v = e
1355            v = self.optimize_node(v, macros)
1356            if v[0] == "int":
1357                if v[1] == 0:
1358                    return ("int", 1)
1359                else:
1360                    return ("int", 0)
1361
1362        elif op == "&&":
1363            op, l, r = e
1364            l  = self.optimize_node(l, macros)
1365            r  = self.optimize_node(r, macros)
1366            li = self.int_node(l)
1367            ri = self.int_node(r)
1368            if li != None:
1369                if li == 0:
1370                    return ("int", 0)
1371                else:
1372                    return r
1373
1374        elif op == "||":
1375            op, l, r = e
1376            l  = self.optimize_node(l, macros)
1377            r  = self.optimize_node(r, macros)
1378            li = self.int_node(l)
1379            ri = self.int_node(r)
1380            if li != None:
1381                if li == 0:
1382                    return r
1383                else:
1384                    return ("int", 1)
1385            elif ri != None:
1386                if ri == 0:
1387                    return l
1388                else:
1389                    return ("int", 1)
1390        return e
1391
1392    def optimize(self,macros={}):
1393        self.expr = self.optimize_node(self.expr,macros)
1394
1395    def removePrefixedNode(self,e,prefix,names):
1396        op = e[0]
1397        if op == "defined":
1398            name = e[1]
1399            if name.startswith(prefix):
1400                if names.has_key[name] and names[name] == "y":
1401                    return ("int", 1)
1402                else:
1403                    return ("int", 0)
1404
1405        elif op in CppExpr.unaries:
1406            op, v = e
1407            v = self.removePrefixedNode(v,prefix,names)
1408            return (op, v)
1409        elif op in CppExpr.binaries:
1410            op, v1, v2 = e
1411            v1 = self.removePrefixedNode(v1,prefix,names)
1412            v2 = self.removePrefixedNode(v2,prefix,names)
1413            return (op, v1, v2)
1414        elif op == "call":
1415            func, params = e[1]
1416            params2 = []
1417            for param in params:
1418                params2.append( self.removePrefixedNode(param,prefix,names) )
1419            return (op, (func, params2))
1420
1421        return e
1422
1423    def removePrefixed(self,prefix,names={}):
1424        self.expr = self.removePrefixedNode(self.expr,prefix,names)
1425
1426    def is_equal_node(self,e1,e2):
1427        if e1[0] != e2[0] or len(e1) != len(e2):
1428            return False
1429
1430        op = e1[0]
1431        if op == "int" or op == "hex" or op == "!" or op == "defined":
1432            return e1[0] == e2[0]
1433
1434        return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
1435
1436    def is_equal(self,other):
1437        return self.is_equal_node(self.expr,other.expr)
1438
1439def test_cpp_expr(expr, expected):
1440    e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
1441    #print repr(e.expr)
1442    s1 = repr(e)
1443    if s1 != expected:
1444        print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
1445    else:
1446        #print "OK: expression '%s'" % expr
1447        pass
1448
1449def test_cpp_expr_optim(expr, expected, macros={}):
1450    e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
1451    e.optimize(macros)
1452
1453    s1 = repr(e)
1454    if s1 != expected:
1455        print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
1456    else:
1457        #print "OK: optmized expression '%s'" % expr
1458        pass
1459
1460def test_cpp_expr_source(expr, expected):
1461    e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
1462    s1 = str(e)
1463    if s1 != expected:
1464        print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
1465    else:
1466        #print "OK: source expression '%s'" % expr
1467        pass
1468
1469def test_CppExpr():
1470    print "testing CppExpr"
1471    test_cpp_expr( "0", "(int 0)" )
1472    test_cpp_expr( "1", "(int 1)" )
1473    test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" )
1474    test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" )
1475    test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" )
1476    test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
1477    test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" )
1478    test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
1479    test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
1480    test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" )
1481
1482    test_cpp_expr_optim( "0", "(int 0)" )
1483    test_cpp_expr_optim( "1", "(int 1)" )
1484    test_cpp_expr_optim( "1 && 1", "(int 1)" )
1485    test_cpp_expr_optim( "1 && 0", "(int 0)" )
1486    test_cpp_expr_optim( "0 && 1", "(int 0)" )
1487    test_cpp_expr_optim( "0 && 0", "(int 0)" )
1488    test_cpp_expr_optim( "1 || 1", "(int 1)" )
1489    test_cpp_expr_optim( "1 || 0", "(int 1)" )
1490    test_cpp_expr_optim( "0 || 1", "(int 1)" )
1491    test_cpp_expr_optim( "0 || 0", "(int 0)" )
1492    test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" )
1493    test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
1494    test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" )
1495    test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } )
1496    test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} )
1497    test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
1498    test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } )
1499    test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } )
1500    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
1501    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } )
1502    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } )
1503    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } )
1504    test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } )
1505
1506    test_cpp_expr_source( "0", "0" )
1507    test_cpp_expr_source( "1", "1" )
1508    test_cpp_expr_source( "1 && 1", "1 && 1" )
1509    test_cpp_expr_source( "1 && 0", "1 && 0" )
1510    test_cpp_expr_source( "0 && 1", "0 && 1" )
1511    test_cpp_expr_source( "0 && 0", "0 && 0" )
1512    test_cpp_expr_source( "1 || 1", "1 || 1" )
1513    test_cpp_expr_source( "1 || 0", "1 || 0" )
1514    test_cpp_expr_source( "0 || 1", "0 || 1" )
1515    test_cpp_expr_source( "0 || 0", "0 || 0" )
1516    test_cpp_expr_source( "EXAMPLE", "EXAMPLE" )
1517    test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" )
1518    test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" )
1519    test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" )
1520
1521
1522#####################################################################################
1523#####################################################################################
1524#####                                                                           #####
1525#####          C P P   B L O C K                                                #####
1526#####                                                                           #####
1527#####################################################################################
1528#####################################################################################
1529
1530class Block:
1531    """a class used to model a block of input source text. there are two block types:
1532        - direcive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
1533        - text blocks, contain the tokens of non-directive blocks
1534
1535       the cpp parser class below will transform an input source file into a list of Block
1536       objects (grouped in a BlockList object for convenience)"""
1537
1538    def __init__(self,tokens,directive=None,lineno=0):
1539        """initialize a new block, if 'directive' is None, this is a text block
1540           NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
1541                 and '#ifndef MACRO' into '#if !defined(MACRO)'"""
1542        if directive == "ifdef":
1543            tok = Token()
1544            tok.set(tokDEFINED)
1545            tokens = [ tok ] + tokens
1546            directive = "if"
1547
1548        elif directive == "ifndef":
1549            tok1 = Token()
1550            tok2 = Token()
1551            tok1.set(tokNOT)
1552            tok2.set(tokDEFINED)
1553            tokens = [ tok1, tok2 ] + tokens
1554            directive = "if"
1555
1556        self.tokens    = tokens
1557        self.directive = directive
1558        if lineno > 0:
1559            self.lineno = lineno
1560        else:
1561            self.lineno = self.tokens[0].lineno
1562
1563        if self.isIf():
1564            self.expr = CppExpr( self.tokens )
1565
1566    def isDirective(self):
1567        """returns True iff this is a directive block"""
1568        return self.directive != None
1569
1570    def isConditional(self):
1571        """returns True iff this is a conditional directive block"""
1572        return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
1573
1574    def isDefine(self):
1575        """returns the macro name in a #define directive, or None otherwise"""
1576        if self.directive != "define":
1577            return None
1578
1579        return self.tokens[0].value
1580
1581    def isIf(self):
1582        """returns True iff this is an #if-like directive block"""
1583        return self.directive in ["if","ifdef","ifndef","elif"]
1584
1585    def isInclude(self):
1586        """checks wether this is a #include directive. if true, then returns the
1587           corresponding file name (with brackets or double-qoutes). None otherwise"""
1588        if self.directive != "include":
1589            return None
1590
1591        #print "iii " + repr(self.tokens)
1592        if self.tokens[0].id == tokSTRING:
1593            # a double-quote include, that's easy
1594            return self.tokens[0].value
1595
1596        # we only want the bracket part, not any comments or junk after it
1597        if self.tokens[0].id == "<":
1598            i   = 0
1599            tok = self.tokens
1600            n   = len(tok)
1601            while i < n and tok[i].id != ">":
1602                i += 1
1603
1604            if i >= n:
1605                return None
1606
1607            return string.join([ str(x) for x in tok[:i+1] ],"")
1608
1609        else:
1610            return None
1611
1612    def __repr__(self):
1613        """generate the representation of a given block"""
1614        if self.directive:
1615            result = "#%s " % self.directive
1616            if self.isIf():
1617                result += repr(self.expr)
1618            else:
1619                for tok in self.tokens:
1620                    result += repr(tok)
1621        else:
1622            result = ""
1623            for tok in self.tokens:
1624                result += repr(tok)
1625
1626        return result
1627
1628    def __str__(self):
1629        """generate the string representation of a given block"""
1630        if self.directive:
1631            if self.directive == "if":
1632                # small optimization to re-generate #ifdef and #ifndef
1633                e = self.expr.expr
1634                op = e[0]
1635                if op == "defined":
1636                    result = "#ifdef %s" % e[1]
1637                elif op == "!" and e[1][0] == "defined":
1638                    result = "#ifndef %s" % e[1][1]
1639                else:
1640                    result = "#if " + str(self.expr)
1641            else:
1642                result = "#%s" % self.directive
1643                if len(self.tokens):
1644                    result += " "
1645                for tok in self.tokens:
1646                    result += str(tok)
1647        else:
1648            result = ""
1649            for tok in self.tokens:
1650                result += str(tok)
1651
1652        return result
1653
1654
1655class BlockList:
1656    """a convenience class used to hold and process a list of blocks returned by
1657       the cpp parser"""
1658    def __init__(self,blocks):
1659        self.blocks = blocks
1660
1661    def __len__(self):
1662        return len(self.blocks)
1663
1664    def __getitem__(self,n):
1665        return self.blocks[n]
1666
1667    def __repr__(self):
1668        return repr(self.blocks)
1669
1670    def __str__(self):
1671        result = ""
1672        for b in self.blocks:
1673            result += str(b)
1674            if b.isDirective():
1675                result += '\n'
1676        return result
1677
1678    def  optimizeIf01(self):
1679        """remove the code between #if 0 .. #endif in a BlockList"""
1680        self.blocks = optimize_if01(self.blocks)
1681
1682    def optimizeMacros(self, macros):
1683        """remove known defined and undefined macros from a BlockList"""
1684        for b in self.blocks:
1685            if b.isIf():
1686                b.expr.optimize(macros)
1687
1688    def removeMacroDefines(self,macros):
1689        """remove known macro definitions from a BlockList"""
1690        self.blocks = remove_macro_defines(self.blocks,macros)
1691
1692    def removePrefixed(self,prefix,names):
1693        for b in self.blocks:
1694            if b.isIf():
1695                b.expr.removePrefixed(prefix,names)
1696
1697    def optimizeAll(self,macros):
1698        self.optimizeMacros(macros)
1699        self.optimizeIf01()
1700        return
1701
1702    def findIncludes(self):
1703        """return the list of included files in a BlockList"""
1704        result = []
1705        for b in self.blocks:
1706            i = b.isInclude()
1707            if i:
1708                result.append(i)
1709
1710        return result
1711
1712
1713    def write(self,out):
1714        out.write(str(self))
1715
1716    def removeComments(self):
1717        for b in self.blocks:
1718            for tok in b.tokens:
1719                if tok.id == tokSPACE:
1720                    tok.value = " "
1721
1722    def removeEmptyLines(self):
1723        # state = 1 => previous line was tokLN
1724        # state = 0 => previous line was directive
1725        state  = 1
1726        for b in self.blocks:
1727            if b.isDirective():
1728                #print "$$$ directive %s" % str(b)
1729                state = 0
1730            else:
1731                # a tokLN followed by spaces is replaced by a single tokLN
1732                # several successive tokLN are replaced by a single one
1733                #
1734                dst   = []
1735                src   = b.tokens
1736                n     = len(src)
1737                i     = 0
1738                #print "$$$ parsing %s" % repr(src)
1739                while i < n:
1740                    # find final tokLN
1741                    j = i
1742                    while j < n and src[j].id != tokLN:
1743                        j += 1
1744
1745                    if j >= n:
1746                        # uhhh
1747                        dst += src[i:]
1748                        break
1749
1750                    if src[i].id == tokSPACE:
1751                        k = i+1
1752                        while src[k].id == tokSPACE:
1753                            k += 1
1754
1755                        if k == j: # empty lines with spaces in it
1756                            i = j  # remove the spaces
1757
1758                    if i == j:
1759                        # an empty line
1760                        if state == 1:
1761                            i += 1   # remove it
1762                        else:
1763                            state = 1
1764                            dst.append(src[i])
1765                            i   += 1
1766                    else:
1767                        # this line is not empty, remove trailing spaces
1768                        k = j
1769                        while k > i and src[k-1].id == tokSPACE:
1770                            k -= 1
1771
1772                        nn = i
1773                        while nn < k:
1774                            dst.append(src[nn])
1775                            nn += 1
1776                        dst.append(src[j])
1777                        state = 0
1778                        i = j+1
1779
1780                b.tokens = dst
1781
1782    def removeVarsAndFuncs(self,knownStatics=set()):
1783        """remove all extern and static declarations corresponding
1784           to variable and function declarations. we only accept typedefs
1785           and enum/structs/union declarations.
1786
1787           however, we keep the definitions corresponding to the set
1788           of known static inline functions in the set 'knownStatics',
1789           which is useful for optimized byteorder swap functions and
1790           stuff like that.
1791           """
1792        # state = 1 => typedef/struct encountered
1793        # state = 2 => vars or func declaration encountered, skipping until ";"
1794        # state = 0 => normal (i.e. LN + spaces)
1795        state      = 0
1796        depth      = 0
1797        blocks2    = []
1798        for b in self.blocks:
1799            if b.isDirective():
1800                blocks2.append(b)
1801            else:
1802                n     = len(b.tokens)
1803                i     = 0
1804                first = 0
1805                if state == 2:
1806                    first = n
1807                while i < n:
1808                    tok = b.tokens[i]
1809                    if state == 0:
1810                        bad = 0
1811                        if tok.id in [tokLN, tokSPACE]:
1812                            pass
1813                        elif tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
1814                            state = 1
1815                        else:
1816                            if tok.value in [ 'static', 'extern', '__KINLINE' ]:
1817                                j = i+1
1818                                ident = ""
1819                                while j < n and not (b.tokens[j].id in [ '(', ';' ]):
1820                                    if b.tokens[j].id == tokIDENT:
1821                                        ident = b.tokens[j].value
1822                                    j += 1
1823                                if j < n and ident in knownStatics:
1824                                    # this is a known static, we're going to keep its
1825                                    # definition in the final output
1826                                    state = 1
1827                                else:
1828                                    #print "### skip static '%s'" % ident
1829                                    pass
1830
1831                            if state == 0:
1832                                if i > first:
1833                                    #print "### intermediate from '%s': '%s'" % (tok.value, repr(b.tokens[first:i]))
1834                                    blocks2.append( Block(b.tokens[first:i]) )
1835                                state = 2
1836                                first = n
1837
1838                    else:  # state > 0
1839                        if tok.id == '{':
1840                            depth += 1
1841
1842                        elif tok.id == '}':
1843                            if depth > 0:
1844                                depth -= 1
1845
1846                        elif depth == 0 and tok.id == ';':
1847                            if state == 2:
1848                                first = i+1
1849                            state = 0
1850
1851                    i += 1
1852
1853                if i > first:
1854                    #print "### final '%s'" % repr(b.tokens[first:i])
1855                    blocks2.append( Block(b.tokens[first:i]) )
1856
1857        self.blocks = blocks2
1858
1859    def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
1860        """insert your standard issue disclaimer that this is an
1861           auto-generated file, etc.."""
1862        tokens = CppLineTokenizer( disclaimer ).toTokenList()
1863        tokens = tokens[:-1]  # remove trailing tokLN
1864        self.blocks = [ Block(tokens) ] + self.blocks
1865
1866class BlockParser:
1867    """a class used to convert an input source file into a BlockList object"""
1868
1869    def __init__(self,tokzer=None):
1870        """initialize a block parser. the input source is provided through a Tokenizer
1871           object"""
1872        self.reset(tokzer)
1873
1874    def reset(self,tokzer):
1875        self.state  = 1
1876        self.tokzer = tokzer
1877
1878    def getBlocks(self,tokzer=None):
1879        """tokenize and parse the input source, return a BlockList object
1880           NOTE: empty and line-numbering directives are ignored and removed
1881                 from the result. as a consequence, it is possible to have
1882                 two successive text blocks in the result"""
1883        # state 0 => in source code
1884        # state 1 => in source code, after a LN
1885        # state 2 => in source code, after LN then some space
1886        state   = 1
1887        lastLN  = 0
1888        current = []
1889        blocks  = []
1890
1891        if tokzer == None:
1892            tokzer = self.tokzer
1893
1894        while 1:
1895            tok = tokzer.getToken()
1896            if tok.id == tokEOF:
1897                break
1898
1899            if tok.id == tokLN:
1900                state    = 1
1901                current.append(tok)
1902                lastLN   = len(current)
1903
1904            elif tok.id == tokSPACE:
1905                if state == 1:
1906                    state = 2
1907                current.append(tok)
1908
1909            elif tok.id == "#":
1910                if state > 0:
1911                    # this is the start of a directive
1912
1913                    if lastLN > 0:
1914                        # record previous tokens as text block
1915                        block   = Block(current[:lastLN])
1916                        blocks.append(block)
1917                        lastLN  = 0
1918
1919                    current = []
1920
1921                    # skip spaces after the #
1922                    while 1:
1923                        tok = tokzer.getToken()
1924                        if tok.id != tokSPACE:
1925                            break
1926
1927                    if tok.id != tokIDENT:
1928                        # empty or line-numbering, ignore it
1929                        if tok.id != tokLN and tok.id != tokEOF:
1930                            while 1:
1931                                tok = tokzer.getToken()
1932                                if tok.id == tokLN or tok.id == tokEOF:
1933                                    break
1934                        continue
1935
1936                    directive = tok.value
1937                    lineno    = tok.lineno
1938
1939                    # skip spaces
1940                    tok = tokzer.getToken()
1941                    while tok.id == tokSPACE:
1942                        tok = tokzer.getToken()
1943
1944                    # then record tokens until LN
1945                    dirtokens = []
1946                    while tok.id != tokLN and tok.id != tokEOF:
1947                        dirtokens.append(tok)
1948                        tok = tokzer.getToken()
1949
1950                    block = Block(dirtokens,directive,lineno)
1951                    blocks.append(block)
1952                    state   = 1
1953
1954            else:
1955                state = 0
1956                current.append(tok)
1957
1958        if len(current) > 0:
1959            block = Block(current)
1960            blocks.append(block)
1961
1962        return BlockList(blocks)
1963
1964    def parse(self,tokzer):
1965        return self.getBlocks( tokzer )
1966
1967    def parseLines(self,lines):
1968        """parse a list of text lines into a BlockList object"""
1969        return self.getBlocks( CppLinesTokenizer(lines) )
1970
1971    def parseFile(self,path):
1972        """parse a file into a BlockList object"""
1973        file = open(path, "rt")
1974        result = self.getBlocks( CppFileTokenizer(file) )
1975        file.close()
1976        return result
1977
1978
1979def test_block_parsing(lines,expected):
1980    blocks = BlockParser().parse( CppLinesTokenizer(lines) )
1981    if len(blocks) != len(expected):
1982        raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
1983              % (str(blocks), repr(expected))
1984    for n in range(len(blocks)):
1985        if str(blocks[n]) != expected[n]:
1986            raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
1987                  % (n, str(blocks[n]), expected[n])
1988    #for block in blocks:
1989    #    print block
1990
1991def test_BlockParser():
1992    test_block_parsing(["#error hello"],["#error hello"])
1993    test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
1994    test_block_parsing([ "foo", "  #  ", "bar" ], [ "foo\n","bar\n" ])
1995    test_block_parsing(\
1996        [ "foo", "   #  ", "  #  /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
1997        [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
1998
1999
2000#####################################################################################
2001#####################################################################################
2002#####                                                                           #####
2003#####        B L O C K   L I S T   O P T I M I Z A T I O N                      #####
2004#####                                                                           #####
2005#####################################################################################
2006#####################################################################################
2007
2008def  remove_macro_defines( blocks, excludedMacros=set() ):
2009    """remove macro definitions like #define <macroName>  ...."""
2010    result = []
2011    for b in blocks:
2012        macroName = b.isDefine()
2013        if macroName == None or not macroName in excludedMacros:
2014            result.append(b)
2015
2016    return result
2017
2018def  find_matching_endif( blocks, i ):
2019    n     = len(blocks)
2020    depth = 1
2021    while i < n:
2022        if blocks[i].isDirective():
2023            dir = blocks[i].directive
2024            if dir in [ "if", "ifndef", "ifdef" ]:
2025                depth += 1
2026            elif depth == 1 and dir in [ "else", "elif" ]:
2027                return i
2028            elif dir == "endif":
2029                depth -= 1
2030                if depth == 0:
2031                    return i
2032        i += 1
2033    return i
2034
2035def  optimize_if01( blocks ):
2036    """remove the code between #if 0 .. #endif in a list of CppBlocks"""
2037    i = 0
2038    n = len(blocks)
2039    result = []
2040    while i < n:
2041        j = i
2042        while j < n and not blocks[j].isIf():
2043            j += 1
2044        if j > i:
2045            D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
2046            result += blocks[i:j]
2047        if j >= n:
2048            break
2049        expr = blocks[j].expr
2050        r    = expr.toInt()
2051        if r == None:
2052            result.append(blocks[j])
2053            i = j + 1
2054            continue
2055
2056        if r == 0:
2057            # if 0 => skip everything until the corresponding #endif
2058            j = find_matching_endif( blocks, j+1 )
2059            if j >= n:
2060                # unterminated #if 0, finish here
2061                break
2062            dir = blocks[j].directive
2063            if dir == "endif":
2064                D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
2065                i = j + 1
2066            elif dir == "else":
2067                # convert 'else' into 'if 1'
2068                D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
2069                blocks[j].directive = "if"
2070                blocks[j].expr      = CppExpr( CppLineTokenizer("1").toTokenList() )
2071                i = j
2072            elif dir == "elif":
2073                # convert 'elif' into 'if'
2074                D2("convert 'if 0' .. 'elif' into 'if'")
2075                blocks[j].directive = "if"
2076                i = j
2077            continue
2078
2079        # if 1 => find corresponding endif and remove/transform them
2080        k = find_matching_endif( blocks, j+1 )
2081        if k >= n:
2082            # unterminated #if 1, finish here
2083            D2("unterminated 'if 1'")
2084            result += blocks[j+1:k]
2085            break
2086
2087        dir = blocks[k].directive
2088        if dir == "endif":
2089            D2("convert 'if 1' .. 'endif' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
2090            result += optimize_if01(blocks[j+1:k])
2091            i       = k+1
2092        elif dir == "else":
2093            # convert 'else' into 'if 0'
2094            D2("convert 'if 1' .. 'else' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
2095            result += optimize_if01(blocks[j+1:k])
2096            blocks[k].directive = "if"
2097            blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
2098            i = k
2099        elif dir == "elif":
2100            # convert 'elif' into 'if 0'
2101            D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
2102            result += optimize_if01(blocks[j+1:k])
2103            blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
2104            i = k
2105    return result
2106
2107def  test_optimizeAll():
2108    text = """\
2109#if 1
2110#define  GOOD_1
2111#endif
2112#if 0
2113#define  BAD_2
2114#define  BAD_3
2115#endif
2116
2117#if 1
2118#define  GOOD_2
2119#else
2120#define  BAD_4
2121#endif
2122
2123#if 0
2124#define  BAD_5
2125#else
2126#define  GOOD_3
2127#endif
2128
2129#if 0
2130#if 1
2131#define  BAD_6
2132#endif
2133#endif\
2134"""
2135
2136    expected = """\
2137#define GOOD_1
2138
2139#define GOOD_2
2140
2141#define GOOD_3
2142
2143"""
2144
2145    print "running test_BlockList.optimizeAll"
2146    out = StringOutput()
2147    lines = string.split(text, '\n')
2148    list = BlockParser().parse( CppLinesTokenizer(lines) )
2149    #D_setlevel(2)
2150    list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
2151    #print repr(list)
2152    list.write(out)
2153    if out.get() != expected:
2154        print "KO: macro optimization failed\n"
2155        print "<<<< expecting '",
2156        print expected,
2157        print "'\n>>>> result '"
2158        print out.get(),
2159        print "'\n----"
2160
2161
2162#####################################################################################
2163#####################################################################################
2164#####                                                                           #####
2165#####                                                                           #####
2166#####                                                                           #####
2167#####################################################################################
2168#####################################################################################
2169
2170def runUnitTests():
2171    """run all unit tests for this program"""
2172    print "running unit tests"
2173    test_CppTokenizer()
2174    test_CppExpr()
2175    test_optimizeAll()
2176    test_BlockParser()
2177    print "OK"
2178
2179if __name__ == "__main__":
2180    runUnitTests()
2181