1# ---------------------------------------------------------------------- 2# clex.py 3# 4# A lexer for ANSI C. 5# ---------------------------------------------------------------------- 6 7import sys 8sys.path.insert(0, "../..") 9 10import ply.lex as lex 11 12# Reserved words 13reserved = ( 14 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 15 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', 16 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 17 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', 18) 19 20tokens = reserved + ( 21 # Literals (identifier, integer constant, float constant, string constant, 22 # char const) 23 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', 24 25 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) 26 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 27 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 28 'LOR', 'LAND', 'LNOT', 29 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 30 31 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 32 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 33 'LSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', 34 35 # Increment/decrement (++,--) 36 'PLUSPLUS', 'MINUSMINUS', 37 38 # Structure dereference (->) 39 'ARROW', 40 41 # Conditional operator (?) 42 'CONDOP', 43 44 # Delimeters ( ) [ ] { } , . ; : 45 'LPAREN', 'RPAREN', 46 'LBRACKET', 'RBRACKET', 47 'LBRACE', 'RBRACE', 48 'COMMA', 'PERIOD', 'SEMI', 'COLON', 49 50 # Ellipsis (...) 51 'ELLIPSIS', 52) 53 54# Completely ignored characters 55t_ignore = ' \t\x0c' 56 57# Newlines 58 59 60def t_NEWLINE(t): 61 r'\n+' 62 t.lexer.lineno += t.value.count("\n") 63 64# Operators 65t_PLUS = r'\+' 66t_MINUS = r'-' 67t_TIMES = r'\*' 68t_DIVIDE = r'/' 69t_MOD = r'%' 70t_OR = r'\|' 71t_AND = r'&' 72t_NOT = r'~' 73t_XOR = r'\^' 74t_LSHIFT = r'<<' 75t_RSHIFT = r'>>' 76t_LOR = r'\|\|' 77t_LAND = r'&&' 78t_LNOT = r'!' 79t_LT = r'<' 80t_GT = r'>' 81t_LE = r'<=' 82t_GE = r'>=' 83t_EQ = r'==' 84t_NE = r'!=' 85 86# Assignment operators 87 88t_EQUALS = r'=' 89t_TIMESEQUAL = r'\*=' 90t_DIVEQUAL = r'/=' 91t_MODEQUAL = r'%=' 92t_PLUSEQUAL = r'\+=' 93t_MINUSEQUAL = r'-=' 94t_LSHIFTEQUAL = r'<<=' 95t_RSHIFTEQUAL = r'>>=' 96t_ANDEQUAL = r'&=' 97t_OREQUAL = r'\|=' 98t_XOREQUAL = r'\^=' 99 100# Increment/decrement 101t_PLUSPLUS = r'\+\+' 102t_MINUSMINUS = r'--' 103 104# -> 105t_ARROW = r'->' 106 107# ? 108t_CONDOP = r'\?' 109 110# Delimeters 111t_LPAREN = r'\(' 112t_RPAREN = r'\)' 113t_LBRACKET = r'\[' 114t_RBRACKET = r'\]' 115t_LBRACE = r'\{' 116t_RBRACE = r'\}' 117t_COMMA = r',' 118t_PERIOD = r'\.' 119t_SEMI = r';' 120t_COLON = r':' 121t_ELLIPSIS = r'\.\.\.' 122 123# Identifiers and reserved words 124 125reserved_map = {} 126for r in reserved: 127 reserved_map[r.lower()] = r 128 129 130def t_ID(t): 131 r'[A-Za-z_][\w_]*' 132 t.type = reserved_map.get(t.value, "ID") 133 return t 134 135# Integer literal 136t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' 137 138# Floating literal 139t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 140 141# String literal 142t_SCONST = r'\"([^\\\n]|(\\.))*?\"' 143 144# Character constant 'c' or L'c' 145t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' 146 147# Comments 148 149 150def t_comment(t): 151 r'/\*(.|\n)*?\*/' 152 t.lexer.lineno += t.value.count('\n') 153 154# Preprocessor directive (ignored) 155 156 157def t_preprocessor(t): 158 r'\#(.)*?\n' 159 t.lexer.lineno += 1 160 161 162def t_error(t): 163 print("Illegal character %s" % repr(t.value[0])) 164 t.lexer.skip(1) 165 166lexer = lex.lex() 167if __name__ == "__main__": 168 lex.runmain(lexer) 169