1grammar t033backtracking; 2options { 3 language=Python; 4 backtrack=true; 5 memoize=true; 6 k=2; 7} 8 9scope Symbols { 10 types; 11} 12 13@header { 14# compatibility stuff 15try: 16 set = set 17 frozenset = frozenset 18except NameError: 19 from sets import Set as set, ImmutableSet as frozenset 20 21 22try: 23 reversed = reversed 24except NameError: 25 def reversed(l): 26 l = l[:] 27 l.reverse() 28 return l 29 30} 31 32@members { 33 def isTypeName(self, name): 34 for scope in reversed(self.Symbols_stack): 35 if name in scope.types: 36 return True 37 38 return False 39 40} 41 42translation_unit 43scope Symbols; // entire file is a scope 44@init { 45 $Symbols::types = set() 46} 47 : external_declaration+ 48 ; 49 50/** Either a function definition or any other kind of C decl/def. 51 * The LL(*) analysis algorithm fails to deal with this due to 52 * recursion in the declarator rules. I'm putting in a 53 * manual predicate here so that we don't backtrack over 54 * the entire function. Further, you get a better error 55 * as errors within the function itself don't make it fail 56 * to predict that it's a function. Weird errors previously. 57 * Remember: the goal is to avoid backtrack like the plague 58 * because it makes debugging, actions, and errors harder. 59 * 60 * Note that k=1 results in a much smaller predictor for the 61 * fixed lookahead; k=2 made a few extra thousand lines. ;) 62 * I'll have to optimize that in the future. 63 */ 64external_declaration 65options {k=1;} 66 : ( declaration_specifiers? declarator declaration* '{' )=> function_definition 67 | declaration 68 ; 69 70function_definition 71scope Symbols; // put parameters and locals into same scope for now 72@init { 73 $Symbols::types = set() 74} 75 : declaration_specifiers? declarator 76// ( declaration+ compound_statement // K&R style 77// | compound_statement // ANSI style 78// ) 79 ; 80 81declaration 82scope { 83 isTypedef; 84} 85@init { 86 $declaration::isTypedef = False 87} 88 : 'typedef' declaration_specifiers? {$declaration::isTypedef = True} 89 init_declarator_list ';' // special case, looking for typedef 90 | declaration_specifiers init_declarator_list? ';' 91 ; 92 93declaration_specifiers 94 : ( storage_class_specifier 95 | type_specifier 96 | type_qualifier 97 )+ 98 ; 99 100init_declarator_list 101 : init_declarator (',' init_declarator)* 102 ; 103 104init_declarator 105 : declarator //('=' initializer)? 106 ; 107 108storage_class_specifier 109 : 'extern' 110 | 'static' 111 | 'auto' 112 | 'register' 113 ; 114 115type_specifier 116 : 'void' 117 | 'char' 118 | 'short' 119 | 'int' 120 | 'long' 121 | 'float' 122 | 'double' 123 | 'signed' 124 | 'unsigned' 125// | struct_or_union_specifier 126// | enum_specifier 127 | type_id 128 ; 129 130type_id 131 : {self.isTypeName(self.input.LT(1).getText())}? IDENTIFIER 132// {System.out.println($IDENTIFIER.text+" is a type");} 133 ; 134 135// struct_or_union_specifier 136// options {k=3;} 137// scope Symbols; // structs are scopes 138// @init { 139// $Symbols::types = set() 140// } 141// : struct_or_union IDENTIFIER? '{' struct_declaration_list '}' 142// | struct_or_union IDENTIFIER 143// ; 144 145// struct_or_union 146// : 'struct' 147// | 'union' 148// ; 149 150// struct_declaration_list 151// : struct_declaration+ 152// ; 153 154// struct_declaration 155// : specifier_qualifier_list struct_declarator_list ';' 156// ; 157 158// specifier_qualifier_list 159// : ( type_qualifier | type_specifier )+ 160// ; 161 162// struct_declarator_list 163// : struct_declarator (',' struct_declarator)* 164// ; 165 166// struct_declarator 167// : declarator (':' constant_expression)? 168// | ':' constant_expression 169// ; 170 171// enum_specifier 172// options {k=3;} 173// : 'enum' '{' enumerator_list '}' 174// | 'enum' IDENTIFIER '{' enumerator_list '}' 175// | 'enum' IDENTIFIER 176// ; 177 178// enumerator_list 179// : enumerator (',' enumerator)* 180// ; 181 182// enumerator 183// : IDENTIFIER ('=' constant_expression)? 184// ; 185 186type_qualifier 187 : 'const' 188 | 'volatile' 189 ; 190 191declarator 192 : pointer? direct_declarator 193 | pointer 194 ; 195 196direct_declarator 197 : ( IDENTIFIER 198 { 199 if len($declaration)>0 and $declaration::isTypedef: 200 $Symbols::types.add($IDENTIFIER.text) 201 print "define type "+$IDENTIFIER.text 202 } 203 | '(' declarator ')' 204 ) 205 declarator_suffix* 206 ; 207 208declarator_suffix 209 : /*'[' constant_expression ']' 210 |*/ '[' ']' 211// | '(' parameter_type_list ')' 212// | '(' identifier_list ')' 213 | '(' ')' 214 ; 215 216pointer 217 : '*' type_qualifier+ pointer? 218 | '*' pointer 219 | '*' 220 ; 221 222// parameter_type_list 223// : parameter_list (',' '...')? 224// ; 225 226// parameter_list 227// : parameter_declaration (',' parameter_declaration)* 228// ; 229 230// parameter_declaration 231// : declaration_specifiers (declarator|abstract_declarator)* 232// ; 233 234// identifier_list 235// : IDENTIFIER (',' IDENTIFIER)* 236// ; 237 238// type_name 239// : specifier_qualifier_list abstract_declarator? 240// ; 241 242// abstract_declarator 243// : pointer direct_abstract_declarator? 244// | direct_abstract_declarator 245// ; 246 247// direct_abstract_declarator 248// : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix* 249// ; 250 251// abstract_declarator_suffix 252// : '[' ']' 253// | '[' constant_expression ']' 254// | '(' ')' 255// | '(' parameter_type_list ')' 256// ; 257 258// initializer 259// : assignment_expression 260// | '{' initializer_list ','? '}' 261// ; 262 263// initializer_list 264// : initializer (',' initializer)* 265// ; 266 267// // E x p r e s s i o n s 268 269// argument_expression_list 270// : assignment_expression (',' assignment_expression)* 271// ; 272 273// additive_expression 274// : (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)* 275// ; 276 277// multiplicative_expression 278// : (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)* 279// ; 280 281// cast_expression 282// : '(' type_name ')' cast_expression 283// | unary_expression 284// ; 285 286// unary_expression 287// : postfix_expression 288// | '++' unary_expression 289// | '--' unary_expression 290// | unary_operator cast_expression 291// | 'sizeof' unary_expression 292// | 'sizeof' '(' type_name ')' 293// ; 294 295// postfix_expression 296// : primary_expression 297// ( '[' expression ']' 298// | '(' ')' 299// | '(' argument_expression_list ')' 300// | '.' IDENTIFIER 301// | '*' IDENTIFIER 302// | '->' IDENTIFIER 303// | '++' 304// | '--' 305// )* 306// ; 307 308// unary_operator 309// : '&' 310// | '*' 311// | '+' 312// | '-' 313// | '~' 314// | '!' 315// ; 316 317// primary_expression 318// : IDENTIFIER 319// | constant 320// | '(' expression ')' 321// ; 322 323// constant 324// : HEX_LITERAL 325// | OCTAL_LITERAL 326// | DECIMAL_LITERAL 327// | CHARACTER_LITERAL 328// | STRING_LITERAL 329// | FLOATING_POINT_LITERAL 330// ; 331 332// ///// 333 334// expression 335// : assignment_expression (',' assignment_expression)* 336// ; 337 338// constant_expression 339// : conditional_expression 340// ; 341 342// assignment_expression 343// : lvalue assignment_operator assignment_expression 344// | conditional_expression 345// ; 346 347// lvalue 348// : unary_expression 349// ; 350 351// assignment_operator 352// : '=' 353// | '*=' 354// | '/=' 355// | '%=' 356// | '+=' 357// | '-=' 358// | '<<=' 359// | '>>=' 360// | '&=' 361// | '^=' 362// | '|=' 363// ; 364 365// conditional_expression 366// : logical_or_expression ('?' expression ':' conditional_expression)? 367// ; 368 369// logical_or_expression 370// : logical_and_expression ('||' logical_and_expression)* 371// ; 372 373// logical_and_expression 374// : inclusive_or_expression ('&&' inclusive_or_expression)* 375// ; 376 377// inclusive_or_expression 378// : exclusive_or_expression ('|' exclusive_or_expression)* 379// ; 380 381// exclusive_or_expression 382// : and_expression ('^' and_expression)* 383// ; 384 385// and_expression 386// : equality_expression ('&' equality_expression)* 387// ; 388// equality_expression 389// : relational_expression (('=='|'!=') relational_expression)* 390// ; 391 392// relational_expression 393// : shift_expression (('<'|'>'|'<='|'>=') shift_expression)* 394// ; 395 396// shift_expression 397// : additive_expression (('<<'|'>>') additive_expression)* 398// ; 399 400// // S t a t e m e n t s 401 402// statement 403// : labeled_statement 404// | compound_statement 405// | expression_statement 406// | selection_statement 407// | iteration_statement 408// | jump_statement 409// ; 410 411// labeled_statement 412// : IDENTIFIER ':' statement 413// | 'case' constant_expression ':' statement 414// | 'default' ':' statement 415// ; 416 417// compound_statement 418// scope Symbols; // blocks have a scope of symbols 419// @init { 420// $Symbols::types = {} 421// } 422// : '{' declaration* statement_list? '}' 423// ; 424 425// statement_list 426// : statement+ 427// ; 428 429// expression_statement 430// : ';' 431// | expression ';' 432// ; 433 434// selection_statement 435// : 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)? 436// | 'switch' '(' expression ')' statement 437// ; 438 439// iteration_statement 440// : 'while' '(' expression ')' statement 441// | 'do' statement 'while' '(' expression ')' ';' 442// | 'for' '(' expression_statement expression_statement expression? ')' statement 443// ; 444 445// jump_statement 446// : 'goto' IDENTIFIER ';' 447// | 'continue' ';' 448// | 'break' ';' 449// | 'return' ';' 450// | 'return' expression ';' 451// ; 452 453IDENTIFIER 454 : LETTER (LETTER|'0'..'9')* 455 ; 456 457fragment 458LETTER 459 : '$' 460 | 'A'..'Z' 461 | 'a'..'z' 462 | '_' 463 ; 464 465CHARACTER_LITERAL 466 : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' 467 ; 468 469STRING_LITERAL 470 : '"' ( EscapeSequence | ~('\\'|'"') )* '"' 471 ; 472 473HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; 474 475DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; 476 477OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; 478 479fragment 480HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; 481 482fragment 483IntegerTypeSuffix 484 : ('u'|'U')? ('l'|'L') 485 | ('u'|'U') ('l'|'L')? 486 ; 487 488FLOATING_POINT_LITERAL 489 : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? 490 | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? 491 | ('0'..'9')+ Exponent FloatTypeSuffix? 492 | ('0'..'9')+ Exponent? FloatTypeSuffix 493 ; 494 495fragment 496Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; 497 498fragment 499FloatTypeSuffix : ('f'|'F'|'d'|'D') ; 500 501fragment 502EscapeSequence 503 : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') 504 | OctalEscape 505 ; 506 507fragment 508OctalEscape 509 : '\\' ('0'..'3') ('0'..'7') ('0'..'7') 510 | '\\' ('0'..'7') ('0'..'7') 511 | '\\' ('0'..'7') 512 ; 513 514fragment 515UnicodeEscape 516 : '\\' 'u' HexDigit HexDigit HexDigit HexDigit 517 ; 518 519WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} 520 ; 521 522COMMENT 523 : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} 524 ; 525 526LINE_COMMENT 527 : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 528 ; 529 530// ignore #line info for now 531LINE_COMMAND 532 : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 533 ; 534 535