1#!/usr/bin/ruby 2# encoding: utf-8 3 4require 'antlr3/test/functional' 5 6class TestBacktracking < ANTLR3::Test::Functional 7 8 inline_grammar( <<-'END' ) 9 grammar Backtrack; 10 options { 11 language = Ruby; 12 backtrack=true; 13 memoize=true; 14 k=2; 15 } 16 17 scope Symbols { 18 types; 19 } 20 21 @members { 22 def is_type_name?(name) 23 @Symbols_stack.reverse_each do |scope| 24 scope.types.include?(name) and return true 25 end 26 return false 27 end 28 29 def report_error(e) 30 # do nothing 31 end 32 33 } 34 35 translation_unit 36 scope Symbols; // entire file is a scope 37 @init { 38 $Symbols::types = Set.new 39 } 40 : external_declaration+ 41 ; 42 43 /** Either a function definition or any other kind of C decl/def. 44 * The LL(*) analysis algorithm fails to deal with this due to 45 * recursion in the declarator rules. I'm putting in a 46 * manual predicate here so that we don't backtrack over 47 * the entire function. Further, you get a better error 48 * as errors within the function itself don't make it fail 49 * to predict that it's a function. Weird errors previously. 50 * Remember: the goal is to avoid backtrack like the plague 51 * because it makes debugging, actions, and errors harder. 52 * 53 * Note that k=1 results in a much smaller predictor for the 54 * fixed look; k=2 made a few extra thousand lines. ;) 55 * I'll have to optimize that in the future. 56 */ 57 external_declaration 58 options {k=1;} 59 : ( declaration_specifiers? declarator declaration* '{' )=> function_definition 60 | declaration 61 ; 62 63 function_definition 64 scope Symbols; // put parameters and locals into same scope for now 65 @init { 66 $Symbols::types = set() 67 } 68 : declaration_specifiers? declarator 69 ; 70 71 declaration 72 scope { 73 is_type_def; 74 } 75 @init { 76 $declaration::is_type_def = false 77 } 78 : 'typedef' declaration_specifiers? {$declaration::is_type_def = true} 79 init_declarator_list ';' // special case, looking for typedef 80 | declaration_specifiers init_declarator_list? ';' 81 ; 82 83 declaration_specifiers 84 : ( storage_class_specifier 85 | type_specifier 86 | type_qualifier 87 )+ 88 ; 89 90 init_declarator_list 91 : init_declarator (',' init_declarator)* 92 ; 93 94 init_declarator 95 : declarator //('=' initializer)? 96 ; 97 98 storage_class_specifier 99 : 'extern' 100 | 'static' 101 | 'auto' 102 | 'register' 103 ; 104 105 type_specifier 106 : 'void' 107 | 'char' 108 | 'short' 109 | 'int' 110 | 'long' 111 | 'float' 112 | 'double' 113 | 'signed' 114 | 'unsigned' 115 | type_id 116 ; 117 118 type_id 119 : { is_type_name?(@input.look(1).text)}? IDENTIFIER 120 ; 121 122 type_qualifier 123 : 'const' 124 | 'volatile' 125 ; 126 127 declarator 128 : pointer? direct_declarator 129 | pointer 130 ; 131 132 direct_declarator 133 : ( IDENTIFIER 134 { 135 if $declaration.length > 0 && $declaration::is_type_def 136 $Symbols::types.add($IDENTIFIER.text) 137 end 138 } 139 | '(' declarator ')' 140 ) 141 declarator_suffix* 142 ; 143 144 declarator_suffix 145 : /*'[' constant_expression ']' 146 |*/ '[' ']' 147 | '(' ')' 148 ; 149 150 pointer 151 : '*' type_qualifier+ pointer? 152 | '*' pointer 153 | '*' 154 ; 155 156 IDENTIFIER 157 : LETTER (LETTER|'0'..'9')* 158 ; 159 160 fragment 161 LETTER 162 : '$' 163 | 'A'..'Z' 164 | 'a'..'z' 165 | '_' 166 ; 167 168 CHARACTER_LITERAL 169 : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' 170 ; 171 172 STRING_LITERAL 173 : '"' ( EscapeSequence | ~('\\'|'"') )* '"' 174 ; 175 176 HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; 177 178 DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; 179 180 OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; 181 182 fragment 183 HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; 184 185 fragment 186 IntegerTypeSuffix 187 : ('u'|'U')? ('l'|'L') 188 | ('u'|'U') ('l'|'L')? 189 ; 190 191 FLOATING_POINT_LITERAL 192 : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? 193 | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? 194 | ('0'..'9')+ Exponent FloatTypeSuffix? 195 | ('0'..'9')+ Exponent? FloatTypeSuffix 196 ; 197 198 fragment 199 Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; 200 201 fragment 202 FloatTypeSuffix : ('f'|'F'|'d'|'D') ; 203 204 fragment 205 EscapeSequence 206 : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') 207 | OctalEscape 208 ; 209 210 fragment 211 OctalEscape 212 : '\\' ('0'..'3') ('0'..'7') ('0'..'7') 213 | '\\' ('0'..'7') ('0'..'7') 214 | '\\' ('0'..'7') 215 ; 216 217 fragment 218 UnicodeEscape 219 : '\\' 'u' HexDigit HexDigit HexDigit HexDigit 220 ; 221 222 WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} 223 ; 224 225 COMMENT 226 : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} 227 ; 228 229 LINE_COMMENT 230 : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 231 ; 232 LINE_COMMAND 233 : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 234 ; 235 END 236 237 example "grammar with backtracking and memoization" do 238 lexer = Backtrack::Lexer.new( 'int a;' ) 239 parser = Backtrack::Parser.new lexer 240 events = parser.translation_unit 241 end 242 243end 244