1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.grammar.v3.ANTLRParser; 31 import org.antlr.misc.IntSet; 32 import org.antlr.misc.IntervalSet; 33 import org.antlr.tool.Grammar; 34 import org.antlr.tool.Rule; 35 36 import java.util.HashMap; 37 import java.util.HashSet; 38 import java.util.Map; 39 import java.util.Set; 40 41 /** 42 * Created by IntelliJ IDEA. 43 * User: parrt 44 * Date: Dec 31, 2007 45 * Time: 1:31:16 PM 46 * To change this template use File | Settings | File Templates. 47 */ 48 public class LL1Analyzer { 49 /** 0 if we hit end of rule and invoker should keep going (epsilon) */ 50 public static final int DETECT_PRED_EOR = 0; 51 /** 1 if we found a nonautobacktracking pred */ 52 public static final int DETECT_PRED_FOUND = 1; 53 /** 2 if we didn't find such a pred */ 54 public static final int DETECT_PRED_NOT_FOUND = 2; 55 56 public Grammar grammar; 57 58 /** Used during LOOK to detect computation cycles */ 59 protected Set<NFAState> lookBusy = new HashSet<NFAState>(); 60 61 public Map<NFAState, LookaheadSet> FIRSTCache = new HashMap<NFAState, LookaheadSet>(); 62 public Map<Rule, LookaheadSet> FOLLOWCache = new HashMap<Rule, LookaheadSet>(); 63 LL1Analyzer(Grammar grammar)64 public LL1Analyzer(Grammar grammar) { 65 this.grammar = grammar; 66 } 67 68 /* 69 public void computeRuleFIRSTSets() { 70 if ( getNumberOfDecisions()==0 ) { 71 createNFAs(); 72 } 73 for (Iterator it = getRules().iterator(); it.hasNext();) { 74 Rule r = (Rule)it.next(); 75 if ( r.isSynPred ) { 76 continue; 77 } 78 LookaheadSet s = FIRST(r); 79 System.out.println("FIRST("+r.name+")="+s); 80 } 81 } 82 */ 83 84 /* 85 public Set<String> getOverriddenRulesWithDifferentFIRST() { 86 // walk every rule in this grammar and compare FIRST set with 87 // those in imported grammars. 88 Set<String> rules = new HashSet(); 89 for (Iterator it = getRules().iterator(); it.hasNext();) { 90 Rule r = (Rule)it.next(); 91 //System.out.println(r.name+" FIRST="+r.FIRST); 92 for (int i = 0; i < delegates.size(); i++) { 93 Grammar g = delegates.get(i); 94 Rule importedRule = g.getRule(r.name); 95 if ( importedRule != null ) { // exists in imported grammar 96 // System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST); 97 if ( !r.FIRST.equals(importedRule.FIRST) ) { 98 rules.add(r.name); 99 } 100 } 101 } 102 } 103 return rules; 104 } 105 106 public Set<Rule> getImportedRulesSensitiveToOverriddenRulesDueToLOOK() { 107 Set<String> diffFIRSTs = getOverriddenRulesWithDifferentFIRST(); 108 Set<Rule> rules = new HashSet(); 109 for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) { 110 String r = (String) it.next(); 111 for (int i = 0; i < delegates.size(); i++) { 112 Grammar g = delegates.get(i); 113 Set<Rule> callers = g.ruleSensitivity.get(r); 114 // somebody invokes rule whose FIRST changed in subgrammar? 115 if ( callers!=null ) { 116 rules.addAll(callers); 117 //System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em"); 118 } 119 } 120 } 121 return rules; 122 } 123 */ 124 125 /* 126 public LookaheadSet LOOK(Rule r) { 127 if ( r.FIRST==null ) { 128 r.FIRST = FIRST(r.startState); 129 } 130 return r.FIRST; 131 } 132 */ 133 134 /** From an NFA state, s, find the set of all labels reachable from s. 135 * Used to compute follow sets for error recovery. Never computes 136 * a FOLLOW operation. FIRST stops at end of rules, returning EOR, unless 137 * invoked from another rule. I.e., routine properly handles 138 * 139 * a : b A ; 140 * 141 * where b is nullable. 142 * 143 * We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can 144 * know at runtime (when these sets are used) to start walking up the 145 * follow chain to compute the real, correct follow set (as opposed to 146 * the FOLLOW, which is a superset). 147 * 148 * This routine will only be used on parser and tree parser grammars. 149 */ FIRST(NFAState s)150 public LookaheadSet FIRST(NFAState s) { 151 //System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule); 152 lookBusy.clear(); 153 LookaheadSet look = _FIRST(s, false); 154 //System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar)); 155 return look; 156 } 157 FOLLOW(Rule r)158 public LookaheadSet FOLLOW(Rule r) { 159 //System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule); 160 LookaheadSet f = FOLLOWCache.get(r); 161 if ( f!=null ) { 162 return f; 163 } 164 f = _FIRST(r.stopState, true); 165 FOLLOWCache.put(r, f); 166 //System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar)); 167 return f; 168 } 169 LOOK(NFAState s)170 public LookaheadSet LOOK(NFAState s) { 171 if ( NFAToDFAConverter.debug ) { 172 System.out.println("> LOOK("+s+")"); 173 } 174 lookBusy.clear(); 175 LookaheadSet look = _FIRST(s, true); 176 // FOLLOW makes no sense (at the moment!) for lexical rules. 177 if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) { 178 // avoid altering FIRST reset as it is cached 179 LookaheadSet f = FOLLOW(s.enclosingRule); 180 f.orInPlace(look); 181 f.remove(Label.EOR_TOKEN_TYPE); 182 look = f; 183 //look.orInPlace(FOLLOW(s.enclosingRule)); 184 } 185 else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) { 186 // if this has EOT, lookahead is all char (all char can follow rule) 187 //look = new LookaheadSet(Label.EOT); 188 look = new LookaheadSet(IntervalSet.COMPLETE_SET); 189 } 190 if ( NFAToDFAConverter.debug ) { 191 System.out.println("< LOOK("+s+")="+look.toString(grammar)); 192 } 193 return look; 194 } 195 _FIRST(NFAState s, boolean chaseFollowTransitions)196 protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) { 197 /* 198 System.out.println("_LOOK("+s+") in rule "+s.enclosingRule); 199 if ( s.transition[0] instanceof RuleClosureTransition ) { 200 System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule); 201 } 202 */ 203 if ( !chaseFollowTransitions && s.isAcceptState() ) { 204 if ( grammar.type==Grammar.LEXER ) { 205 // FOLLOW makes no sense (at the moment!) for lexical rules. 206 // assume all char can follow 207 return new LookaheadSet(IntervalSet.COMPLETE_SET); 208 } 209 return new LookaheadSet(Label.EOR_TOKEN_TYPE); 210 } 211 212 if ( lookBusy.contains(s) ) { 213 // return a copy of an empty set; we may modify set inline 214 return new LookaheadSet(); 215 } 216 lookBusy.add(s); 217 218 Transition transition0 = s.transition[0]; 219 if ( transition0==null ) { 220 return null; 221 } 222 223 if ( transition0.label.isAtom() ) { 224 int atom = transition0.label.getAtom(); 225 return new LookaheadSet(atom); 226 } 227 if ( transition0.label.isSet() ) { 228 IntSet sl = transition0.label.getSet(); 229 return new LookaheadSet(sl); 230 } 231 232 // compute FIRST of transition 0 233 LookaheadSet tset = null; 234 // if transition 0 is a rule call and we don't want FOLLOW, check cache 235 if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) { 236 tset = FIRSTCache.get((NFAState)transition0.target); 237 } 238 239 // if not in cache, must compute 240 if ( tset==null ) { 241 tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions); 242 // save FIRST cache for transition 0 if rule call 243 if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) { 244 FIRSTCache.put((NFAState)transition0.target, tset); 245 } 246 } 247 248 LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance 249 250 // did we fall off the end? 251 if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) { 252 if ( transition0 instanceof RuleClosureTransition ) { 253 // we called a rule that found the end of the rule. 254 // That means the rule is nullable and we need to 255 // keep looking at what follows the rule ref. E.g., 256 // a : b A ; where b is nullable means that LOOK(a) 257 // should include A. 258 RuleClosureTransition ruleInvocationTrans = 259 (RuleClosureTransition)transition0; 260 // remove the EOR and get what follows 261 //tset.remove(Label.EOR_TOKEN_TYPE); 262 NFAState following = ruleInvocationTrans.followState; 263 LookaheadSet fset = _FIRST(following, chaseFollowTransitions); 264 fset.orInPlace(tset); // tset cached; or into new set 265 fset.remove(Label.EOR_TOKEN_TYPE); 266 tset = fset; 267 } 268 } 269 270 Transition transition1 = s.transition[1]; 271 if ( transition1!=null ) { 272 LookaheadSet tset1 = 273 _FIRST((NFAState)transition1.target, chaseFollowTransitions); 274 tset1.orInPlace(tset); 275 tset = tset1; 276 } 277 278 // never return a cached set; clone 279 return tset==tsetCached ? new LookaheadSet(tset) : tset; 280 } 281 282 /** Is there a non-syn-pred predicate visible from s that is not in 283 * the rule enclosing s? This accounts for most predicate situations 284 * and lets ANTLR do a simple LL(1)+pred computation. 285 * 286 * TODO: what about gated vs regular preds? 287 */ detectConfoundingPredicates(NFAState s)288 public boolean detectConfoundingPredicates(NFAState s) { 289 lookBusy.clear(); 290 Rule r = s.enclosingRule; 291 return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND; 292 } 293 _detectConfoundingPredicates(NFAState s, Rule enclosingRule, boolean chaseFollowTransitions)294 protected int _detectConfoundingPredicates(NFAState s, 295 Rule enclosingRule, 296 boolean chaseFollowTransitions) 297 { 298 //System.out.println("_detectNonAutobacktrackPredicates("+s+")"); 299 if ( !chaseFollowTransitions && s.isAcceptState() ) { 300 if ( grammar.type==Grammar.LEXER ) { 301 // FOLLOW makes no sense (at the moment!) for lexical rules. 302 // assume all char can follow 303 return DETECT_PRED_NOT_FOUND; 304 } 305 return DETECT_PRED_EOR; 306 } 307 308 if ( lookBusy.contains(s) ) { 309 // return a copy of an empty set; we may modify set inline 310 return DETECT_PRED_NOT_FOUND; 311 } 312 lookBusy.add(s); 313 314 Transition transition0 = s.transition[0]; 315 if ( transition0==null ) { 316 return DETECT_PRED_NOT_FOUND; 317 } 318 319 if ( !(transition0.label.isSemanticPredicate()|| 320 transition0.label.isEpsilon()) ) { 321 return DETECT_PRED_NOT_FOUND; 322 } 323 324 if ( transition0.label.isSemanticPredicate() ) { 325 //System.out.println("pred "+transition0.label); 326 SemanticContext ctx = transition0.label.getSemanticContext(); 327 SemanticContext.Predicate p = (SemanticContext.Predicate)ctx; 328 if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) { 329 return DETECT_PRED_FOUND; 330 } 331 } 332 333 /* 334 if ( transition0.label.isSemanticPredicate() ) { 335 System.out.println("pred "+transition0.label); 336 SemanticContext ctx = transition0.label.getSemanticContext(); 337 SemanticContext.Predicate p = (SemanticContext.Predicate)ctx; 338 // if a non-syn-pred found not in enclosingRule, say we found one 339 if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED && 340 !p.predicateAST.enclosingRuleName.equals(enclosingRule.name) ) 341 { 342 System.out.println("found pred "+p+" not in "+enclosingRule.name); 343 return DETECT_PRED_FOUND; 344 } 345 } 346 */ 347 348 int result = _detectConfoundingPredicates((NFAState)transition0.target, 349 enclosingRule, 350 chaseFollowTransitions); 351 if ( result == DETECT_PRED_FOUND ) { 352 return DETECT_PRED_FOUND; 353 } 354 355 if ( result == DETECT_PRED_EOR ) { 356 if ( transition0 instanceof RuleClosureTransition ) { 357 // we called a rule that found the end of the rule. 358 // That means the rule is nullable and we need to 359 // keep looking at what follows the rule ref. E.g., 360 // a : b A ; where b is nullable means that LOOK(a) 361 // should include A. 362 RuleClosureTransition ruleInvocationTrans = 363 (RuleClosureTransition)transition0; 364 NFAState following = ruleInvocationTrans.followState; 365 int afterRuleResult = 366 _detectConfoundingPredicates(following, 367 enclosingRule, 368 chaseFollowTransitions); 369 if ( afterRuleResult == DETECT_PRED_FOUND ) { 370 return DETECT_PRED_FOUND; 371 } 372 } 373 } 374 375 Transition transition1 = s.transition[1]; 376 if ( transition1!=null ) { 377 int t1Result = 378 _detectConfoundingPredicates((NFAState)transition1.target, 379 enclosingRule, 380 chaseFollowTransitions); 381 if ( t1Result == DETECT_PRED_FOUND ) { 382 return DETECT_PRED_FOUND; 383 } 384 } 385 386 return DETECT_PRED_NOT_FOUND; 387 } 388 389 /** Return predicate expression found via epsilon edges from s. Do 390 * not look into other rules for now. Do something simple. Include 391 * backtracking synpreds. 392 */ getPredicates(NFAState altStartState)393 public SemanticContext getPredicates(NFAState altStartState) { 394 lookBusy.clear(); 395 return _getPredicates(altStartState, altStartState); 396 } 397 _getPredicates(NFAState s, NFAState altStartState)398 protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) { 399 //System.out.println("_getPredicates("+s+")"); 400 if ( s.isAcceptState() ) { 401 return null; 402 } 403 404 // avoid infinite loops from (..)* etc... 405 if ( lookBusy.contains(s) ) { 406 return null; 407 } 408 lookBusy.add(s); 409 410 Transition transition0 = s.transition[0]; 411 // no transitions 412 if ( transition0==null ) { 413 return null; 414 } 415 416 // not a predicate and not even an epsilon 417 if ( !(transition0.label.isSemanticPredicate()|| 418 transition0.label.isEpsilon()) ) { 419 return null; 420 } 421 422 SemanticContext p = null; 423 SemanticContext p0; 424 SemanticContext p1 = null; 425 if ( transition0.label.isSemanticPredicate() ) { 426 //System.out.println("pred "+transition0.label); 427 p = transition0.label.getSemanticContext(); 428 // ignore backtracking preds not on left edge for this decision 429 if ( ((SemanticContext.Predicate)p).predicateAST.getType() == 430 ANTLRParser.BACKTRACK_SEMPRED && 431 s == altStartState.transition[0].target ) 432 { 433 p = null; // don't count 434 } 435 } 436 437 // get preds from beyond this state 438 p0 = _getPredicates((NFAState)transition0.target, altStartState); 439 440 // get preds from other transition 441 Transition transition1 = s.transition[1]; 442 if ( transition1!=null ) { 443 p1 = _getPredicates((NFAState)transition1.target, altStartState); 444 } 445 446 // join this&following-right|following-down 447 return SemanticContext.and(p,SemanticContext.or(p0,p1)); 448 } 449 } 450