1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.misc.IntSet; 31 import org.antlr.misc.MultiMap; 32 import org.antlr.misc.OrderedHashSet; 33 import org.antlr.misc.Utils; 34 import org.antlr.tool.Grammar; 35 36 import java.util.*; 37 38 /** A DFA state represents a set of possible NFA configurations. 39 * As Aho, Sethi, Ullman p. 117 says "The DFA uses its state 40 * to keep track of all possible states the NFA can be in after 41 * reading each input symbol. That is to say, after reading 42 * input a1a2..an, the DFA is in a state that represents the 43 * subset T of the states of the NFA that are reachable from the 44 * NFA's start state along some path labeled a1a2..an." 45 * In conventional NFA->DFA conversion, therefore, the subset T 46 * would be a bitset representing the set of states the 47 * NFA could be in. We need to track the alt predicted by each 48 * state as well, however. More importantly, we need to maintain 49 * a stack of states, tracking the closure operations as they 50 * jump from rule to rule, emulating rule invocations (method calls). 51 * Recall that NFAs do not normally have a stack like a pushdown-machine 52 * so I have to add one to simulate the proper lookahead sequences for 53 * the underlying LL grammar from which the NFA was derived. 54 * 55 * I use a list of NFAConfiguration objects. An NFAConfiguration 56 * is both a state (ala normal conversion) and an NFAContext describing 57 * the chain of rules (if any) followed to arrive at that state. There 58 * is also the semantic context, which is the "set" of predicates found 59 * on the path to this configuration. 60 * 61 * A DFA state may have multiple references to a particular state, 62 * but with different NFAContexts (with same or different alts) 63 * meaning that state was reached via a different set of rule invocations. 64 */ 65 public class DFAState extends State { 66 public static final int INITIAL_NUM_TRANSITIONS = 4; 67 public static final int PREDICTED_ALT_UNSET = NFA.INVALID_ALT_NUMBER-1; 68 69 /** We are part of what DFA? Use this ref to get access to the 70 * context trees for an alt. 71 */ 72 public DFA dfa; 73 74 /** Track the transitions emanating from this DFA state. The List 75 * elements are Transition objects. 76 */ 77 protected List<Transition> transitions = 78 new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS); 79 80 /** When doing an acyclic DFA, this is the number of lookahead symbols 81 * consumed to reach this state. This value may be nonzero for most 82 * dfa states, but it is only a valid value if the user has specified 83 * a max fixed lookahead. 84 */ 85 protected int k; 86 87 /** The NFA->DFA algorithm may terminate leaving some states 88 * without a path to an accept state, implying that upon certain 89 * input, the decision is not deterministic--no decision about 90 * predicting a unique alternative can be made. Recall that an 91 * accept state is one in which a unique alternative is predicted. 92 */ 93 protected int acceptStateReachable = DFA.REACHABLE_UNKNOWN; 94 95 /** Rather than recheck every NFA configuration in a DFA state (after 96 * resolving) in findNewDFAStatesAndAddDFATransitions just check 97 * this boolean. Saves a linear walk perhaps DFA state creation. 98 * Every little bit helps. 99 */ 100 protected boolean resolvedWithPredicates = false; 101 102 /** If a closure operation finds that we tried to invoke the same 103 * rule too many times (stack would grow beyond a threshold), it 104 * marks the state has aborted and notifies the DecisionProbe. 105 */ 106 public boolean abortedDueToRecursionOverflow = false; 107 108 /** If we detect recursion on more than one alt, decision is non-LL(*), 109 * but try to isolate it to only those states whose closure operations 110 * detect recursion. There may be other alts that are cool: 111 * 112 * a : recur '.' 113 * | recur ';' 114 * | X Y // LL(2) decision; don't abort and use k=1 plus backtracking 115 * | X Z 116 * ; 117 * 118 * 12/13/2007: Actually this has caused problems. If k=*, must terminate 119 * and throw out entire DFA; retry with k=1. Since recursive, do not 120 * attempt more closure ops as it may take forever. Exception thrown 121 * now and we simply report the problem. If synpreds exist, I'll retry 122 * with k=1. 123 */ 124 protected boolean abortedDueToMultipleRecursiveAlts = false; 125 126 /** Build up the hash code for this state as NFA configurations 127 * are added as it's monotonically increasing list of configurations. 128 */ 129 protected int cachedHashCode; 130 131 protected int cachedUniquelyPredicatedAlt = PREDICTED_ALT_UNSET; 132 133 public int minAltInConfigurations=Integer.MAX_VALUE; 134 135 public boolean atLeastOneConfigurationHasAPredicate = false; 136 137 /** The set of NFA configurations (state,alt,context) for this DFA state */ 138 public OrderedHashSet<NFAConfiguration> nfaConfigurations = 139 new OrderedHashSet<NFAConfiguration>(); 140 141 public List<NFAConfiguration> configurationsWithLabeledEdges = 142 new ArrayList<NFAConfiguration>(); 143 144 /** Used to prevent the closure operation from looping to itself and 145 * hence looping forever. Sensitive to the NFA state, the alt, and 146 * the stack context. This just the nfa config set because we want to 147 * prevent closures only on states contributed by closure not reach 148 * operations. 149 * 150 * Two configurations identical including semantic context are 151 * considered the same closure computation. @see NFAToDFAConverter.closureBusy(). 152 */ 153 protected Set<NFAConfiguration> closureBusy = new HashSet<NFAConfiguration>(); 154 155 /** As this state is constructed (i.e., as NFA states are added), we 156 * can easily check for non-epsilon transitions because the only 157 * transition that could be a valid label is transition(0). When we 158 * process this node eventually, we'll have to walk all states looking 159 * for all possible transitions. That is of the order: size(label space) 160 * times size(nfa states), which can be pretty damn big. It's better 161 * to simply track possible labels. 162 */ 163 protected OrderedHashSet<Label> reachableLabels; 164 DFAState(DFA dfa)165 public DFAState(DFA dfa) { 166 this.dfa = dfa; 167 } 168 reset()169 public void reset() { 170 //nfaConfigurations = null; // getGatedPredicatesInNFAConfigurations needs 171 configurationsWithLabeledEdges = null; 172 closureBusy = null; 173 reachableLabels = null; 174 } 175 transition(int i)176 public Transition transition(int i) { 177 return (Transition)transitions.get(i); 178 } 179 getNumberOfTransitions()180 public int getNumberOfTransitions() { 181 return transitions.size(); 182 } 183 addTransition(Transition t)184 public void addTransition(Transition t) { 185 transitions.add(t); 186 } 187 188 /** Add a transition from this state to target with label. Return 189 * the transition number from 0..n-1. 190 */ addTransition(DFAState target, Label label)191 public int addTransition(DFAState target, Label label) { 192 transitions.add( new Transition(label, target) ); 193 return transitions.size()-1; 194 } 195 getTransition(int trans)196 public Transition getTransition(int trans) { 197 return transitions.get(trans); 198 } 199 removeTransition(int trans)200 public void removeTransition(int trans) { 201 transitions.remove(trans); 202 } 203 204 /** Add an NFA configuration to this DFA node. Add uniquely 205 * an NFA state/alt/syntactic&semantic context (chain of invoking state(s) 206 * and semantic predicate contexts). 207 * 208 * I don't see how there could be two configurations with same 209 * state|alt|synCtx and different semantic contexts because the 210 * semantic contexts are computed along the path to a particular state 211 * so those two configurations would have to have the same predicate. 212 * Nonetheless, the addition of configurations is unique on all 213 * configuration info. I guess I'm saying that syntactic context 214 * implies semantic context as the latter is computed according to the 215 * former. 216 * 217 * As we add configurations to this DFA state, track the set of all possible 218 * transition labels so we can simply walk it later rather than doing a 219 * loop over all possible labels in the NFA. 220 */ addNFAConfiguration(NFAState state, NFAConfiguration c)221 public void addNFAConfiguration(NFAState state, NFAConfiguration c) { 222 if ( nfaConfigurations.contains(c) ) { 223 return; 224 } 225 226 nfaConfigurations.add(c); 227 228 // track min alt rather than compute later 229 if ( c.alt < minAltInConfigurations ) { 230 minAltInConfigurations = c.alt; 231 } 232 233 if ( c.semanticContext!=SemanticContext.EMPTY_SEMANTIC_CONTEXT ) { 234 atLeastOneConfigurationHasAPredicate = true; 235 } 236 237 // update hashCode; for some reason using context.hashCode() also 238 // makes the GC take like 70% of the CPU and is slow! 239 cachedHashCode += c.state + c.alt; 240 241 // update reachableLabels 242 // We're adding an NFA state; check to see if it has a non-epsilon edge 243 if ( state.transition[0] != null ) { 244 Label label = state.transition[0].label; 245 if ( !(label.isEpsilon()||label.isSemanticPredicate()) ) { 246 // this NFA state has a non-epsilon edge, track for fast 247 // walking later when we do reach on this DFA state we're 248 // building. 249 configurationsWithLabeledEdges.add(c); 250 if ( state.transition[1] ==null ) { 251 // later we can check this to ignore o-A->o states in closure 252 c.singleAtomTransitionEmanating = true; 253 } 254 addReachableLabel(label); 255 } 256 } 257 } 258 addNFAConfiguration(NFAState state, int alt, NFAContext context, SemanticContext semanticContext)259 public NFAConfiguration addNFAConfiguration(NFAState state, 260 int alt, 261 NFAContext context, 262 SemanticContext semanticContext) 263 { 264 NFAConfiguration c = new NFAConfiguration(state.stateNumber, 265 alt, 266 context, 267 semanticContext); 268 addNFAConfiguration(state, c); 269 return c; 270 } 271 272 /** Add label uniquely and disjointly; intersection with 273 * another set or int/char forces breaking up the set(s). 274 * 275 * Example, if reachable list of labels is [a..z, {k,9}, 0..9], 276 * the disjoint list will be [{a..j,l..z}, k, 9, 0..8]. 277 * 278 * As we add NFA configurations to a DFA state, we might as well track 279 * the set of all possible transition labels to make the DFA conversion 280 * more efficient. W/o the reachable labels, we'd need to check the 281 * whole vocabulary space (could be 0..\uFFFF)! The problem is that 282 * labels can be sets, which may overlap with int labels or other sets. 283 * As we need a deterministic set of transitions from any 284 * state in the DFA, we must make the reachable labels set disjoint. 285 * This operation amounts to finding the character classes for this 286 * DFA state whereas with tools like flex, that need to generate a 287 * homogeneous DFA, must compute char classes across all states. 288 * We are going to generate DFAs with heterogeneous states so we 289 * only care that the set of transitions out of a single state are 290 * unique. :) 291 * 292 * The idea for adding a new set, t, is to look for overlap with the 293 * elements of existing list s. Upon overlap, replace 294 * existing set s[i] with two new disjoint sets, s[i]-t and s[i]&t. 295 * (if s[i]-t is nil, don't add). The remainder is t-s[i], which is 296 * what you want to add to the set minus what was already there. The 297 * remainder must then be compared against the i+1..n elements in s 298 * looking for another collision. Each collision results in a smaller 299 * and smaller remainder. Stop when you run out of s elements or 300 * remainder goes to nil. If remainder is non nil when you run out of 301 * s elements, then add remainder to the end. 302 * 303 * Single element labels are treated as sets to make the code uniform. 304 */ addReachableLabel(Label label)305 protected void addReachableLabel(Label label) { 306 if ( reachableLabels==null ) { 307 reachableLabels = new OrderedHashSet<Label>(); 308 } 309 /* 310 System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar)); 311 System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " + 312 "reachableLabels="+reachableLabels.toString()); 313 */ 314 if ( reachableLabels.contains(label) ) { // exact label present 315 return; 316 } 317 IntSet t = label.getSet(); 318 IntSet remainder = t; // remainder starts out as whole set to add 319 int n = reachableLabels.size(); // only look at initial elements 320 // walk the existing list looking for the collision 321 for (int i=0; i<n; i++) { 322 Label rl = reachableLabels.get(i); 323 /* 324 System.out.println("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+ 325 rl.toString(dfa.nfa.grammar)+"="+ 326 intersection.toString(dfa.nfa.grammar)); 327 */ 328 if ( !Label.intersect(label, rl) ) { 329 continue; 330 } 331 //System.out.println(label+" collides with "+rl); 332 333 // For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t) 334 // (ignoring s_i-t if nil; don't put in list) 335 336 // Replace existing s_i with intersection since we 337 // know that will always be a non nil character class 338 IntSet s_i = rl.getSet(); 339 IntSet intersection = s_i.and(t); 340 reachableLabels.set(i, new Label(intersection)); 341 342 // Compute s_i-t to see what is in current set and not in incoming 343 IntSet existingMinusNewElements = s_i.subtract(t); 344 //System.out.println(s_i+"-"+t+"="+existingMinusNewElements); 345 if ( !existingMinusNewElements.isNil() ) { 346 // found a new character class, add to the end (doesn't affect 347 // outer loop duration due to n computation a priori. 348 Label newLabel = new Label(existingMinusNewElements); 349 reachableLabels.add(newLabel); 350 } 351 352 /* 353 System.out.println("after collision, " + 354 "reachableLabels="+reachableLabels.toString()); 355 */ 356 357 // anything left to add to the reachableLabels? 358 remainder = t.subtract(s_i); 359 if ( remainder.isNil() ) { 360 break; // nothing left to add to set. done! 361 } 362 363 t = remainder; 364 } 365 if ( !remainder.isNil() ) { 366 /* 367 System.out.println("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " + 368 "reachableLabels="+reachableLabels.toString()); 369 System.out.println("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar)); 370 */ 371 Label newLabel = new Label(remainder); 372 reachableLabels.add(newLabel); 373 } 374 /* 375 System.out.println("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " + 376 "reachableLabels="+reachableLabels.toString()); 377 */ 378 } 379 getReachableLabels()380 public OrderedHashSet getReachableLabels() { 381 return reachableLabels; 382 } 383 setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs)384 public void setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs) { 385 this.nfaConfigurations = configs; 386 } 387 388 /** A decent hash for a DFA state is the sum of the NFA state/alt pairs. 389 * This is used when we add DFAState objects to the DFA.states Map and 390 * when we compare DFA states. Computed in addNFAConfiguration() 391 */ hashCode()392 public int hashCode() { 393 if ( cachedHashCode==0 ) { 394 // LL(1) algorithm doesn't use NFA configurations, which 395 // dynamically compute hashcode; must have something; use super 396 return super.hashCode(); 397 } 398 return cachedHashCode; 399 } 400 401 /** Two DFAStates are equal if their NFA configuration sets are the 402 * same. This method is used to see if a DFA state already exists. 403 * 404 * Because the number of alternatives and number of NFA configurations are 405 * finite, there is a finite number of DFA states that can be processed. 406 * This is necessary to show that the algorithm terminates. 407 * 408 * Cannot test the DFA state numbers here because in DFA.addState we need 409 * to know if any other state exists that has this exact set of NFA 410 * configurations. The DFAState state number is irrelevant. 411 */ equals(Object o)412 public boolean equals(Object o) { 413 // compare set of NFA configurations in this set with other 414 DFAState other = (DFAState)o; 415 return this.nfaConfigurations.equals(other.nfaConfigurations); 416 } 417 418 /** Walk each configuration and if they are all the same alt, return 419 * that alt else return NFA.INVALID_ALT_NUMBER. Ignore resolved 420 * configurations, but don't ignore resolveWithPredicate configs 421 * because this state should not be an accept state. We need to add 422 * this to the work list and then have semantic predicate edges 423 * emanating from it. 424 */ getUniquelyPredictedAlt()425 public int getUniquelyPredictedAlt() { 426 if ( cachedUniquelyPredicatedAlt!=PREDICTED_ALT_UNSET ) { 427 return cachedUniquelyPredicatedAlt; 428 } 429 int alt = NFA.INVALID_ALT_NUMBER; 430 int numConfigs = nfaConfigurations.size(); 431 for (int i = 0; i < numConfigs; i++) { 432 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 433 // ignore anything we resolved; predicates will still result 434 // in transitions out of this state, so must count those 435 // configurations; i.e., don't ignore resolveWithPredicate configs 436 if ( configuration.resolved ) { 437 continue; 438 } 439 if ( alt==NFA.INVALID_ALT_NUMBER ) { 440 alt = configuration.alt; // found first nonresolved alt 441 } 442 else if ( configuration.alt!=alt ) { 443 return NFA.INVALID_ALT_NUMBER; 444 } 445 } 446 this.cachedUniquelyPredicatedAlt = alt; 447 return alt; 448 } 449 450 /** Return the uniquely mentioned alt from the NFA configurations; 451 * Ignore the resolved bit etc... Return INVALID_ALT_NUMBER 452 * if there is more than one alt mentioned. 453 */ getUniqueAlt()454 public int getUniqueAlt() { 455 int alt = NFA.INVALID_ALT_NUMBER; 456 int numConfigs = nfaConfigurations.size(); 457 for (int i = 0; i < numConfigs; i++) { 458 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 459 if ( alt==NFA.INVALID_ALT_NUMBER ) { 460 alt = configuration.alt; // found first alt 461 } 462 else if ( configuration.alt!=alt ) { 463 return NFA.INVALID_ALT_NUMBER; 464 } 465 } 466 return alt; 467 } 468 469 /** When more than one alternative can match the same input, the first 470 * alternative is chosen to resolve the conflict. The other alts 471 * are "turned off" by setting the "resolved" flag in the NFA 472 * configurations. Return the set of disabled alternatives. For 473 * 474 * a : A | A | A ; 475 * 476 * this method returns {2,3} as disabled. This does not mean that 477 * the alternative is totally unreachable, it just means that for this 478 * DFA state, that alt is disabled. There may be other accept states 479 * for that alt. 480 */ getDisabledAlternatives()481 public Set getDisabledAlternatives() { 482 Set disabled = new LinkedHashSet(); 483 int numConfigs = nfaConfigurations.size(); 484 for (int i = 0; i < numConfigs; i++) { 485 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 486 if ( configuration.resolved ) { 487 disabled.add(Utils.integer(configuration.alt)); 488 } 489 } 490 return disabled; 491 } 492 getNonDeterministicAlts()493 protected Set getNonDeterministicAlts() { 494 int user_k = dfa.getUserMaxLookahead(); 495 if ( user_k>0 && user_k==k ) { 496 // if fixed lookahead, then more than 1 alt is a nondeterminism 497 // if we have hit the max lookahead 498 return getAltSet(); 499 } 500 else if ( abortedDueToMultipleRecursiveAlts || abortedDueToRecursionOverflow ) { 501 // if we had to abort for non-LL(*) state assume all alts are a problem 502 return getAltSet(); 503 } 504 else { 505 return getConflictingAlts(); 506 } 507 } 508 509 /** Walk each NFA configuration in this DFA state looking for a conflict 510 * where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with 511 * context conflicting ctx predicts alts i and j. Return an Integer set 512 * of the alternative numbers that conflict. Two contexts conflict if 513 * they are equal or one is a stack suffix of the other or one is 514 * the empty context. 515 * 516 * Use a hash table to record the lists of configs for each state 517 * as they are encountered. We need only consider states for which 518 * there is more than one configuration. The configurations' predicted 519 * alt must be different or must have different contexts to avoid a 520 * conflict. 521 * 522 * Don't report conflicts for DFA states that have conflicting Tokens 523 * rule NFA states; they will be resolved in favor of the first rule. 524 */ getConflictingAlts()525 protected Set<Integer> getConflictingAlts() { 526 // TODO this is called multiple times: cache result? 527 //System.out.println("getNondetAlts for DFA state "+stateNumber); 528 Set<Integer> nondeterministicAlts = new HashSet<Integer>(); 529 530 // If only 1 NFA conf then no way it can be nondeterministic; 531 // save the overhead. There are many o-a->o NFA transitions 532 // and so we save a hash map and iterator creation for each 533 // state. 534 int numConfigs = nfaConfigurations.size(); 535 if ( numConfigs <=1 ) { 536 return null; 537 } 538 539 // First get a list of configurations for each state. 540 // Most of the time, each state will have one associated configuration. 541 MultiMap<Integer, NFAConfiguration> stateToConfigListMap = 542 new MultiMap<Integer, NFAConfiguration>(); 543 for (int i = 0; i < numConfigs; i++) { 544 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 545 Integer stateI = Utils.integer(configuration.state); 546 stateToConfigListMap.map(stateI, configuration); 547 } 548 // potential conflicts are states with > 1 configuration and diff alts 549 Set states = stateToConfigListMap.keySet(); 550 int numPotentialConflicts = 0; 551 for (Iterator it = states.iterator(); it.hasNext();) { 552 Integer stateI = (Integer) it.next(); 553 boolean thisStateHasPotentialProblem = false; 554 List configsForState = (List)stateToConfigListMap.get(stateI); 555 int alt=0; 556 int numConfigsForState = configsForState.size(); 557 for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) { 558 NFAConfiguration c = (NFAConfiguration) configsForState.get(i); 559 if ( alt==0 ) { 560 alt = c.alt; 561 } 562 else if ( c.alt!=alt ) { 563 /* 564 System.out.println("potential conflict in state "+stateI+ 565 " configs: "+configsForState); 566 */ 567 // 11/28/2005: don't report closures that pinch back 568 // together in Tokens rule. We want to silently resolve 569 // to the first token definition ala lex/flex by ignoring 570 // these conflicts. 571 // Also this ensures that lexers look for more and more 572 // characters (longest match) before resorting to predicates. 573 // TestSemanticPredicates.testLexerMatchesLongestThenTestPred() 574 // for example would terminate at state s1 and test predicate 575 // meaning input "ab" would test preds to decide what to 576 // do but it should match rule C w/o testing preds. 577 if ( dfa.nfa.grammar.type!=Grammar.LEXER || 578 !dfa.decisionNFAStartState.enclosingRule.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) 579 { 580 numPotentialConflicts++; 581 thisStateHasPotentialProblem = true; 582 } 583 } 584 } 585 if ( !thisStateHasPotentialProblem ) { 586 // remove NFA state's configurations from 587 // further checking; no issues with it 588 // (can't remove as it's concurrent modification; set to null) 589 stateToConfigListMap.put(stateI, null); 590 } 591 } 592 593 // a fast check for potential issues; most states have none 594 if ( numPotentialConflicts==0 ) { 595 return null; 596 } 597 598 // we have a potential problem, so now go through config lists again 599 // looking for different alts (only states with potential issues 600 // are left in the states set). Now we will check context. 601 // For example, the list of configs for NFA state 3 in some DFA 602 // state might be: 603 // [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2] 604 // I want to create a map from context to alts looking for overlap: 605 // [28 18 $] -> 2 606 // [28 $] -> 1 607 // [$] -> 1,2 608 // Indeed a conflict exists as same state 3, same context [$], predicts 609 // alts 1 and 2. 610 // walk each state with potential conflicting configurations 611 for (Iterator it = states.iterator(); it.hasNext();) { 612 Integer stateI = (Integer) it.next(); 613 List configsForState = (List)stateToConfigListMap.get(stateI); 614 // compare each configuration pair s, t to ensure: 615 // s.ctx different than t.ctx if s.alt != t.alt 616 int numConfigsForState = 0; 617 if ( configsForState!=null ) { 618 numConfigsForState = configsForState.size(); 619 } 620 for (int i = 0; i < numConfigsForState; i++) { 621 NFAConfiguration s = (NFAConfiguration) configsForState.get(i); 622 for (int j = i+1; j < numConfigsForState; j++) { 623 NFAConfiguration t = (NFAConfiguration)configsForState.get(j); 624 // conflicts means s.ctx==t.ctx or s.ctx is a stack 625 // suffix of t.ctx or vice versa (if alts differ). 626 // Also a conflict if s.ctx or t.ctx is empty 627 if ( s.alt != t.alt && s.context.conflictsWith(t.context) ) { 628 nondeterministicAlts.add(Utils.integer(s.alt)); 629 nondeterministicAlts.add(Utils.integer(t.alt)); 630 } 631 } 632 } 633 } 634 635 if ( nondeterministicAlts.size()==0 ) { 636 return null; 637 } 638 return nondeterministicAlts; 639 } 640 641 /** Get the set of all alts mentioned by all NFA configurations in this 642 * DFA state. 643 */ getAltSet()644 public Set getAltSet() { 645 int numConfigs = nfaConfigurations.size(); 646 Set alts = new HashSet(); 647 for (int i = 0; i < numConfigs; i++) { 648 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 649 alts.add(Utils.integer(configuration.alt)); 650 } 651 if ( alts.size()==0 ) { 652 return null; 653 } 654 return alts; 655 } 656 getGatedSyntacticPredicatesInNFAConfigurations()657 public Set getGatedSyntacticPredicatesInNFAConfigurations() { 658 int numConfigs = nfaConfigurations.size(); 659 Set<SemanticContext> synpreds = new HashSet<SemanticContext>(); 660 for (int i = 0; i < numConfigs; i++) { 661 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 662 SemanticContext gatedPredExpr = 663 configuration.semanticContext.getGatedPredicateContext(); 664 // if this is a manual syn pred (gated and syn pred), add 665 if ( gatedPredExpr!=null && 666 configuration.semanticContext.isSyntacticPredicate() ) 667 { 668 synpreds.add(configuration.semanticContext); 669 } 670 } 671 if ( synpreds.size()==0 ) { 672 return null; 673 } 674 return synpreds; 675 } 676 677 /** For gated productions, we need an OR'd list of all predicates for the 678 * target of an edge so we can gate the edge based upon the predicates 679 * associated with taking that path (if any). 680 * 681 * For syntactic predicates, we only want to generate predicate 682 * evaluations as it transitions to an accept state; waste to 683 * do it earlier. So, only add gated preds derived from manually- 684 * specified syntactic predicates if this is an accept state. 685 * 686 * Also, since configurations w/o gated predicates are like true 687 * gated predicates, finding a configuration whose alt has no gated 688 * predicate implies we should evaluate the predicate to true. This 689 * means the whole edge has to be ungated. Consider: 690 * 691 * X : ('a' | {p}?=> 'a') 692 * | 'a' 'b' 693 * ; 694 * 695 * Here, you 'a' gets you from s0 to s1 but you can't test p because 696 * plain 'a' is ok. It's also ok for starting alt 2. Hence, you can't 697 * test p. Even on the edge going to accept state for alt 1 of X, you 698 * can't test p. You can get to the same place with and w/o the context. 699 * Therefore, it is never ok to test p in this situation. 700 * 701 * TODO: cache this as it's called a lot; or at least set bit if >1 present in state 702 */ getGatedPredicatesInNFAConfigurations()703 public SemanticContext getGatedPredicatesInNFAConfigurations() { 704 SemanticContext unionOfPredicatesFromAllAlts = null; 705 int numConfigs = nfaConfigurations.size(); 706 for (int i = 0; i < numConfigs; i++) { 707 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 708 SemanticContext gatedPredExpr = 709 configuration.semanticContext.getGatedPredicateContext(); 710 if ( gatedPredExpr==null ) { 711 // if we ever find a configuration w/o a gated predicate 712 // (even if it's a nongated predicate), we cannot gate 713 // the indident edges. 714 return null; 715 } 716 else if ( acceptState || !configuration.semanticContext.isSyntacticPredicate() ) { 717 // at this point we have a gated predicate and, due to elseif, 718 // we know it's an accept or not a syn pred. In this case, 719 // it's safe to add the gated predicate to the union. We 720 // only want to add syn preds if it's an accept state. Other 721 // gated preds can be used with edges leading to accept states. 722 if ( unionOfPredicatesFromAllAlts==null ) { 723 unionOfPredicatesFromAllAlts = gatedPredExpr; 724 } 725 else { 726 unionOfPredicatesFromAllAlts = 727 SemanticContext.or(unionOfPredicatesFromAllAlts,gatedPredExpr); 728 } 729 } 730 } 731 if ( unionOfPredicatesFromAllAlts instanceof SemanticContext.TruePredicate ) { 732 return null; 733 } 734 return unionOfPredicatesFromAllAlts; 735 } 736 737 /** Is an accept state reachable from this state? */ getAcceptStateReachable()738 public int getAcceptStateReachable() { 739 return acceptStateReachable; 740 } 741 setAcceptStateReachable(int acceptStateReachable)742 public void setAcceptStateReachable(int acceptStateReachable) { 743 this.acceptStateReachable = acceptStateReachable; 744 } 745 isResolvedWithPredicates()746 public boolean isResolvedWithPredicates() { 747 return resolvedWithPredicates; 748 } 749 750 /** Print all NFA states plus what alts they predict */ toString()751 public String toString() { 752 StringBuffer buf = new StringBuffer(); 753 buf.append(stateNumber+":{"); 754 for (int i = 0; i < nfaConfigurations.size(); i++) { 755 NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i); 756 if ( i>0 ) { 757 buf.append(", "); 758 } 759 buf.append(configuration); 760 } 761 buf.append("}"); 762 return buf.toString(); 763 } 764 getLookaheadDepth()765 public int getLookaheadDepth() { 766 return k; 767 } 768 setLookaheadDepth(int k)769 public void setLookaheadDepth(int k) { 770 this.k = k; 771 if ( k > dfa.max_k ) { // track max k for entire DFA 772 dfa.max_k = k; 773 } 774 } 775 776 } 777