1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.misc.IntSet; 31 import org.antlr.misc.MultiMap; 32 import org.antlr.misc.OrderedHashSet; 33 import org.antlr.misc.Utils; 34 import org.antlr.tool.Grammar; 35 36 import java.util.*; 37 38 /** A DFA state represents a set of possible NFA configurations. 39 * As Aho, Sethi, Ullman p. 117 says "The DFA uses its state 40 * to keep track of all possible states the NFA can be in after 41 * reading each input symbol. That is to say, after reading 42 * input a1a2..an, the DFA is in a state that represents the 43 * subset T of the states of the NFA that are reachable from the 44 * NFA's start state along some path labeled a1a2..an." 45 * In conventional NFA→DFA conversion, therefore, the subset T 46 * would be a bitset representing the set of states the 47 * NFA could be in. We need to track the alt predicted by each 48 * state as well, however. More importantly, we need to maintain 49 * a stack of states, tracking the closure operations as they 50 * jump from rule to rule, emulating rule invocations (method calls). 51 * Recall that NFAs do not normally have a stack like a pushdown-machine 52 * so I have to add one to simulate the proper lookahead sequences for 53 * the underlying LL grammar from which the NFA was derived. 54 * 55 * I use a list of NFAConfiguration objects. An NFAConfiguration 56 * is both a state (ala normal conversion) and an NFAContext describing 57 * the chain of rules (if any) followed to arrive at that state. There 58 * is also the semantic context, which is the "set" of predicates found 59 * on the path to this configuration. 60 * 61 * A DFA state may have multiple references to a particular state, 62 * but with different NFAContexts (with same or different alts) 63 * meaning that state was reached via a different set of rule invocations. 64 */ 65 public class DFAState extends State { 66 public static final int INITIAL_NUM_TRANSITIONS = 4; 67 public static final int PREDICTED_ALT_UNSET = NFA.INVALID_ALT_NUMBER-1; 68 69 /** We are part of what DFA? Use this ref to get access to the 70 * context trees for an alt. 71 */ 72 public DFA dfa; 73 74 /** Track the transitions emanating from this DFA state. The List 75 * elements are Transition objects. 76 */ 77 protected List<Transition> transitions = 78 new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS); 79 80 /** When doing an acyclic DFA, this is the number of lookahead symbols 81 * consumed to reach this state. This value may be nonzero for most 82 * dfa states, but it is only a valid value if the user has specified 83 * a max fixed lookahead. 84 */ 85 protected int k; 86 87 /** The NFA→DFA algorithm may terminate leaving some states 88 * without a path to an accept state, implying that upon certain 89 * input, the decision is not deterministic--no decision about 90 * predicting a unique alternative can be made. Recall that an 91 * accept state is one in which a unique alternative is predicted. 92 */ 93 protected int acceptStateReachable = DFA.REACHABLE_UNKNOWN; 94 95 /** Rather than recheck every NFA configuration in a DFA state (after 96 * resolving) in findNewDFAStatesAndAddDFATransitions just check 97 * this boolean. Saves a linear walk perhaps DFA state creation. 98 * Every little bit helps. 99 */ 100 protected boolean resolvedWithPredicates = false; 101 102 /** If a closure operation finds that we tried to invoke the same 103 * rule too many times (stack would grow beyond a threshold), it 104 * marks the state has aborted and notifies the DecisionProbe. 105 */ 106 public boolean abortedDueToRecursionOverflow = false; 107 108 /** If we detect recursion on more than one alt, decision is non-LL(*), 109 * but try to isolate it to only those states whose closure operations 110 * detect recursion. There may be other alts that are cool: 111 * 112 * a : recur '.' 113 * | recur ';' 114 * | X Y // LL(2) decision; don't abort and use k=1 plus backtracking 115 * | X Z 116 * ; 117 * 118 * 12/13/2007: Actually this has caused problems. If k=*, must terminate 119 * and throw out entire DFA; retry with k=1. Since recursive, do not 120 * attempt more closure ops as it may take forever. Exception thrown 121 * now and we simply report the problem. If synpreds exist, I'll retry 122 * with k=1. 123 */ 124 protected boolean abortedDueToMultipleRecursiveAlts = false; 125 126 /** Build up the hash code for this state as NFA configurations 127 * are added as it's monotonically increasing list of configurations. 128 */ 129 protected int cachedHashCode; 130 131 protected int cachedUniquelyPredicatedAlt = PREDICTED_ALT_UNSET; 132 133 public int minAltInConfigurations=Integer.MAX_VALUE; 134 135 public boolean atLeastOneConfigurationHasAPredicate = false; 136 137 /** The set of NFA configurations (state,alt,context) for this DFA state */ 138 public OrderedHashSet<NFAConfiguration> nfaConfigurations = 139 new OrderedHashSet<NFAConfiguration>(); 140 141 public List<NFAConfiguration> configurationsWithLabeledEdges = 142 new ArrayList<NFAConfiguration>(); 143 144 /** Used to prevent the closure operation from looping to itself and 145 * hence looping forever. Sensitive to the NFA state, the alt, and 146 * the stack context. This just the nfa config set because we want to 147 * prevent closures only on states contributed by closure not reach 148 * operations. 149 * 150 * Two configurations identical including semantic context are 151 * considered the same closure computation. @see NFAToDFAConverter.closureBusy(). 152 */ 153 protected Set<NFAConfiguration> closureBusy = new HashSet<NFAConfiguration>(); 154 155 /** As this state is constructed (i.e., as NFA states are added), we 156 * can easily check for non-epsilon transitions because the only 157 * transition that could be a valid label is transition(0). When we 158 * process this node eventually, we'll have to walk all states looking 159 * for all possible transitions. That is of the order: size(label space) 160 * times size(nfa states), which can be pretty damn big. It's better 161 * to simply track possible labels. 162 */ 163 protected OrderedHashSet<Label> reachableLabels; 164 DFAState(DFA dfa)165 public DFAState(DFA dfa) { 166 this.dfa = dfa; 167 } 168 reset()169 public void reset() { 170 //nfaConfigurations = null; // getGatedPredicatesInNFAConfigurations needs 171 configurationsWithLabeledEdges = null; 172 closureBusy = null; 173 reachableLabels = null; 174 } 175 176 @Override transition(int i)177 public Transition transition(int i) { 178 return transitions.get(i); 179 } 180 181 @Override getNumberOfTransitions()182 public int getNumberOfTransitions() { 183 return transitions.size(); 184 } 185 186 @Override addTransition(Transition t)187 public void addTransition(Transition t) { 188 transitions.add(t); 189 } 190 191 /** Add a transition from this state to target with label. Return 192 * the transition number from 0..n-1. 193 */ addTransition(DFAState target, Label label)194 public int addTransition(DFAState target, Label label) { 195 transitions.add( new Transition(label, target) ); 196 return transitions.size()-1; 197 } 198 getTransition(int trans)199 public Transition getTransition(int trans) { 200 return transitions.get(trans); 201 } 202 removeTransition(int trans)203 public void removeTransition(int trans) { 204 transitions.remove(trans); 205 } 206 207 /** Add an NFA configuration to this DFA node. Add uniquely 208 * an NFA state/alt/syntactic&semantic context (chain of invoking state(s) 209 * and semantic predicate contexts). 210 * 211 * I don't see how there could be two configurations with same 212 * state|alt|synCtx and different semantic contexts because the 213 * semantic contexts are computed along the path to a particular state 214 * so those two configurations would have to have the same predicate. 215 * Nonetheless, the addition of configurations is unique on all 216 * configuration info. I guess I'm saying that syntactic context 217 * implies semantic context as the latter is computed according to the 218 * former. 219 * 220 * As we add configurations to this DFA state, track the set of all possible 221 * transition labels so we can simply walk it later rather than doing a 222 * loop over all possible labels in the NFA. 223 */ addNFAConfiguration(NFAState state, NFAConfiguration c)224 public void addNFAConfiguration(NFAState state, NFAConfiguration c) { 225 if ( nfaConfigurations.contains(c) ) { 226 return; 227 } 228 229 nfaConfigurations.add(c); 230 231 // track min alt rather than compute later 232 if ( c.alt < minAltInConfigurations ) { 233 minAltInConfigurations = c.alt; 234 } 235 236 if ( c.semanticContext!=SemanticContext.EMPTY_SEMANTIC_CONTEXT ) { 237 atLeastOneConfigurationHasAPredicate = true; 238 } 239 240 // update hashCode; for some reason using context.hashCode() also 241 // makes the GC take like 70% of the CPU and is slow! 242 cachedHashCode += c.state + c.alt; 243 244 // update reachableLabels 245 // We're adding an NFA state; check to see if it has a non-epsilon edge 246 if ( state.transition[0] != null ) { 247 Label label = state.transition[0].label; 248 if ( !(label.isEpsilon()||label.isSemanticPredicate()) ) { 249 // this NFA state has a non-epsilon edge, track for fast 250 // walking later when we do reach on this DFA state we're 251 // building. 252 configurationsWithLabeledEdges.add(c); 253 if ( state.transition[1] ==null ) { 254 // later we can check this to ignore o-A->o states in closure 255 c.singleAtomTransitionEmanating = true; 256 } 257 addReachableLabel(label); 258 } 259 } 260 } 261 addNFAConfiguration(NFAState state, int alt, NFAContext context, SemanticContext semanticContext)262 public NFAConfiguration addNFAConfiguration(NFAState state, 263 int alt, 264 NFAContext context, 265 SemanticContext semanticContext) 266 { 267 NFAConfiguration c = new NFAConfiguration(state.stateNumber, 268 alt, 269 context, 270 semanticContext); 271 addNFAConfiguration(state, c); 272 return c; 273 } 274 275 /** Add label uniquely and disjointly; intersection with 276 * another set or int/char forces breaking up the set(s). 277 * 278 * Example, if reachable list of labels is [a..z, {k,9}, 0..9], 279 * the disjoint list will be [{a..j,l..z}, k, 9, 0..8]. 280 * 281 * As we add NFA configurations to a DFA state, we might as well track 282 * the set of all possible transition labels to make the DFA conversion 283 * more efficient. W/o the reachable labels, we'd need to check the 284 * whole vocabulary space (could be 0..\uFFFF)! The problem is that 285 * labels can be sets, which may overlap with int labels or other sets. 286 * As we need a deterministic set of transitions from any 287 * state in the DFA, we must make the reachable labels set disjoint. 288 * This operation amounts to finding the character classes for this 289 * DFA state whereas with tools like flex, that need to generate a 290 * homogeneous DFA, must compute char classes across all states. 291 * We are going to generate DFAs with heterogeneous states so we 292 * only care that the set of transitions out of a single state are 293 * unique. :) 294 * 295 * The idea for adding a new set, t, is to look for overlap with the 296 * elements of existing list s. Upon overlap, replace 297 * existing set s[i] with two new disjoint sets, s[i]-t and s[i]&t. 298 * (if s[i]-t is nil, don't add). The remainder is t-s[i], which is 299 * what you want to add to the set minus what was already there. The 300 * remainder must then be compared against the i+1..n elements in s 301 * looking for another collision. Each collision results in a smaller 302 * and smaller remainder. Stop when you run out of s elements or 303 * remainder goes to nil. If remainder is non nil when you run out of 304 * s elements, then add remainder to the end. 305 * 306 * Single element labels are treated as sets to make the code uniform. 307 */ addReachableLabel(Label label)308 protected void addReachableLabel(Label label) { 309 if ( reachableLabels==null ) { 310 reachableLabels = new OrderedHashSet<Label>(); 311 } 312 /* 313 System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar)); 314 System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " + 315 "reachableLabels="+reachableLabels.toString()); 316 */ 317 if ( reachableLabels.contains(label) ) { // exact label present 318 return; 319 } 320 IntSet t = label.getSet(); 321 IntSet remainder = t; // remainder starts out as whole set to add 322 int n = reachableLabels.size(); // only look at initial elements 323 // walk the existing list looking for the collision 324 for (int i=0; i<n; i++) { 325 Label rl = reachableLabels.get(i); 326 /* 327 System.out.println("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+ 328 rl.toString(dfa.nfa.grammar)+"="+ 329 intersection.toString(dfa.nfa.grammar)); 330 */ 331 if ( !Label.intersect(label, rl) ) { 332 continue; 333 } 334 //System.out.println(label+" collides with "+rl); 335 336 // For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t) 337 // (ignoring s_i-t if nil; don't put in list) 338 339 // Replace existing s_i with intersection since we 340 // know that will always be a non nil character class 341 IntSet s_i = rl.getSet(); 342 IntSet intersection = s_i.and(t); 343 reachableLabels.set(i, new Label(intersection)); 344 345 // Compute s_i-t to see what is in current set and not in incoming 346 IntSet existingMinusNewElements = s_i.subtract(t); 347 //System.out.println(s_i+"-"+t+"="+existingMinusNewElements); 348 if ( !existingMinusNewElements.isNil() ) { 349 // found a new character class, add to the end (doesn't affect 350 // outer loop duration due to n computation a priori. 351 Label newLabel = new Label(existingMinusNewElements); 352 reachableLabels.add(newLabel); 353 } 354 355 /* 356 System.out.println("after collision, " + 357 "reachableLabels="+reachableLabels.toString()); 358 */ 359 360 // anything left to add to the reachableLabels? 361 remainder = t.subtract(s_i); 362 if ( remainder.isNil() ) { 363 break; // nothing left to add to set. done! 364 } 365 366 t = remainder; 367 } 368 if ( !remainder.isNil() ) { 369 /* 370 System.out.println("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " + 371 "reachableLabels="+reachableLabels.toString()); 372 System.out.println("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar)); 373 */ 374 Label newLabel = new Label(remainder); 375 reachableLabels.add(newLabel); 376 } 377 /* 378 System.out.println("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " + 379 "reachableLabels="+reachableLabels.toString()); 380 */ 381 } 382 getReachableLabels()383 public OrderedHashSet<Label> getReachableLabels() { 384 return reachableLabels; 385 } 386 setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs)387 public void setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs) { 388 this.nfaConfigurations = configs; 389 } 390 391 /** A decent hash for a DFA state is the sum of the NFA state/alt pairs. 392 * This is used when we add DFAState objects to the DFA.states Map and 393 * when we compare DFA states. Computed in addNFAConfiguration() 394 */ 395 @Override hashCode()396 public int hashCode() { 397 if ( cachedHashCode==0 ) { 398 // LL(1) algorithm doesn't use NFA configurations, which 399 // dynamically compute hashcode; must have something; use super 400 return super.hashCode(); 401 } 402 return cachedHashCode; 403 } 404 405 /** Two DFAStates are equal if their NFA configuration sets are the 406 * same. This method is used to see if a DFA state already exists. 407 * 408 * Because the number of alternatives and number of NFA configurations are 409 * finite, there is a finite number of DFA states that can be processed. 410 * This is necessary to show that the algorithm terminates. 411 * 412 * Cannot test the DFA state numbers here because in DFA.addState we need 413 * to know if any other state exists that has this exact set of NFA 414 * configurations. The DFAState state number is irrelevant. 415 */ 416 @Override equals(Object o)417 public boolean equals(Object o) { 418 // compare set of NFA configurations in this set with other 419 DFAState other = (DFAState)o; 420 return this.nfaConfigurations.equals(other.nfaConfigurations); 421 } 422 423 /** Walk each configuration and if they are all the same alt, return 424 * that alt else return NFA.INVALID_ALT_NUMBER. Ignore resolved 425 * configurations, but don't ignore resolveWithPredicate configs 426 * because this state should not be an accept state. We need to add 427 * this to the work list and then have semantic predicate edges 428 * emanating from it. 429 */ getUniquelyPredictedAlt()430 public int getUniquelyPredictedAlt() { 431 if ( cachedUniquelyPredicatedAlt!=PREDICTED_ALT_UNSET ) { 432 return cachedUniquelyPredicatedAlt; 433 } 434 int alt = NFA.INVALID_ALT_NUMBER; 435 int numConfigs = nfaConfigurations.size(); 436 for (int i = 0; i < numConfigs; i++) { 437 NFAConfiguration configuration = nfaConfigurations.get(i); 438 // ignore anything we resolved; predicates will still result 439 // in transitions out of this state, so must count those 440 // configurations; i.e., don't ignore resolveWithPredicate configs 441 if ( configuration.resolved ) { 442 continue; 443 } 444 if ( alt==NFA.INVALID_ALT_NUMBER ) { 445 alt = configuration.alt; // found first nonresolved alt 446 } 447 else if ( configuration.alt!=alt ) { 448 return NFA.INVALID_ALT_NUMBER; 449 } 450 } 451 this.cachedUniquelyPredicatedAlt = alt; 452 return alt; 453 } 454 455 /** Return the uniquely mentioned alt from the NFA configurations; 456 * Ignore the resolved bit etc... Return INVALID_ALT_NUMBER 457 * if there is more than one alt mentioned. 458 */ getUniqueAlt()459 public int getUniqueAlt() { 460 int alt = NFA.INVALID_ALT_NUMBER; 461 int numConfigs = nfaConfigurations.size(); 462 for (int i = 0; i < numConfigs; i++) { 463 NFAConfiguration configuration = nfaConfigurations.get(i); 464 if ( alt==NFA.INVALID_ALT_NUMBER ) { 465 alt = configuration.alt; // found first alt 466 } 467 else if ( configuration.alt!=alt ) { 468 return NFA.INVALID_ALT_NUMBER; 469 } 470 } 471 return alt; 472 } 473 474 /** When more than one alternative can match the same input, the first 475 * alternative is chosen to resolve the conflict. The other alts 476 * are "turned off" by setting the "resolved" flag in the NFA 477 * configurations. Return the set of disabled alternatives. For 478 * 479 * a : A | A | A ; 480 * 481 * this method returns {2,3} as disabled. This does not mean that 482 * the alternative is totally unreachable, it just means that for this 483 * DFA state, that alt is disabled. There may be other accept states 484 * for that alt. 485 */ getDisabledAlternatives()486 public Set<Integer> getDisabledAlternatives() { 487 Set<Integer> disabled = new LinkedHashSet<Integer>(); 488 int numConfigs = nfaConfigurations.size(); 489 for (int i = 0; i < numConfigs; i++) { 490 NFAConfiguration configuration = nfaConfigurations.get(i); 491 if ( configuration.resolved ) { 492 disabled.add(Utils.integer(configuration.alt)); 493 } 494 } 495 return disabled; 496 } 497 getNonDeterministicAlts()498 protected Set<Integer> getNonDeterministicAlts() { 499 int user_k = dfa.getUserMaxLookahead(); 500 if ( user_k>0 && user_k==k ) { 501 // if fixed lookahead, then more than 1 alt is a nondeterminism 502 // if we have hit the max lookahead 503 return getAltSet(); 504 } 505 else if ( abortedDueToMultipleRecursiveAlts || abortedDueToRecursionOverflow ) { 506 // if we had to abort for non-LL(*) state assume all alts are a problem 507 return getAltSet(); 508 } 509 else { 510 return getConflictingAlts(); 511 } 512 } 513 514 /** Walk each NFA configuration in this DFA state looking for a conflict 515 * where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with 516 * context conflicting ctx predicts alts i and j. Return an Integer set 517 * of the alternative numbers that conflict. Two contexts conflict if 518 * they are equal or one is a stack suffix of the other or one is 519 * the empty context. 520 * 521 * Use a hash table to record the lists of configs for each state 522 * as they are encountered. We need only consider states for which 523 * there is more than one configuration. The configurations' predicted 524 * alt must be different or must have different contexts to avoid a 525 * conflict. 526 * 527 * Don't report conflicts for DFA states that have conflicting Tokens 528 * rule NFA states; they will be resolved in favor of the first rule. 529 */ getConflictingAlts()530 protected Set<Integer> getConflictingAlts() { 531 // TODO this is called multiple times: cache result? 532 //System.out.println("getNondetAlts for DFA state "+stateNumber); 533 Set<Integer> nondeterministicAlts = new HashSet<Integer>(); 534 535 // If only 1 NFA conf then no way it can be nondeterministic; 536 // save the overhead. There are many o-a->o NFA transitions 537 // and so we save a hash map and iterator creation for each 538 // state. 539 int numConfigs = nfaConfigurations.size(); 540 if ( numConfigs <=1 ) { 541 return null; 542 } 543 544 // First get a list of configurations for each state. 545 // Most of the time, each state will have one associated configuration. 546 MultiMap<Integer, NFAConfiguration> stateToConfigListMap = 547 new MultiMap<Integer, NFAConfiguration>(); 548 for (int i = 0; i < numConfigs; i++) { 549 NFAConfiguration configuration = nfaConfigurations.get(i); 550 Integer stateI = Utils.integer(configuration.state); 551 stateToConfigListMap.map(stateI, configuration); 552 } 553 // potential conflicts are states with > 1 configuration and diff alts 554 Set<Integer> states = stateToConfigListMap.keySet(); 555 int numPotentialConflicts = 0; 556 for (Integer stateI : states) { 557 boolean thisStateHasPotentialProblem = false; 558 List<NFAConfiguration> configsForState = stateToConfigListMap.get(stateI); 559 int alt=0; 560 int numConfigsForState = configsForState.size(); 561 for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) { 562 NFAConfiguration c = configsForState.get(i); 563 if ( alt==0 ) { 564 alt = c.alt; 565 } 566 else if ( c.alt!=alt ) { 567 /* 568 System.out.println("potential conflict in state "+stateI+ 569 " configs: "+configsForState); 570 */ 571 // 11/28/2005: don't report closures that pinch back 572 // together in Tokens rule. We want to silently resolve 573 // to the first token definition ala lex/flex by ignoring 574 // these conflicts. 575 // Also this ensures that lexers look for more and more 576 // characters (longest match) before resorting to predicates. 577 // TestSemanticPredicates.testLexerMatchesLongestThenTestPred() 578 // for example would terminate at state s1 and test predicate 579 // meaning input "ab" would test preds to decide what to 580 // do but it should match rule C w/o testing preds. 581 if ( dfa.nfa.grammar.type!=Grammar.LEXER || 582 !dfa.decisionNFAStartState.enclosingRule.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) 583 { 584 numPotentialConflicts++; 585 thisStateHasPotentialProblem = true; 586 } 587 } 588 } 589 if ( !thisStateHasPotentialProblem ) { 590 // remove NFA state's configurations from 591 // further checking; no issues with it 592 // (can't remove as it's concurrent modification; set to null) 593 stateToConfigListMap.put(stateI, null); 594 } 595 } 596 597 // a fast check for potential issues; most states have none 598 if ( numPotentialConflicts==0 ) { 599 return null; 600 } 601 602 // we have a potential problem, so now go through config lists again 603 // looking for different alts (only states with potential issues 604 // are left in the states set). Now we will check context. 605 // For example, the list of configs for NFA state 3 in some DFA 606 // state might be: 607 // [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2] 608 // I want to create a map from context to alts looking for overlap: 609 // [28 18 $] -> 2 610 // [28 $] -> 1 611 // [$] -> 1,2 612 // Indeed a conflict exists as same state 3, same context [$], predicts 613 // alts 1 and 2. 614 // walk each state with potential conflicting configurations 615 for (Integer stateI : states) { 616 List<NFAConfiguration> configsForState = stateToConfigListMap.get(stateI); 617 // compare each configuration pair s, t to ensure: 618 // s.ctx different than t.ctx if s.alt != t.alt 619 int numConfigsForState = 0; 620 if ( configsForState!=null ) { 621 numConfigsForState = configsForState.size(); 622 } 623 for (int i = 0; i < numConfigsForState; i++) { 624 NFAConfiguration s = configsForState.get(i); 625 for (int j = i+1; j < numConfigsForState; j++) { 626 NFAConfiguration t = configsForState.get(j); 627 // conflicts means s.ctx==t.ctx or s.ctx is a stack 628 // suffix of t.ctx or vice versa (if alts differ). 629 // Also a conflict if s.ctx or t.ctx is empty 630 if ( s.alt != t.alt && s.context.conflictsWith(t.context) ) { 631 nondeterministicAlts.add(Utils.integer(s.alt)); 632 nondeterministicAlts.add(Utils.integer(t.alt)); 633 } 634 } 635 } 636 } 637 638 if ( nondeterministicAlts.isEmpty() ) { 639 return null; 640 } 641 return nondeterministicAlts; 642 } 643 644 /** Get the set of all alts mentioned by all NFA configurations in this 645 * DFA state. 646 */ getAltSet()647 public Set<Integer> getAltSet() { 648 int numConfigs = nfaConfigurations.size(); 649 Set<Integer> alts = new HashSet<Integer>(); 650 for (int i = 0; i < numConfigs; i++) { 651 NFAConfiguration configuration = nfaConfigurations.get(i); 652 alts.add(Utils.integer(configuration.alt)); 653 } 654 if ( alts.isEmpty() ) { 655 return null; 656 } 657 return alts; 658 } 659 getGatedSyntacticPredicatesInNFAConfigurations()660 public Set<? extends SemanticContext> getGatedSyntacticPredicatesInNFAConfigurations() { 661 int numConfigs = nfaConfigurations.size(); 662 Set<SemanticContext> synpreds = new HashSet<SemanticContext>(); 663 for (int i = 0; i < numConfigs; i++) { 664 NFAConfiguration configuration = nfaConfigurations.get(i); 665 SemanticContext gatedPredExpr = 666 configuration.semanticContext.getGatedPredicateContext(); 667 // if this is a manual syn pred (gated and syn pred), add 668 if ( gatedPredExpr!=null && 669 configuration.semanticContext.isSyntacticPredicate() ) 670 { 671 synpreds.add(configuration.semanticContext); 672 } 673 } 674 if ( synpreds.isEmpty() ) { 675 return null; 676 } 677 return synpreds; 678 } 679 680 /** For gated productions, we need an OR'd list of all predicates for the 681 * target of an edge so we can gate the edge based upon the predicates 682 * associated with taking that path (if any). 683 * 684 * For syntactic predicates, we only want to generate predicate 685 * evaluations as it transitions to an accept state; waste to 686 * do it earlier. So, only add gated preds derived from manually- 687 * specified syntactic predicates if this is an accept state. 688 * 689 * Also, since configurations w/o gated predicates are like true 690 * gated predicates, finding a configuration whose alt has no gated 691 * predicate implies we should evaluate the predicate to true. This 692 * means the whole edge has to be ungated. Consider: 693 * 694 * X : ('a' | {p}?=> 'a') 695 * | 'a' 'b' 696 * ; 697 * 698 * Here, you 'a' gets you from s0 to s1 but you can't test p because 699 * plain 'a' is ok. It's also ok for starting alt 2. Hence, you can't 700 * test p. Even on the edge going to accept state for alt 1 of X, you 701 * can't test p. You can get to the same place with and w/o the context. 702 * Therefore, it is never ok to test p in this situation. 703 * 704 * TODO: cache this as it's called a lot; or at least set bit if >1 present in state 705 */ getGatedPredicatesInNFAConfigurations()706 public SemanticContext getGatedPredicatesInNFAConfigurations() { 707 SemanticContext unionOfPredicatesFromAllAlts = null; 708 int numConfigs = nfaConfigurations.size(); 709 for (int i = 0; i < numConfigs; i++) { 710 NFAConfiguration configuration = nfaConfigurations.get(i); 711 SemanticContext gatedPredExpr = 712 configuration.semanticContext.getGatedPredicateContext(); 713 if ( gatedPredExpr==null ) { 714 // if we ever find a configuration w/o a gated predicate 715 // (even if it's a nongated predicate), we cannot gate 716 // the indident edges. 717 return null; 718 } 719 else if ( acceptState || !configuration.semanticContext.isSyntacticPredicate() ) { 720 // at this point we have a gated predicate and, due to elseif, 721 // we know it's an accept or not a syn pred. In this case, 722 // it's safe to add the gated predicate to the union. We 723 // only want to add syn preds if it's an accept state. Other 724 // gated preds can be used with edges leading to accept states. 725 if ( unionOfPredicatesFromAllAlts==null ) { 726 unionOfPredicatesFromAllAlts = gatedPredExpr; 727 } 728 else { 729 unionOfPredicatesFromAllAlts = 730 SemanticContext.or(unionOfPredicatesFromAllAlts,gatedPredExpr); 731 } 732 } 733 } 734 if ( unionOfPredicatesFromAllAlts instanceof SemanticContext.TruePredicate ) { 735 return null; 736 } 737 return unionOfPredicatesFromAllAlts; 738 } 739 740 /** Is an accept state reachable from this state? */ getAcceptStateReachable()741 public int getAcceptStateReachable() { 742 return acceptStateReachable; 743 } 744 setAcceptStateReachable(int acceptStateReachable)745 public void setAcceptStateReachable(int acceptStateReachable) { 746 this.acceptStateReachable = acceptStateReachable; 747 } 748 isResolvedWithPredicates()749 public boolean isResolvedWithPredicates() { 750 return resolvedWithPredicates; 751 } 752 753 /** Print all NFA states plus what alts they predict */ 754 @Override toString()755 public String toString() { 756 StringBuilder buf = new StringBuilder(); 757 buf.append(stateNumber).append(":{"); 758 for (int i = 0; i < nfaConfigurations.size(); i++) { 759 NFAConfiguration configuration = nfaConfigurations.get(i); 760 if ( i>0 ) { 761 buf.append(", "); 762 } 763 buf.append(configuration); 764 } 765 buf.append("}"); 766 return buf.toString(); 767 } 768 getLookaheadDepth()769 public int getLookaheadDepth() { 770 return k; 771 } 772 setLookaheadDepth(int k)773 public void setLookaheadDepth(int k) { 774 this.k = k; 775 if ( k > dfa.max_k ) { // track max k for entire DFA 776 dfa.max_k = k; 777 } 778 } 779 780 } 781