• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.misc.IntSet;
31 import org.antlr.misc.MultiMap;
32 import org.antlr.misc.OrderedHashSet;
33 import org.antlr.misc.Utils;
34 import org.antlr.tool.Grammar;
35 
36 import java.util.*;
37 
38 /** A DFA state represents a set of possible NFA configurations.
39  *  As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
40  *  to keep track of all possible states the NFA can be in after
41  *  reading each input symbol.  That is to say, after reading
42  *  input a1a2..an, the DFA is in a state that represents the
43  *  subset T of the states of the NFA that are reachable from the
44  *  NFA's start state along some path labeled a1a2..an."
45  *  In conventional NFA→DFA conversion, therefore, the subset T
46  *  would be a bitset representing the set of states the
47  *  NFA could be in.  We need to track the alt predicted by each
48  *  state as well, however.  More importantly, we need to maintain
49  *  a stack of states, tracking the closure operations as they
50  *  jump from rule to rule, emulating rule invocations (method calls).
51  *  Recall that NFAs do not normally have a stack like a pushdown-machine
52  *  so I have to add one to simulate the proper lookahead sequences for
53  *  the underlying LL grammar from which the NFA was derived.
54  *
55  *  I use a list of NFAConfiguration objects.  An NFAConfiguration
56  *  is both a state (ala normal conversion) and an NFAContext describing
57  *  the chain of rules (if any) followed to arrive at that state.  There
58  *  is also the semantic context, which is the "set" of predicates found
59  *  on the path to this configuration.
60  *
61  *  A DFA state may have multiple references to a particular state,
62  *  but with different NFAContexts (with same or different alts)
63  *  meaning that state was reached via a different set of rule invocations.
64  */
65 public class DFAState extends State {
66     public static final int INITIAL_NUM_TRANSITIONS = 4;
67 	public static final int PREDICTED_ALT_UNSET = NFA.INVALID_ALT_NUMBER-1;
68 
69     /** We are part of what DFA?  Use this ref to get access to the
70      *  context trees for an alt.
71      */
72     public DFA dfa;
73 
74     /** Track the transitions emanating from this DFA state.  The List
75      *  elements are Transition objects.
76      */
77     protected List<Transition> transitions =
78 		new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS);
79 
80 	/** When doing an acyclic DFA, this is the number of lookahead symbols
81 	 *  consumed to reach this state.  This value may be nonzero for most
82 	 *  dfa states, but it is only a valid value if the user has specified
83 	 *  a max fixed lookahead.
84 	 */
85     protected int k;
86 
87     /** The NFA&rarr;DFA algorithm may terminate leaving some states
88      *  without a path to an accept state, implying that upon certain
89      *  input, the decision is not deterministic--no decision about
90      *  predicting a unique alternative can be made.  Recall that an
91      *  accept state is one in which a unique alternative is predicted.
92      */
93     protected int acceptStateReachable = DFA.REACHABLE_UNKNOWN;
94 
95     /** Rather than recheck every NFA configuration in a DFA state (after
96      *  resolving) in findNewDFAStatesAndAddDFATransitions just check
97      *  this boolean.  Saves a linear walk perhaps DFA state creation.
98      *  Every little bit helps.
99      */
100     protected boolean resolvedWithPredicates = false;
101 
102 	/** If a closure operation finds that we tried to invoke the same
103 	 *  rule too many times (stack would grow beyond a threshold), it
104 	 *  marks the state has aborted and notifies the DecisionProbe.
105 	 */
106 	public boolean abortedDueToRecursionOverflow = false;
107 
108 	/** If we detect recursion on more than one alt, decision is non-LL(*),
109 	 *  but try to isolate it to only those states whose closure operations
110 	 *  detect recursion.  There may be other alts that are cool:
111 	 *
112 	 *  a : recur '.'
113 	 *    | recur ';'
114 	 *    | X Y  // LL(2) decision; don't abort and use k=1 plus backtracking
115 	 *    | X Z
116 	 *    ;
117 	 *
118 	 *  12/13/2007: Actually this has caused problems.  If k=*, must terminate
119 	 *  and throw out entire DFA; retry with k=1.  Since recursive, do not
120 	 *  attempt more closure ops as it may take forever.  Exception thrown
121 	 *  now and we simply report the problem.  If synpreds exist, I'll retry
122 	 *  with k=1.
123 	 */
124 	protected boolean abortedDueToMultipleRecursiveAlts = false;
125 
126 	/** Build up the hash code for this state as NFA configurations
127      *  are added as it's monotonically increasing list of configurations.
128      */
129     protected int cachedHashCode;
130 
131 	protected int cachedUniquelyPredicatedAlt = PREDICTED_ALT_UNSET;
132 
133 	public int minAltInConfigurations=Integer.MAX_VALUE;
134 
135 	public boolean atLeastOneConfigurationHasAPredicate = false;
136 
137 	/** The set of NFA configurations (state,alt,context) for this DFA state */
138     public OrderedHashSet<NFAConfiguration> nfaConfigurations =
139 		new OrderedHashSet<NFAConfiguration>();
140 
141 	public List<NFAConfiguration> configurationsWithLabeledEdges =
142 		new ArrayList<NFAConfiguration>();
143 
144 	/** Used to prevent the closure operation from looping to itself and
145      *  hence looping forever.  Sensitive to the NFA state, the alt, and
146      *  the stack context.  This just the nfa config set because we want to
147 	 *  prevent closures only on states contributed by closure not reach
148 	 *  operations.
149 	 *
150 	 *  Two configurations identical including semantic context are
151 	 *  considered the same closure computation.  @see NFAToDFAConverter.closureBusy().
152      */
153 	protected Set<NFAConfiguration> closureBusy = new HashSet<NFAConfiguration>();
154 
155 	/** As this state is constructed (i.e., as NFA states are added), we
156      *  can easily check for non-epsilon transitions because the only
157      *  transition that could be a valid label is transition(0).  When we
158      *  process this node eventually, we'll have to walk all states looking
159      *  for all possible transitions.  That is of the order: size(label space)
160      *  times size(nfa states), which can be pretty damn big.  It's better
161      *  to simply track possible labels.
162      */
163     protected OrderedHashSet<Label> reachableLabels;
164 
DFAState(DFA dfa)165     public DFAState(DFA dfa) {
166         this.dfa = dfa;
167     }
168 
reset()169 	public void reset() {
170 		//nfaConfigurations = null; // getGatedPredicatesInNFAConfigurations needs
171 		configurationsWithLabeledEdges = null;
172 		closureBusy = null;
173 		reachableLabels = null;
174 	}
175 
176 	@Override
transition(int i)177 	public Transition transition(int i) {
178         return transitions.get(i);
179     }
180 
181 	@Override
getNumberOfTransitions()182     public int getNumberOfTransitions() {
183         return transitions.size();
184     }
185 
186 	@Override
addTransition(Transition t)187     public void addTransition(Transition t) {
188         transitions.add(t);
189     }
190 
191 	/** Add a transition from this state to target with label.  Return
192 	 *  the transition number from 0..n-1.
193 	 */
addTransition(DFAState target, Label label)194     public int addTransition(DFAState target, Label label) {
195 		transitions.add( new Transition(label, target) );
196 		return transitions.size()-1;
197     }
198 
getTransition(int trans)199     public Transition getTransition(int trans) {
200         return transitions.get(trans);
201     }
202 
removeTransition(int trans)203 	public void removeTransition(int trans) {
204 		transitions.remove(trans);
205 	}
206 
207     /** Add an NFA configuration to this DFA node.  Add uniquely
208      *  an NFA state/alt/syntactic&amp;semantic context (chain of invoking state(s)
209      *  and semantic predicate contexts).
210      *
211      *  I don't see how there could be two configurations with same
212      *  state|alt|synCtx and different semantic contexts because the
213      *  semantic contexts are computed along the path to a particular state
214      *  so those two configurations would have to have the same predicate.
215      *  Nonetheless, the addition of configurations is unique on all
216      *  configuration info.  I guess I'm saying that syntactic context
217      *  implies semantic context as the latter is computed according to the
218      *  former.
219      *
220      *  As we add configurations to this DFA state, track the set of all possible
221      *  transition labels so we can simply walk it later rather than doing a
222      *  loop over all possible labels in the NFA.
223      */
addNFAConfiguration(NFAState state, NFAConfiguration c)224     public void addNFAConfiguration(NFAState state, NFAConfiguration c) {
225 		if ( nfaConfigurations.contains(c) ) {
226             return;
227         }
228 
229         nfaConfigurations.add(c);
230 
231 		// track min alt rather than compute later
232 		if ( c.alt < minAltInConfigurations ) {
233 			minAltInConfigurations = c.alt;
234 		}
235 
236 		if ( c.semanticContext!=SemanticContext.EMPTY_SEMANTIC_CONTEXT ) {
237 			atLeastOneConfigurationHasAPredicate = true;
238 		}
239 
240 		// update hashCode; for some reason using context.hashCode() also
241         // makes the GC take like 70% of the CPU and is slow!
242         cachedHashCode += c.state + c.alt;
243 
244 		// update reachableLabels
245 		// We're adding an NFA state; check to see if it has a non-epsilon edge
246 		if ( state.transition[0] != null ) {
247 			Label label = state.transition[0].label;
248 			if ( !(label.isEpsilon()||label.isSemanticPredicate()) ) {
249 				// this NFA state has a non-epsilon edge, track for fast
250 				// walking later when we do reach on this DFA state we're
251 				// building.
252 				configurationsWithLabeledEdges.add(c);
253 				if ( state.transition[1] ==null ) {
254 					// later we can check this to ignore o-A->o states in closure
255 					c.singleAtomTransitionEmanating = true;
256 				}
257 				addReachableLabel(label);
258 			}
259 		}
260     }
261 
addNFAConfiguration(NFAState state, int alt, NFAContext context, SemanticContext semanticContext)262 	public NFAConfiguration addNFAConfiguration(NFAState state,
263 												int alt,
264 												NFAContext context,
265 												SemanticContext semanticContext)
266 	{
267 		NFAConfiguration c = new NFAConfiguration(state.stateNumber,
268 												  alt,
269 												  context,
270 												  semanticContext);
271 		addNFAConfiguration(state, c);
272 		return c;
273 	}
274 
275 	/** Add label uniquely and disjointly; intersection with
276      *  another set or int/char forces breaking up the set(s).
277      *
278      *  Example, if reachable list of labels is [a..z, {k,9}, 0..9],
279      *  the disjoint list will be [{a..j,l..z}, k, 9, 0..8].
280      *
281      *  As we add NFA configurations to a DFA state, we might as well track
282      *  the set of all possible transition labels to make the DFA conversion
283      *  more efficient.  W/o the reachable labels, we'd need to check the
284      *  whole vocabulary space (could be 0..\uFFFF)!  The problem is that
285      *  labels can be sets, which may overlap with int labels or other sets.
286      *  As we need a deterministic set of transitions from any
287      *  state in the DFA, we must make the reachable labels set disjoint.
288      *  This operation amounts to finding the character classes for this
289      *  DFA state whereas with tools like flex, that need to generate a
290      *  homogeneous DFA, must compute char classes across all states.
291      *  We are going to generate DFAs with heterogeneous states so we
292      *  only care that the set of transitions out of a single state are
293      *  unique. :)
294      *
295      *  The idea for adding a new set, t, is to look for overlap with the
296      *  elements of existing list s.  Upon overlap, replace
297      *  existing set s[i] with two new disjoint sets, s[i]-t and s[i]&amp;t.
298      *  (if s[i]-t is nil, don't add).  The remainder is t-s[i], which is
299      *  what you want to add to the set minus what was already there.  The
300      *  remainder must then be compared against the i+1..n elements in s
301      *  looking for another collision.  Each collision results in a smaller
302      *  and smaller remainder.  Stop when you run out of s elements or
303      *  remainder goes to nil.  If remainder is non nil when you run out of
304      *  s elements, then add remainder to the end.
305      *
306      *  Single element labels are treated as sets to make the code uniform.
307      */
addReachableLabel(Label label)308     protected void addReachableLabel(Label label) {
309 		if ( reachableLabels==null ) {
310 			reachableLabels = new OrderedHashSet<Label>();
311 		}
312 		/*
313 		System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar));
314 		System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
315 				"reachableLabels="+reachableLabels.toString());
316 				*/
317 		if ( reachableLabels.contains(label) ) { // exact label present
318             return;
319         }
320         IntSet t = label.getSet();
321         IntSet remainder = t; // remainder starts out as whole set to add
322         int n = reachableLabels.size(); // only look at initial elements
323         // walk the existing list looking for the collision
324         for (int i=0; i<n; i++) {
325 			Label rl = reachableLabels.get(i);
326             /*
327 			System.out.println("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+
328                     rl.toString(dfa.nfa.grammar)+"="+
329                     intersection.toString(dfa.nfa.grammar));
330             */
331 			if ( !Label.intersect(label, rl) ) {
332                 continue;
333             }
334 			//System.out.println(label+" collides with "+rl);
335 
336 			// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
337             // (ignoring s_i-t if nil; don't put in list)
338 
339             // Replace existing s_i with intersection since we
340             // know that will always be a non nil character class
341 			IntSet s_i = rl.getSet();
342 			IntSet intersection = s_i.and(t);
343             reachableLabels.set(i, new Label(intersection));
344 
345             // Compute s_i-t to see what is in current set and not in incoming
346             IntSet existingMinusNewElements = s_i.subtract(t);
347 			//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
348             if ( !existingMinusNewElements.isNil() ) {
349                 // found a new character class, add to the end (doesn't affect
350                 // outer loop duration due to n computation a priori.
351                 Label newLabel = new Label(existingMinusNewElements);
352                 reachableLabels.add(newLabel);
353             }
354 
355 			/*
356             System.out.println("after collision, " +
357                     "reachableLabels="+reachableLabels.toString());
358 					*/
359 
360             // anything left to add to the reachableLabels?
361             remainder = t.subtract(s_i);
362             if ( remainder.isNil() ) {
363                 break; // nothing left to add to set.  done!
364             }
365 
366             t = remainder;
367         }
368         if ( !remainder.isNil() ) {
369 			/*
370 			System.out.println("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " +
371 					"reachableLabels="+reachableLabels.toString());
372 			System.out.println("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar));
373             */
374 			Label newLabel = new Label(remainder);
375             reachableLabels.add(newLabel);
376         }
377 		/*
378 		System.out.println("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
379 				"reachableLabels="+reachableLabels.toString());
380 				*/
381     }
382 
getReachableLabels()383     public OrderedHashSet<Label> getReachableLabels() {
384         return reachableLabels;
385     }
386 
setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs)387 	public void setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs) {
388 		this.nfaConfigurations = configs;
389 	}
390 
391     /** A decent hash for a DFA state is the sum of the NFA state/alt pairs.
392      *  This is used when we add DFAState objects to the DFA.states Map and
393      *  when we compare DFA states.  Computed in addNFAConfiguration()
394      */
395 	@Override
hashCode()396     public int hashCode() {
397 		if ( cachedHashCode==0 ) {
398 			// LL(1) algorithm doesn't use NFA configurations, which
399 			// dynamically compute hashcode; must have something; use super
400 			return super.hashCode();
401 		}
402 		return cachedHashCode;
403     }
404 
405     /** Two DFAStates are equal if their NFA configuration sets are the
406 	 *  same. This method is used to see if a DFA state already exists.
407 	 *
408      *  Because the number of alternatives and number of NFA configurations are
409      *  finite, there is a finite number of DFA states that can be processed.
410      *  This is necessary to show that the algorithm terminates.
411 	 *
412 	 *  Cannot test the DFA state numbers here because in DFA.addState we need
413 	 *  to know if any other state exists that has this exact set of NFA
414 	 *  configurations.  The DFAState state number is irrelevant.
415      */
416 	@Override
equals(Object o)417     public boolean equals(Object o) {
418 		// compare set of NFA configurations in this set with other
419         DFAState other = (DFAState)o;
420 		return this.nfaConfigurations.equals(other.nfaConfigurations);
421 	}
422 
423     /** Walk each configuration and if they are all the same alt, return
424      *  that alt else return NFA.INVALID_ALT_NUMBER.  Ignore resolved
425      *  configurations, but don't ignore resolveWithPredicate configs
426      *  because this state should not be an accept state.  We need to add
427      *  this to the work list and then have semantic predicate edges
428      *  emanating from it.
429      */
getUniquelyPredictedAlt()430     public int getUniquelyPredictedAlt() {
431 		if ( cachedUniquelyPredicatedAlt!=PREDICTED_ALT_UNSET ) {
432 			return cachedUniquelyPredicatedAlt;
433 		}
434         int alt = NFA.INVALID_ALT_NUMBER;
435 		int numConfigs = nfaConfigurations.size();
436 		for (int i = 0; i < numConfigs; i++) {
437 			NFAConfiguration configuration = nfaConfigurations.get(i);
438 			// ignore anything we resolved; predicates will still result
439 			// in transitions out of this state, so must count those
440 			// configurations; i.e., don't ignore resolveWithPredicate configs
441 			if ( configuration.resolved ) {
442 				continue;
443 			}
444 			if ( alt==NFA.INVALID_ALT_NUMBER ) {
445 				alt = configuration.alt; // found first nonresolved alt
446 			}
447 			else if ( configuration.alt!=alt ) {
448 				return NFA.INVALID_ALT_NUMBER;
449 			}
450 		}
451 		this.cachedUniquelyPredicatedAlt = alt;
452         return alt;
453     }
454 
455 	/** Return the uniquely mentioned alt from the NFA configurations;
456 	 *  Ignore the resolved bit etc...  Return INVALID_ALT_NUMBER
457 	 *  if there is more than one alt mentioned.
458 	 */
getUniqueAlt()459 	public int getUniqueAlt() {
460 		int alt = NFA.INVALID_ALT_NUMBER;
461 		int numConfigs = nfaConfigurations.size();
462 		for (int i = 0; i < numConfigs; i++) {
463 			NFAConfiguration configuration = nfaConfigurations.get(i);
464 			if ( alt==NFA.INVALID_ALT_NUMBER ) {
465 				alt = configuration.alt; // found first alt
466 			}
467 			else if ( configuration.alt!=alt ) {
468 				return NFA.INVALID_ALT_NUMBER;
469 			}
470 		}
471 		return alt;
472 	}
473 
474 	/** When more than one alternative can match the same input, the first
475 	 *  alternative is chosen to resolve the conflict.  The other alts
476 	 *  are "turned off" by setting the "resolved" flag in the NFA
477 	 *  configurations.  Return the set of disabled alternatives.  For
478 	 *
479 	 *  a : A | A | A ;
480 	 *
481 	 *  this method returns {2,3} as disabled.  This does not mean that
482 	 *  the alternative is totally unreachable, it just means that for this
483 	 *  DFA state, that alt is disabled.  There may be other accept states
484 	 *  for that alt.
485 	 */
getDisabledAlternatives()486 	public Set<Integer> getDisabledAlternatives() {
487 		Set<Integer> disabled = new LinkedHashSet<Integer>();
488 		int numConfigs = nfaConfigurations.size();
489 		for (int i = 0; i < numConfigs; i++) {
490 			NFAConfiguration configuration = nfaConfigurations.get(i);
491 			if ( configuration.resolved ) {
492 				disabled.add(Utils.integer(configuration.alt));
493 			}
494 		}
495 		return disabled;
496 	}
497 
getNonDeterministicAlts()498 	protected Set<Integer> getNonDeterministicAlts() {
499 		int user_k = dfa.getUserMaxLookahead();
500 		if ( user_k>0 && user_k==k ) {
501 			// if fixed lookahead, then more than 1 alt is a nondeterminism
502 			// if we have hit the max lookahead
503 			return getAltSet();
504 		}
505 		else if ( abortedDueToMultipleRecursiveAlts || abortedDueToRecursionOverflow ) {
506 			// if we had to abort for non-LL(*) state assume all alts are a problem
507 			return getAltSet();
508 		}
509 		else {
510 			return getConflictingAlts();
511 		}
512 	}
513 
514     /** Walk each NFA configuration in this DFA state looking for a conflict
515      *  where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with
516      *  context conflicting ctx predicts alts i and j.  Return an Integer set
517 	 *  of the alternative numbers that conflict.  Two contexts conflict if
518 	 *  they are equal or one is a stack suffix of the other or one is
519 	 *  the empty context.
520 	 *
521      *  Use a hash table to record the lists of configs for each state
522 	 *  as they are encountered.  We need only consider states for which
523 	 *  there is more than one configuration.  The configurations' predicted
524 	 *  alt must be different or must have different contexts to avoid a
525 	 *  conflict.
526 	 *
527 	 *  Don't report conflicts for DFA states that have conflicting Tokens
528 	 *  rule NFA states; they will be resolved in favor of the first rule.
529      */
getConflictingAlts()530     protected Set<Integer> getConflictingAlts() {
531 		// TODO this is called multiple times: cache result?
532 		//System.out.println("getNondetAlts for DFA state "+stateNumber);
533  		Set<Integer> nondeterministicAlts = new HashSet<Integer>();
534 
535 		// If only 1 NFA conf then no way it can be nondeterministic;
536 		// save the overhead.  There are many o-a->o NFA transitions
537 		// and so we save a hash map and iterator creation for each
538 		// state.
539 		int numConfigs = nfaConfigurations.size();
540 		if ( numConfigs <=1 ) {
541 			return null;
542 		}
543 
544 		// First get a list of configurations for each state.
545 		// Most of the time, each state will have one associated configuration.
546 		MultiMap<Integer, NFAConfiguration> stateToConfigListMap =
547 			new MultiMap<Integer, NFAConfiguration>();
548 		for (int i = 0; i < numConfigs; i++) {
549 			NFAConfiguration configuration = nfaConfigurations.get(i);
550 			Integer stateI = Utils.integer(configuration.state);
551 			stateToConfigListMap.map(stateI, configuration);
552 		}
553 		// potential conflicts are states with > 1 configuration and diff alts
554 		Set<Integer> states = stateToConfigListMap.keySet();
555 		int numPotentialConflicts = 0;
556 		for (Integer stateI : states) {
557 			boolean thisStateHasPotentialProblem = false;
558 			List<NFAConfiguration> configsForState = stateToConfigListMap.get(stateI);
559 			int alt=0;
560 			int numConfigsForState = configsForState.size();
561 			for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) {
562 				NFAConfiguration c = configsForState.get(i);
563 				if ( alt==0 ) {
564 					alt = c.alt;
565 				}
566 				else if ( c.alt!=alt ) {
567 					/*
568 					System.out.println("potential conflict in state "+stateI+
569 									   " configs: "+configsForState);
570 					*/
571 					// 11/28/2005: don't report closures that pinch back
572 					// together in Tokens rule.  We want to silently resolve
573 					// to the first token definition ala lex/flex by ignoring
574 					// these conflicts.
575 					// Also this ensures that lexers look for more and more
576 					// characters (longest match) before resorting to predicates.
577 					// TestSemanticPredicates.testLexerMatchesLongestThenTestPred()
578 					// for example would terminate at state s1 and test predicate
579 					// meaning input "ab" would test preds to decide what to
580 					// do but it should match rule C w/o testing preds.
581 					if ( dfa.nfa.grammar.type!=Grammar.LEXER ||
582 						 !dfa.decisionNFAStartState.enclosingRule.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) )
583 					{
584 						numPotentialConflicts++;
585 						thisStateHasPotentialProblem = true;
586 					}
587 				}
588 			}
589 			if ( !thisStateHasPotentialProblem ) {
590 				// remove NFA state's configurations from
591 				// further checking; no issues with it
592 				// (can't remove as it's concurrent modification; set to null)
593 				stateToConfigListMap.put(stateI, null);
594 			}
595 		}
596 
597 		// a fast check for potential issues; most states have none
598 		if ( numPotentialConflicts==0 ) {
599 			return null;
600 		}
601 
602 		// we have a potential problem, so now go through config lists again
603 		// looking for different alts (only states with potential issues
604 		// are left in the states set).  Now we will check context.
605 		// For example, the list of configs for NFA state 3 in some DFA
606 		// state might be:
607 		//   [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2]
608 		// I want to create a map from context to alts looking for overlap:
609 		//   [28 18 $] -> 2
610 		//   [28 $] -> 1
611 		//   [$] -> 1,2
612 		// Indeed a conflict exists as same state 3, same context [$], predicts
613 		// alts 1 and 2.
614 		// walk each state with potential conflicting configurations
615 		for (Integer stateI : states) {
616 			List<NFAConfiguration> configsForState = stateToConfigListMap.get(stateI);
617 			// compare each configuration pair s, t to ensure:
618 			// s.ctx different than t.ctx if s.alt != t.alt
619 			int numConfigsForState = 0;
620 			if ( configsForState!=null ) {
621 				numConfigsForState = configsForState.size();
622 			}
623 			for (int i = 0; i < numConfigsForState; i++) {
624 				NFAConfiguration s = configsForState.get(i);
625 				for (int j = i+1; j < numConfigsForState; j++) {
626 					NFAConfiguration t = configsForState.get(j);
627 					// conflicts means s.ctx==t.ctx or s.ctx is a stack
628 					// suffix of t.ctx or vice versa (if alts differ).
629 					// Also a conflict if s.ctx or t.ctx is empty
630 					if ( s.alt != t.alt && s.context.conflictsWith(t.context) ) {
631 						nondeterministicAlts.add(Utils.integer(s.alt));
632 						nondeterministicAlts.add(Utils.integer(t.alt));
633 					}
634 				}
635 			}
636 		}
637 
638 		if ( nondeterministicAlts.isEmpty() ) {
639 			return null;
640 		}
641         return nondeterministicAlts;
642     }
643 
644 	/** Get the set of all alts mentioned by all NFA configurations in this
645 	 *  DFA state.
646 	 */
getAltSet()647 	public Set<Integer> getAltSet() {
648 		int numConfigs = nfaConfigurations.size();
649 		Set<Integer> alts = new HashSet<Integer>();
650 		for (int i = 0; i < numConfigs; i++) {
651 			NFAConfiguration configuration = nfaConfigurations.get(i);
652 			alts.add(Utils.integer(configuration.alt));
653 		}
654 		if ( alts.isEmpty() ) {
655 			return null;
656 		}
657 		return alts;
658 	}
659 
getGatedSyntacticPredicatesInNFAConfigurations()660 	public Set<? extends SemanticContext> getGatedSyntacticPredicatesInNFAConfigurations() {
661 		int numConfigs = nfaConfigurations.size();
662 		Set<SemanticContext> synpreds = new HashSet<SemanticContext>();
663 		for (int i = 0; i < numConfigs; i++) {
664 			NFAConfiguration configuration = nfaConfigurations.get(i);
665 			SemanticContext gatedPredExpr =
666 				configuration.semanticContext.getGatedPredicateContext();
667 			// if this is a manual syn pred (gated and syn pred), add
668 			if ( gatedPredExpr!=null &&
669 				 configuration.semanticContext.isSyntacticPredicate() )
670 			{
671 				synpreds.add(configuration.semanticContext);
672 			}
673 		}
674 		if ( synpreds.isEmpty() ) {
675 			return null;
676 		}
677 		return synpreds;
678 	}
679 
680 	/** For gated productions, we need an OR'd list of all predicates for the
681 	 *  target of an edge so we can gate the edge based upon the predicates
682 	 *  associated with taking that path (if any).
683 	 *
684 	 *  For syntactic predicates, we only want to generate predicate
685 	 *  evaluations as it transitions to an accept state; waste to
686 	 *  do it earlier.  So, only add gated preds derived from manually-
687 	 *  specified syntactic predicates if this is an accept state.
688 	 *
689 	 *  Also, since configurations w/o gated predicates are like true
690 	 *  gated predicates, finding a configuration whose alt has no gated
691 	 *  predicate implies we should evaluate the predicate to true. This
692 	 *  means the whole edge has to be ungated. Consider:
693 	 *
694 	 *	 X : ('a' | {p}?=&gt; 'a')
695 	 *	   | 'a' 'b'
696 	 *	   ;
697 	 *
698 	 *  Here, you 'a' gets you from s0 to s1 but you can't test p because
699 	 *  plain 'a' is ok.  It's also ok for starting alt 2.  Hence, you can't
700 	 *  test p.  Even on the edge going to accept state for alt 1 of X, you
701 	 *  can't test p.  You can get to the same place with and w/o the context.
702 	 *  Therefore, it is never ok to test p in this situation.
703 	 *
704 	 *  TODO: cache this as it's called a lot; or at least set bit if &gt;1 present in state
705 	 */
getGatedPredicatesInNFAConfigurations()706 	public SemanticContext getGatedPredicatesInNFAConfigurations() {
707 		SemanticContext unionOfPredicatesFromAllAlts = null;
708 		int numConfigs = nfaConfigurations.size();
709 		for (int i = 0; i < numConfigs; i++) {
710 			NFAConfiguration configuration = nfaConfigurations.get(i);
711 			SemanticContext gatedPredExpr =
712 				configuration.semanticContext.getGatedPredicateContext();
713 			if ( gatedPredExpr==null ) {
714 				// if we ever find a configuration w/o a gated predicate
715 				// (even if it's a nongated predicate), we cannot gate
716 				// the indident edges.
717 				return null;
718 			}
719 			else if ( acceptState || !configuration.semanticContext.isSyntacticPredicate() ) {
720 				// at this point we have a gated predicate and, due to elseif,
721 				// we know it's an accept or not a syn pred.  In this case,
722 				// it's safe to add the gated predicate to the union.  We
723 				// only want to add syn preds if it's an accept state.  Other
724 				// gated preds can be used with edges leading to accept states.
725 				if ( unionOfPredicatesFromAllAlts==null ) {
726 					unionOfPredicatesFromAllAlts = gatedPredExpr;
727 				}
728 				else {
729 					unionOfPredicatesFromAllAlts =
730 						SemanticContext.or(unionOfPredicatesFromAllAlts,gatedPredExpr);
731 				}
732 			}
733 		}
734 		if ( unionOfPredicatesFromAllAlts instanceof SemanticContext.TruePredicate ) {
735 			return null;
736 		}
737 		return unionOfPredicatesFromAllAlts;
738 	}
739 
740     /** Is an accept state reachable from this state? */
getAcceptStateReachable()741     public int getAcceptStateReachable() {
742         return acceptStateReachable;
743     }
744 
setAcceptStateReachable(int acceptStateReachable)745     public void setAcceptStateReachable(int acceptStateReachable) {
746         this.acceptStateReachable = acceptStateReachable;
747     }
748 
isResolvedWithPredicates()749     public boolean isResolvedWithPredicates() {
750         return resolvedWithPredicates;
751     }
752 
753     /** Print all NFA states plus what alts they predict */
754 	@Override
toString()755     public String toString() {
756         StringBuilder buf = new StringBuilder();
757         buf.append(stateNumber).append(":{");
758 		for (int i = 0; i < nfaConfigurations.size(); i++) {
759 			NFAConfiguration configuration = nfaConfigurations.get(i);
760 			if ( i>0 ) {
761 				buf.append(", ");
762 			}
763 			buf.append(configuration);
764 		}
765         buf.append("}");
766         return buf.toString();
767     }
768 
getLookaheadDepth()769 	public int getLookaheadDepth() {
770 		return k;
771 	}
772 
setLookaheadDepth(int k)773 	public void setLookaheadDepth(int k) {
774 		this.k = k;
775 		if ( k > dfa.max_k ) { // track max k for entire DFA
776 			dfa.max_k = k;
777 		}
778 	}
779 
780 }
781