• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.misc.IntSet;
31 import org.antlr.misc.MultiMap;
32 import org.antlr.misc.OrderedHashSet;
33 import org.antlr.misc.Utils;
34 import org.antlr.tool.Grammar;
35 
36 import java.util.*;
37 
38 /** A DFA state represents a set of possible NFA configurations.
39  *  As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
40  *  to keep track of all possible states the NFA can be in after
41  *  reading each input symbol.  That is to say, after reading
42  *  input a1a2..an, the DFA is in a state that represents the
43  *  subset T of the states of the NFA that are reachable from the
44  *  NFA's start state along some path labeled a1a2..an."
45  *  In conventional NFA->DFA conversion, therefore, the subset T
46  *  would be a bitset representing the set of states the
47  *  NFA could be in.  We need to track the alt predicted by each
48  *  state as well, however.  More importantly, we need to maintain
49  *  a stack of states, tracking the closure operations as they
50  *  jump from rule to rule, emulating rule invocations (method calls).
51  *  Recall that NFAs do not normally have a stack like a pushdown-machine
52  *  so I have to add one to simulate the proper lookahead sequences for
53  *  the underlying LL grammar from which the NFA was derived.
54  *
55  *  I use a list of NFAConfiguration objects.  An NFAConfiguration
56  *  is both a state (ala normal conversion) and an NFAContext describing
57  *  the chain of rules (if any) followed to arrive at that state.  There
58  *  is also the semantic context, which is the "set" of predicates found
59  *  on the path to this configuration.
60  *
61  *  A DFA state may have multiple references to a particular state,
62  *  but with different NFAContexts (with same or different alts)
63  *  meaning that state was reached via a different set of rule invocations.
64  */
65 public class DFAState extends State {
66     public static final int INITIAL_NUM_TRANSITIONS = 4;
67 	public static final int PREDICTED_ALT_UNSET = NFA.INVALID_ALT_NUMBER-1;
68 
69     /** We are part of what DFA?  Use this ref to get access to the
70      *  context trees for an alt.
71      */
72     public DFA dfa;
73 
74     /** Track the transitions emanating from this DFA state.  The List
75      *  elements are Transition objects.
76      */
77     protected List<Transition> transitions =
78 		new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS);
79 
80 	/** When doing an acyclic DFA, this is the number of lookahead symbols
81 	 *  consumed to reach this state.  This value may be nonzero for most
82 	 *  dfa states, but it is only a valid value if the user has specified
83 	 *  a max fixed lookahead.
84 	 */
85     protected int k;
86 
87     /** The NFA->DFA algorithm may terminate leaving some states
88      *  without a path to an accept state, implying that upon certain
89      *  input, the decision is not deterministic--no decision about
90      *  predicting a unique alternative can be made.  Recall that an
91      *  accept state is one in which a unique alternative is predicted.
92      */
93     protected int acceptStateReachable = DFA.REACHABLE_UNKNOWN;
94 
95     /** Rather than recheck every NFA configuration in a DFA state (after
96      *  resolving) in findNewDFAStatesAndAddDFATransitions just check
97      *  this boolean.  Saves a linear walk perhaps DFA state creation.
98      *  Every little bit helps.
99      */
100     protected boolean resolvedWithPredicates = false;
101 
102 	/** If a closure operation finds that we tried to invoke the same
103 	 *  rule too many times (stack would grow beyond a threshold), it
104 	 *  marks the state has aborted and notifies the DecisionProbe.
105 	 */
106 	public boolean abortedDueToRecursionOverflow = false;
107 
108 	/** If we detect recursion on more than one alt, decision is non-LL(*),
109 	 *  but try to isolate it to only those states whose closure operations
110 	 *  detect recursion.  There may be other alts that are cool:
111 	 *
112 	 *  a : recur '.'
113 	 *    | recur ';'
114 	 *    | X Y  // LL(2) decision; don't abort and use k=1 plus backtracking
115 	 *    | X Z
116 	 *    ;
117 	 *
118 	 *  12/13/2007: Actually this has caused problems.  If k=*, must terminate
119 	 *  and throw out entire DFA; retry with k=1.  Since recursive, do not
120 	 *  attempt more closure ops as it may take forever.  Exception thrown
121 	 *  now and we simply report the problem.  If synpreds exist, I'll retry
122 	 *  with k=1.
123 	 */
124 	protected boolean abortedDueToMultipleRecursiveAlts = false;
125 
126 	/** Build up the hash code for this state as NFA configurations
127      *  are added as it's monotonically increasing list of configurations.
128      */
129     protected int cachedHashCode;
130 
131 	protected int cachedUniquelyPredicatedAlt = PREDICTED_ALT_UNSET;
132 
133 	public int minAltInConfigurations=Integer.MAX_VALUE;
134 
135 	public boolean atLeastOneConfigurationHasAPredicate = false;
136 
137 	/** The set of NFA configurations (state,alt,context) for this DFA state */
138     public OrderedHashSet<NFAConfiguration> nfaConfigurations =
139 		new OrderedHashSet<NFAConfiguration>();
140 
141 	public List<NFAConfiguration> configurationsWithLabeledEdges =
142 		new ArrayList<NFAConfiguration>();
143 
144 	/** Used to prevent the closure operation from looping to itself and
145      *  hence looping forever.  Sensitive to the NFA state, the alt, and
146      *  the stack context.  This just the nfa config set because we want to
147 	 *  prevent closures only on states contributed by closure not reach
148 	 *  operations.
149 	 *
150 	 *  Two configurations identical including semantic context are
151 	 *  considered the same closure computation.  @see NFAToDFAConverter.closureBusy().
152      */
153 	protected Set<NFAConfiguration> closureBusy = new HashSet<NFAConfiguration>();
154 
155 	/** As this state is constructed (i.e., as NFA states are added), we
156      *  can easily check for non-epsilon transitions because the only
157      *  transition that could be a valid label is transition(0).  When we
158      *  process this node eventually, we'll have to walk all states looking
159      *  for all possible transitions.  That is of the order: size(label space)
160      *  times size(nfa states), which can be pretty damn big.  It's better
161      *  to simply track possible labels.
162      */
163     protected OrderedHashSet<Label> reachableLabels;
164 
DFAState(DFA dfa)165     public DFAState(DFA dfa) {
166         this.dfa = dfa;
167     }
168 
reset()169 	public void reset() {
170 		//nfaConfigurations = null; // getGatedPredicatesInNFAConfigurations needs
171 		configurationsWithLabeledEdges = null;
172 		closureBusy = null;
173 		reachableLabels = null;
174 	}
175 
transition(int i)176 	public Transition transition(int i) {
177         return (Transition)transitions.get(i);
178     }
179 
getNumberOfTransitions()180     public int getNumberOfTransitions() {
181         return transitions.size();
182     }
183 
addTransition(Transition t)184     public void addTransition(Transition t) {
185         transitions.add(t);
186     }
187 
188 	/** Add a transition from this state to target with label.  Return
189 	 *  the transition number from 0..n-1.
190 	 */
addTransition(DFAState target, Label label)191     public int addTransition(DFAState target, Label label) {
192 		transitions.add( new Transition(label, target) );
193 		return transitions.size()-1;
194     }
195 
getTransition(int trans)196     public Transition getTransition(int trans) {
197         return transitions.get(trans);
198     }
199 
removeTransition(int trans)200 	public void removeTransition(int trans) {
201 		transitions.remove(trans);
202 	}
203 
204     /** Add an NFA configuration to this DFA node.  Add uniquely
205      *  an NFA state/alt/syntactic&semantic context (chain of invoking state(s)
206      *  and semantic predicate contexts).
207      *
208      *  I don't see how there could be two configurations with same
209      *  state|alt|synCtx and different semantic contexts because the
210      *  semantic contexts are computed along the path to a particular state
211      *  so those two configurations would have to have the same predicate.
212      *  Nonetheless, the addition of configurations is unique on all
213      *  configuration info.  I guess I'm saying that syntactic context
214      *  implies semantic context as the latter is computed according to the
215      *  former.
216      *
217      *  As we add configurations to this DFA state, track the set of all possible
218      *  transition labels so we can simply walk it later rather than doing a
219      *  loop over all possible labels in the NFA.
220      */
addNFAConfiguration(NFAState state, NFAConfiguration c)221     public void addNFAConfiguration(NFAState state, NFAConfiguration c) {
222 		if ( nfaConfigurations.contains(c) ) {
223             return;
224         }
225 
226         nfaConfigurations.add(c);
227 
228 		// track min alt rather than compute later
229 		if ( c.alt < minAltInConfigurations ) {
230 			minAltInConfigurations = c.alt;
231 		}
232 
233 		if ( c.semanticContext!=SemanticContext.EMPTY_SEMANTIC_CONTEXT ) {
234 			atLeastOneConfigurationHasAPredicate = true;
235 		}
236 
237 		// update hashCode; for some reason using context.hashCode() also
238         // makes the GC take like 70% of the CPU and is slow!
239         cachedHashCode += c.state + c.alt;
240 
241 		// update reachableLabels
242 		// We're adding an NFA state; check to see if it has a non-epsilon edge
243 		if ( state.transition[0] != null ) {
244 			Label label = state.transition[0].label;
245 			if ( !(label.isEpsilon()||label.isSemanticPredicate()) ) {
246 				// this NFA state has a non-epsilon edge, track for fast
247 				// walking later when we do reach on this DFA state we're
248 				// building.
249 				configurationsWithLabeledEdges.add(c);
250 				if ( state.transition[1] ==null ) {
251 					// later we can check this to ignore o-A->o states in closure
252 					c.singleAtomTransitionEmanating = true;
253 				}
254 				addReachableLabel(label);
255 			}
256 		}
257     }
258 
addNFAConfiguration(NFAState state, int alt, NFAContext context, SemanticContext semanticContext)259 	public NFAConfiguration addNFAConfiguration(NFAState state,
260 												int alt,
261 												NFAContext context,
262 												SemanticContext semanticContext)
263 	{
264 		NFAConfiguration c = new NFAConfiguration(state.stateNumber,
265 												  alt,
266 												  context,
267 												  semanticContext);
268 		addNFAConfiguration(state, c);
269 		return c;
270 	}
271 
272 	/** Add label uniquely and disjointly; intersection with
273      *  another set or int/char forces breaking up the set(s).
274      *
275      *  Example, if reachable list of labels is [a..z, {k,9}, 0..9],
276      *  the disjoint list will be [{a..j,l..z}, k, 9, 0..8].
277      *
278      *  As we add NFA configurations to a DFA state, we might as well track
279      *  the set of all possible transition labels to make the DFA conversion
280      *  more efficient.  W/o the reachable labels, we'd need to check the
281      *  whole vocabulary space (could be 0..\uFFFF)!  The problem is that
282      *  labels can be sets, which may overlap with int labels or other sets.
283      *  As we need a deterministic set of transitions from any
284      *  state in the DFA, we must make the reachable labels set disjoint.
285      *  This operation amounts to finding the character classes for this
286      *  DFA state whereas with tools like flex, that need to generate a
287      *  homogeneous DFA, must compute char classes across all states.
288      *  We are going to generate DFAs with heterogeneous states so we
289      *  only care that the set of transitions out of a single state are
290      *  unique. :)
291      *
292      *  The idea for adding a new set, t, is to look for overlap with the
293      *  elements of existing list s.  Upon overlap, replace
294      *  existing set s[i] with two new disjoint sets, s[i]-t and s[i]&t.
295      *  (if s[i]-t is nil, don't add).  The remainder is t-s[i], which is
296      *  what you want to add to the set minus what was already there.  The
297      *  remainder must then be compared against the i+1..n elements in s
298      *  looking for another collision.  Each collision results in a smaller
299      *  and smaller remainder.  Stop when you run out of s elements or
300      *  remainder goes to nil.  If remainder is non nil when you run out of
301      *  s elements, then add remainder to the end.
302      *
303      *  Single element labels are treated as sets to make the code uniform.
304      */
addReachableLabel(Label label)305     protected void addReachableLabel(Label label) {
306 		if ( reachableLabels==null ) {
307 			reachableLabels = new OrderedHashSet<Label>();
308 		}
309 		/*
310 		System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar));
311 		System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
312 				"reachableLabels="+reachableLabels.toString());
313 				*/
314 		if ( reachableLabels.contains(label) ) { // exact label present
315             return;
316         }
317         IntSet t = label.getSet();
318         IntSet remainder = t; // remainder starts out as whole set to add
319         int n = reachableLabels.size(); // only look at initial elements
320         // walk the existing list looking for the collision
321         for (int i=0; i<n; i++) {
322 			Label rl = reachableLabels.get(i);
323             /*
324 			System.out.println("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+
325                     rl.toString(dfa.nfa.grammar)+"="+
326                     intersection.toString(dfa.nfa.grammar));
327             */
328 			if ( !Label.intersect(label, rl) ) {
329                 continue;
330             }
331 			//System.out.println(label+" collides with "+rl);
332 
333 			// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
334             // (ignoring s_i-t if nil; don't put in list)
335 
336             // Replace existing s_i with intersection since we
337             // know that will always be a non nil character class
338 			IntSet s_i = rl.getSet();
339 			IntSet intersection = s_i.and(t);
340             reachableLabels.set(i, new Label(intersection));
341 
342             // Compute s_i-t to see what is in current set and not in incoming
343             IntSet existingMinusNewElements = s_i.subtract(t);
344 			//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
345             if ( !existingMinusNewElements.isNil() ) {
346                 // found a new character class, add to the end (doesn't affect
347                 // outer loop duration due to n computation a priori.
348                 Label newLabel = new Label(existingMinusNewElements);
349                 reachableLabels.add(newLabel);
350             }
351 
352 			/*
353             System.out.println("after collision, " +
354                     "reachableLabels="+reachableLabels.toString());
355 					*/
356 
357             // anything left to add to the reachableLabels?
358             remainder = t.subtract(s_i);
359             if ( remainder.isNil() ) {
360                 break; // nothing left to add to set.  done!
361             }
362 
363             t = remainder;
364         }
365         if ( !remainder.isNil() ) {
366 			/*
367 			System.out.println("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " +
368 					"reachableLabels="+reachableLabels.toString());
369 			System.out.println("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar));
370             */
371 			Label newLabel = new Label(remainder);
372             reachableLabels.add(newLabel);
373         }
374 		/*
375 		System.out.println("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
376 				"reachableLabels="+reachableLabels.toString());
377 				*/
378     }
379 
getReachableLabels()380     public OrderedHashSet getReachableLabels() {
381         return reachableLabels;
382     }
383 
setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs)384 	public void setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs) {
385 		this.nfaConfigurations = configs;
386 	}
387 
388     /** A decent hash for a DFA state is the sum of the NFA state/alt pairs.
389      *  This is used when we add DFAState objects to the DFA.states Map and
390      *  when we compare DFA states.  Computed in addNFAConfiguration()
391      */
hashCode()392     public int hashCode() {
393 		if ( cachedHashCode==0 ) {
394 			// LL(1) algorithm doesn't use NFA configurations, which
395 			// dynamically compute hashcode; must have something; use super
396 			return super.hashCode();
397 		}
398 		return cachedHashCode;
399     }
400 
401     /** Two DFAStates are equal if their NFA configuration sets are the
402 	 *  same. This method is used to see if a DFA state already exists.
403 	 *
404      *  Because the number of alternatives and number of NFA configurations are
405      *  finite, there is a finite number of DFA states that can be processed.
406      *  This is necessary to show that the algorithm terminates.
407 	 *
408 	 *  Cannot test the DFA state numbers here because in DFA.addState we need
409 	 *  to know if any other state exists that has this exact set of NFA
410 	 *  configurations.  The DFAState state number is irrelevant.
411      */
equals(Object o)412     public boolean equals(Object o) {
413 		// compare set of NFA configurations in this set with other
414         DFAState other = (DFAState)o;
415 		return this.nfaConfigurations.equals(other.nfaConfigurations);
416 	}
417 
418     /** Walk each configuration and if they are all the same alt, return
419      *  that alt else return NFA.INVALID_ALT_NUMBER.  Ignore resolved
420      *  configurations, but don't ignore resolveWithPredicate configs
421      *  because this state should not be an accept state.  We need to add
422      *  this to the work list and then have semantic predicate edges
423      *  emanating from it.
424      */
getUniquelyPredictedAlt()425     public int getUniquelyPredictedAlt() {
426 		if ( cachedUniquelyPredicatedAlt!=PREDICTED_ALT_UNSET ) {
427 			return cachedUniquelyPredicatedAlt;
428 		}
429         int alt = NFA.INVALID_ALT_NUMBER;
430 		int numConfigs = nfaConfigurations.size();
431 		for (int i = 0; i < numConfigs; i++) {
432 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
433 			// ignore anything we resolved; predicates will still result
434 			// in transitions out of this state, so must count those
435 			// configurations; i.e., don't ignore resolveWithPredicate configs
436 			if ( configuration.resolved ) {
437 				continue;
438 			}
439 			if ( alt==NFA.INVALID_ALT_NUMBER ) {
440 				alt = configuration.alt; // found first nonresolved alt
441 			}
442 			else if ( configuration.alt!=alt ) {
443 				return NFA.INVALID_ALT_NUMBER;
444 			}
445 		}
446 		this.cachedUniquelyPredicatedAlt = alt;
447         return alt;
448     }
449 
450 	/** Return the uniquely mentioned alt from the NFA configurations;
451 	 *  Ignore the resolved bit etc...  Return INVALID_ALT_NUMBER
452 	 *  if there is more than one alt mentioned.
453 	 */
getUniqueAlt()454 	public int getUniqueAlt() {
455 		int alt = NFA.INVALID_ALT_NUMBER;
456 		int numConfigs = nfaConfigurations.size();
457 		for (int i = 0; i < numConfigs; i++) {
458 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
459 			if ( alt==NFA.INVALID_ALT_NUMBER ) {
460 				alt = configuration.alt; // found first alt
461 			}
462 			else if ( configuration.alt!=alt ) {
463 				return NFA.INVALID_ALT_NUMBER;
464 			}
465 		}
466 		return alt;
467 	}
468 
469 	/** When more than one alternative can match the same input, the first
470 	 *  alternative is chosen to resolve the conflict.  The other alts
471 	 *  are "turned off" by setting the "resolved" flag in the NFA
472 	 *  configurations.  Return the set of disabled alternatives.  For
473 	 *
474 	 *  a : A | A | A ;
475 	 *
476 	 *  this method returns {2,3} as disabled.  This does not mean that
477 	 *  the alternative is totally unreachable, it just means that for this
478 	 *  DFA state, that alt is disabled.  There may be other accept states
479 	 *  for that alt.
480 	 */
getDisabledAlternatives()481 	public Set getDisabledAlternatives() {
482 		Set disabled = new LinkedHashSet();
483 		int numConfigs = nfaConfigurations.size();
484 		for (int i = 0; i < numConfigs; i++) {
485 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
486 			if ( configuration.resolved ) {
487 				disabled.add(Utils.integer(configuration.alt));
488 			}
489 		}
490 		return disabled;
491 	}
492 
getNonDeterministicAlts()493 	protected Set getNonDeterministicAlts() {
494 		int user_k = dfa.getUserMaxLookahead();
495 		if ( user_k>0 && user_k==k ) {
496 			// if fixed lookahead, then more than 1 alt is a nondeterminism
497 			// if we have hit the max lookahead
498 			return getAltSet();
499 		}
500 		else if ( abortedDueToMultipleRecursiveAlts || abortedDueToRecursionOverflow ) {
501 			// if we had to abort for non-LL(*) state assume all alts are a problem
502 			return getAltSet();
503 		}
504 		else {
505 			return getConflictingAlts();
506 		}
507 	}
508 
509     /** Walk each NFA configuration in this DFA state looking for a conflict
510      *  where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with
511      *  context conflicting ctx predicts alts i and j.  Return an Integer set
512 	 *  of the alternative numbers that conflict.  Two contexts conflict if
513 	 *  they are equal or one is a stack suffix of the other or one is
514 	 *  the empty context.
515 	 *
516      *  Use a hash table to record the lists of configs for each state
517 	 *  as they are encountered.  We need only consider states for which
518 	 *  there is more than one configuration.  The configurations' predicted
519 	 *  alt must be different or must have different contexts to avoid a
520 	 *  conflict.
521 	 *
522 	 *  Don't report conflicts for DFA states that have conflicting Tokens
523 	 *  rule NFA states; they will be resolved in favor of the first rule.
524      */
getConflictingAlts()525     protected Set<Integer> getConflictingAlts() {
526 		// TODO this is called multiple times: cache result?
527 		//System.out.println("getNondetAlts for DFA state "+stateNumber);
528  		Set<Integer> nondeterministicAlts = new HashSet<Integer>();
529 
530 		// If only 1 NFA conf then no way it can be nondeterministic;
531 		// save the overhead.  There are many o-a->o NFA transitions
532 		// and so we save a hash map and iterator creation for each
533 		// state.
534 		int numConfigs = nfaConfigurations.size();
535 		if ( numConfigs <=1 ) {
536 			return null;
537 		}
538 
539 		// First get a list of configurations for each state.
540 		// Most of the time, each state will have one associated configuration.
541 		MultiMap<Integer, NFAConfiguration> stateToConfigListMap =
542 			new MultiMap<Integer, NFAConfiguration>();
543 		for (int i = 0; i < numConfigs; i++) {
544 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
545 			Integer stateI = Utils.integer(configuration.state);
546 			stateToConfigListMap.map(stateI, configuration);
547 		}
548 		// potential conflicts are states with > 1 configuration and diff alts
549 		Set states = stateToConfigListMap.keySet();
550 		int numPotentialConflicts = 0;
551 		for (Iterator it = states.iterator(); it.hasNext();) {
552 			Integer stateI = (Integer) it.next();
553 			boolean thisStateHasPotentialProblem = false;
554 			List configsForState = (List)stateToConfigListMap.get(stateI);
555 			int alt=0;
556 			int numConfigsForState = configsForState.size();
557 			for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) {
558 				NFAConfiguration c = (NFAConfiguration) configsForState.get(i);
559 				if ( alt==0 ) {
560 					alt = c.alt;
561 				}
562 				else if ( c.alt!=alt ) {
563 					/*
564 					System.out.println("potential conflict in state "+stateI+
565 									   " configs: "+configsForState);
566 					*/
567 					// 11/28/2005: don't report closures that pinch back
568 					// together in Tokens rule.  We want to silently resolve
569 					// to the first token definition ala lex/flex by ignoring
570 					// these conflicts.
571 					// Also this ensures that lexers look for more and more
572 					// characters (longest match) before resorting to predicates.
573 					// TestSemanticPredicates.testLexerMatchesLongestThenTestPred()
574 					// for example would terminate at state s1 and test predicate
575 					// meaning input "ab" would test preds to decide what to
576 					// do but it should match rule C w/o testing preds.
577 					if ( dfa.nfa.grammar.type!=Grammar.LEXER ||
578 						 !dfa.decisionNFAStartState.enclosingRule.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) )
579 					{
580 						numPotentialConflicts++;
581 						thisStateHasPotentialProblem = true;
582 					}
583 				}
584 			}
585 			if ( !thisStateHasPotentialProblem ) {
586 				// remove NFA state's configurations from
587 				// further checking; no issues with it
588 				// (can't remove as it's concurrent modification; set to null)
589 				stateToConfigListMap.put(stateI, null);
590 			}
591 		}
592 
593 		// a fast check for potential issues; most states have none
594 		if ( numPotentialConflicts==0 ) {
595 			return null;
596 		}
597 
598 		// we have a potential problem, so now go through config lists again
599 		// looking for different alts (only states with potential issues
600 		// are left in the states set).  Now we will check context.
601 		// For example, the list of configs for NFA state 3 in some DFA
602 		// state might be:
603 		//   [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2]
604 		// I want to create a map from context to alts looking for overlap:
605 		//   [28 18 $] -> 2
606 		//   [28 $] -> 1
607 		//   [$] -> 1,2
608 		// Indeed a conflict exists as same state 3, same context [$], predicts
609 		// alts 1 and 2.
610 		// walk each state with potential conflicting configurations
611 		for (Iterator it = states.iterator(); it.hasNext();) {
612 			Integer stateI = (Integer) it.next();
613 			List configsForState = (List)stateToConfigListMap.get(stateI);
614 			// compare each configuration pair s, t to ensure:
615 			// s.ctx different than t.ctx if s.alt != t.alt
616 			int numConfigsForState = 0;
617 			if ( configsForState!=null ) {
618 				numConfigsForState = configsForState.size();
619 			}
620 			for (int i = 0; i < numConfigsForState; i++) {
621 				NFAConfiguration s = (NFAConfiguration) configsForState.get(i);
622 				for (int j = i+1; j < numConfigsForState; j++) {
623 					NFAConfiguration t = (NFAConfiguration)configsForState.get(j);
624 					// conflicts means s.ctx==t.ctx or s.ctx is a stack
625 					// suffix of t.ctx or vice versa (if alts differ).
626 					// Also a conflict if s.ctx or t.ctx is empty
627 					if ( s.alt != t.alt && s.context.conflictsWith(t.context) ) {
628 						nondeterministicAlts.add(Utils.integer(s.alt));
629 						nondeterministicAlts.add(Utils.integer(t.alt));
630 					}
631 				}
632 			}
633 		}
634 
635 		if ( nondeterministicAlts.size()==0 ) {
636 			return null;
637 		}
638         return nondeterministicAlts;
639     }
640 
641 	/** Get the set of all alts mentioned by all NFA configurations in this
642 	 *  DFA state.
643 	 */
getAltSet()644 	public Set getAltSet() {
645 		int numConfigs = nfaConfigurations.size();
646 		Set alts = new HashSet();
647 		for (int i = 0; i < numConfigs; i++) {
648 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
649 			alts.add(Utils.integer(configuration.alt));
650 		}
651 		if ( alts.size()==0 ) {
652 			return null;
653 		}
654 		return alts;
655 	}
656 
getGatedSyntacticPredicatesInNFAConfigurations()657 	public Set getGatedSyntacticPredicatesInNFAConfigurations() {
658 		int numConfigs = nfaConfigurations.size();
659 		Set<SemanticContext> synpreds = new HashSet<SemanticContext>();
660 		for (int i = 0; i < numConfigs; i++) {
661 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
662 			SemanticContext gatedPredExpr =
663 				configuration.semanticContext.getGatedPredicateContext();
664 			// if this is a manual syn pred (gated and syn pred), add
665 			if ( gatedPredExpr!=null &&
666 				 configuration.semanticContext.isSyntacticPredicate() )
667 			{
668 				synpreds.add(configuration.semanticContext);
669 			}
670 		}
671 		if ( synpreds.size()==0 ) {
672 			return null;
673 		}
674 		return synpreds;
675 	}
676 
677 	/** For gated productions, we need an OR'd list of all predicates for the
678 	 *  target of an edge so we can gate the edge based upon the predicates
679 	 *  associated with taking that path (if any).
680 	 *
681 	 *  For syntactic predicates, we only want to generate predicate
682 	 *  evaluations as it transitions to an accept state; waste to
683 	 *  do it earlier.  So, only add gated preds derived from manually-
684 	 *  specified syntactic predicates if this is an accept state.
685 	 *
686 	 *  Also, since configurations w/o gated predicates are like true
687 	 *  gated predicates, finding a configuration whose alt has no gated
688 	 *  predicate implies we should evaluate the predicate to true. This
689 	 *  means the whole edge has to be ungated. Consider:
690 	 *
691 	 *	 X : ('a' | {p}?=> 'a')
692 	 *	   | 'a' 'b'
693 	 *	   ;
694 	 *
695 	 *  Here, you 'a' gets you from s0 to s1 but you can't test p because
696 	 *  plain 'a' is ok.  It's also ok for starting alt 2.  Hence, you can't
697 	 *  test p.  Even on the edge going to accept state for alt 1 of X, you
698 	 *  can't test p.  You can get to the same place with and w/o the context.
699 	 *  Therefore, it is never ok to test p in this situation.
700 	 *
701 	 *  TODO: cache this as it's called a lot; or at least set bit if >1 present in state
702 	 */
getGatedPredicatesInNFAConfigurations()703 	public SemanticContext getGatedPredicatesInNFAConfigurations() {
704 		SemanticContext unionOfPredicatesFromAllAlts = null;
705 		int numConfigs = nfaConfigurations.size();
706 		for (int i = 0; i < numConfigs; i++) {
707 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
708 			SemanticContext gatedPredExpr =
709 				configuration.semanticContext.getGatedPredicateContext();
710 			if ( gatedPredExpr==null ) {
711 				// if we ever find a configuration w/o a gated predicate
712 				// (even if it's a nongated predicate), we cannot gate
713 				// the indident edges.
714 				return null;
715 			}
716 			else if ( acceptState || !configuration.semanticContext.isSyntacticPredicate() ) {
717 				// at this point we have a gated predicate and, due to elseif,
718 				// we know it's an accept or not a syn pred.  In this case,
719 				// it's safe to add the gated predicate to the union.  We
720 				// only want to add syn preds if it's an accept state.  Other
721 				// gated preds can be used with edges leading to accept states.
722 				if ( unionOfPredicatesFromAllAlts==null ) {
723 					unionOfPredicatesFromAllAlts = gatedPredExpr;
724 				}
725 				else {
726 					unionOfPredicatesFromAllAlts =
727 						SemanticContext.or(unionOfPredicatesFromAllAlts,gatedPredExpr);
728 				}
729 			}
730 		}
731 		if ( unionOfPredicatesFromAllAlts instanceof SemanticContext.TruePredicate ) {
732 			return null;
733 		}
734 		return unionOfPredicatesFromAllAlts;
735 	}
736 
737     /** Is an accept state reachable from this state? */
getAcceptStateReachable()738     public int getAcceptStateReachable() {
739         return acceptStateReachable;
740     }
741 
setAcceptStateReachable(int acceptStateReachable)742     public void setAcceptStateReachable(int acceptStateReachable) {
743         this.acceptStateReachable = acceptStateReachable;
744     }
745 
isResolvedWithPredicates()746     public boolean isResolvedWithPredicates() {
747         return resolvedWithPredicates;
748     }
749 
750     /** Print all NFA states plus what alts they predict */
toString()751     public String toString() {
752         StringBuffer buf = new StringBuffer();
753         buf.append(stateNumber+":{");
754 		for (int i = 0; i < nfaConfigurations.size(); i++) {
755 			NFAConfiguration configuration = (NFAConfiguration) nfaConfigurations.get(i);
756 			if ( i>0 ) {
757 				buf.append(", ");
758 			}
759 			buf.append(configuration);
760 		}
761         buf.append("}");
762         return buf.toString();
763     }
764 
getLookaheadDepth()765 	public int getLookaheadDepth() {
766 		return k;
767 	}
768 
setLookaheadDepth(int k)769 	public void setLookaheadDepth(int k) {
770 		this.k = k;
771 		if ( k > dfa.max_k ) { // track max k for entire DFA
772 			dfa.max_k = k;
773 		}
774 	}
775 
776 }
777