1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 /** A tree node for tracking the call chains for NFAs that invoke 31 * other NFAs. These trees only have to point upwards to their parents 32 * so we can walk back up the tree (i.e., pop stuff off the stack). We 33 * never walk from stack down down through the children. 34 * 35 * Each alt predicted in a decision has its own context tree, 36 * representing all possible return nodes. The initial stack has 37 * EOF ("$") in it. So, for m alternative productions, the lookahead 38 * DFA will have m NFAContext trees. 39 * 40 * To "push" a new context, just do "new NFAContext(context-parent, state)" 41 * which will add itself to the parent. The root is NFAContext(null, null). 42 * 43 * The complete context for an NFA configuration is the set of invoking states 44 * on the path from this node thru the parent pointers to the root. 45 */ 46 public class NFAContext { 47 /** This is similar to Bermudez's m constant in his LAR(m) where 48 * you bound the stack so your states don't explode. The main difference 49 * is that I bound only recursion on the stack, not the simple stack size. 50 * This looser constraint will let the conversion roam further to find 51 * lookahead to resolve a decision. 52 * 53 * Bermudez's m operates differently as it is his LR stack depth 54 * I'm pretty sure it therefore includes all stack symbols. Here I 55 * restrict the size of an NFA configuration to be finite because a 56 * stack component may mention the same NFA invocation state at 57 * most m times. Hence, the number of DFA states will not grow forever. 58 * With recursive rules like 59 * 60 * e : '(' e ')' | INT ; 61 * 62 * you could chase your tail forever if somebody said "s : e '.' | e ';' ;" 63 * This constant prevents new states from being created after a stack gets 64 * "too big". Actually (12/14/2007) I realize that this example is 65 * trapped by the non-LL(*) detector for recursion in > 1 alt. Here is 66 * an example that trips stack overflow: 67 * 68 * s : a Y | A A A A A X ; // force recursion past m=4 69 * a : A a | Q; 70 * 71 * If that were: 72 * 73 * s : a Y | A+ X ; 74 * 75 * it could loop forever. 76 * 77 * Imagine doing a depth-first search on the e DFA...as you chase an input 78 * sequence you can recurse to same rule such as e above. You'd have a 79 * chain of ((((. When you get do some point, you have to give up. The 80 * states in the chain will have longer and longer NFA config stacks. 81 * Must limit size. 82 * 83 * max=0 implies you cannot ever jump to another rule during closure. 84 * max=1 implies you can make as many calls as you want--you just 85 * can't ever visit a state that is on your rule invocation stack. 86 * I.e., you cannot ever recurse. 87 * max=2 implies you are able to recurse once (i.e., call a rule twice 88 * from the same place). 89 * 90 * This tracks recursion to a rule specific to an invocation site! 91 * It does not detect multiple calls to a rule from different rule 92 * invocation states. We are guaranteed to terminate because the 93 * stack can only grow as big as the number of NFA states * max. 94 * 95 * I noticed that the Java grammar didn't work with max=1, but did with 96 * max=4. Let's set to 4. Recursion is sometimes needed to resolve some 97 * fixed lookahead decisions. 98 */ 99 public static int MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK = 4; 100 101 public NFAContext parent; 102 103 /** The NFA state that invoked another rule's start state is recorded 104 * on the rule invocation context stack. 105 */ 106 public NFAState invokingState; 107 108 /** Computing the hashCode is very expensive and closureBusy() 109 * uses it to track when it's seen a state|ctx before to avoid 110 * infinite loops. As we add new contexts, record the hash code 111 * as this.invokingState + parent.cachedHashCode. Avoids walking 112 * up the tree for every hashCode(). Note that this caching works 113 * because a context is a monotonically growing tree of context nodes 114 * and nothing on the stack is ever modified...ctx just grows 115 * or shrinks. 116 */ 117 protected int cachedHashCode; 118 NFAContext(NFAContext parent, NFAState invokingState)119 public NFAContext(NFAContext parent, NFAState invokingState) { 120 this.parent = parent; 121 this.invokingState = invokingState; 122 if ( invokingState!=null ) { 123 this.cachedHashCode = invokingState.stateNumber; 124 } 125 if ( parent!=null ) { 126 this.cachedHashCode += parent.cachedHashCode; 127 } 128 } 129 130 /** Two contexts are equals() if both have 131 * same call stack; walk upwards to the root. 132 * Recall that the root sentinel node has no invokingStates and no parent. 133 * Note that you may be comparing contexts in different alt trees. 134 * 135 * The hashCode is now cheap as it's computed once upon each context 136 * push on the stack. Use it to make equals() more efficient. 137 */ 138 @Override equals(Object o)139 public boolean equals(Object o) { 140 NFAContext other = ((NFAContext)o); 141 if ( this.cachedHashCode != other.cachedHashCode ) { 142 return false; // can't be same if hash is different 143 } 144 if ( this==other ) { 145 return true; 146 } 147 // System.out.println("comparing "+this+" with "+other); 148 NFAContext sp = this; 149 while ( sp.parent!=null && other.parent!=null ) { 150 if ( sp.invokingState != other.invokingState ) { 151 return false; 152 } 153 sp = sp.parent; 154 other = other.parent; 155 } 156 if ( !(sp.parent==null && other.parent==null) ) { 157 return false; // both pointers must be at their roots after walk 158 } 159 return true; 160 } 161 162 /** Two contexts conflict() if they are equals() or one is a stack suffix 163 * of the other. For example, contexts [21 12 $] and [21 9 $] do not 164 * conflict, but [21 $] and [21 12 $] do conflict. Note that I should 165 * probably not show the $ in this case. There is a dummy node for each 166 * stack that just means empty; $ is a marker that's all. 167 * 168 * This is used in relation to checking conflicts associated with a 169 * single NFA state's configurations within a single DFA state. 170 * If there are configurations s and t within a DFA state such that 171 * s.state=t.state && s.alt != t.alt && s.ctx conflicts t.ctx then 172 * the DFA state predicts more than a single alt--it's nondeterministic. 173 * Two contexts conflict if they are the same or if one is a suffix 174 * of the other. 175 * 176 * When comparing contexts, if one context has a stack and the other 177 * does not then they should be considered the same context. The only 178 * way for an NFA state p to have an empty context and a nonempty context 179 * is the case when closure falls off end of rule without a call stack 180 * and re-enters the rule with a context. This resolves the issue I 181 * discussed with Sriram Srinivasan Feb 28, 2005 about not terminating 182 * fast enough upon nondeterminism. 183 */ conflictsWith(NFAContext other)184 public boolean conflictsWith(NFAContext other) { 185 return this.suffix(other); // || this.equals(other); 186 } 187 188 /** [$] suffix any context 189 * [21 $] suffix [21 12 $] 190 * [21 12 $] suffix [21 $] 191 * [21 18 $] suffix [21 18 12 9 $] 192 * [21 18 12 9 $] suffix [21 18 $] 193 * [21 12 $] not suffix [21 9 $] 194 * 195 * Example "[21 $] suffix [21 12 $]" means: rule r invoked current rule 196 * from state 21. Rule s invoked rule r from state 12 which then invoked 197 * current rule also via state 21. While the context prior to state 21 198 * is different, the fact that both contexts emanate from state 21 implies 199 * that they are now going to track perfectly together. Once they 200 * converged on state 21, there is no way they can separate. In other 201 * words, the prior stack state is not consulted when computing where to 202 * go in the closure operation. ?$ and ??$ are considered the same stack. 203 * If ? is popped off then $ and ?$ remain; they are now an empty and 204 * nonempty context comparison. So, if one stack is a suffix of 205 * another, then it will still degenerate to the simple empty stack 206 * comparison case. 207 */ suffix(NFAContext other)208 protected boolean suffix(NFAContext other) { 209 NFAContext sp = this; 210 // if one of the contexts is empty, it never enters loop and returns true 211 while ( sp.parent!=null && other.parent!=null ) { 212 if ( sp.invokingState != other.invokingState ) { 213 return false; 214 } 215 sp = sp.parent; 216 other = other.parent; 217 } 218 //System.out.println("suffix"); 219 return true; 220 } 221 222 /** Walk upwards to the root of the call stack context looking 223 * for a particular invoking state. 224 public boolean contains(int state) { 225 NFAContext sp = this; 226 int n = 0; // track recursive invocations of state 227 System.out.println("this.context is "+sp); 228 while ( sp.parent!=null ) { 229 if ( sp.invokingState.stateNumber == state ) { 230 return true; 231 } 232 sp = sp.parent; 233 } 234 return false; 235 } 236 */ 237 238 /** Given an NFA state number, how many times has the NFA-to-DFA 239 * conversion pushed that state on the stack? In other words, 240 * the NFA state must be a rule invocation state and this method 241 * tells you how many times you've been to this state. If none, 242 * then you have not called the target rule from this state before 243 * (though another NFA state could have called that target rule). 244 * If n=1, then you've been to this state before during this 245 * DFA construction and are going to invoke that rule again. 246 * 247 * Note that many NFA states can invoke rule r, but we ignore recursion 248 * unless you hit the same rule invocation state again. 249 */ recursionDepthEmanatingFromState(int state)250 public int recursionDepthEmanatingFromState(int state) { 251 NFAContext sp = this; 252 int n = 0; // track recursive invocations of target from this state 253 //System.out.println("this.context is "+sp); 254 while ( sp.parent!=null ) { 255 if ( sp.invokingState.stateNumber == state ) { 256 n++; 257 } 258 sp = sp.parent; 259 } 260 return n; 261 } 262 263 @Override hashCode()264 public int hashCode() { 265 return cachedHashCode; 266 /* 267 int h = 0; 268 NFAContext sp = this; 269 while ( sp.parent!=null ) { 270 h += sp.invokingState.getStateNumber(); 271 sp = sp.parent; 272 } 273 return h; 274 */ 275 } 276 277 /** A context is empty if there is no parent; meaning nobody pushed 278 * anything on the call stack. 279 */ isEmpty()280 public boolean isEmpty() { 281 return parent==null; 282 } 283 284 @Override toString()285 public String toString() { 286 StringBuilder buf = new StringBuilder(); 287 NFAContext sp = this; 288 buf.append("["); 289 while ( sp.parent!=null ) { 290 buf.append(sp.invokingState.stateNumber); 291 buf.append(" "); 292 sp = sp.parent; 293 } 294 buf.append("$]"); 295 return buf.toString(); 296 } 297 } 298