1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.codegen.CodeGenerator; 31 import org.antlr.misc.IntSet; 32 import org.antlr.misc.IntervalSet; 33 import org.antlr.misc.Utils; 34 import org.antlr.runtime.IntStream; 35 import org.stringtemplate.v4.ST; 36 import org.antlr.tool.*; 37 38 import java.util.*; 39 40 /** A DFA (converted from a grammar's NFA). 41 * DFAs are used as prediction machine for alternative blocks in all kinds 42 * of recognizers (lexers, parsers, tree walkers). 43 */ 44 public class DFA { 45 public static final int REACHABLE_UNKNOWN = -2; 46 public static final int REACHABLE_BUSY = -1; // in process of computing 47 public static final int REACHABLE_NO = 0; 48 public static final int REACHABLE_YES = 1; 49 50 public static final int CYCLIC_UNKNOWN = -2; 51 public static final int CYCLIC_BUSY = -1; // in process of computing 52 public static final int CYCLIC_DONE = 0; 53 54 /** Prevent explosion of DFA states during conversion. The max number 55 * of states per alt in a single decision's DFA. 56 public static final int MAX_STATES_PER_ALT_IN_DFA = 450; 57 */ 58 59 /** Set to 0 to not terminate early (time in ms) */ 60 public static int MAX_TIME_PER_DFA_CREATION = 1*1000; 61 62 /** How many edges can each DFA state have before a "special" state 63 * is created that uses IF expressions instead of a table? 64 */ 65 public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534; 66 67 /** What's the start state for this DFA? */ 68 public DFAState startState; 69 70 /** This DFA is being built for which decision? */ 71 public int decisionNumber = 0; 72 73 /** From what NFAState did we create the DFA? */ 74 public NFAState decisionNFAStartState; 75 76 /** The printable grammar fragment associated with this DFA */ 77 public String description; 78 79 /** A set of all uniquely-numbered DFA states. Maps hash of DFAState 80 * to the actual DFAState object. We use this to detect 81 * existing DFA states. Map<DFAState,DFAState>. Use Map so 82 * we can get old state back (Set only allows you to see if it's there). 83 * Not used during fixed k lookahead as it's a waste to fill it with 84 * a dup of states array. 85 */ 86 protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>(); 87 88 /** Maps the state number to the actual DFAState. Use a Vector as it 89 * grows automatically when I set the ith element. This contains all 90 * states, but the states are not unique. s3 might be same as s1 so 91 * s3 → s1 in this table. This is how cycles occur. If fixed k, 92 * then these states will all be unique as states[i] always points 93 * at state i when no cycles exist. 94 * 95 * This is managed in parallel with uniqueStates and simply provides 96 * a way to go from state number to DFAState rather than via a 97 * hash lookup. 98 */ 99 protected Vector<DFAState> states = new Vector<DFAState>(); 100 101 /** Unique state numbers per DFA */ 102 protected int stateCounter = 0; 103 104 /** count only new states not states that were rejected as already present */ 105 protected int numberOfStates = 0; 106 107 /** User specified max fixed lookahead. If 0, nothing specified. -1 108 * implies we have not looked at the options table yet to set k. 109 */ 110 protected int user_k = -1; 111 112 /** While building the DFA, track max lookahead depth if not cyclic */ 113 protected int max_k = -1; 114 115 /** Is this DFA reduced? I.e., can all states lead to an accept state? */ 116 protected boolean reduced = true; 117 118 /** Are there any loops in this DFA? 119 * Computed by doesStateReachAcceptState() 120 */ 121 protected boolean cyclic = false; 122 123 /** Track whether this DFA has at least one sem/syn pred encountered 124 * during a closure operation. This is useful for deciding whether 125 * to retry a non-LL(*) with k=1. If no pred, it will not work w/o 126 * a pred so don't bother. It would just give another error message. 127 */ 128 public boolean predicateVisible = false; 129 130 public boolean hasPredicateBlockedByAction = false; 131 132 /** Each alt in an NFA derived from a grammar must have a DFA state that 133 * predicts it lest the parser not know what to do. Nondeterminisms can 134 * lead to this situation (assuming no semantic predicates can resolve 135 * the problem) and when for some reason, I cannot compute the lookahead 136 * (which might arise from an error in the algorithm or from 137 * left-recursion etc...). This list starts out with all alts contained 138 * and then in method doesStateReachAcceptState() I remove the alts I 139 * know to be uniquely predicted. 140 */ 141 protected List<Integer> unreachableAlts; 142 143 protected int nAlts = 0; 144 145 /** We only want one accept state per predicted alt; track here */ 146 protected DFAState[] altToAcceptState; 147 148 /** Track whether an alt discovers recursion for each alt during 149 * NFA to DFA conversion; >1 alt with recursion implies nonregular. 150 */ 151 public IntSet recursiveAltSet = new IntervalSet(); 152 153 /** Which NFA are we converting (well, which piece of the NFA)? */ 154 public NFA nfa; 155 156 protected NFAToDFAConverter nfaConverter; 157 158 /** This probe tells you a lot about a decision and is useful even 159 * when there is no error such as when a syntactic nondeterminism 160 * is solved via semantic predicates. Perhaps a GUI would want 161 * the ability to show that. 162 */ 163 public DecisionProbe probe = new DecisionProbe(this); 164 165 /** Track absolute time of the conversion so we can have a failsafe: 166 * if it takes too long, then terminate. Assume bugs are in the 167 * analysis engine. 168 */ 169 //protected long conversionStartTime; 170 171 /** Map an edge transition table to a unique set number; ordered so 172 * we can push into the output template as an ordered list of sets 173 * and then ref them from within the transition[][] table. Like this 174 * for C# target: 175 * public static readonly DFA30_transition0 = 176 * new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...}; 177 * public static readonly DFA30_transition1 = 178 * new short[] { 21 }; 179 * public static readonly short[][] DFA30_transition = { 180 * DFA30_transition0, 181 * DFA30_transition0, 182 * DFA30_transition1, 183 * ... 184 * }; 185 */ 186 public Map<List<Integer>, Integer> edgeTransitionClassMap = new LinkedHashMap<List<Integer>, Integer>(); 187 188 /** The unique edge transition class number; every time we see a new 189 * set of edges emanating from a state, we number it so we can reuse 190 * if it's every seen again for another state. For Java grammar, 191 * some of the big edge transition tables are seen about 57 times. 192 */ 193 protected int edgeTransitionClass =0; 194 195 /* This DFA can be converted to a transition[state][char] table and 196 * the following tables are filled by createStateTables upon request. 197 * These are injected into the templates for code generation. 198 * See March 25, 2006 entry for description: 199 * http://www.antlr.org/blog/antlr3/codegen.tml 200 * Often using Vector as can't set ith position in a List and have 201 * it extend list size; bizarre. 202 */ 203 204 /** List of special DFAState objects */ 205 public List<DFAState> specialStates; 206 /** List of ST for special states. */ 207 public List<ST> specialStateSTs; 208 public Vector<Integer> accept; 209 public Vector<Integer> eot; 210 public Vector<Integer> eof; 211 public Vector<Integer> min; 212 public Vector<Integer> max; 213 public Vector<Integer> special; 214 public Vector<Vector<Integer>> transition; 215 /** just the Vector<Integer> indicating which unique edge table is at 216 * position i. 217 */ 218 public Vector<Integer> transitionEdgeTables; // not used by java yet 219 protected int uniqueCompressedSpecialStateNum = 0; 220 221 /** Which generator to use if we're building state tables */ 222 protected CodeGenerator generator = null; 223 DFA()224 protected DFA() {} 225 DFA(int decisionNumber, NFAState decisionStartState)226 public DFA(int decisionNumber, NFAState decisionStartState) { 227 this.decisionNumber = decisionNumber; 228 this.decisionNFAStartState = decisionStartState; 229 nfa = decisionStartState.nfa; 230 nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState); 231 //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) ); 232 initAltRelatedInfo(); 233 234 //long start = System.currentTimeMillis(); 235 nfaConverter = new NFAToDFAConverter(this); 236 try { 237 nfaConverter.convert(); 238 239 // figure out if there are problems with decision 240 verify(); 241 242 if ( !probe.isDeterministic() || probe.analysisOverflowed() ) { 243 probe.issueWarnings(); 244 } 245 246 // must be after verify as it computes cyclic, needed by this routine 247 // should be after warnings because early termination or something 248 // will not allow the reset to operate properly in some cases. 249 resetStateNumbersToBeContiguous(); 250 251 //long stop = System.currentTimeMillis(); 252 //System.out.println("verify cost: "+(int)(stop-start)+" ms"); 253 } 254 // catch (AnalysisTimeoutException at) { 255 // probe.reportAnalysisTimeout(); 256 // if ( !okToRetryDFAWithK1() ) { 257 // probe.issueWarnings(); 258 // } 259 // } 260 catch (NonLLStarDecisionException nonLL) { 261 probe.reportNonLLStarDecision(this); 262 // >1 alt recurses, k=* and no auto backtrack nor manual sem/syn 263 if ( !okToRetryDFAWithK1() ) { 264 probe.issueWarnings(); 265 } 266 } 267 } 268 269 /** Walk all states and reset their numbers to be a contiguous sequence 270 * of integers starting from 0. Only cyclic DFA can have unused positions 271 * in states list. State i might be identical to a previous state j and 272 * will result in states[i] == states[j]. We don't want to waste a state 273 * number on this. Useful mostly for code generation in tables. 274 * 275 * At the start of this routine, states[i].stateNumber <= i by definition. 276 * If states[50].stateNumber is 50 then a cycle during conversion may 277 * try to add state 103, but we find that an identical DFA state, named 278 * 50, already exists, hence, states[103]==states[50] and both have 279 * stateNumber 50 as they point at same object. Afterwards, the set 280 * of state numbers from all states should represent a contiguous range 281 * from 0..n-1 where n is the number of unique states. 282 */ resetStateNumbersToBeContiguous()283 public void resetStateNumbersToBeContiguous() { 284 if ( getUserMaxLookahead()>0 ) { 285 // all numbers are unique already; no states are thrown out. 286 return; 287 } 288 289 // walk list of DFAState objects by state number, 290 // setting state numbers to 0..n-1 291 int snum=0; 292 for (int i = 0; i <= getMaxStateNumber(); i++) { 293 DFAState s = getState(i); 294 // some states are unused after creation most commonly due to cycles 295 // or conflict resolution. 296 if ( s==null ) { 297 continue; 298 } 299 // state i is mapped to DFAState with state number set to i originally 300 // so if it's less than i, then we renumbered it already; that 301 // happens when states have been merged or cycles occurred I think. 302 // states[50] will point to DFAState with s50 in it but 303 // states[103] might also point at this same DFAState. Since 304 // 50 < 103 then it's already been renumbered as it points downwards. 305 boolean alreadyRenumbered = s.stateNumber<i; 306 if ( !alreadyRenumbered ) { 307 // state i is a valid state, reset it's state number 308 s.stateNumber = snum; // rewrite state numbers to be 0..n-1 309 snum++; 310 } 311 } 312 if ( snum!=getNumberOfStates() ) { 313 ErrorManager.internalError("DFA "+decisionNumber+": "+ 314 decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+ 315 "!= num renumbered states "+snum); 316 } 317 } 318 319 // JAVA-SPECIFIC Accessors!!!!! It is so impossible to get arrays 320 // or even consistently formatted strings acceptable to java that 321 // I am forced to build the individual char elements here 322 323 public List<? extends String> getJavaCompressedAccept() { return getRunLengthEncoding(accept); } 324 public List<? extends String> getJavaCompressedEOT() { return getRunLengthEncoding(eot); } 325 public List<? extends String> getJavaCompressedEOF() { return getRunLengthEncoding(eof); } 326 public List<? extends String> getJavaCompressedMin() { return getRunLengthEncoding(min); } 327 public List<? extends String> getJavaCompressedMax() { return getRunLengthEncoding(max); } 328 public List<? extends String> getJavaCompressedSpecial() { return getRunLengthEncoding(special); } 329 public List<List<? extends String>> getJavaCompressedTransition() { 330 if ( transition==null || transition.isEmpty() ) { 331 return null; 332 } 333 List<List<? extends String>> encoded = new ArrayList<List<? extends String>>(transition.size()); 334 // walk Vector<Vector<FormattedInteger>> which is the transition[][] table 335 for (int i = 0; i < transition.size(); i++) { 336 Vector<Integer> transitionsForState = transition.elementAt(i); 337 encoded.add(getRunLengthEncoding(transitionsForState)); 338 } 339 return encoded; 340 } 341 342 /** Compress the incoming data list so that runs of same number are 343 * encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded 344 * as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression 345 * that GIF files use. Transition tables are heavily compressed by 346 * this technique. I got the idea from JFlex http://jflex.de/ 347 * 348 * Return List<String> where each string is either \xyz for 8bit char 349 * and \uFFFF for 16bit. Hideous and specific to Java, but it is the 350 * only target bad enough to need it. 351 */ 352 public List<? extends String> getRunLengthEncoding(List<Integer> data) { 353 if ( data==null || data.isEmpty() ) { 354 // for states with no transitions we want an empty string "" 355 // to hold its place in the transitions array. 356 List<String> empty = new ArrayList<String>(); 357 empty.add(""); 358 return empty; 359 } 360 int size = Math.max(2,data.size()/2); 361 List<String> encoded = new ArrayList<String>(size); // guess at size 362 // scan values looking for runs 363 int i = 0; 364 Integer emptyValue = Utils.integer(-1); 365 while ( i < data.size() ) { 366 Integer I = data.get(i); 367 if ( I==null ) { 368 I = emptyValue; 369 } 370 // count how many v there are? 371 int n = 0; 372 for (int j = i; j < data.size(); j++) { 373 Integer v = data.get(j); 374 if ( v==null ) { 375 v = emptyValue; 376 } 377 if ( I.equals(v) ) { 378 n++; 379 } 380 else { 381 break; 382 } 383 } 384 encoded.add(generator.target.encodeIntAsCharEscape((char)n)); 385 encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue())); 386 i+=n; 387 } 388 return encoded; 389 } 390 391 public void createStateTables(CodeGenerator generator) { 392 //System.out.println("createTables:\n"+this); 393 this.generator = generator; 394 description = getNFADecisionStartState().getDescription(); 395 description = 396 generator.target.getTargetStringLiteralFromString(description); 397 398 // create all the tables 399 special = new Vector<Integer>(this.getNumberOfStates()); // Vector<short> 400 special.setSize(this.getNumberOfStates()); 401 specialStates = new ArrayList<DFAState>(); // List<DFAState> 402 specialStateSTs = new ArrayList<ST>(); // List<ST> 403 accept = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> 404 accept.setSize(this.getNumberOfStates()); 405 eot = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> 406 eot.setSize(this.getNumberOfStates()); 407 eof = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> 408 eof.setSize(this.getNumberOfStates()); 409 min = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> 410 min.setSize(this.getNumberOfStates()); 411 max = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> 412 max.setSize(this.getNumberOfStates()); 413 transition = new Vector<Vector<Integer>>(this.getNumberOfStates()); // Vector<Vector<int>> 414 transition.setSize(this.getNumberOfStates()); 415 transitionEdgeTables = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> 416 transitionEdgeTables.setSize(this.getNumberOfStates()); 417 418 // for each state in the DFA, fill relevant tables. 419 Iterator<DFAState> it; 420 if ( getUserMaxLookahead()>0 ) { 421 it = states.iterator(); 422 } 423 else { 424 it = getUniqueStates().values().iterator(); 425 } 426 while ( it.hasNext() ) { 427 DFAState s = it.next(); 428 if ( s==null ) { 429 // ignore null states; some acylic DFA see this condition 430 // when inlining DFA (due to lacking of exit branch pruning?) 431 continue; 432 } 433 if ( s.isAcceptState() ) { 434 // can't compute min,max,special,transition on accepts 435 accept.set(s.stateNumber, 436 Utils.integer(s.getUniquelyPredictedAlt())); 437 } 438 else { 439 createMinMaxTables(s); 440 createTransitionTableEntryForState(s); 441 createSpecialTable(s); 442 createEOTAndEOFTables(s); 443 } 444 } 445 446 // now that we have computed list of specialStates, gen code for 'em 447 for (int i = 0; i < specialStates.size(); i++) { 448 DFAState ss = specialStates.get(i); 449 ST stateST = 450 generator.generateSpecialState(ss); 451 specialStateSTs.add(stateST); 452 } 453 454 // check that the tables are not messed up by encode/decode 455 /* 456 testEncodeDecode(min); 457 testEncodeDecode(max); 458 testEncodeDecode(accept); 459 testEncodeDecode(special); 460 System.out.println("min="+min); 461 System.out.println("max="+max); 462 System.out.println("eot="+eot); 463 System.out.println("eof="+eof); 464 System.out.println("accept="+accept); 465 System.out.println("special="+special); 466 System.out.println("transition="+transition); 467 */ 468 } 469 470 /* 471 private void testEncodeDecode(List data) { 472 System.out.println("data="+data); 473 List encoded = getRunLengthEncoding(data); 474 StringBuffer buf = new StringBuffer(); 475 for (int i = 0; i < encoded.size(); i++) { 476 String I = (String)encoded.get(i); 477 int v = 0; 478 if ( I.startsWith("\\u") ) { 479 v = Integer.parseInt(I.substring(2,I.length()), 16); 480 } 481 else { 482 v = Integer.parseInt(I.substring(1,I.length()), 8); 483 } 484 buf.append((char)v); 485 } 486 String encodedS = buf.toString(); 487 short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS); 488 //System.out.println("decoded:"); 489 for (int i = 0; i < decoded.length; i++) { 490 short x = decoded[i]; 491 if ( x!=((Integer)data.get(i)).intValue() ) { 492 System.err.println("problem with encoding"); 493 } 494 //System.out.print(", "+x); 495 } 496 //System.out.println(); 497 } 498 */ 499 500 protected void createMinMaxTables(DFAState s) { 501 int smin = Label.MAX_CHAR_VALUE + 1; 502 int smax = Label.MIN_ATOM_VALUE - 1; 503 for (int j = 0; j < s.getNumberOfTransitions(); j++) { 504 Transition edge = s.transition(j); 505 Label label = edge.label; 506 if ( label.isAtom() ) { 507 if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) { 508 if ( label.getAtom()<smin ) { 509 smin = label.getAtom(); 510 } 511 if ( label.getAtom()>smax ) { 512 smax = label.getAtom(); 513 } 514 } 515 } 516 else if ( label.isSet() ) { 517 IntervalSet labels = (IntervalSet)label.getSet(); 518 int lmin = labels.getMinElement(); 519 // if valid char (don't do EOF) and less than current min 520 if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) { 521 smin = labels.getMinElement(); 522 } 523 if ( labels.getMaxElement()>smax ) { 524 smax = labels.getMaxElement(); 525 } 526 } 527 } 528 529 if ( smax<0 ) { 530 // must be predicates or pure EOT transition; just zero out min, max 531 smin = Label.MIN_CHAR_VALUE; 532 smax = Label.MIN_CHAR_VALUE; 533 } 534 535 min.set(s.stateNumber, Utils.integer((char)smin)); 536 max.set(s.stateNumber, Utils.integer((char)smax)); 537 538 if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) { 539 ErrorManager.internalError("messed up: min="+min+", max="+max); 540 } 541 } 542 543 protected void createTransitionTableEntryForState(DFAState s) { 544 /* 545 System.out.println("createTransitionTableEntryForState s"+s.stateNumber+ 546 " dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic()); 547 */ 548 int smax = max.get(s.stateNumber); 549 int smin = min.get(s.stateNumber); 550 551 Vector<Integer> stateTransitions = new Vector<Integer>(smax-smin+1); 552 stateTransitions.setSize(smax-smin+1); 553 transition.set(s.stateNumber, stateTransitions); 554 for (int j = 0; j < s.getNumberOfTransitions(); j++) { 555 Transition edge = s.transition(j); 556 Label label = edge.label; 557 if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) { 558 int labelIndex = label.getAtom()-smin; // offset from 0 559 stateTransitions.set(labelIndex, 560 Utils.integer(edge.target.stateNumber)); 561 } 562 else if ( label.isSet() ) { 563 IntervalSet labels = (IntervalSet)label.getSet(); 564 int[] atoms = labels.toArray(); 565 for (int a = 0; a < atoms.length; a++) { 566 // set the transition if the label is valid (don't do EOF) 567 if ( atoms[a]>=Label.MIN_CHAR_VALUE ) { 568 int labelIndex = atoms[a]-smin; // offset from 0 569 stateTransitions.set(labelIndex, 570 Utils.integer(edge.target.stateNumber)); 571 } 572 } 573 } 574 } 575 // track unique state transition tables so we can reuse 576 Integer edgeClass = edgeTransitionClassMap.get(stateTransitions); 577 if ( edgeClass!=null ) { 578 //System.out.println("we've seen this array before; size="+stateTransitions.size()); 579 transitionEdgeTables.set(s.stateNumber, edgeClass); 580 } 581 else { 582 edgeClass = Utils.integer(edgeTransitionClass); 583 transitionEdgeTables.set(s.stateNumber, edgeClass); 584 edgeTransitionClassMap.put(stateTransitions, edgeClass); 585 edgeTransitionClass++; 586 } 587 } 588 589 /** Set up the EOT and EOF tables; we cannot put -1 min/max values so 590 * we need another way to test that in the DFA transition function. 591 */ 592 protected void createEOTAndEOFTables(DFAState s) { 593 for (int j = 0; j < s.getNumberOfTransitions(); j++) { 594 Transition edge = s.transition(j); 595 Label label = edge.label; 596 if ( label.isAtom() ) { 597 if ( label.getAtom()==Label.EOT ) { 598 // eot[s] points to accept state 599 eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); 600 } 601 else if ( label.getAtom()==Label.EOF ) { 602 // eof[s] points to accept state 603 eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); 604 } 605 } 606 else if ( label.isSet() ) { 607 IntervalSet labels = (IntervalSet)label.getSet(); 608 int[] atoms = labels.toArray(); 609 for (int a = 0; a < atoms.length; a++) { 610 if ( atoms[a]==Label.EOT ) { 611 // eot[s] points to accept state 612 eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); 613 } 614 else if ( atoms[a]==Label.EOF ) { 615 eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); 616 } 617 } 618 } 619 } 620 } 621 622 protected void createSpecialTable(DFAState s) { 623 // number all special states from 0...n-1 instead of their usual numbers 624 boolean hasSemPred = false; 625 626 // TODO this code is very similar to canGenerateSwitch. Refactor to share 627 for (int j = 0; j < s.getNumberOfTransitions(); j++) { 628 Transition edge = s.transition(j); 629 Label label = edge.label; 630 // can't do a switch if the edges have preds or are going to 631 // require gated predicates 632 if ( label.isSemanticPredicate() || 633 ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null) 634 { 635 hasSemPred = true; 636 break; 637 } 638 } 639 // if has pred or too big for table, make it special 640 int smax = max.get(s.stateNumber); 641 int smin = min.get(s.stateNumber); 642 if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) { 643 special.set(s.stateNumber, 644 Utils.integer(uniqueCompressedSpecialStateNum)); 645 uniqueCompressedSpecialStateNum++; 646 specialStates.add(s); 647 } 648 else { 649 special.set(s.stateNumber, Utils.integer(-1)); // not special 650 } 651 } 652 653 public int predict(IntStream input) { 654 Interpreter interp = new Interpreter(nfa.grammar, input); 655 return interp.predict(this); 656 } 657 658 /** Add a new DFA state to this DFA if not already present. 659 * To force an acyclic, fixed maximum depth DFA, just always 660 * return the incoming state. By not reusing old states, 661 * no cycles can be created. If we're doing fixed k lookahead 662 * don't updated uniqueStates, just return incoming state, which 663 * indicates it's a new state. 664 */ 665 protected DFAState addState(DFAState d) { 666 if ( getUserMaxLookahead()>0 ) { 667 return d; 668 } 669 // does a DFA state exist already with everything the same 670 // except its state number? 671 DFAState existing = uniqueStates.get(d); 672 if ( existing != null ) { 673 /* 674 System.out.println("state "+d.stateNumber+" exists as state "+ 675 existing.stateNumber); 676 */ 677 // already there...get the existing DFA state 678 return existing; 679 } 680 681 // if not there, then add new state. 682 uniqueStates.put(d,d); 683 numberOfStates++; 684 return d; 685 } 686 687 public void removeState(DFAState d) { 688 DFAState it = uniqueStates.remove(d); 689 if ( it!=null ) { 690 numberOfStates--; 691 } 692 } 693 694 public Map<DFAState, DFAState> getUniqueStates() { 695 return uniqueStates; 696 } 697 698 /** What is the max state number ever created? This may be beyond 699 * getNumberOfStates(). 700 */ 701 public int getMaxStateNumber() { 702 return states.size()-1; 703 } 704 705 public DFAState getState(int stateNumber) { 706 return states.get(stateNumber); 707 } 708 709 public void setState(int stateNumber, DFAState d) { 710 states.set(stateNumber, d); 711 } 712 713 /** Is the DFA reduced? I.e., does every state have a path to an accept 714 * state? If not, don't delete as we need to generate an error indicating 715 * which paths are "dead ends". Also tracks list of alts with no accept 716 * state in the DFA. Must call verify() first before this makes sense. 717 */ 718 public boolean isReduced() { 719 return reduced; 720 } 721 722 /** Is this DFA cyclic? That is, are there any loops? If not, then 723 * the DFA is essentially an LL(k) predictor for some fixed, max k value. 724 * We can build a series of nested IF statements to match this. In the 725 * presence of cycles, we need to build a general DFA and interpret it 726 * to distinguish between alternatives. 727 */ 728 public boolean isCyclic() { 729 return cyclic && getUserMaxLookahead()==0; 730 } 731 732 public boolean isClassicDFA() { 733 return !isCyclic() && 734 !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) && 735 !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this); 736 } 737 738 public boolean canInlineDecision() { 739 return !isCyclic() && 740 !probe.isNonLLStarDecision() && 741 getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE; 742 } 743 744 /** Is this DFA derived from the NFA for the Tokens rule? */ 745 public boolean isTokensRuleDecision() { 746 if ( nfa.grammar.type!=Grammar.LEXER ) { 747 return false; 748 } 749 NFAState nfaStart = getNFADecisionStartState(); 750 Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME); 751 NFAState TokensRuleStart = r.startState; 752 NFAState TokensDecisionStart = 753 (NFAState)TokensRuleStart.transition[0].target; 754 return nfaStart == TokensDecisionStart; 755 } 756 757 /** The user may specify a max, acyclic lookahead for any decision. No 758 * DFA cycles are created when this value, k, is greater than 0. 759 * If this decision has no k lookahead specified, then try the grammar. 760 */ 761 public int getUserMaxLookahead() { 762 if ( user_k>=0 ) { // cache for speed 763 return user_k; 764 } 765 user_k = nfa.grammar.getUserMaxLookahead(decisionNumber); 766 return user_k; 767 } 768 769 public boolean getAutoBacktrackMode() { 770 return nfa.grammar.getAutoBacktrackMode(decisionNumber); 771 } 772 773 public void setUserMaxLookahead(int k) { 774 this.user_k = k; 775 } 776 777 /** Return k if decision is LL(k) for some k else return max int 778 */ 779 public int getMaxLookaheadDepth() { 780 if ( hasCycle() ) return Integer.MAX_VALUE; 781 // compute to be sure 782 return _getMaxLookaheadDepth(startState, 0); 783 } 784 785 int _getMaxLookaheadDepth(DFAState d, int depth) { 786 // not cyclic; don't worry about termination 787 // fail if pred edge. 788 int max = depth; 789 for (int i=0; i<d.getNumberOfTransitions(); i++) { 790 Transition t = d.transition(i); 791 // if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE; 792 if ( !t.isSemanticPredicate() ) { 793 // if pure pred not gated, it must target stop state; don't count 794 DFAState edgeTarget = (DFAState)t.target; 795 int m = _getMaxLookaheadDepth(edgeTarget, depth+1); 796 max = Math.max(max, m); 797 } 798 } 799 return max; 800 } 801 802 /** Count all disambiguating syn preds (ignore synpred tests 803 * for gated edges, which occur for nonambig input sequences). 804 * E.g., 805 * x : (X)=> (X|Y)\n" + 806 * | X\n" + 807 * ; 808 * 809 * gives 810 * 811 * .s0-X->.s1 812 * .s0-Y&&{synpred1_t}?->:s2=>1 813 * .s1-{synpred1_t}?->:s2=>1 814 * .s1-{true}?->:s3=>2 815 */ 816 public boolean hasSynPred() { 817 boolean has = _hasSynPred(startState, new HashSet<DFAState>()); 818 // if ( !has ) { 819 // System.out.println("no synpred in dec "+decisionNumber); 820 // FASerializer serializer = new FASerializer(nfa.grammar); 821 // String result = serializer.serialize(startState); 822 // System.out.println(result); 823 // } 824 return has; 825 } 826 827 public boolean getHasSynPred() { return hasSynPred(); } // for ST 828 829 boolean _hasSynPred(DFAState d, Set<DFAState> busy) { 830 busy.add(d); 831 for (int i=0; i<d.getNumberOfTransitions(); i++) { 832 Transition t = d.transition(i); 833 if ( t.isSemanticPredicate() ) { 834 SemanticContext ctx = t.label.getSemanticContext(); 835 // if ( ctx.toString().indexOf("synpred")>=0 ) { 836 // System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate()); 837 // System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token); 838 // } 839 if ( ctx.isSyntacticPredicate() ) return true; 840 } 841 DFAState edgeTarget = (DFAState)t.target; 842 if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true; 843 } 844 845 return false; 846 } 847 848 public boolean hasSemPred() { // has user-defined sempred 849 boolean has = _hasSemPred(startState, new HashSet<DFAState>()); 850 return has; 851 } 852 853 boolean _hasSemPred(DFAState d, Set<DFAState> busy) { 854 busy.add(d); 855 for (int i=0; i<d.getNumberOfTransitions(); i++) { 856 Transition t = d.transition(i); 857 if ( t.isSemanticPredicate() ) { 858 SemanticContext ctx = t.label.getSemanticContext(); 859 if ( ctx.hasUserSemanticPredicate() ) return true; 860 } 861 DFAState edgeTarget = (DFAState)t.target; 862 if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true; 863 } 864 865 return false; 866 } 867 868 /** Compute cyclic w/o relying on state computed during analysis. just check. */ 869 public boolean hasCycle() { 870 boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>()); 871 return cyclic; 872 } 873 874 boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) { 875 busy.put(d, CYCLIC_BUSY); 876 for (int i=0; i<d.getNumberOfTransitions(); i++) { 877 Transition t = d.transition(i); 878 DFAState target = (DFAState)t.target; 879 int cond = CYCLIC_UNKNOWN; 880 if ( busy.get(target)!=null ) cond = busy.get(target); 881 if ( cond==CYCLIC_BUSY ) return true; 882 if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true; 883 } 884 busy.put(d, CYCLIC_DONE); 885 return false; 886 } 887 888 889 /** Return a list of Integer alt numbers for which no lookahead could 890 * be computed or for which no single DFA accept state predicts those 891 * alts. Must call verify() first before this makes sense. 892 */ 893 public List<Integer> getUnreachableAlts() { 894 return unreachableAlts; 895 } 896 897 /** Once this DFA has been built, need to verify that: 898 * 899 * 1. it's reduced 900 * 2. all alts have an accept state 901 * 902 * Elsewhere, in the NFA converter, we need to verify that: 903 * 904 * 3. alts i and j have disjoint lookahead if no sem preds 905 * 4. if sem preds, nondeterministic alts must be sufficiently covered 906 * 907 * This is avoided if analysis bails out for any reason. 908 */ 909 public void verify() { 910 doesStateReachAcceptState(startState); 911 } 912 913 /** figure out if this state eventually reaches an accept state and 914 * modify the instance variable 'reduced' to indicate if we find 915 * at least one state that cannot reach an accept state. This implies 916 * that the overall DFA is not reduced. This algorithm should be 917 * linear in the number of DFA states. 918 * 919 * The algorithm also tracks which alternatives have no accept state, 920 * indicating a nondeterminism. 921 * 922 * Also computes whether the DFA is cyclic. 923 * 924 * TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt 925 */ 926 protected boolean doesStateReachAcceptState(DFAState d) { 927 if ( d.isAcceptState() ) { 928 // accept states have no edges emanating from them so we can return 929 d.setAcceptStateReachable(REACHABLE_YES); 930 // this alt is uniquely predicted, remove from nondeterministic list 931 int predicts = d.getUniquelyPredictedAlt(); 932 unreachableAlts.remove(Utils.integer(predicts)); 933 return true; 934 } 935 936 // avoid infinite loops 937 d.setAcceptStateReachable(REACHABLE_BUSY); 938 939 boolean anEdgeReachesAcceptState = false; 940 // Visit every transition, track if at least one edge reaches stop state 941 // Cannot terminate when we know this state reaches stop state since 942 // all transitions must be traversed to set status of each DFA state. 943 for (int i=0; i<d.getNumberOfTransitions(); i++) { 944 Transition t = d.transition(i); 945 DFAState edgeTarget = (DFAState)t.target; 946 int targetStatus = edgeTarget.getAcceptStateReachable(); 947 if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing 948 cyclic = true; 949 continue; 950 } 951 if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work 952 anEdgeReachesAcceptState = true; 953 continue; 954 } 955 if ( targetStatus==REACHABLE_NO ) { // avoid unnecessary work 956 continue; 957 } 958 // target must be REACHABLE_UNKNOWN (i.e., unvisited) 959 if ( doesStateReachAcceptState(edgeTarget) ) { 960 anEdgeReachesAcceptState = true; 961 // have to keep looking so don't break loop 962 // must cover all states even if we find a path for this state 963 } 964 } 965 if ( anEdgeReachesAcceptState ) { 966 d.setAcceptStateReachable(REACHABLE_YES); 967 } 968 else { 969 d.setAcceptStateReachable(REACHABLE_NO); 970 reduced = false; 971 } 972 return anEdgeReachesAcceptState; 973 } 974 975 /** Walk all accept states and find the manually-specified synpreds. 976 * Gated preds are not always hoisted 977 * I used to do this in the code generator, but that is too late. 978 * This converter tries to avoid computing DFA for decisions in 979 * syntactic predicates that are not ever used such as those 980 * created by autobacktrack mode. 981 */ 982 public void findAllGatedSynPredsUsedInDFAAcceptStates() { 983 int nAlts = getNumberOfAlts(); 984 for (int i=1; i<=nAlts; i++) { 985 DFAState a = getAcceptState(i); 986 //System.out.println("alt "+i+": "+a); 987 if ( a!=null ) { 988 Set<? extends SemanticContext> synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations(); 989 if ( synpreds!=null ) { 990 // add all the predicates we find (should be just one, right?) 991 for (SemanticContext semctx : synpreds) { 992 // System.out.println("synpreds: "+semctx); 993 nfa.grammar.synPredUsedInDFA(this, semctx); 994 } 995 } 996 } 997 } 998 } 999 1000 public NFAState getNFADecisionStartState() { 1001 return decisionNFAStartState; 1002 } 1003 1004 public DFAState getAcceptState(int alt) { 1005 return altToAcceptState[alt]; 1006 } 1007 1008 public void setAcceptState(int alt, DFAState acceptState) { 1009 altToAcceptState[alt] = acceptState; 1010 } 1011 1012 public String getDescription() { 1013 return description; 1014 } 1015 1016 public int getDecisionNumber() { 1017 return decisionNFAStartState.getDecisionNumber(); 1018 } 1019 1020 /** If this DFA failed to finish during construction, we might be 1021 * able to retry with k=1 but we need to know whether it will 1022 * potentially succeed. Can only succeed if there is a predicate 1023 * to resolve the issue. Don't try if k=1 already as it would 1024 * cycle forever. Timeout can retry with k=1 even if no predicate 1025 * if k!=1. 1026 */ 1027 public boolean okToRetryDFAWithK1() { 1028 boolean nonLLStarOrOverflowAndPredicateVisible = 1029 (probe.isNonLLStarDecision()||probe.analysisOverflowed()) && 1030 predicateVisible; // auto backtrack or manual sem/syn 1031 return getUserMaxLookahead()!=1 && 1032 nonLLStarOrOverflowAndPredicateVisible; 1033 } 1034 1035 public String getReasonForFailure() { 1036 StringBuilder buf = new StringBuilder(); 1037 if ( probe.isNonLLStarDecision() ) { 1038 buf.append("non-LL(*)"); 1039 if ( predicateVisible ) { 1040 buf.append(" && predicate visible"); 1041 } 1042 } 1043 if ( probe.analysisOverflowed() ) { 1044 buf.append("recursion overflow"); 1045 if ( predicateVisible ) { 1046 buf.append(" && predicate visible"); 1047 } 1048 } 1049 buf.append("\n"); 1050 return buf.toString(); 1051 } 1052 1053 /** What GrammarAST node (derived from the grammar) is this DFA 1054 * associated with? It will point to the start of a block or 1055 * the loop back of a (...)+ block etc... 1056 */ 1057 public GrammarAST getDecisionASTNode() { 1058 return decisionNFAStartState.associatedASTNode; 1059 } 1060 1061 public boolean isGreedy() { 1062 GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber); 1063 Object v = nfa.grammar.getBlockOption(blockAST,"greedy"); 1064 if ( v!=null && v.equals("false") ) { 1065 return false; 1066 } 1067 return true; 1068 1069 } 1070 1071 public DFAState newState() { 1072 DFAState n = new DFAState(this); 1073 n.stateNumber = stateCounter; 1074 stateCounter++; 1075 states.setSize(n.stateNumber+1); 1076 states.set(n.stateNumber, n); // track state num to state 1077 return n; 1078 } 1079 1080 public int getNumberOfStates() { 1081 if ( getUserMaxLookahead()>0 ) { 1082 // if using fixed lookahead then uniqueSets not set 1083 return states.size(); 1084 } 1085 return numberOfStates; 1086 } 1087 1088 public int getNumberOfAlts() { 1089 return nAlts; 1090 } 1091 1092 // public boolean analysisTimedOut() { 1093 // return probe.analysisTimedOut(); 1094 // } 1095 1096 protected void initAltRelatedInfo() { 1097 unreachableAlts = new LinkedList<Integer>(); 1098 for (int i = 1; i <= nAlts; i++) { 1099 unreachableAlts.add(Utils.integer(i)); 1100 } 1101 altToAcceptState = new DFAState[nAlts+1]; 1102 } 1103 1104 @Override 1105 public String toString() { 1106 FASerializer serializer = new FASerializer(nfa.grammar); 1107 if ( startState==null ) { 1108 return ""; 1109 } 1110 return serializer.serialize(startState, false); 1111 } 1112 1113 /** EOT (end of token) is a label that indicates when the DFA conversion 1114 * algorithm would "fall off the end of a lexer rule". It normally 1115 * means the default clause. So for ('a'..'z')+ you would see a DFA 1116 * with a state that has a..z and EOT emanating from it. a..z would 1117 * jump to a state predicting alt 1 and EOT would jump to a state 1118 * predicting alt 2 (the exit loop branch). EOT implies anything other 1119 * than a..z. If for some reason, the set is "all char" such as with 1120 * the wildcard '.', then EOT cannot match anything. For example, 1121 * 1122 * BLOCK : '{' (.)* '}' 1123 * 1124 * consumes all char until EOF when greedy=true. When all edges are 1125 * combined for the DFA state after matching '}', you will find that 1126 * it is all char. The EOT transition has nothing to match and is 1127 * unreachable. The findNewDFAStatesAndAddDFATransitions() method 1128 * must know to ignore the EOT, so we simply remove it from the 1129 * reachable labels. Later analysis will find that the exit branch 1130 * is not predicted by anything. For greedy=false, we leave only 1131 * the EOT label indicating that the DFA should stop immediately 1132 * and predict the exit branch. The reachable labels are often a 1133 * set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}] 1134 * due to DFA conversion so must construct a pure set to see if 1135 * it is same as Label.ALLCHAR. 1136 * 1137 * Only do this for Lexers. 1138 * 1139 * If EOT coexists with ALLCHAR: 1140 * 1. If not greedy, modify the labels parameter to be EOT 1141 * 2. If greedy, remove EOT from the labels set 1142 protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels) 1143 { 1144 Label eot = new Label(Label.EOT); 1145 if ( !labels.containsKey(eot) ) { 1146 return false; 1147 } 1148 System.out.println("### contains EOT"); 1149 boolean containsAllChar = false; 1150 IntervalSet completeVocab = new IntervalSet(); 1151 int n = labels.size(); 1152 for (int i=0; i<n; i++) { 1153 Label rl = (Label)labels.get(i); 1154 if ( !rl.equals(eot) ) { 1155 completeVocab.addAll(rl.getSet()); 1156 } 1157 } 1158 System.out.println("completeVocab="+completeVocab); 1159 if ( completeVocab.equals(Label.ALLCHAR) ) { 1160 System.out.println("all char"); 1161 containsAllChar = true; 1162 } 1163 return containsAllChar; 1164 } 1165 */ 1166 } 1167 1168