1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.misc.Utils; 31 import org.antlr.tool.Grammar; 32 33 import java.util.HashSet; 34 import java.util.Set; 35 36 /** A module to perform optimizations on DFAs. 37 * 38 * I could more easily (and more quickly) do some optimizations (such as 39 * PRUNE_EBNF_EXIT_BRANCHES) during DFA construction, but then it 40 * messes up the determinism checking. For example, it looks like 41 * loop exit branches are unreachable if you prune exit branches 42 * during DFA construction and before determinism checks. 43 * 44 * In general, ANTLR's NFA→DFA→codegen pipeline seems very robust 45 * to me which I attribute to a uniform and consistent set of data 46 * structures. Regardless of what I want to "say"/implement, I do so 47 * within the confines of, for example, a DFA. The code generator 48 * can then just generate code--it doesn't have to do much thinking. 49 * Putting optimizations in the code gen code really starts to make 50 * it a spagetti factory (uh oh, now I'm hungry!). The pipeline is 51 * very testable; each stage has well defined input/output pairs. 52 * 53 * ### Optimization: PRUNE_EBNF_EXIT_BRANCHES 54 * 55 * There is no need to test EBNF block exit branches. Not only is it 56 * an unneeded computation, but counter-intuitively, you actually get 57 * better errors. You can report an error at the missing or extra 58 * token rather than as soon as you've figured out you will fail. 59 * 60 * Imagine optional block "( DOT CLASS )? SEMI". ANTLR generates: 61 * 62 * int alt=0; 63 * if ( input.LA(1)==DOT ) { 64 * alt=1; 65 * } 66 * else if ( input.LA(1)==SEMI ) { 67 * alt=2; 68 * } 69 * 70 * Clearly, since Parser.match() will ultimately find the error, we 71 * do not want to report an error nor do we want to bother testing 72 * lookahead against what follows the (...)? We want to generate 73 * simply "should I enter the subrule?": 74 * 75 * int alt=2; 76 * if ( input.LA(1)==DOT ) { 77 * alt=1; 78 * } 79 * 80 * NOTE 1. Greedy loops cannot be optimized in this way. For example, 81 * "(greedy=false:'x'|.)* '\n'". You specifically need the exit branch 82 * to tell you when to terminate the loop as the same input actually 83 * predicts one of the alts (i.e., staying in the loop). 84 * 85 * NOTE 2. I do not optimize cyclic DFAs at the moment as it doesn't 86 * seem to work. ;) I'll have to investigate later to see what work I 87 * can do on cyclic DFAs to make them have fewer edges. Might have 88 * something to do with the EOT token. 89 * 90 * ### PRUNE_SUPERFLUOUS_EOT_EDGES 91 * 92 * When a token is a subset of another such as the following rules, ANTLR 93 * quietly assumes the first token to resolve the ambiguity. 94 * 95 * EQ : '=' ; 96 * ASSIGNOP : '=' | '+=' ; 97 * 98 * It can yield states that have only a single edge on EOT to an accept 99 * state. This is a waste and messes up my code generation. ;) If 100 * Tokens rule DFA goes 101 * 102 * s0 -'='-> s3 -EOT-> s5 (accept) 103 * 104 * then s5 should be pruned and s3 should be made an accept. Do NOT do this 105 * for keyword versus ID as the state with EOT edge emanating from it will 106 * also have another edge. 107 * 108 * ### Optimization: COLLAPSE_ALL_INCIDENT_EDGES 109 * 110 * Done during DFA construction. See method addTransition() in 111 * NFAToDFAConverter. 112 * 113 * ### Optimization: MERGE_STOP_STATES 114 * 115 * Done during DFA construction. See addDFAState() in NFAToDFAConverter. 116 */ 117 public class DFAOptimizer { 118 public static boolean PRUNE_EBNF_EXIT_BRANCHES = true; 119 public static boolean PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES = true; 120 public static boolean COLLAPSE_ALL_PARALLEL_EDGES = true; 121 public static boolean MERGE_STOP_STATES = true; 122 123 /** Used by DFA state machine generator to avoid infinite recursion 124 * resulting from cycles int the DFA. This is a set of int state #s. 125 * This is a side-effect of calling optimize; can't clear after use 126 * because code gen needs it. 127 */ 128 protected Set<Integer> visited = new HashSet<Integer>(); 129 130 protected Grammar grammar; 131 DFAOptimizer(Grammar grammar)132 public DFAOptimizer(Grammar grammar) { 133 this.grammar = grammar; 134 } 135 optimize()136 public void optimize() { 137 // optimize each DFA in this grammar 138 for (int decisionNumber=1; 139 decisionNumber<=grammar.getNumberOfDecisions(); 140 decisionNumber++) 141 { 142 DFA dfa = grammar.getLookaheadDFA(decisionNumber); 143 optimize(dfa); 144 } 145 } 146 optimize(DFA dfa)147 protected void optimize(DFA dfa) { 148 if ( dfa==null ) { 149 return; // nothing to do 150 } 151 /* 152 System.out.println("Optimize DFA "+dfa.decisionNFAStartState.decisionNumber+ 153 " num states="+dfa.getNumberOfStates()); 154 */ 155 //long start = System.currentTimeMillis(); 156 if ( PRUNE_EBNF_EXIT_BRANCHES && dfa.canInlineDecision() ) { 157 visited.clear(); 158 int decisionType = 159 dfa.getNFADecisionStartState().decisionStateType; 160 if ( dfa.isGreedy() && 161 (decisionType==NFAState.OPTIONAL_BLOCK_START || 162 decisionType==NFAState.LOOPBACK) ) 163 { 164 optimizeExitBranches(dfa.startState); 165 } 166 } 167 // If the Tokens rule has syntactically ambiguous rules, try to prune 168 if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES && 169 dfa.isTokensRuleDecision() && 170 dfa.probe.stateToSyntacticallyAmbiguousTokensRuleAltsMap.size()>0 ) 171 { 172 visited.clear(); 173 optimizeEOTBranches(dfa.startState); 174 } 175 176 /* ack...code gen needs this, cannot optimize 177 visited.clear(); 178 unlinkUnneededStateData(dfa.startState); 179 */ 180 //long stop = System.currentTimeMillis(); 181 //System.out.println("minimized in "+(int)(stop-start)+" ms"); 182 } 183 optimizeExitBranches(DFAState d)184 protected void optimizeExitBranches(DFAState d) { 185 Integer sI = Utils.integer(d.stateNumber); 186 if ( visited.contains(sI) ) { 187 return; // already visited 188 } 189 visited.add(sI); 190 int nAlts = d.dfa.getNumberOfAlts(); 191 for (int i = 0; i < d.getNumberOfTransitions(); i++) { 192 Transition edge = d.transition(i); 193 DFAState edgeTarget = ((DFAState)edge.target); 194 /* 195 System.out.println(d.stateNumber+"-"+ 196 edge.label.toString(d.dfa.nfa.grammar)+"->"+ 197 edgeTarget.stateNumber); 198 */ 199 // if target is an accept state and that alt is the exit alt 200 if ( edgeTarget.isAcceptState() && 201 edgeTarget.getUniquelyPredictedAlt()==nAlts) 202 { 203 /* 204 System.out.println("ignoring transition "+i+" to max alt "+ 205 d.dfa.getNumberOfAlts()); 206 */ 207 d.removeTransition(i); 208 i--; // back up one so that i++ of loop iteration stays within bounds 209 } 210 optimizeExitBranches(edgeTarget); 211 } 212 } 213 optimizeEOTBranches(DFAState d)214 protected void optimizeEOTBranches(DFAState d) { 215 Integer sI = Utils.integer(d.stateNumber); 216 if ( visited.contains(sI) ) { 217 return; // already visited 218 } 219 visited.add(sI); 220 for (int i = 0; i < d.getNumberOfTransitions(); i++) { 221 Transition edge = d.transition(i); 222 DFAState edgeTarget = ((DFAState)edge.target); 223 /* 224 System.out.println(d.stateNumber+"-"+ 225 edge.label.toString(d.dfa.nfa.grammar)+"->"+ 226 edgeTarget.stateNumber); 227 */ 228 // if only one edge coming out, it is EOT, and target is accept prune 229 if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES && 230 edgeTarget.isAcceptState() && 231 d.getNumberOfTransitions()==1 && 232 edge.label.isAtom() && 233 edge.label.getAtom()==Label.EOT ) 234 { 235 //System.out.println("state "+d+" can be pruned"); 236 // remove the superfluous EOT edge 237 d.removeTransition(i); 238 d.setAcceptState(true); // make it an accept state 239 // force it to uniquely predict the originally predicted state 240 d.cachedUniquelyPredicatedAlt = 241 edgeTarget.getUniquelyPredictedAlt(); 242 i--; // back up one so that i++ of loop iteration stays within bounds 243 } 244 optimizeEOTBranches(edgeTarget); 245 } 246 } 247 248 /** Walk DFA states, unlinking the nfa configs and whatever else I 249 * can to reduce memory footprint. 250 protected void unlinkUnneededStateData(DFAState d) { 251 Integer sI = Utils.integer(d.stateNumber); 252 if ( visited.contains(sI) ) { 253 return; // already visited 254 } 255 visited.add(sI); 256 d.nfaConfigurations = null; 257 for (int i = 0; i < d.getNumberOfTransitions(); i++) { 258 Transition edge = (Transition) d.transition(i); 259 DFAState edgeTarget = ((DFAState)edge.target); 260 unlinkUnneededStateData(edgeTarget); 261 } 262 } 263 */ 264 265 } 266