1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.misc.IntSet; 31 import org.antlr.misc.IntervalSet; 32 import org.antlr.tool.Grammar; 33 34 /** A state machine transition label. A label can be either a simple 35 * label such as a token or character. A label can be a set of char or 36 * tokens. It can be an epsilon transition. It can be a semantic predicate 37 * (which assumes an epsilon transition) or a tree of predicates (in a DFA). 38 * Special label types have to be < 0 to avoid conflict with char. 39 */ 40 public class Label implements Comparable, Cloneable { 41 public static final int INVALID = -7; 42 43 public static final int ACTION = -6; 44 45 public static final int EPSILON = -5; 46 47 public static final String EPSILON_STR = "<EPSILON>"; 48 49 /** label is a semantic predicate; implies label is epsilon also */ 50 public static final int SEMPRED = -4; 51 52 /** label is a set of tokens or char */ 53 public static final int SET = -3; 54 55 /** End of Token is like EOF for lexer rules. It implies that no more 56 * characters are available and that NFA conversion should terminate 57 * for this path. For example 58 * 59 * A : 'a' 'b' | 'a' ; 60 * 61 * yields a DFA predictor: 62 * 63 * o-a->o-b->1 predict alt 1 64 * | 65 * |-EOT->o predict alt 2 66 * 67 * To generate code for EOT, treat it as the "default" path, which 68 * implies there is no way to mismatch a char for the state from 69 * which the EOT emanates. 70 */ 71 public static final int EOT = -2; 72 73 public static final int EOF = -1; 74 75 /** We have labels like EPSILON that are below 0; it's hard to 76 * store them in an array with negative index so use this 77 * constant as an index shift when accessing arrays based upon 78 * token type. If real token type is i, then array index would be 79 * NUM_FAUX_LABELS + i. 80 */ 81 public static final int NUM_FAUX_LABELS = -INVALID; 82 83 /** Anything at this value or larger can be considered a simple atom int 84 * for easy comparison during analysis only; faux labels are not used 85 * during parse time for real token types or char values. 86 */ 87 public static final int MIN_ATOM_VALUE = EOT; 88 89 // TODO: is 0 a valid unicode char? max is FFFF -1, right? 90 public static final int MIN_CHAR_VALUE = '\u0000'; 91 public static final int MAX_CHAR_VALUE = '\uFFFF'; 92 93 /** End of rule token type; imaginary token type used only for 94 * local, partial FOLLOW sets to indicate that the local FOLLOW 95 * hit the end of rule. During error recovery, the local FOLLOW 96 * of a token reference may go beyond the end of the rule and have 97 * to use FOLLOW(rule). I have to just shift the token types to 2..n 98 * rather than 1..n to accommodate this imaginary token in my bitsets. 99 * If I didn't use a bitset implementation for runtime sets, I wouldn't 100 * need this. EOF is another candidate for a run time token type for 101 * parsers. Follow sets are not computed for lexers so we do not have 102 * this issue. 103 */ 104 public static final int EOR_TOKEN_TYPE = 105 org.antlr.runtime.Token.EOR_TOKEN_TYPE; 106 107 public static final int DOWN = org.antlr.runtime.Token.DOWN; 108 public static final int UP = org.antlr.runtime.Token.UP; 109 110 /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */ 111 public static final int MIN_TOKEN_TYPE = 112 org.antlr.runtime.Token.MIN_TOKEN_TYPE; 113 114 /** The wildcard '.' char atom implies all valid characters==UNICODE */ 115 //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE); 116 117 /** The token type or character value; or, signifies special label. */ 118 protected int label; 119 120 /** A set of token types or character codes if label==SET */ 121 // TODO: try IntervalSet for everything 122 protected IntSet labelSet; 123 Label(int label)124 public Label(int label) { 125 this.label = label; 126 } 127 128 /** Make a set label */ Label(IntSet labelSet)129 public Label(IntSet labelSet) { 130 if ( labelSet==null ) { 131 this.label = SET; 132 this.labelSet = IntervalSet.of(INVALID); 133 return; 134 } 135 int singleAtom = labelSet.getSingleElement(); 136 if ( singleAtom!=INVALID ) { 137 // convert back to a single atomic element if |labelSet|==1 138 label = singleAtom; 139 return; 140 } 141 this.label = SET; 142 this.labelSet = labelSet; 143 } 144 clone()145 public Object clone() { 146 Label l; 147 try { 148 l = (Label)super.clone(); 149 l.label = this.label; 150 l.labelSet = new IntervalSet(); 151 l.labelSet.addAll(this.labelSet); 152 } 153 catch (CloneNotSupportedException e) { 154 throw new InternalError(); 155 } 156 return l; 157 } 158 add(Label a)159 public void add(Label a) { 160 if ( isAtom() ) { 161 labelSet = IntervalSet.of(label); 162 label=SET; 163 if ( a.isAtom() ) { 164 labelSet.add(a.getAtom()); 165 } 166 else if ( a.isSet() ) { 167 labelSet.addAll(a.getSet()); 168 } 169 else { 170 throw new IllegalStateException("can't add element to Label of type "+label); 171 } 172 return; 173 } 174 if ( isSet() ) { 175 if ( a.isAtom() ) { 176 labelSet.add(a.getAtom()); 177 } 178 else if ( a.isSet() ) { 179 labelSet.addAll(a.getSet()); 180 } 181 else { 182 throw new IllegalStateException("can't add element to Label of type "+label); 183 } 184 return; 185 } 186 throw new IllegalStateException("can't add element to Label of type "+label); 187 } 188 isAtom()189 public boolean isAtom() { 190 return label>=MIN_ATOM_VALUE; 191 } 192 isEpsilon()193 public boolean isEpsilon() { 194 return label==EPSILON; 195 } 196 isSemanticPredicate()197 public boolean isSemanticPredicate() { 198 return false; 199 } 200 isAction()201 public boolean isAction() { 202 return false; 203 } 204 isSet()205 public boolean isSet() { 206 return label==SET; 207 } 208 209 /** return the single atom label or INVALID if not a single atom */ getAtom()210 public int getAtom() { 211 if ( isAtom() ) { 212 return label; 213 } 214 return INVALID; 215 } 216 getSet()217 public IntSet getSet() { 218 if ( label!=SET ) { 219 // convert single element to a set if they ask for it. 220 return IntervalSet.of(label); 221 } 222 return labelSet; 223 } 224 setSet(IntSet set)225 public void setSet(IntSet set) { 226 label=SET; 227 labelSet = set; 228 } 229 getSemanticContext()230 public SemanticContext getSemanticContext() { 231 return null; 232 } 233 matches(int atom)234 public boolean matches(int atom) { 235 if ( label==atom ) { 236 return true; // handle the single atom case efficiently 237 } 238 if ( isSet() ) { 239 return labelSet.member(atom); 240 } 241 return false; 242 } 243 matches(IntSet set)244 public boolean matches(IntSet set) { 245 if ( isAtom() ) { 246 return set.member(getAtom()); 247 } 248 if ( isSet() ) { 249 // matches if intersection non-nil 250 return !getSet().and(set).isNil(); 251 } 252 return false; 253 } 254 255 matches(Label other)256 public boolean matches(Label other) { 257 if ( other.isSet() ) { 258 return matches(other.getSet()); 259 } 260 if ( other.isAtom() ) { 261 return matches(other.getAtom()); 262 } 263 return false; 264 } 265 hashCode()266 public int hashCode() { 267 if (label==SET) { 268 return labelSet.hashCode(); 269 } 270 else { 271 return label; 272 } 273 } 274 275 // TODO: do we care about comparing set {A} with atom A? Doesn't now. equals(Object o)276 public boolean equals(Object o) { 277 if ( o==null ) { 278 return false; 279 } 280 if ( this == o ) { 281 return true; // equals if same object 282 } 283 // labels must be the same even if epsilon or set or sempred etc... 284 if ( label!=((Label)o).label ) { 285 return false; 286 } 287 if ( label==SET ) { 288 return this.labelSet.equals(((Label)o).labelSet); 289 } 290 return true; // label values are same, so true 291 } 292 compareTo(Object o)293 public int compareTo(Object o) { 294 return this.label-((Label)o).label; 295 } 296 297 /** Predicates are lists of AST nodes from the NFA created from the 298 * grammar, but the same predicate could be cut/paste into multiple 299 * places in the grammar. I must compare the text of all the 300 * predicates to truly answer whether {p1,p2} .equals {p1,p2}. 301 * Unfortunately, I cannot rely on the AST.equals() to work properly 302 * so I must do a brute force O(n^2) nested traversal of the Set 303 * doing a String compare. 304 * 305 * At this point, Labels are not compared for equals when they are 306 * predicates, but here's the code for future use. 307 */ 308 /* 309 protected boolean predicatesEquals(Set others) { 310 Iterator iter = semanticContext.iterator(); 311 while (iter.hasNext()) { 312 AST predAST = (AST) iter.next(); 313 Iterator inner = semanticContext.iterator(); 314 while (inner.hasNext()) { 315 AST otherPredAST = (AST) inner.next(); 316 if ( !predAST.getText().equals(otherPredAST.getText()) ) { 317 return false; 318 } 319 } 320 } 321 return true; 322 } 323 */ 324 toString()325 public String toString() { 326 switch (label) { 327 case SET : 328 return labelSet.toString(); 329 default : 330 return String.valueOf(label); 331 } 332 } 333 toString(Grammar g)334 public String toString(Grammar g) { 335 switch (label) { 336 case SET : 337 return labelSet.toString(g); 338 default : 339 return g.getTokenDisplayName(label); 340 } 341 } 342 343 /* 344 public String predicatesToString() { 345 if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) { 346 return "!other preds"; 347 } 348 StringBuffer buf = new StringBuffer(); 349 Iterator iter = semanticContext.iterator(); 350 while (iter.hasNext()) { 351 AST predAST = (AST) iter.next(); 352 buf.append(predAST.getText()); 353 if ( iter.hasNext() ) { 354 buf.append("&"); 355 } 356 } 357 return buf.toString(); 358 } 359 */ 360 intersect(Label label, Label edgeLabel)361 public static boolean intersect(Label label, Label edgeLabel) { 362 boolean hasIntersection = false; 363 boolean labelIsSet = label.isSet(); 364 boolean edgeIsSet = edgeLabel.isSet(); 365 if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) { 366 hasIntersection = true; 367 } 368 else if ( labelIsSet && edgeIsSet && 369 !edgeLabel.getSet().and(label.getSet()).isNil() ) { 370 hasIntersection = true; 371 } 372 else if ( labelIsSet && !edgeIsSet && 373 label.getSet().member(edgeLabel.label) ) { 374 hasIntersection = true; 375 } 376 else if ( !labelIsSet && edgeIsSet && 377 edgeLabel.getSet().member(label.label) ) { 378 hasIntersection = true; 379 } 380 return hasIntersection; 381 } 382 } 383