1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.misc.IntSet; 31 import org.antlr.misc.IntervalSet; 32 import org.antlr.tool.Grammar; 33 34 /** A state machine transition label. A label can be either a simple 35 * label such as a token or character. A label can be a set of char or 36 * tokens. It can be an epsilon transition. It can be a semantic predicate 37 * (which assumes an epsilon transition) or a tree of predicates (in a DFA). 38 * Special label types have to be < 0 to avoid conflict with char. 39 */ 40 public class Label implements Comparable<Label>, Cloneable { 41 public static final int INVALID = -7; 42 43 public static final int ACTION = -6; 44 45 public static final int EPSILON = -5; 46 47 public static final String EPSILON_STR = "<EPSILON>"; 48 49 /** label is a semantic predicate; implies label is epsilon also */ 50 public static final int SEMPRED = -4; 51 52 /** label is a set of tokens or char */ 53 public static final int SET = -3; 54 55 /** End of Token is like EOF for lexer rules. It implies that no more 56 * characters are available and that NFA conversion should terminate 57 * for this path. For example 58 * 59 * A : 'a' 'b' | 'a' ; 60 * 61 * yields a DFA predictor: 62 * 63 * o-a->o-b->1 predict alt 1 64 * | 65 * |-EOT->o predict alt 2 66 * 67 * To generate code for EOT, treat it as the "default" path, which 68 * implies there is no way to mismatch a char for the state from 69 * which the EOT emanates. 70 */ 71 public static final int EOT = -2; 72 73 public static final int EOF = -1; 74 75 /** We have labels like EPSILON that are below 0; it's hard to 76 * store them in an array with negative index so use this 77 * constant as an index shift when accessing arrays based upon 78 * token type. If real token type is i, then array index would be 79 * NUM_FAUX_LABELS + i. 80 */ 81 public static final int NUM_FAUX_LABELS = -INVALID; 82 83 /** Anything at this value or larger can be considered a simple atom int 84 * for easy comparison during analysis only; faux labels are not used 85 * during parse time for real token types or char values. 86 */ 87 public static final int MIN_ATOM_VALUE = EOT; 88 89 // TODO: is 0 a valid unicode char? max is FFFF -1, right? 90 public static final int MIN_CHAR_VALUE = '\u0000'; 91 public static final int MAX_CHAR_VALUE = '\uFFFF'; 92 93 /** End of rule token type; imaginary token type used only for 94 * local, partial FOLLOW sets to indicate that the local FOLLOW 95 * hit the end of rule. During error recovery, the local FOLLOW 96 * of a token reference may go beyond the end of the rule and have 97 * to use FOLLOW(rule). I have to just shift the token types to 2..n 98 * rather than 1..n to accommodate this imaginary token in my bitsets. 99 * If I didn't use a bitset implementation for runtime sets, I wouldn't 100 * need this. EOF is another candidate for a run time token type for 101 * parsers. Follow sets are not computed for lexers so we do not have 102 * this issue. 103 */ 104 public static final int EOR_TOKEN_TYPE = 105 org.antlr.runtime.Token.EOR_TOKEN_TYPE; 106 107 public static final int DOWN = org.antlr.runtime.Token.DOWN; 108 public static final int UP = org.antlr.runtime.Token.UP; 109 110 /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */ 111 public static final int MIN_TOKEN_TYPE = 112 org.antlr.runtime.Token.MIN_TOKEN_TYPE; 113 114 /** The wildcard '.' char atom implies all valid characters==UNICODE */ 115 //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE); 116 117 /** The token type or character value; or, signifies special label. */ 118 protected int label; 119 120 /** A set of token types or character codes if label==SET */ 121 // TODO: try IntervalSet for everything 122 protected IntSet labelSet; 123 Label(int label)124 public Label(int label) { 125 this.label = label; 126 } 127 128 /** Make a set label */ Label(IntSet labelSet)129 public Label(IntSet labelSet) { 130 if ( labelSet==null ) { 131 this.label = SET; 132 this.labelSet = IntervalSet.of(INVALID); 133 return; 134 } 135 int singleAtom = labelSet.getSingleElement(); 136 if ( singleAtom!=INVALID ) { 137 // convert back to a single atomic element if |labelSet|==1 138 label = singleAtom; 139 return; 140 } 141 this.label = SET; 142 this.labelSet = labelSet; 143 } 144 145 @Override clone()146 public Object clone() { 147 Label l; 148 try { 149 l = (Label)super.clone(); 150 l.label = this.label; 151 l.labelSet = new IntervalSet(); 152 l.labelSet.addAll(this.labelSet); 153 } 154 catch (CloneNotSupportedException e) { 155 throw new InternalError(); 156 } 157 return l; 158 } 159 add(Label a)160 public void add(Label a) { 161 if ( isAtom() ) { 162 labelSet = IntervalSet.of(label); 163 label=SET; 164 if ( a.isAtom() ) { 165 labelSet.add(a.getAtom()); 166 } 167 else if ( a.isSet() ) { 168 labelSet.addAll(a.getSet()); 169 } 170 else { 171 throw new IllegalStateException("can't add element to Label of type "+label); 172 } 173 return; 174 } 175 if ( isSet() ) { 176 if ( a.isAtom() ) { 177 labelSet.add(a.getAtom()); 178 } 179 else if ( a.isSet() ) { 180 labelSet.addAll(a.getSet()); 181 } 182 else { 183 throw new IllegalStateException("can't add element to Label of type "+label); 184 } 185 return; 186 } 187 throw new IllegalStateException("can't add element to Label of type "+label); 188 } 189 isAtom()190 public boolean isAtom() { 191 return label>=MIN_ATOM_VALUE; 192 } 193 isEpsilon()194 public boolean isEpsilon() { 195 return label==EPSILON; 196 } 197 isSemanticPredicate()198 public boolean isSemanticPredicate() { 199 return false; 200 } 201 isAction()202 public boolean isAction() { 203 return false; 204 } 205 isSet()206 public boolean isSet() { 207 return label==SET; 208 } 209 210 /** return the single atom label or INVALID if not a single atom */ getAtom()211 public int getAtom() { 212 if ( isAtom() ) { 213 return label; 214 } 215 return INVALID; 216 } 217 getSet()218 public IntSet getSet() { 219 if ( label!=SET ) { 220 // convert single element to a set if they ask for it. 221 return IntervalSet.of(label); 222 } 223 return labelSet; 224 } 225 setSet(IntSet set)226 public void setSet(IntSet set) { 227 label=SET; 228 labelSet = set; 229 } 230 getSemanticContext()231 public SemanticContext getSemanticContext() { 232 return null; 233 } 234 matches(int atom)235 public boolean matches(int atom) { 236 if ( label==atom ) { 237 return true; // handle the single atom case efficiently 238 } 239 if ( isSet() ) { 240 return labelSet.member(atom); 241 } 242 return false; 243 } 244 matches(IntSet set)245 public boolean matches(IntSet set) { 246 if ( isAtom() ) { 247 return set.member(getAtom()); 248 } 249 if ( isSet() ) { 250 // matches if intersection non-nil 251 return !getSet().and(set).isNil(); 252 } 253 return false; 254 } 255 256 matches(Label other)257 public boolean matches(Label other) { 258 if ( other.isSet() ) { 259 return matches(other.getSet()); 260 } 261 if ( other.isAtom() ) { 262 return matches(other.getAtom()); 263 } 264 return false; 265 } 266 267 @Override hashCode()268 public int hashCode() { 269 if (label==SET) { 270 return labelSet.hashCode(); 271 } 272 else { 273 return label; 274 } 275 } 276 277 // TODO: do we care about comparing set {A} with atom A? Doesn't now. 278 @Override equals(Object o)279 public boolean equals(Object o) { 280 if ( o==null ) { 281 return false; 282 } 283 if ( this == o ) { 284 return true; // equals if same object 285 } 286 // labels must be the same even if epsilon or set or sempred etc... 287 if ( label!=((Label)o).label ) { 288 return false; 289 } 290 if ( label==SET ) { 291 return this.labelSet.equals(((Label)o).labelSet); 292 } 293 return true; // label values are same, so true 294 } 295 296 @Override compareTo(Label o)297 public int compareTo(Label o) { 298 return this.label-o.label; 299 } 300 301 /** Predicates are lists of AST nodes from the NFA created from the 302 * grammar, but the same predicate could be cut/paste into multiple 303 * places in the grammar. I must compare the text of all the 304 * predicates to truly answer whether {p1,p2} .equals {p1,p2}. 305 * Unfortunately, I cannot rely on the AST.equals() to work properly 306 * so I must do a brute force O(n^2) nested traversal of the Set 307 * doing a String compare. 308 * 309 * At this point, Labels are not compared for equals when they are 310 * predicates, but here's the code for future use. 311 */ 312 /* 313 protected boolean predicatesEquals(Set others) { 314 Iterator iter = semanticContext.iterator(); 315 while (iter.hasNext()) { 316 AST predAST = (AST) iter.next(); 317 Iterator inner = semanticContext.iterator(); 318 while (inner.hasNext()) { 319 AST otherPredAST = (AST) inner.next(); 320 if ( !predAST.getText().equals(otherPredAST.getText()) ) { 321 return false; 322 } 323 } 324 } 325 return true; 326 } 327 */ 328 329 @Override toString()330 public String toString() { 331 switch (label) { 332 case SET : 333 return labelSet.toString(); 334 default : 335 return String.valueOf(label); 336 } 337 } 338 toString(Grammar g)339 public String toString(Grammar g) { 340 switch (label) { 341 case SET : 342 return labelSet.toString(g); 343 default : 344 return g.getTokenDisplayName(label); 345 } 346 } 347 348 /* 349 public String predicatesToString() { 350 if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) { 351 return "!other preds"; 352 } 353 StringBuffer buf = new StringBuffer(); 354 Iterator iter = semanticContext.iterator(); 355 while (iter.hasNext()) { 356 AST predAST = (AST) iter.next(); 357 buf.append(predAST.getText()); 358 if ( iter.hasNext() ) { 359 buf.append("&"); 360 } 361 } 362 return buf.toString(); 363 } 364 */ 365 intersect(Label label, Label edgeLabel)366 public static boolean intersect(Label label, Label edgeLabel) { 367 boolean hasIntersection = false; 368 boolean labelIsSet = label.isSet(); 369 boolean edgeIsSet = edgeLabel.isSet(); 370 if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) { 371 hasIntersection = true; 372 } 373 else if ( labelIsSet && edgeIsSet && 374 !edgeLabel.getSet().and(label.getSet()).isNil() ) { 375 hasIntersection = true; 376 } 377 else if ( labelIsSet && !edgeIsSet && 378 label.getSet().member(edgeLabel.label) ) { 379 hasIntersection = true; 380 } 381 else if ( !labelIsSet && edgeIsSet && 382 edgeLabel.getSet().member(label.label) ) { 383 hasIntersection = true; 384 } 385 return hasIntersection; 386 } 387 } 388