1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.misc; 29 30 import org.antlr.analysis.Label; 31 import org.antlr.tool.Grammar; 32 33 import java.util.ArrayList; 34 import java.util.Iterator; 35 import java.util.List; 36 import java.util.ListIterator; 37 38 /** A set of integers that relies on ranges being common to do 39 * "run-length-encoded" like compression (if you view an IntSet like 40 * a BitSet with runs of 0s and 1s). Only ranges are recorded so that 41 * a few ints up near value 1000 don't cause massive bitsets, just two 42 * integer intervals. 43 * 44 * element values may be negative. Useful for sets of EPSILON and EOF. 45 * 46 * 0..9 char range is index pair ['\u0030','\u0039']. 47 * Multiple ranges are encoded with multiple index pairs. Isolated 48 * elements are encoded with an index pair where both intervals are the same. 49 * 50 * The ranges are ordered and disjoint so that 2..6 appears before 101..103. 51 */ 52 public class IntervalSet implements IntSet { 53 public static final IntervalSet COMPLETE_SET = IntervalSet.of(0,Label.MAX_CHAR_VALUE); 54 55 /** The list of sorted, disjoint intervals. */ 56 protected List<Interval> intervals; 57 58 /** Create a set with no elements */ IntervalSet()59 public IntervalSet() { 60 intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements 61 } 62 IntervalSet(List<Interval> intervals)63 public IntervalSet(List<Interval> intervals) { 64 this.intervals = intervals; 65 } 66 67 /** Create a set with a single element, el. */ of(int a)68 public static IntervalSet of(int a) { 69 IntervalSet s = new IntervalSet(); 70 s.add(a); 71 return s; 72 } 73 74 /** Create a set with all ints within range [a..b] (inclusive) */ of(int a, int b)75 public static IntervalSet of(int a, int b) { 76 IntervalSet s = new IntervalSet(); 77 s.add(a,b); 78 return s; 79 } 80 81 /** Add a single element to the set. An isolated element is stored 82 * as a range el..el. 83 */ add(int el)84 public void add(int el) { 85 add(el,el); 86 } 87 88 /** Add interval; i.e., add all integers from a to b to set. 89 * If b<a, do nothing. 90 * Keep list in sorted order (by left range value). 91 * If overlap, combine ranges. For example, 92 * If this is {1..5, 10..20}, adding 6..7 yields 93 * {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. 94 */ add(int a, int b)95 public void add(int a, int b) { 96 add(Interval.create(a,b)); 97 } 98 99 // copy on write so we can cache a..a intervals and sets of that add(Interval addition)100 protected void add(Interval addition) { 101 //System.out.println("add "+addition+" to "+intervals.toString()); 102 if ( addition.b<addition.a ) { 103 return; 104 } 105 // find position in list 106 // Use iterators as we modify list in place 107 for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { 108 Interval r = (Interval) iter.next(); 109 if ( addition.equals(r) ) { 110 return; 111 } 112 if ( addition.adjacent(r) || !addition.disjoint(r) ) { 113 // next to each other, make a single larger interval 114 Interval bigger = addition.union(r); 115 iter.set(bigger); 116 // make sure we didn't just create an interval that 117 // should be merged with next interval in list 118 if ( iter.hasNext() ) { 119 Interval next = (Interval) iter.next(); 120 if ( bigger.adjacent(next)||!bigger.disjoint(next) ) { 121 // if we bump up against or overlap next, merge 122 iter.remove(); // remove this one 123 iter.previous(); // move backwards to what we just set 124 iter.set(bigger.union(next)); // set to 3 merged ones 125 } 126 } 127 return; 128 } 129 if ( addition.startsBeforeDisjoint(r) ) { 130 // insert before r 131 iter.previous(); 132 iter.add(addition); 133 return; 134 } 135 // if disjoint and after r, a future iteration will handle it 136 } 137 // ok, must be after last interval (and disjoint from last interval) 138 // just add it 139 intervals.add(addition); 140 } 141 142 /* 143 protected void add(Interval addition) { 144 //System.out.println("add "+addition+" to "+intervals.toString()); 145 if ( addition.b<addition.a ) { 146 return; 147 } 148 // find position in list 149 //for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { 150 int n = intervals.size(); 151 for (int i=0; i<n; i++) { 152 Interval r = (Interval)intervals.get(i); 153 if ( addition.equals(r) ) { 154 return; 155 } 156 if ( addition.adjacent(r) || !addition.disjoint(r) ) { 157 // next to each other, make a single larger interval 158 Interval bigger = addition.union(r); 159 intervals.set(i, bigger); 160 // make sure we didn't just create an interval that 161 // should be merged with next interval in list 162 if ( (i+1)<n ) { 163 i++; 164 Interval next = (Interval)intervals.get(i); 165 if ( bigger.adjacent(next)||!bigger.disjoint(next) ) { 166 // if we bump up against or overlap next, merge 167 intervals.remove(i); // remove next one 168 i--; 169 intervals.set(i, bigger.union(next)); // set to 3 merged ones 170 } 171 } 172 return; 173 } 174 if ( addition.startsBeforeDisjoint(r) ) { 175 // insert before r 176 intervals.add(i, addition); 177 return; 178 } 179 // if disjoint and after r, a future iteration will handle it 180 } 181 // ok, must be after last interval (and disjoint from last interval) 182 // just add it 183 intervals.add(addition); 184 } 185 */ 186 addAll(IntSet set)187 public void addAll(IntSet set) { 188 if ( set==null ) { 189 return; 190 } 191 if ( !(set instanceof IntervalSet) ) { 192 throw new IllegalArgumentException("can't add non IntSet ("+ 193 set.getClass().getName()+ 194 ") to IntervalSet"); 195 } 196 IntervalSet other = (IntervalSet)set; 197 // walk set and add each interval 198 int n = other.intervals.size(); 199 for (int i = 0; i < n; i++) { 200 Interval I = (Interval) other.intervals.get(i); 201 this.add(I.a,I.b); 202 } 203 } 204 complement(int minElement, int maxElement)205 public IntSet complement(int minElement, int maxElement) { 206 return this.complement(IntervalSet.of(minElement,maxElement)); 207 } 208 209 /** Given the set of possible values (rather than, say UNICODE or MAXINT), 210 * return a new set containing all elements in vocabulary, but not in 211 * this. The computation is (vocabulary - this). 212 * 213 * 'this' is assumed to be either a subset or equal to vocabulary. 214 */ complement(IntSet vocabulary)215 public IntSet complement(IntSet vocabulary) { 216 if ( vocabulary==null ) { 217 return null; // nothing in common with null set 218 } 219 if ( !(vocabulary instanceof IntervalSet ) ) { 220 throw new IllegalArgumentException("can't complement with non IntervalSet ("+ 221 vocabulary.getClass().getName()+")"); 222 } 223 IntervalSet vocabularyIS = ((IntervalSet)vocabulary); 224 int maxElement = vocabularyIS.getMaxElement(); 225 226 IntervalSet compl = new IntervalSet(); 227 int n = intervals.size(); 228 if ( n ==0 ) { 229 return compl; 230 } 231 Interval first = (Interval)intervals.get(0); 232 // add a range from 0 to first.a constrained to vocab 233 if ( first.a > 0 ) { 234 IntervalSet s = IntervalSet.of(0, first.a-1); 235 IntervalSet a = (IntervalSet)s.and(vocabularyIS); 236 compl.addAll(a); 237 } 238 for (int i=1; i<n; i++) { // from 2nd interval .. nth 239 Interval previous = (Interval)intervals.get(i-1); 240 Interval current = (Interval)intervals.get(i); 241 IntervalSet s = IntervalSet.of(previous.b+1, current.a-1); 242 IntervalSet a = (IntervalSet)s.and(vocabularyIS); 243 compl.addAll(a); 244 } 245 Interval last = (Interval)intervals.get(n -1); 246 // add a range from last.b to maxElement constrained to vocab 247 if ( last.b < maxElement ) { 248 IntervalSet s = IntervalSet.of(last.b+1, maxElement); 249 IntervalSet a = (IntervalSet)s.and(vocabularyIS); 250 compl.addAll(a); 251 } 252 return compl; 253 } 254 255 /** Compute this-other via this&~other. 256 * Return a new set containing all elements in this but not in other. 257 * other is assumed to be a subset of this; 258 * anything that is in other but not in this will be ignored. 259 */ subtract(IntSet other)260 public IntSet subtract(IntSet other) { 261 // assume the whole unicode range here for the complement 262 // because it doesn't matter. Anything beyond the max of this' set 263 // will be ignored since we are doing this & ~other. The intersection 264 // will be empty. The only problem would be when this' set max value 265 // goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE 266 // will prevent this. 267 return this.and(((IntervalSet)other).complement(COMPLETE_SET)); 268 } 269 270 /** return a new set containing all elements in this but not in other. 271 * Intervals may have to be broken up when ranges in this overlap 272 * with ranges in other. other is assumed to be a subset of this; 273 * anything that is in other but not in this will be ignored. 274 * 275 * Keep around, but 10-20-2005, I decided to make complement work w/o 276 * subtract and so then subtract can simply be a&~b 277 * 278 public IntSet subtract(IntSet other) { 279 if ( other==null || !(other instanceof IntervalSet) ) { 280 return null; // nothing in common with null set 281 } 282 283 IntervalSet diff = new IntervalSet(); 284 285 // iterate down both interval lists 286 ListIterator thisIter = this.intervals.listIterator(); 287 ListIterator otherIter = ((IntervalSet)other).intervals.listIterator(); 288 Interval mine=null; 289 Interval theirs=null; 290 if ( thisIter.hasNext() ) { 291 mine = (Interval)thisIter.next(); 292 } 293 if ( otherIter.hasNext() ) { 294 theirs = (Interval)otherIter.next(); 295 } 296 while ( mine!=null ) { 297 //System.out.println("mine="+mine+", theirs="+theirs); 298 // CASE 1: nothing in theirs removes a chunk from mine 299 if ( theirs==null || mine.disjoint(theirs) ) { 300 // SUBCASE 1a: finished traversing theirs; keep adding mine now 301 if ( theirs==null ) { 302 // add everything in mine to difference since theirs done 303 diff.add(mine); 304 mine = null; 305 if ( thisIter.hasNext() ) { 306 mine = (Interval)thisIter.next(); 307 } 308 } 309 else { 310 // SUBCASE 1b: mine is completely to the left of theirs 311 // so we can add to difference; move mine, but not theirs 312 if ( mine.startsBeforeDisjoint(theirs) ) { 313 diff.add(mine); 314 mine = null; 315 if ( thisIter.hasNext() ) { 316 mine = (Interval)thisIter.next(); 317 } 318 } 319 // SUBCASE 1c: theirs is completely to the left of mine 320 else { 321 // keep looking in theirs 322 theirs = null; 323 if ( otherIter.hasNext() ) { 324 theirs = (Interval)otherIter.next(); 325 } 326 } 327 } 328 } 329 else { 330 // CASE 2: theirs breaks mine into two chunks 331 if ( mine.properlyContains(theirs) ) { 332 // must add two intervals: stuff to left and stuff to right 333 diff.add(mine.a, theirs.a-1); 334 // don't actually add stuff to right yet as next 'theirs' 335 // might overlap with it 336 // The stuff to the right might overlap with next "theirs". 337 // so it is considered next 338 Interval right = new Interval(theirs.b+1, mine.b); 339 mine = right; 340 // move theirs forward 341 theirs = null; 342 if ( otherIter.hasNext() ) { 343 theirs = (Interval)otherIter.next(); 344 } 345 } 346 347 // CASE 3: theirs covers mine; nothing to add to diff 348 else if ( theirs.properlyContains(mine) ) { 349 // nothing to add, theirs forces removal totally of mine 350 // just move mine looking for an overlapping interval 351 mine = null; 352 if ( thisIter.hasNext() ) { 353 mine = (Interval)thisIter.next(); 354 } 355 } 356 357 // CASE 4: non proper overlap 358 else { 359 // overlap, but not properly contained 360 diff.add(mine.differenceNotProperlyContained(theirs)); 361 // update iterators 362 boolean moveTheirs = true; 363 if ( mine.startsBeforeNonDisjoint(theirs) || 364 theirs.b > mine.b ) 365 { 366 // uh oh, right of theirs extends past right of mine 367 // therefore could overlap with next of mine so don't 368 // move theirs iterator yet 369 moveTheirs = false; 370 } 371 // always move mine 372 mine = null; 373 if ( thisIter.hasNext() ) { 374 mine = (Interval)thisIter.next(); 375 } 376 if ( moveTheirs ) { 377 theirs = null; 378 if ( otherIter.hasNext() ) { 379 theirs = (Interval)otherIter.next(); 380 } 381 } 382 } 383 } 384 } 385 return diff; 386 } 387 */ 388 389 /** TODO: implement this! */ or(IntSet a)390 public IntSet or(IntSet a) { 391 IntervalSet o = new IntervalSet(); 392 o.addAll(this); 393 o.addAll(a); 394 //throw new NoSuchMethodError(); 395 return o; 396 } 397 398 /** Return a new set with the intersection of this set with other. Because 399 * the intervals are sorted, we can use an iterator for each list and 400 * just walk them together. This is roughly O(min(n,m)) for interval 401 * list lengths n and m. 402 */ and(IntSet other)403 public IntSet and(IntSet other) { 404 if ( other==null ) { //|| !(other instanceof IntervalSet) ) { 405 return null; // nothing in common with null set 406 } 407 408 ArrayList myIntervals = (ArrayList)this.intervals; 409 ArrayList theirIntervals = (ArrayList)((IntervalSet)other).intervals; 410 IntervalSet intersection = null; 411 int mySize = myIntervals.size(); 412 int theirSize = theirIntervals.size(); 413 int i = 0; 414 int j = 0; 415 // iterate down both interval lists looking for nondisjoint intervals 416 while ( i<mySize && j<theirSize ) { 417 Interval mine = (Interval)myIntervals.get(i); 418 Interval theirs = (Interval)theirIntervals.get(j); 419 //System.out.println("mine="+mine+" and theirs="+theirs); 420 if ( mine.startsBeforeDisjoint(theirs) ) { 421 // move this iterator looking for interval that might overlap 422 i++; 423 } 424 else if ( theirs.startsBeforeDisjoint(mine) ) { 425 // move other iterator looking for interval that might overlap 426 j++; 427 } 428 else if ( mine.properlyContains(theirs) ) { 429 // overlap, add intersection, get next theirs 430 if ( intersection==null ) { 431 intersection = new IntervalSet(); 432 } 433 intersection.add(mine.intersection(theirs)); 434 j++; 435 } 436 else if ( theirs.properlyContains(mine) ) { 437 // overlap, add intersection, get next mine 438 if ( intersection==null ) { 439 intersection = new IntervalSet(); 440 } 441 intersection.add(mine.intersection(theirs)); 442 i++; 443 } 444 else if ( !mine.disjoint(theirs) ) { 445 // overlap, add intersection 446 if ( intersection==null ) { 447 intersection = new IntervalSet(); 448 } 449 intersection.add(mine.intersection(theirs)); 450 // Move the iterator of lower range [a..b], but not 451 // the upper range as it may contain elements that will collide 452 // with the next iterator. So, if mine=[0..115] and 453 // theirs=[115..200], then intersection is 115 and move mine 454 // but not theirs as theirs may collide with the next range 455 // in thisIter. 456 // move both iterators to next ranges 457 if ( mine.startsAfterNonDisjoint(theirs) ) { 458 j++; 459 } 460 else if ( theirs.startsAfterNonDisjoint(mine) ) { 461 i++; 462 } 463 } 464 } 465 if ( intersection==null ) { 466 return new IntervalSet(); 467 } 468 return intersection; 469 } 470 471 /** Is el in any range of this set? */ member(int el)472 public boolean member(int el) { 473 int n = intervals.size(); 474 for (int i = 0; i < n; i++) { 475 Interval I = (Interval) intervals.get(i); 476 int a = I.a; 477 int b = I.b; 478 if ( el<a ) { 479 break; // list is sorted and el is before this interval; not here 480 } 481 if ( el>=a && el<=b ) { 482 return true; // found in this interval 483 } 484 } 485 return false; 486 /* 487 for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { 488 Interval I = (Interval) iter.next(); 489 if ( el<I.a ) { 490 break; // list is sorted and el is before this interval; not here 491 } 492 if ( el>=I.a && el<=I.b ) { 493 return true; // found in this interval 494 } 495 } 496 return false; 497 */ 498 } 499 500 /** return true if this set has no members */ isNil()501 public boolean isNil() { 502 return intervals==null || intervals.size()==0; 503 } 504 505 /** If this set is a single integer, return it otherwise Label.INVALID */ getSingleElement()506 public int getSingleElement() { 507 if ( intervals!=null && intervals.size()==1 ) { 508 Interval I = (Interval)intervals.get(0); 509 if ( I.a == I.b ) { 510 return I.a; 511 } 512 } 513 return Label.INVALID; 514 } 515 getMaxElement()516 public int getMaxElement() { 517 if ( isNil() ) { 518 return Label.INVALID; 519 } 520 Interval last = (Interval)intervals.get(intervals.size()-1); 521 return last.b; 522 } 523 524 /** Return minimum element >= 0 */ getMinElement()525 public int getMinElement() { 526 if ( isNil() ) { 527 return Label.INVALID; 528 } 529 int n = intervals.size(); 530 for (int i = 0; i < n; i++) { 531 Interval I = (Interval) intervals.get(i); 532 int a = I.a; 533 int b = I.b; 534 for (int v=a; v<=b; v++) { 535 if ( v>=0 ) return v; 536 } 537 } 538 return Label.INVALID; 539 } 540 541 /** Return a list of Interval objects. */ getIntervals()542 public List<Interval> getIntervals() { 543 return intervals; 544 } 545 546 /** Are two IntervalSets equal? Because all intervals are sorted 547 * and disjoint, equals is a simple linear walk over both lists 548 * to make sure they are the same. Interval.equals() is used 549 * by the List.equals() method to check the ranges. 550 */ equals(Object obj)551 public boolean equals(Object obj) { 552 if ( obj==null || !(obj instanceof IntervalSet) ) { 553 return false; 554 } 555 IntervalSet other = (IntervalSet)obj; 556 return this.intervals.equals(other.intervals); 557 } 558 toString()559 public String toString() { 560 return toString(null); 561 } 562 toString(Grammar g)563 public String toString(Grammar g) { 564 StringBuffer buf = new StringBuffer(); 565 if ( this.intervals==null || this.intervals.size()==0 ) { 566 return "{}"; 567 } 568 if ( this.intervals.size()>1 ) { 569 buf.append("{"); 570 } 571 Iterator iter = this.intervals.iterator(); 572 while (iter.hasNext()) { 573 Interval I = (Interval) iter.next(); 574 int a = I.a; 575 int b = I.b; 576 if ( a==b ) { 577 if ( g!=null ) { 578 buf.append(g.getTokenDisplayName(a)); 579 } 580 else { 581 buf.append(a); 582 } 583 } 584 else { 585 if ( g!=null ) { 586 buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b)); 587 } 588 else { 589 buf.append(a+".."+b); 590 } 591 } 592 if ( iter.hasNext() ) { 593 buf.append(", "); 594 } 595 } 596 if ( this.intervals.size()>1 ) { 597 buf.append("}"); 598 } 599 return buf.toString(); 600 } 601 size()602 public int size() { 603 int n = 0; 604 int numIntervals = intervals.size(); 605 if ( numIntervals==1 ) { 606 Interval firstInterval = this.intervals.get(0); 607 return firstInterval.b-firstInterval.a+1; 608 } 609 for (int i = 0; i < numIntervals; i++) { 610 Interval I = (Interval) intervals.get(i); 611 n += (I.b-I.a+1); 612 } 613 return n; 614 } 615 toList()616 public List toList() { 617 List values = new ArrayList(); 618 int n = intervals.size(); 619 for (int i = 0; i < n; i++) { 620 Interval I = (Interval) intervals.get(i); 621 int a = I.a; 622 int b = I.b; 623 for (int v=a; v<=b; v++) { 624 values.add(Utils.integer(v)); 625 } 626 } 627 return values; 628 } 629 630 /** Get the ith element of ordered set. Used only by RandomPhrase so 631 * don't bother to implement if you're not doing that for a new 632 * ANTLR code gen target. 633 */ get(int i)634 public int get(int i) { 635 int n = intervals.size(); 636 int index = 0; 637 for (int j = 0; j < n; j++) { 638 Interval I = (Interval) intervals.get(j); 639 int a = I.a; 640 int b = I.b; 641 for (int v=a; v<=b; v++) { 642 if ( index==i ) { 643 return v; 644 } 645 index++; 646 } 647 } 648 return -1; 649 } 650 toArray()651 public int[] toArray() { 652 int[] values = new int[size()]; 653 int n = intervals.size(); 654 int j = 0; 655 for (int i = 0; i < n; i++) { 656 Interval I = (Interval) intervals.get(i); 657 int a = I.a; 658 int b = I.b; 659 for (int v=a; v<=b; v++) { 660 values[j] = v; 661 j++; 662 } 663 } 664 return values; 665 } 666 toRuntimeBitSet()667 public org.antlr.runtime.BitSet toRuntimeBitSet() { 668 org.antlr.runtime.BitSet s = 669 new org.antlr.runtime.BitSet(getMaxElement()+1); 670 int n = intervals.size(); 671 for (int i = 0; i < n; i++) { 672 Interval I = (Interval) intervals.get(i); 673 int a = I.a; 674 int b = I.b; 675 for (int v=a; v<=b; v++) { 676 s.add(v); 677 } 678 } 679 return s; 680 } 681 remove(int el)682 public void remove(int el) { 683 throw new NoSuchMethodError("IntervalSet.remove() unimplemented"); 684 } 685 686 /* 687 protected void finalize() throws Throwable { 688 super.finalize(); 689 System.out.println("size "+intervals.size()+" "+size()); 690 } 691 */ 692 } 693