1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.misc; 29 30 import org.antlr.analysis.Label; 31 import org.antlr.tool.Grammar; 32 33 import java.util.ArrayList; 34 import java.util.Iterator; 35 import java.util.List; 36 import java.util.ListIterator; 37 38 /** A set of integers that relies on ranges being common to do 39 * "run-length-encoded" like compression (if you view an IntSet like 40 * a BitSet with runs of 0s and 1s). Only ranges are recorded so that 41 * a few ints up near value 1000 don't cause massive bitsets, just two 42 * integer intervals. 43 * 44 * element values may be negative. Useful for sets of EPSILON and EOF. 45 * 46 * 0..9 char range is index pair ['\u0030','\u0039']. 47 * Multiple ranges are encoded with multiple index pairs. Isolated 48 * elements are encoded with an index pair where both intervals are the same. 49 * 50 * The ranges are ordered and disjoint so that 2..6 appears before 101..103. 51 */ 52 public class IntervalSet implements IntSet { 53 public static final IntervalSet COMPLETE_SET = IntervalSet.of(0,Label.MAX_CHAR_VALUE); 54 55 /** The list of sorted, disjoint intervals. */ 56 protected List<Interval> intervals; 57 58 /** Create a set with no elements */ IntervalSet()59 public IntervalSet() { 60 intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements 61 } 62 IntervalSet(List<Interval> intervals)63 public IntervalSet(List<Interval> intervals) { 64 this.intervals = intervals; 65 } 66 67 /** Create a set with a single element, el. */ of(int a)68 public static IntervalSet of(int a) { 69 IntervalSet s = new IntervalSet(); 70 s.add(a); 71 return s; 72 } 73 74 /** Create a set with all ints within range [a..b] (inclusive) */ of(int a, int b)75 public static IntervalSet of(int a, int b) { 76 IntervalSet s = new IntervalSet(); 77 s.add(a,b); 78 return s; 79 } 80 81 /** Add a single element to the set. An isolated element is stored 82 * as a range el..el. 83 */ 84 @Override add(int el)85 public void add(int el) { 86 add(el,el); 87 } 88 89 /** Add interval; i.e., add all integers from a to b to set. 90 * If b<a, do nothing. 91 * Keep list in sorted order (by left range value). 92 * If overlap, combine ranges. For example, 93 * If this is {1..5, 10..20}, adding 6..7 yields 94 * {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. 95 */ add(int a, int b)96 public void add(int a, int b) { 97 add(Interval.create(a,b)); 98 } 99 100 // copy on write so we can cache a..a intervals and sets of that add(Interval addition)101 protected void add(Interval addition) { 102 //System.out.println("add "+addition+" to "+intervals.toString()); 103 if ( addition.b<addition.a ) { 104 return; 105 } 106 // find position in list 107 // Use iterators as we modify list in place 108 for (ListIterator<Interval> iter = intervals.listIterator(); iter.hasNext();) { 109 Interval r = iter.next(); 110 if ( addition.equals(r) ) { 111 return; 112 } 113 if ( addition.adjacent(r) || !addition.disjoint(r) ) { 114 // next to each other, make a single larger interval 115 Interval bigger = addition.union(r); 116 iter.set(bigger); 117 // make sure we didn't just create an interval that 118 // should be merged with next interval in list 119 while ( iter.hasNext() ) { 120 Interval next = iter.next(); 121 if ( !bigger.adjacent(next) && bigger.disjoint(next) ) { 122 break; 123 } 124 125 // if we bump up against or overlap next, merge 126 iter.remove(); // remove this one 127 iter.previous(); // move backwards to what we just set 128 iter.set(bigger.union(next)); // set to 3 merged ones 129 iter.next(); // first call to next after previous duplicates the result 130 } 131 return; 132 } 133 if ( addition.startsBeforeDisjoint(r) ) { 134 // insert before r 135 iter.previous(); 136 iter.add(addition); 137 return; 138 } 139 // if disjoint and after r, a future iteration will handle it 140 } 141 // ok, must be after last interval (and disjoint from last interval) 142 // just add it 143 intervals.add(addition); 144 } 145 146 /* 147 protected void add(Interval addition) { 148 //System.out.println("add "+addition+" to "+intervals.toString()); 149 if ( addition.b<addition.a ) { 150 return; 151 } 152 // find position in list 153 //for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { 154 int n = intervals.size(); 155 for (int i=0; i<n; i++) { 156 Interval r = (Interval)intervals.get(i); 157 if ( addition.equals(r) ) { 158 return; 159 } 160 if ( addition.adjacent(r) || !addition.disjoint(r) ) { 161 // next to each other, make a single larger interval 162 Interval bigger = addition.union(r); 163 intervals.set(i, bigger); 164 // make sure we didn't just create an interval that 165 // should be merged with next interval in list 166 if ( (i+1)<n ) { 167 i++; 168 Interval next = (Interval)intervals.get(i); 169 if ( bigger.adjacent(next)||!bigger.disjoint(next) ) { 170 // if we bump up against or overlap next, merge 171 intervals.remove(i); // remove next one 172 i--; 173 intervals.set(i, bigger.union(next)); // set to 3 merged ones 174 } 175 } 176 return; 177 } 178 if ( addition.startsBeforeDisjoint(r) ) { 179 // insert before r 180 intervals.add(i, addition); 181 return; 182 } 183 // if disjoint and after r, a future iteration will handle it 184 } 185 // ok, must be after last interval (and disjoint from last interval) 186 // just add it 187 intervals.add(addition); 188 } 189 */ 190 191 @Override addAll(IntSet set)192 public void addAll(IntSet set) { 193 if ( set==null ) { 194 return; 195 } 196 if ( !(set instanceof IntervalSet) ) { 197 throw new IllegalArgumentException("can't add non IntSet ("+ 198 set.getClass().getName()+ 199 ") to IntervalSet"); 200 } 201 IntervalSet other = (IntervalSet)set; 202 // walk set and add each interval 203 int n = other.intervals.size(); 204 for (int i = 0; i < n; i++) { 205 Interval I = other.intervals.get(i); 206 this.add(I.a,I.b); 207 } 208 } 209 complement(int minElement, int maxElement)210 public IntervalSet complement(int minElement, int maxElement) { 211 return this.complement(IntervalSet.of(minElement,maxElement)); 212 } 213 214 /** Given the set of possible values (rather than, say UNICODE or MAXINT), 215 * return a new set containing all elements in vocabulary, but not in 216 * this. The computation is (vocabulary - this). 217 * 218 * 'this' is assumed to be either a subset or equal to vocabulary. 219 */ 220 @Override complement(IntSet vocabulary)221 public IntervalSet complement(IntSet vocabulary) { 222 if ( vocabulary==null ) { 223 return null; // nothing in common with null set 224 } 225 if ( !(vocabulary instanceof IntervalSet ) ) { 226 throw new IllegalArgumentException("can't complement with non IntervalSet ("+ 227 vocabulary.getClass().getName()+")"); 228 } 229 IntervalSet vocabularyIS = ((IntervalSet)vocabulary); 230 int maxElement = vocabularyIS.getMaxElement(); 231 232 IntervalSet compl = new IntervalSet(); 233 int n = intervals.size(); 234 if ( n ==0 ) { 235 return compl; 236 } 237 Interval first = intervals.get(0); 238 // add a range from 0 to first.a constrained to vocab 239 if ( first.a > 0 ) { 240 IntervalSet s = IntervalSet.of(0, first.a-1); 241 IntervalSet a = s.and(vocabularyIS); 242 compl.addAll(a); 243 } 244 for (int i=1; i<n; i++) { // from 2nd interval .. nth 245 Interval previous = intervals.get(i-1); 246 Interval current = intervals.get(i); 247 IntervalSet s = IntervalSet.of(previous.b+1, current.a-1); 248 IntervalSet a = s.and(vocabularyIS); 249 compl.addAll(a); 250 } 251 Interval last = intervals.get(n -1); 252 // add a range from last.b to maxElement constrained to vocab 253 if ( last.b < maxElement ) { 254 IntervalSet s = IntervalSet.of(last.b+1, maxElement); 255 IntervalSet a = s.and(vocabularyIS); 256 compl.addAll(a); 257 } 258 return compl; 259 } 260 261 /** Compute this-other via this&~other. 262 * Return a new set containing all elements in this but not in other. 263 * other is assumed to be a subset of this; 264 * anything that is in other but not in this will be ignored. 265 */ 266 @Override subtract(IntSet other)267 public IntervalSet subtract(IntSet other) { 268 // assume the whole unicode range here for the complement 269 // because it doesn't matter. Anything beyond the max of this' set 270 // will be ignored since we are doing this & ~other. The intersection 271 // will be empty. The only problem would be when this' set max value 272 // goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE 273 // will prevent this. 274 return this.and(((IntervalSet)other).complement(COMPLETE_SET)); 275 } 276 277 /** return a new set containing all elements in this but not in other. 278 * Intervals may have to be broken up when ranges in this overlap 279 * with ranges in other. other is assumed to be a subset of this; 280 * anything that is in other but not in this will be ignored. 281 * 282 * Keep around, but 10-20-2005, I decided to make complement work w/o 283 * subtract and so then subtract can simply be a&~b 284 * 285 public IntSet subtract(IntSet other) { 286 if ( other==null || !(other instanceof IntervalSet) ) { 287 return null; // nothing in common with null set 288 } 289 290 IntervalSet diff = new IntervalSet(); 291 292 // iterate down both interval lists 293 ListIterator thisIter = this.intervals.listIterator(); 294 ListIterator otherIter = ((IntervalSet)other).intervals.listIterator(); 295 Interval mine=null; 296 Interval theirs=null; 297 if ( thisIter.hasNext() ) { 298 mine = (Interval)thisIter.next(); 299 } 300 if ( otherIter.hasNext() ) { 301 theirs = (Interval)otherIter.next(); 302 } 303 while ( mine!=null ) { 304 //System.out.println("mine="+mine+", theirs="+theirs); 305 // CASE 1: nothing in theirs removes a chunk from mine 306 if ( theirs==null || mine.disjoint(theirs) ) { 307 // SUBCASE 1a: finished traversing theirs; keep adding mine now 308 if ( theirs==null ) { 309 // add everything in mine to difference since theirs done 310 diff.add(mine); 311 mine = null; 312 if ( thisIter.hasNext() ) { 313 mine = (Interval)thisIter.next(); 314 } 315 } 316 else { 317 // SUBCASE 1b: mine is completely to the left of theirs 318 // so we can add to difference; move mine, but not theirs 319 if ( mine.startsBeforeDisjoint(theirs) ) { 320 diff.add(mine); 321 mine = null; 322 if ( thisIter.hasNext() ) { 323 mine = (Interval)thisIter.next(); 324 } 325 } 326 // SUBCASE 1c: theirs is completely to the left of mine 327 else { 328 // keep looking in theirs 329 theirs = null; 330 if ( otherIter.hasNext() ) { 331 theirs = (Interval)otherIter.next(); 332 } 333 } 334 } 335 } 336 else { 337 // CASE 2: theirs breaks mine into two chunks 338 if ( mine.properlyContains(theirs) ) { 339 // must add two intervals: stuff to left and stuff to right 340 diff.add(mine.a, theirs.a-1); 341 // don't actually add stuff to right yet as next 'theirs' 342 // might overlap with it 343 // The stuff to the right might overlap with next "theirs". 344 // so it is considered next 345 Interval right = new Interval(theirs.b+1, mine.b); 346 mine = right; 347 // move theirs forward 348 theirs = null; 349 if ( otherIter.hasNext() ) { 350 theirs = (Interval)otherIter.next(); 351 } 352 } 353 354 // CASE 3: theirs covers mine; nothing to add to diff 355 else if ( theirs.properlyContains(mine) ) { 356 // nothing to add, theirs forces removal totally of mine 357 // just move mine looking for an overlapping interval 358 mine = null; 359 if ( thisIter.hasNext() ) { 360 mine = (Interval)thisIter.next(); 361 } 362 } 363 364 // CASE 4: non proper overlap 365 else { 366 // overlap, but not properly contained 367 diff.add(mine.differenceNotProperlyContained(theirs)); 368 // update iterators 369 boolean moveTheirs = true; 370 if ( mine.startsBeforeNonDisjoint(theirs) || 371 theirs.b > mine.b ) 372 { 373 // uh oh, right of theirs extends past right of mine 374 // therefore could overlap with next of mine so don't 375 // move theirs iterator yet 376 moveTheirs = false; 377 } 378 // always move mine 379 mine = null; 380 if ( thisIter.hasNext() ) { 381 mine = (Interval)thisIter.next(); 382 } 383 if ( moveTheirs ) { 384 theirs = null; 385 if ( otherIter.hasNext() ) { 386 theirs = (Interval)otherIter.next(); 387 } 388 } 389 } 390 } 391 } 392 return diff; 393 } 394 */ 395 396 /** TODO: implement this! */ 397 @Override or(IntSet a)398 public IntSet or(IntSet a) { 399 IntervalSet o = new IntervalSet(); 400 o.addAll(this); 401 o.addAll(a); 402 //throw new NoSuchMethodError(); 403 return o; 404 } 405 406 /** Return a new set with the intersection of this set with other. Because 407 * the intervals are sorted, we can use an iterator for each list and 408 * just walk them together. This is roughly O(min(n,m)) for interval 409 * list lengths n and m. 410 */ 411 @Override and(IntSet other)412 public IntervalSet and(IntSet other) { 413 if ( other==null ) { //|| !(other instanceof IntervalSet) ) { 414 return null; // nothing in common with null set 415 } 416 417 List<Interval> myIntervals = this.intervals; 418 List<Interval> theirIntervals = ((IntervalSet)other).intervals; 419 IntervalSet intersection = null; 420 int mySize = myIntervals.size(); 421 int theirSize = theirIntervals.size(); 422 int i = 0; 423 int j = 0; 424 // iterate down both interval lists looking for nondisjoint intervals 425 while ( i<mySize && j<theirSize ) { 426 Interval mine = myIntervals.get(i); 427 Interval theirs = theirIntervals.get(j); 428 //System.out.println("mine="+mine+" and theirs="+theirs); 429 if ( mine.startsBeforeDisjoint(theirs) ) { 430 // move this iterator looking for interval that might overlap 431 i++; 432 } 433 else if ( theirs.startsBeforeDisjoint(mine) ) { 434 // move other iterator looking for interval that might overlap 435 j++; 436 } 437 else if ( mine.properlyContains(theirs) ) { 438 // overlap, add intersection, get next theirs 439 if ( intersection==null ) { 440 intersection = new IntervalSet(); 441 } 442 intersection.add(mine.intersection(theirs)); 443 j++; 444 } 445 else if ( theirs.properlyContains(mine) ) { 446 // overlap, add intersection, get next mine 447 if ( intersection==null ) { 448 intersection = new IntervalSet(); 449 } 450 intersection.add(mine.intersection(theirs)); 451 i++; 452 } 453 else if ( !mine.disjoint(theirs) ) { 454 // overlap, add intersection 455 if ( intersection==null ) { 456 intersection = new IntervalSet(); 457 } 458 intersection.add(mine.intersection(theirs)); 459 // Move the iterator of lower range [a..b], but not 460 // the upper range as it may contain elements that will collide 461 // with the next iterator. So, if mine=[0..115] and 462 // theirs=[115..200], then intersection is 115 and move mine 463 // but not theirs as theirs may collide with the next range 464 // in thisIter. 465 // move both iterators to next ranges 466 if ( mine.startsAfterNonDisjoint(theirs) ) { 467 j++; 468 } 469 else if ( theirs.startsAfterNonDisjoint(mine) ) { 470 i++; 471 } 472 } 473 } 474 if ( intersection==null ) { 475 return new IntervalSet(); 476 } 477 return intersection; 478 } 479 480 /** Is el in any range of this set? */ 481 @Override member(int el)482 public boolean member(int el) { 483 int n = intervals.size(); 484 for (int i = 0; i < n; i++) { 485 Interval I = intervals.get(i); 486 int a = I.a; 487 int b = I.b; 488 if ( el<a ) { 489 break; // list is sorted and el is before this interval; not here 490 } 491 if ( el>=a && el<=b ) { 492 return true; // found in this interval 493 } 494 } 495 return false; 496 /* 497 for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { 498 Interval I = (Interval) iter.next(); 499 if ( el<I.a ) { 500 break; // list is sorted and el is before this interval; not here 501 } 502 if ( el>=I.a && el<=I.b ) { 503 return true; // found in this interval 504 } 505 } 506 return false; 507 */ 508 } 509 510 /** return true if this set has no members */ 511 @Override isNil()512 public boolean isNil() { 513 return intervals==null || intervals.isEmpty(); 514 } 515 516 /** If this set is a single integer, return it otherwise Label.INVALID */ 517 @Override getSingleElement()518 public int getSingleElement() { 519 if ( intervals!=null && intervals.size()==1 ) { 520 Interval I = intervals.get(0); 521 if ( I.a == I.b ) { 522 return I.a; 523 } 524 } 525 return Label.INVALID; 526 } 527 getMaxElement()528 public int getMaxElement() { 529 if ( isNil() ) { 530 return Label.INVALID; 531 } 532 Interval last = intervals.get(intervals.size()-1); 533 return last.b; 534 } 535 536 /** Return minimum element >= 0 */ getMinElement()537 public int getMinElement() { 538 if ( isNil() ) { 539 return Label.INVALID; 540 } 541 int n = intervals.size(); 542 for (int i = 0; i < n; i++) { 543 Interval I = intervals.get(i); 544 int a = I.a; 545 int b = I.b; 546 for (int v=a; v<=b; v++) { 547 if ( v>=0 ) return v; 548 } 549 } 550 return Label.INVALID; 551 } 552 553 /** Return a list of Interval objects. */ getIntervals()554 public List<Interval> getIntervals() { 555 return intervals; 556 } 557 558 /** Are two IntervalSets equal? Because all intervals are sorted 559 * and disjoint, equals is a simple linear walk over both lists 560 * to make sure they are the same. Interval.equals() is used 561 * by the List.equals() method to check the ranges. 562 */ 563 @Override equals(Object obj)564 public boolean equals(Object obj) { 565 if ( !(obj instanceof IntervalSet) ) { 566 return false; 567 } 568 IntervalSet other = (IntervalSet)obj; 569 return this.intervals.equals(other.intervals); 570 } 571 572 @Override toString()573 public String toString() { 574 return toString(null); 575 } 576 577 @Override toString(Grammar g)578 public String toString(Grammar g) { 579 StringBuilder buf = new StringBuilder(); 580 if ( this.intervals==null || this.intervals.isEmpty() ) { 581 return "{}"; 582 } 583 if ( this.intervals.size()>1 ) { 584 buf.append("{"); 585 } 586 Iterator<Interval> iter = this.intervals.iterator(); 587 while (iter.hasNext()) { 588 Interval I = iter.next(); 589 int a = I.a; 590 int b = I.b; 591 if ( a==b ) { 592 if ( g!=null ) { 593 buf.append(g.getTokenDisplayName(a)); 594 } 595 else { 596 buf.append(a); 597 } 598 } 599 else { 600 if ( g!=null ) { 601 buf.append(g.getTokenDisplayName(a)).append("..").append(g.getTokenDisplayName(b)); 602 } 603 else { 604 buf.append(a).append("..").append(b); 605 } 606 } 607 if ( iter.hasNext() ) { 608 buf.append(", "); 609 } 610 } 611 if ( this.intervals.size()>1 ) { 612 buf.append("}"); 613 } 614 return buf.toString(); 615 } 616 617 @Override size()618 public int size() { 619 int n = 0; 620 int numIntervals = intervals.size(); 621 if ( numIntervals==1 ) { 622 Interval firstInterval = this.intervals.get(0); 623 return firstInterval.b-firstInterval.a+1; 624 } 625 for (int i = 0; i < numIntervals; i++) { 626 Interval I = intervals.get(i); 627 n += (I.b-I.a+1); 628 } 629 return n; 630 } 631 632 @Override toList()633 public List<Integer> toList() { 634 List<Integer> values = new ArrayList<Integer>(); 635 int n = intervals.size(); 636 for (int i = 0; i < n; i++) { 637 Interval I = intervals.get(i); 638 int a = I.a; 639 int b = I.b; 640 for (int v=a; v<=b; v++) { 641 values.add(Utils.integer(v)); 642 } 643 } 644 return values; 645 } 646 647 /** Get the ith element of ordered set. Used only by RandomPhrase so 648 * don't bother to implement if you're not doing that for a new 649 * ANTLR code gen target. 650 */ get(int i)651 public int get(int i) { 652 int n = intervals.size(); 653 int index = 0; 654 for (int j = 0; j < n; j++) { 655 Interval I = intervals.get(j); 656 int a = I.a; 657 int b = I.b; 658 for (int v=a; v<=b; v++) { 659 if ( index==i ) { 660 return v; 661 } 662 index++; 663 } 664 } 665 return -1; 666 } 667 toArray()668 public int[] toArray() { 669 int[] values = new int[size()]; 670 int n = intervals.size(); 671 int j = 0; 672 for (int i = 0; i < n; i++) { 673 Interval I = intervals.get(i); 674 int a = I.a; 675 int b = I.b; 676 for (int v=a; v<=b; v++) { 677 values[j] = v; 678 j++; 679 } 680 } 681 return values; 682 } 683 toRuntimeBitSet()684 public org.antlr.runtime.BitSet toRuntimeBitSet() { 685 org.antlr.runtime.BitSet s = 686 new org.antlr.runtime.BitSet(getMaxElement()+1); 687 int n = intervals.size(); 688 for (int i = 0; i < n; i++) { 689 Interval I = intervals.get(i); 690 int a = I.a; 691 int b = I.b; 692 for (int v=a; v<=b; v++) { 693 s.add(v); 694 } 695 } 696 return s; 697 } 698 699 @Override remove(int el)700 public void remove(int el) { 701 throw new NoSuchMethodError("IntervalSet.remove() unimplemented"); 702 } 703 704 /* 705 protected void finalize() throws Throwable { 706 super.finalize(); 707 System.out.println("size "+intervals.size()+" "+size()); 708 } 709 */ 710 } 711