1# Copyright (C) 2014 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import print_function 16import heapq 17import itertools 18 19__all__ = ["RangeSet"] 20 21class RangeSet(object): 22 """A RangeSet represents a set of nonoverlapping ranges on the 23 integers (ie, a set of integers, but efficient when the set contains 24 lots of runs.""" 25 26 def __init__(self, data=None): 27 if isinstance(data, str): 28 self._parse_internal(data) 29 elif data: 30 self.data = tuple(self._remove_pairs(data)) 31 else: 32 self.data = () 33 34 def __iter__(self): 35 for i in range(0, len(self.data), 2): 36 yield self.data[i:i+2] 37 38 def __eq__(self, other): 39 return self.data == other.data 40 def __ne__(self, other): 41 return self.data != other.data 42 def __nonzero__(self): 43 return bool(self.data) 44 45 def __str__(self): 46 if not self.data: 47 return "empty" 48 else: 49 return self.to_string() 50 51 def __repr__(self): 52 return '<RangeSet("' + self.to_string() + '")>' 53 54 @classmethod 55 def parse(cls, text): 56 """Parse a text string consisting of a space-separated list of 57 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to 58 include both their ends (so the above example represents 18 59 individual blocks. Returns a RangeSet object. 60 61 If the input has all its blocks in increasing order, then returned 62 RangeSet will have an extra attribute 'monotonic' that is set to 63 True. For example the input "10-20 30" is monotonic, but the input 64 "15-20 30 10-14" is not, even though they represent the same set 65 of blocks (and the two RangeSets will compare equal with ==). 66 """ 67 return cls(text) 68 69 def _parse_internal(self, text): 70 data = [] 71 last = -1 72 monotonic = True 73 for p in text.split(): 74 if "-" in p: 75 s, e = p.split("-") 76 data.append(int(s)) 77 data.append(int(e)+1) 78 if last <= s <= e: 79 last = e 80 else: 81 monotonic = False 82 else: 83 s = int(p) 84 data.append(s) 85 data.append(s+1) 86 if last <= s: 87 last = s+1 88 else: 89 monotonic = True 90 data.sort() 91 self.data = tuple(self._remove_pairs(data)) 92 self.monotonic = monotonic 93 94 @staticmethod 95 def _remove_pairs(source): 96 last = None 97 for i in source: 98 if i == last: 99 last = None 100 else: 101 if last is not None: 102 yield last 103 last = i 104 if last is not None: 105 yield last 106 107 def to_string(self): 108 out = [] 109 for i in range(0, len(self.data), 2): 110 s, e = self.data[i:i+2] 111 if e == s+1: 112 out.append(str(s)) 113 else: 114 out.append(str(s) + "-" + str(e-1)) 115 return " ".join(out) 116 117 def to_string_raw(self): 118 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 119 120 def union(self, other): 121 """Return a new RangeSet representing the union of this RangeSet 122 with the argument. 123 124 >>> RangeSet("10-19 30-34").union(RangeSet("18-29")) 125 <RangeSet("10-34")> 126 >>> RangeSet("10-19 30-34").union(RangeSet("22 32")) 127 <RangeSet("10-19 22 30-34")> 128 """ 129 out = [] 130 z = 0 131 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 132 zip(other.data, itertools.cycle((+1, -1)))): 133 if (z == 0 and d == 1) or (z == 1 and d == -1): 134 out.append(p) 135 z += d 136 return RangeSet(data=out) 137 138 def intersect(self, other): 139 """Return a new RangeSet representing the intersection of this 140 RangeSet with the argument. 141 142 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32")) 143 <RangeSet("18-19 30-32")> 144 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28")) 145 <RangeSet("")> 146 """ 147 out = [] 148 z = 0 149 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 150 zip(other.data, itertools.cycle((+1, -1)))): 151 if (z == 1 and d == 1) or (z == 2 and d == -1): 152 out.append(p) 153 z += d 154 return RangeSet(data=out) 155 156 def subtract(self, other): 157 """Return a new RangeSet representing subtracting the argument 158 from this RangeSet. 159 160 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32")) 161 <RangeSet("10-17 33-34")> 162 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28")) 163 <RangeSet("10-19 30-34")> 164 """ 165 166 out = [] 167 z = 0 168 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 169 zip(other.data, itertools.cycle((-1, +1)))): 170 if (z == 0 and d == 1) or (z == 1 and d == -1): 171 out.append(p) 172 z += d 173 return RangeSet(data=out) 174 175 def overlaps(self, other): 176 """Returns true if the argument has a nonempty overlap with this 177 RangeSet. 178 179 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32")) 180 True 181 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28")) 182 False 183 """ 184 185 # This is like intersect, but we can stop as soon as we discover the 186 # output is going to be nonempty. 187 z = 0 188 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 189 zip(other.data, itertools.cycle((+1, -1)))): 190 if (z == 1 and d == 1) or (z == 2 and d == -1): 191 return True 192 z += d 193 return False 194 195 def size(self): 196 """Returns the total size of the RangeSet (ie, how many integers 197 are in the set). 198 199 >>> RangeSet("10-19 30-34").size() 200 15 201 """ 202 203 total = 0 204 for i, p in enumerate(self.data): 205 if i % 2: 206 total += p 207 else: 208 total -= p 209 return total 210 211 def map_within(self, other): 212 """'other' should be a subset of 'self'. Returns a RangeSet 213 representing what 'other' would get translated to if the integers 214 of 'self' were translated down to be contiguous starting at zero. 215 216 >>> RangeSet("0-9").map_within(RangeSet("3-4")) 217 <RangeSet("3-4")> 218 >>> RangeSet("10-19").map_within(RangeSet("13-14")) 219 <RangeSet("3-4")> 220 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32")) 221 <RangeSet("7-12")> 222 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32")) 223 <RangeSet("2-3 7-12")> 224 """ 225 226 out = [] 227 offset = 0 228 start = None 229 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))), 230 zip(other.data, itertools.cycle((-1, +1)))): 231 if d == -5: 232 start = p 233 elif d == +5: 234 offset += p-start 235 start = None 236 else: 237 out.append(offset + p - start) 238 return RangeSet(data=out) 239 240 241if __name__ == "__main__": 242 import doctest 243 doctest.testmod() 244