1# Copyright (C) 2014 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import print_function 16 17import heapq 18import itertools 19 20 21__all__ = ["RangeSet"] 22 23 24class RangeSet(object): 25 """A RangeSet represents a set of non-overlapping ranges on integers. 26 27 Attributes: 28 monotonic: Whether the input has all its integers in increasing order. 29 extra: A dict that can be used by the caller, e.g. to store info that's 30 only meaningful to caller. 31 """ 32 33 def __init__(self, data=None): 34 self.monotonic = False 35 self._extra = {} 36 if isinstance(data, str): 37 self._parse_internal(data) 38 elif data: 39 assert len(data) % 2 == 0 40 self.data = tuple(self._remove_pairs(data)) 41 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:])) 42 else: 43 self.data = () 44 45 def __iter__(self): 46 for i in range(0, len(self.data), 2): 47 yield self.data[i:i+2] 48 49 def __eq__(self, other): 50 return self.data == other.data 51 52 def __ne__(self, other): 53 return self.data != other.data 54 55 def __nonzero__(self): 56 return bool(self.data) 57 58 def __str__(self): 59 if not self.data: 60 return "empty" 61 else: 62 return self.to_string() 63 64 def __repr__(self): 65 return '<RangeSet("' + self.to_string() + '")>' 66 67 @property 68 def extra(self): 69 return self._extra 70 71 @classmethod 72 def parse(cls, text): 73 """Parses a text string into a RangeSet. 74 75 The input text string consists of a space-separated list of blocks and 76 ranges, e.g. "10-20 30 35-40". Ranges are interpreted to include both their 77 ends (so the above example represents 18 individual blocks). Returns a 78 RangeSet object. 79 80 If the input has all its blocks in increasing order, then the 'monotonic' 81 attribute of the returned RangeSet will be set to True. For example the 82 input "10-20 30" is monotonic, but the input "15-20 30 10-14" is not, even 83 though they represent the same set of blocks (and the two RangeSets will 84 compare equal with ==). 85 """ 86 return cls(text) 87 88 @classmethod 89 def parse_raw(cls, text): 90 """Parse a string generated by RangeSet.to_string_raw(). 91 92 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw()) 93 <RangeSet("0-9")> 94 """ 95 96 raw = [int(i) for i in text.split(',')] 97 assert raw[0] == len(raw[1:]), "Invalid raw string." 98 99 return cls(data=raw[1:]) 100 101 def _parse_internal(self, text): 102 data = [] 103 last = -1 104 monotonic = True 105 for p in text.split(): 106 if "-" in p: 107 s, e = (int(x) for x in p.split("-")) 108 data.append(s) 109 data.append(e+1) 110 if last <= s <= e: 111 last = e 112 else: 113 monotonic = False 114 else: 115 s = int(p) 116 data.append(s) 117 data.append(s+1) 118 if last <= s: 119 last = s+1 120 else: 121 monotonic = False 122 data.sort() 123 self.data = tuple(self._remove_pairs(data)) 124 self.monotonic = monotonic 125 126 @staticmethod 127 def _remove_pairs(source): 128 """Remove consecutive duplicate items to simplify the result. 129 130 [1, 2, 2, 5, 5, 10] will become [1, 10].""" 131 last = None 132 for i in source: 133 if i == last: 134 last = None 135 else: 136 if last is not None: 137 yield last 138 last = i 139 if last is not None: 140 yield last 141 142 def to_string(self): 143 out = [] 144 for i in range(0, len(self.data), 2): 145 s, e = self.data[i:i+2] 146 if e == s+1: 147 out.append(str(s)) 148 else: 149 out.append(str(s) + "-" + str(e-1)) 150 return " ".join(out) 151 152 def to_string_raw(self): 153 assert self.data 154 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 155 156 def union(self, other): 157 """Return a new RangeSet representing the union of this RangeSet 158 with the argument. 159 160 >>> RangeSet("10-19 30-34").union(RangeSet("18-29")) 161 <RangeSet("10-34")> 162 >>> RangeSet("10-19 30-34").union(RangeSet("22 32")) 163 <RangeSet("10-19 22 30-34")> 164 """ 165 out = [] 166 z = 0 167 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 168 zip(other.data, itertools.cycle((+1, -1)))): 169 if (z == 0 and d == 1) or (z == 1 and d == -1): 170 out.append(p) 171 z += d 172 return RangeSet(data=out) 173 174 def intersect(self, other): 175 """Return a new RangeSet representing the intersection of this 176 RangeSet with the argument. 177 178 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32")) 179 <RangeSet("18-19 30-32")> 180 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28")) 181 <RangeSet("")> 182 """ 183 out = [] 184 z = 0 185 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 186 zip(other.data, itertools.cycle((+1, -1)))): 187 if (z == 1 and d == 1) or (z == 2 and d == -1): 188 out.append(p) 189 z += d 190 return RangeSet(data=out) 191 192 def subtract(self, other): 193 """Return a new RangeSet representing subtracting the argument 194 from this RangeSet. 195 196 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32")) 197 <RangeSet("10-17 33-34")> 198 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28")) 199 <RangeSet("10-19 30-34")> 200 """ 201 202 out = [] 203 z = 0 204 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 205 zip(other.data, itertools.cycle((-1, +1)))): 206 if (z == 0 and d == 1) or (z == 1 and d == -1): 207 out.append(p) 208 z += d 209 return RangeSet(data=out) 210 211 def overlaps(self, other): 212 """Returns true if the argument has a nonempty overlap with this 213 RangeSet. 214 215 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32")) 216 True 217 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28")) 218 False 219 """ 220 221 # This is like intersect, but we can stop as soon as we discover the 222 # output is going to be nonempty. 223 z = 0 224 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 225 zip(other.data, itertools.cycle((+1, -1)))): 226 if (z == 1 and d == 1) or (z == 2 and d == -1): 227 return True 228 z += d 229 return False 230 231 def size(self): 232 """Returns the total size of the RangeSet (ie, how many integers 233 are in the set). 234 235 >>> RangeSet("10-19 30-34").size() 236 15 237 """ 238 239 total = 0 240 for i, p in enumerate(self.data): 241 if i % 2: 242 total += p 243 else: 244 total -= p 245 return total 246 247 def map_within(self, other): 248 """'other' should be a subset of 'self'. Returns a RangeSet 249 representing what 'other' would get translated to if the integers 250 of 'self' were translated down to be contiguous starting at zero. 251 252 >>> RangeSet("0-9").map_within(RangeSet("3-4")) 253 <RangeSet("3-4")> 254 >>> RangeSet("10-19").map_within(RangeSet("13-14")) 255 <RangeSet("3-4")> 256 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32")) 257 <RangeSet("7-12")> 258 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32")) 259 <RangeSet("2-3 7-12")> 260 """ 261 262 out = [] 263 offset = 0 264 start = None 265 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))), 266 zip(other.data, itertools.cycle((-1, +1)))): 267 if d == -5: 268 start = p 269 elif d == +5: 270 offset += p-start 271 start = None 272 else: 273 out.append(offset + p - start) 274 return RangeSet(data=out) 275 276 def extend(self, n): 277 """Extend the RangeSet by 'n' blocks. 278 279 The lower bound is guaranteed to be non-negative. 280 281 >>> RangeSet("0-9").extend(1) 282 <RangeSet("0-10")> 283 >>> RangeSet("10-19").extend(15) 284 <RangeSet("0-34")> 285 >>> RangeSet("10-19 30-39").extend(4) 286 <RangeSet("6-23 26-43")> 287 >>> RangeSet("10-19 30-39").extend(10) 288 <RangeSet("0-49")> 289 """ 290 out = self 291 for i in range(0, len(self.data), 2): 292 s, e = self.data[i:i+2] 293 s1 = max(0, s - n) 294 e1 = e + n 295 out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) 296 return out 297 298 def first(self, n): 299 """Return the RangeSet that contains at most the first 'n' integers. 300 301 >>> RangeSet("0-9").first(1) 302 <RangeSet("0")> 303 >>> RangeSet("10-19").first(5) 304 <RangeSet("10-14")> 305 >>> RangeSet("10-19").first(15) 306 <RangeSet("10-19")> 307 >>> RangeSet("10-19 30-39").first(3) 308 <RangeSet("10-12")> 309 >>> RangeSet("10-19 30-39").first(15) 310 <RangeSet("10-19 30-34")> 311 >>> RangeSet("10-19 30-39").first(30) 312 <RangeSet("10-19 30-39")> 313 >>> RangeSet("0-9").first(0) 314 <RangeSet("")> 315 """ 316 317 if self.size() <= n: 318 return self 319 320 out = [] 321 for s, e in self: 322 if e - s >= n: 323 out += (s, s+n) 324 break 325 else: 326 out += (s, e) 327 n -= e - s 328 return RangeSet(data=out) 329 330 def next_item(self): 331 """Return the next integer represented by the RangeSet. 332 333 >>> list(RangeSet("0-9").next_item()) 334 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 335 >>> list(RangeSet("10-19 3-5").next_item()) 336 [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 337 >>> list(RangeSet("10-19 3 5 7").next_item()) 338 [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 339 """ 340 for s, e in self: 341 for element in range(s, e): 342 yield element 343 344 345if __name__ == "__main__": 346 import doctest 347 doctest.testmod() 348