1# Copyright (C) 2014 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import print_function 16import heapq 17import itertools 18 19__all__ = ["RangeSet"] 20 21class RangeSet(object): 22 """A RangeSet represents a set of nonoverlapping ranges on the 23 integers (ie, a set of integers, but efficient when the set contains 24 lots of runs.""" 25 26 def __init__(self, data=None): 27 # TODO(tbao): monotonic is broken when passing in a tuple. 28 self.monotonic = False 29 if isinstance(data, str): 30 self._parse_internal(data) 31 elif data: 32 self.data = tuple(self._remove_pairs(data)) 33 else: 34 self.data = () 35 36 def __iter__(self): 37 for i in range(0, len(self.data), 2): 38 yield self.data[i:i+2] 39 40 def __eq__(self, other): 41 return self.data == other.data 42 def __ne__(self, other): 43 return self.data != other.data 44 def __nonzero__(self): 45 return bool(self.data) 46 47 def __str__(self): 48 if not self.data: 49 return "empty" 50 else: 51 return self.to_string() 52 53 def __repr__(self): 54 return '<RangeSet("' + self.to_string() + '")>' 55 56 @classmethod 57 def parse(cls, text): 58 """Parse a text string consisting of a space-separated list of 59 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to 60 include both their ends (so the above example represents 18 61 individual blocks. Returns a RangeSet object. 62 63 If the input has all its blocks in increasing order, then returned 64 RangeSet will have an extra attribute 'monotonic' that is set to 65 True. For example the input "10-20 30" is monotonic, but the input 66 "15-20 30 10-14" is not, even though they represent the same set 67 of blocks (and the two RangeSets will compare equal with ==). 68 """ 69 return cls(text) 70 71 def _parse_internal(self, text): 72 data = [] 73 last = -1 74 monotonic = True 75 for p in text.split(): 76 if "-" in p: 77 s, e = p.split("-") 78 data.append(int(s)) 79 data.append(int(e)+1) 80 if last <= s <= e: 81 last = e 82 else: 83 monotonic = False 84 else: 85 s = int(p) 86 data.append(s) 87 data.append(s+1) 88 if last <= s: 89 last = s+1 90 else: 91 monotonic = True 92 data.sort() 93 self.data = tuple(self._remove_pairs(data)) 94 self.monotonic = monotonic 95 96 @staticmethod 97 def _remove_pairs(source): 98 last = None 99 for i in source: 100 if i == last: 101 last = None 102 else: 103 if last is not None: 104 yield last 105 last = i 106 if last is not None: 107 yield last 108 109 def to_string(self): 110 out = [] 111 for i in range(0, len(self.data), 2): 112 s, e = self.data[i:i+2] 113 if e == s+1: 114 out.append(str(s)) 115 else: 116 out.append(str(s) + "-" + str(e-1)) 117 return " ".join(out) 118 119 def to_string_raw(self): 120 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 121 122 def union(self, other): 123 """Return a new RangeSet representing the union of this RangeSet 124 with the argument. 125 126 >>> RangeSet("10-19 30-34").union(RangeSet("18-29")) 127 <RangeSet("10-34")> 128 >>> RangeSet("10-19 30-34").union(RangeSet("22 32")) 129 <RangeSet("10-19 22 30-34")> 130 """ 131 out = [] 132 z = 0 133 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 134 zip(other.data, itertools.cycle((+1, -1)))): 135 if (z == 0 and d == 1) or (z == 1 and d == -1): 136 out.append(p) 137 z += d 138 return RangeSet(data=out) 139 140 def intersect(self, other): 141 """Return a new RangeSet representing the intersection of this 142 RangeSet with the argument. 143 144 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32")) 145 <RangeSet("18-19 30-32")> 146 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28")) 147 <RangeSet("")> 148 """ 149 out = [] 150 z = 0 151 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 152 zip(other.data, itertools.cycle((+1, -1)))): 153 if (z == 1 and d == 1) or (z == 2 and d == -1): 154 out.append(p) 155 z += d 156 return RangeSet(data=out) 157 158 def subtract(self, other): 159 """Return a new RangeSet representing subtracting the argument 160 from this RangeSet. 161 162 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32")) 163 <RangeSet("10-17 33-34")> 164 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28")) 165 <RangeSet("10-19 30-34")> 166 """ 167 168 out = [] 169 z = 0 170 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 171 zip(other.data, itertools.cycle((-1, +1)))): 172 if (z == 0 and d == 1) or (z == 1 and d == -1): 173 out.append(p) 174 z += d 175 return RangeSet(data=out) 176 177 def overlaps(self, other): 178 """Returns true if the argument has a nonempty overlap with this 179 RangeSet. 180 181 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32")) 182 True 183 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28")) 184 False 185 """ 186 187 # This is like intersect, but we can stop as soon as we discover the 188 # output is going to be nonempty. 189 z = 0 190 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 191 zip(other.data, itertools.cycle((+1, -1)))): 192 if (z == 1 and d == 1) or (z == 2 and d == -1): 193 return True 194 z += d 195 return False 196 197 def size(self): 198 """Returns the total size of the RangeSet (ie, how many integers 199 are in the set). 200 201 >>> RangeSet("10-19 30-34").size() 202 15 203 """ 204 205 total = 0 206 for i, p in enumerate(self.data): 207 if i % 2: 208 total += p 209 else: 210 total -= p 211 return total 212 213 def map_within(self, other): 214 """'other' should be a subset of 'self'. Returns a RangeSet 215 representing what 'other' would get translated to if the integers 216 of 'self' were translated down to be contiguous starting at zero. 217 218 >>> RangeSet("0-9").map_within(RangeSet("3-4")) 219 <RangeSet("3-4")> 220 >>> RangeSet("10-19").map_within(RangeSet("13-14")) 221 <RangeSet("3-4")> 222 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32")) 223 <RangeSet("7-12")> 224 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32")) 225 <RangeSet("2-3 7-12")> 226 """ 227 228 out = [] 229 offset = 0 230 start = None 231 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))), 232 zip(other.data, itertools.cycle((-1, +1)))): 233 if d == -5: 234 start = p 235 elif d == +5: 236 offset += p-start 237 start = None 238 else: 239 out.append(offset + p - start) 240 return RangeSet(data=out) 241 242 def extend(self, n): 243 """Extend the RangeSet by 'n' blocks. 244 245 The lower bound is guaranteed to be non-negative. 246 247 >>> RangeSet("0-9").extend(1) 248 <RangeSet("0-10")> 249 >>> RangeSet("10-19").extend(15) 250 <RangeSet("0-34")> 251 >>> RangeSet("10-19 30-39").extend(4) 252 <RangeSet("6-23 26-43")> 253 >>> RangeSet("10-19 30-39").extend(10) 254 <RangeSet("0-49")> 255 """ 256 out = self 257 for i in range(0, len(self.data), 2): 258 s, e = self.data[i:i+2] 259 s1 = max(0, s - n) 260 e1 = e + n 261 out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) 262 return out 263 264 def first(self, n): 265 """Return the RangeSet that contains at most the first 'n' integers. 266 267 >>> RangeSet("0-9").first(1) 268 <RangeSet("0")> 269 >>> RangeSet("10-19").first(5) 270 <RangeSet("10-14")> 271 >>> RangeSet("10-19").first(15) 272 <RangeSet("10-19")> 273 >>> RangeSet("10-19 30-39").first(3) 274 <RangeSet("10-12")> 275 >>> RangeSet("10-19 30-39").first(15) 276 <RangeSet("10-19 30-34")> 277 >>> RangeSet("10-19 30-39").first(30) 278 <RangeSet("10-19 30-39")> 279 >>> RangeSet("0-9").first(0) 280 <RangeSet("")> 281 """ 282 283 if self.size() <= n: 284 return self 285 286 out = [] 287 for s, e in self: 288 if e - s >= n: 289 out += (s, s+n) 290 break 291 else: 292 out += (s, e) 293 n -= e - s 294 return RangeSet(data=out) 295 296 297if __name__ == "__main__": 298 import doctest 299 doctest.testmod() 300