• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22  """A RangeSet represents a set of nonoverlapping ranges on the
23  integers (ie, a set of integers, but efficient when the set contains
24  lots of runs."""
25
26  def __init__(self, data=None):
27    self.monotonic = False
28    if isinstance(data, str):
29      self._parse_internal(data)
30    elif data:
31      assert len(data) % 2 == 0
32      self.data = tuple(self._remove_pairs(data))
33      self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
34    else:
35      self.data = ()
36
37  def __iter__(self):
38    for i in range(0, len(self.data), 2):
39      yield self.data[i:i+2]
40
41  def __eq__(self, other):
42    return self.data == other.data
43
44  def __ne__(self, other):
45    return self.data != other.data
46
47  def __nonzero__(self):
48    return bool(self.data)
49
50  def __str__(self):
51    if not self.data:
52      return "empty"
53    else:
54      return self.to_string()
55
56  def __repr__(self):
57    return '<RangeSet("' + self.to_string() + '")>'
58
59  @classmethod
60  def parse(cls, text):
61    """Parse a text string consisting of a space-separated list of
62    blocks and ranges, eg "10-20 30 35-40".  Ranges are interpreted to
63    include both their ends (so the above example represents 18
64    individual blocks.  Returns a RangeSet object.
65
66    If the input has all its blocks in increasing order, then returned
67    RangeSet will have an extra attribute 'monotonic' that is set to
68    True.  For example the input "10-20 30" is monotonic, but the input
69    "15-20 30 10-14" is not, even though they represent the same set
70    of blocks (and the two RangeSets will compare equal with ==).
71    """
72    return cls(text)
73
74  @classmethod
75  def parse_raw(cls, text):
76    """Parse a string generated by RangeSet.to_string_raw().
77
78    >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
79    <RangeSet("0-9")>
80    """
81
82    raw = [int(i) for i in text.split(',')]
83    assert raw[0] == len(raw[1:]), "Invalid raw string."
84
85    return cls(data=raw[1:])
86
87  def _parse_internal(self, text):
88    data = []
89    last = -1
90    monotonic = True
91    for p in text.split():
92      if "-" in p:
93        s, e = (int(x) for x in p.split("-"))
94        data.append(s)
95        data.append(e+1)
96        if last <= s <= e:
97          last = e
98        else:
99          monotonic = False
100      else:
101        s = int(p)
102        data.append(s)
103        data.append(s+1)
104        if last <= s:
105          last = s+1
106        else:
107          monotonic = False
108    data.sort()
109    self.data = tuple(self._remove_pairs(data))
110    self.monotonic = monotonic
111
112  @staticmethod
113  def _remove_pairs(source):
114    """Remove consecutive duplicate items to simplify the result.
115
116    [1, 2, 2, 5, 5, 10] will become [1, 10]."""
117    last = None
118    for i in source:
119      if i == last:
120        last = None
121      else:
122        if last is not None:
123          yield last
124        last = i
125    if last is not None:
126      yield last
127
128  def to_string(self):
129    out = []
130    for i in range(0, len(self.data), 2):
131      s, e = self.data[i:i+2]
132      if e == s+1:
133        out.append(str(s))
134      else:
135        out.append(str(s) + "-" + str(e-1))
136    return " ".join(out)
137
138  def to_string_raw(self):
139    assert self.data
140    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
141
142  def union(self, other):
143    """Return a new RangeSet representing the union of this RangeSet
144    with the argument.
145
146    >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
147    <RangeSet("10-34")>
148    >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
149    <RangeSet("10-19 22 30-34")>
150    """
151    out = []
152    z = 0
153    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
154                            zip(other.data, itertools.cycle((+1, -1)))):
155      if (z == 0 and d == 1) or (z == 1 and d == -1):
156        out.append(p)
157      z += d
158    return RangeSet(data=out)
159
160  def intersect(self, other):
161    """Return a new RangeSet representing the intersection of this
162    RangeSet with the argument.
163
164    >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
165    <RangeSet("18-19 30-32")>
166    >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
167    <RangeSet("")>
168    """
169    out = []
170    z = 0
171    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
172                            zip(other.data, itertools.cycle((+1, -1)))):
173      if (z == 1 and d == 1) or (z == 2 and d == -1):
174        out.append(p)
175      z += d
176    return RangeSet(data=out)
177
178  def subtract(self, other):
179    """Return a new RangeSet representing subtracting the argument
180    from this RangeSet.
181
182    >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
183    <RangeSet("10-17 33-34")>
184    >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
185    <RangeSet("10-19 30-34")>
186    """
187
188    out = []
189    z = 0
190    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
191                            zip(other.data, itertools.cycle((-1, +1)))):
192      if (z == 0 and d == 1) or (z == 1 and d == -1):
193        out.append(p)
194      z += d
195    return RangeSet(data=out)
196
197  def overlaps(self, other):
198    """Returns true if the argument has a nonempty overlap with this
199    RangeSet.
200
201    >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
202    True
203    >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
204    False
205    """
206
207    # This is like intersect, but we can stop as soon as we discover the
208    # output is going to be nonempty.
209    z = 0
210    for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
211                            zip(other.data, itertools.cycle((+1, -1)))):
212      if (z == 1 and d == 1) or (z == 2 and d == -1):
213        return True
214      z += d
215    return False
216
217  def size(self):
218    """Returns the total size of the RangeSet (ie, how many integers
219    are in the set).
220
221    >>> RangeSet("10-19 30-34").size()
222    15
223    """
224
225    total = 0
226    for i, p in enumerate(self.data):
227      if i % 2:
228        total += p
229      else:
230        total -= p
231    return total
232
233  def map_within(self, other):
234    """'other' should be a subset of 'self'.  Returns a RangeSet
235    representing what 'other' would get translated to if the integers
236    of 'self' were translated down to be contiguous starting at zero.
237
238    >>> RangeSet("0-9").map_within(RangeSet("3-4"))
239    <RangeSet("3-4")>
240    >>> RangeSet("10-19").map_within(RangeSet("13-14"))
241    <RangeSet("3-4")>
242    >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
243    <RangeSet("7-12")>
244    >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
245    <RangeSet("2-3 7-12")>
246    """
247
248    out = []
249    offset = 0
250    start = None
251    for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
252                            zip(other.data, itertools.cycle((-1, +1)))):
253      if d == -5:
254        start = p
255      elif d == +5:
256        offset += p-start
257        start = None
258      else:
259        out.append(offset + p - start)
260    return RangeSet(data=out)
261
262  def extend(self, n):
263    """Extend the RangeSet by 'n' blocks.
264
265    The lower bound is guaranteed to be non-negative.
266
267    >>> RangeSet("0-9").extend(1)
268    <RangeSet("0-10")>
269    >>> RangeSet("10-19").extend(15)
270    <RangeSet("0-34")>
271    >>> RangeSet("10-19 30-39").extend(4)
272    <RangeSet("6-23 26-43")>
273    >>> RangeSet("10-19 30-39").extend(10)
274    <RangeSet("0-49")>
275    """
276    out = self
277    for i in range(0, len(self.data), 2):
278      s, e = self.data[i:i+2]
279      s1 = max(0, s - n)
280      e1 = e + n
281      out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
282    return out
283
284  def first(self, n):
285    """Return the RangeSet that contains at most the first 'n' integers.
286
287    >>> RangeSet("0-9").first(1)
288    <RangeSet("0")>
289    >>> RangeSet("10-19").first(5)
290    <RangeSet("10-14")>
291    >>> RangeSet("10-19").first(15)
292    <RangeSet("10-19")>
293    >>> RangeSet("10-19 30-39").first(3)
294    <RangeSet("10-12")>
295    >>> RangeSet("10-19 30-39").first(15)
296    <RangeSet("10-19 30-34")>
297    >>> RangeSet("10-19 30-39").first(30)
298    <RangeSet("10-19 30-39")>
299    >>> RangeSet("0-9").first(0)
300    <RangeSet("")>
301    """
302
303    if self.size() <= n:
304      return self
305
306    out = []
307    for s, e in self:
308      if e - s >= n:
309        out += (s, s+n)
310        break
311      else:
312        out += (s, e)
313        n -= e - s
314    return RangeSet(data=out)
315
316  def next_item(self):
317    """Return the next integer represented by the RangeSet.
318
319    >>> list(RangeSet("0-9").next_item())
320    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
321    >>> list(RangeSet("10-19 3-5").next_item())
322    [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
323    >>> list(rangelib.RangeSet("10-19 3 5 7").next_item())
324    [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
325    """
326    for s, e in self:
327      for element in range(s, e):
328        yield element
329
330
331if __name__ == "__main__":
332  import doctest
333  doctest.testmod()
334