• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Classes to represent arbitrary sets (including sets of sets).
2
3This module implements sets using dictionaries whose values are
4ignored.  The usual operations (union, intersection, deletion, etc.)
5are provided as both methods and operators.
6
7Important: sets are not sequences!  While they support 'x in s',
8'len(s)', and 'for x in s', none of those operations are unique for
9sequences; for example, mappings support all three as well.  The
10characteristic operation for sequences is subscripting with small
11integers: s[i], for i in range(len(s)).  Sets don't support
12subscripting at all.  Also, sequences allow multiple occurrences and
13their elements have a definite order; sets on the other hand don't
14record multiple occurrences and don't remember the order of element
15insertion (which is why they don't support s[i]).
16
17The following classes are provided:
18
19BaseSet -- All the operations common to both mutable and immutable
20    sets. This is an abstract class, not meant to be directly
21    instantiated.
22
23Set -- Mutable sets, subclass of BaseSet; not hashable.
24
25ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26    An iterable argument is mandatory to create an ImmutableSet.
27
28_TemporarilyImmutableSet -- A wrapper around a Set, hashable,
29    giving the same hash value as the immutable set equivalent
30    would have.  Do not use this class directly.
31
32Only hashable objects can be added to a Set. In particular, you cannot
33really add a Set as an element to another Set; if you try, what is
34actually added is an ImmutableSet built from it (it compares equal to
35the one you tried adding).
36
37When you ask if `x in y' where x is a Set and y is a Set or
38ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
39what's tested is actually `z in y'.
40
41"""
42
43# Code history:
44#
45# - Greg V. Wilson wrote the first version, using a different approach
46#   to the mutable/immutable problem, and inheriting from dict.
47#
48# - Alex Martelli modified Greg's version to implement the current
49#   Set/ImmutableSet approach, and make the data an attribute.
50#
51# - Guido van Rossum rewrote much of the code, made some API changes,
52#   and cleaned up the docstrings.
53#
54# - Raymond Hettinger added a number of speedups and other
55#   improvements.
56
57from itertools import ifilter, ifilterfalse
58
59__all__ = ['BaseSet', 'Set', 'ImmutableSet']
60
61import warnings
62warnings.warn("the sets module is deprecated", DeprecationWarning,
63                stacklevel=2)
64
65class BaseSet(object):
66    """Common base class for mutable and immutable sets."""
67
68    __slots__ = ['_data']
69
70    # Constructor
71
72    def __init__(self):
73        """This is an abstract class."""
74        # Don't call this from a concrete subclass!
75        if self.__class__ is BaseSet:
76            raise TypeError, ("BaseSet is an abstract class.  "
77                              "Use Set or ImmutableSet.")
78
79    # Standard protocols: __len__, __repr__, __str__, __iter__
80
81    def __len__(self):
82        """Return the number of elements of a set."""
83        return len(self._data)
84
85    def __repr__(self):
86        """Return string representation of a set.
87
88        This looks like 'Set([<list of elements>])'.
89        """
90        return self._repr()
91
92    # __str__ is the same as __repr__
93    __str__ = __repr__
94
95    def _repr(self, sorted=False):
96        elements = self._data.keys()
97        if sorted:
98            elements.sort()
99        return '%s(%r)' % (self.__class__.__name__, elements)
100
101    def __iter__(self):
102        """Return an iterator over the elements or a set.
103
104        This is the keys iterator for the underlying dict.
105        """
106        return self._data.iterkeys()
107
108    # Three-way comparison is not supported.  However, because __eq__ is
109    # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
110    # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
111    # case).
112
113    def __cmp__(self, other):
114        raise TypeError, "can't compare sets using cmp()"
115
116    # Equality comparisons using the underlying dicts.  Mixed-type comparisons
117    # are allowed here, where Set == z for non-Set z always returns False,
118    # and Set != z always True.  This allows expressions like "x in y" to
119    # give the expected result when y is a sequence of mixed types, not
120    # raising a pointless TypeError just because y contains a Set, or x is
121    # a Set and y contain's a non-set ("in" invokes only __eq__).
122    # Subtle:  it would be nicer if __eq__ and __ne__ could return
123    # NotImplemented instead of True or False.  Then the other comparand
124    # would get a chance to determine the result, and if the other comparand
125    # also returned NotImplemented then it would fall back to object address
126    # comparison (which would always return False for __eq__ and always
127    # True for __ne__).  However, that doesn't work, because this type
128    # *also* implements __cmp__:  if, e.g., __eq__ returns NotImplemented,
129    # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
130
131    def __eq__(self, other):
132        if isinstance(other, BaseSet):
133            return self._data == other._data
134        else:
135            return False
136
137    def __ne__(self, other):
138        if isinstance(other, BaseSet):
139            return self._data != other._data
140        else:
141            return True
142
143    # Copying operations
144
145    def copy(self):
146        """Return a shallow copy of a set."""
147        result = self.__class__()
148        result._data.update(self._data)
149        return result
150
151    __copy__ = copy # For the copy module
152
153    def __deepcopy__(self, memo):
154        """Return a deep copy of a set; used by copy module."""
155        # This pre-creates the result and inserts it in the memo
156        # early, in case the deep copy recurses into another reference
157        # to this same set.  A set can't be an element of itself, but
158        # it can certainly contain an object that has a reference to
159        # itself.
160        from copy import deepcopy
161        result = self.__class__()
162        memo[id(self)] = result
163        data = result._data
164        value = True
165        for elt in self:
166            data[deepcopy(elt, memo)] = value
167        return result
168
169    # Standard set operations: union, intersection, both differences.
170    # Each has an operator version (e.g. __or__, invoked with |) and a
171    # method version (e.g. union).
172    # Subtle:  Each pair requires distinct code so that the outcome is
173    # correct when the type of other isn't suitable.  For example, if
174    # we did "union = __or__" instead, then Set().union(3) would return
175    # NotImplemented instead of raising TypeError (albeit that *why* it
176    # raises TypeError as-is is also a bit subtle).
177
178    def __or__(self, other):
179        """Return the union of two sets as a new set.
180
181        (I.e. all elements that are in either set.)
182        """
183        if not isinstance(other, BaseSet):
184            return NotImplemented
185        return self.union(other)
186
187    def union(self, other):
188        """Return the union of two sets as a new set.
189
190        (I.e. all elements that are in either set.)
191        """
192        result = self.__class__(self)
193        result._update(other)
194        return result
195
196    def __and__(self, other):
197        """Return the intersection of two sets as a new set.
198
199        (I.e. all elements that are in both sets.)
200        """
201        if not isinstance(other, BaseSet):
202            return NotImplemented
203        return self.intersection(other)
204
205    def intersection(self, other):
206        """Return the intersection of two sets as a new set.
207
208        (I.e. all elements that are in both sets.)
209        """
210        if not isinstance(other, BaseSet):
211            other = Set(other)
212        if len(self) <= len(other):
213            little, big = self, other
214        else:
215            little, big = other, self
216        common = ifilter(big._data.__contains__, little)
217        return self.__class__(common)
218
219    def __xor__(self, other):
220        """Return the symmetric difference of two sets as a new set.
221
222        (I.e. all elements that are in exactly one of the sets.)
223        """
224        if not isinstance(other, BaseSet):
225            return NotImplemented
226        return self.symmetric_difference(other)
227
228    def symmetric_difference(self, other):
229        """Return the symmetric difference of two sets as a new set.
230
231        (I.e. all elements that are in exactly one of the sets.)
232        """
233        result = self.__class__()
234        data = result._data
235        value = True
236        selfdata = self._data
237        try:
238            otherdata = other._data
239        except AttributeError:
240            otherdata = Set(other)._data
241        for elt in ifilterfalse(otherdata.__contains__, selfdata):
242            data[elt] = value
243        for elt in ifilterfalse(selfdata.__contains__, otherdata):
244            data[elt] = value
245        return result
246
247    def  __sub__(self, other):
248        """Return the difference of two sets as a new Set.
249
250        (I.e. all elements that are in this set and not in the other.)
251        """
252        if not isinstance(other, BaseSet):
253            return NotImplemented
254        return self.difference(other)
255
256    def difference(self, other):
257        """Return the difference of two sets as a new Set.
258
259        (I.e. all elements that are in this set and not in the other.)
260        """
261        result = self.__class__()
262        data = result._data
263        try:
264            otherdata = other._data
265        except AttributeError:
266            otherdata = Set(other)._data
267        value = True
268        for elt in ifilterfalse(otherdata.__contains__, self):
269            data[elt] = value
270        return result
271
272    # Membership test
273
274    def __contains__(self, element):
275        """Report whether an element is a member of a set.
276
277        (Called in response to the expression `element in self'.)
278        """
279        try:
280            return element in self._data
281        except TypeError:
282            transform = getattr(element, "__as_temporarily_immutable__", None)
283            if transform is None:
284                raise # re-raise the TypeError exception we caught
285            return transform() in self._data
286
287    # Subset and superset test
288
289    def issubset(self, other):
290        """Report whether another set contains this set."""
291        self._binary_sanity_check(other)
292        if len(self) > len(other):  # Fast check for obvious cases
293            return False
294        for elt in ifilterfalse(other._data.__contains__, self):
295            return False
296        return True
297
298    def issuperset(self, other):
299        """Report whether this set contains another set."""
300        self._binary_sanity_check(other)
301        if len(self) < len(other):  # Fast check for obvious cases
302            return False
303        for elt in ifilterfalse(self._data.__contains__, other):
304            return False
305        return True
306
307    # Inequality comparisons using the is-subset relation.
308    __le__ = issubset
309    __ge__ = issuperset
310
311    def __lt__(self, other):
312        self._binary_sanity_check(other)
313        return len(self) < len(other) and self.issubset(other)
314
315    def __gt__(self, other):
316        self._binary_sanity_check(other)
317        return len(self) > len(other) and self.issuperset(other)
318
319    # We inherit object.__hash__, so we must deny this explicitly
320    __hash__ = None
321
322    # Assorted helpers
323
324    def _binary_sanity_check(self, other):
325        # Check that the other argument to a binary operation is also
326        # a set, raising a TypeError otherwise.
327        if not isinstance(other, BaseSet):
328            raise TypeError, "Binary operation only permitted between sets"
329
330    def _compute_hash(self):
331        # Calculate hash code for a set by xor'ing the hash codes of
332        # the elements.  This ensures that the hash code does not depend
333        # on the order in which elements are added to the set.  This is
334        # not called __hash__ because a BaseSet should not be hashable;
335        # only an ImmutableSet is hashable.
336        result = 0
337        for elt in self:
338            result ^= hash(elt)
339        return result
340
341    def _update(self, iterable):
342        # The main loop for update() and the subclass __init__() methods.
343        data = self._data
344
345        # Use the fast update() method when a dictionary is available.
346        if isinstance(iterable, BaseSet):
347            data.update(iterable._data)
348            return
349
350        value = True
351
352        if type(iterable) in (list, tuple, xrange):
353            # Optimized: we know that __iter__() and next() can't
354            # raise TypeError, so we can move 'try:' out of the loop.
355            it = iter(iterable)
356            while True:
357                try:
358                    for element in it:
359                        data[element] = value
360                    return
361                except TypeError:
362                    transform = getattr(element, "__as_immutable__", None)
363                    if transform is None:
364                        raise # re-raise the TypeError exception we caught
365                    data[transform()] = value
366        else:
367            # Safe: only catch TypeError where intended
368            for element in iterable:
369                try:
370                    data[element] = value
371                except TypeError:
372                    transform = getattr(element, "__as_immutable__", None)
373                    if transform is None:
374                        raise # re-raise the TypeError exception we caught
375                    data[transform()] = value
376
377
378class ImmutableSet(BaseSet):
379    """Immutable set class."""
380
381    __slots__ = ['_hashcode']
382
383    # BaseSet + hashing
384
385    def __init__(self, iterable=None):
386        """Construct an immutable set from an optional iterable."""
387        self._hashcode = None
388        self._data = {}
389        if iterable is not None:
390            self._update(iterable)
391
392    def __hash__(self):
393        if self._hashcode is None:
394            self._hashcode = self._compute_hash()
395        return self._hashcode
396
397    def __getstate__(self):
398        return self._data, self._hashcode
399
400    def __setstate__(self, state):
401        self._data, self._hashcode = state
402
403class Set(BaseSet):
404    """ Mutable set class."""
405
406    __slots__ = []
407
408    # BaseSet + operations requiring mutability; no hashing
409
410    def __init__(self, iterable=None):
411        """Construct a set from an optional iterable."""
412        self._data = {}
413        if iterable is not None:
414            self._update(iterable)
415
416    def __getstate__(self):
417        # getstate's results are ignored if it is not
418        return self._data,
419
420    def __setstate__(self, data):
421        self._data, = data
422
423    # In-place union, intersection, differences.
424    # Subtle:  The xyz_update() functions deliberately return None,
425    # as do all mutating operations on built-in container types.
426    # The __xyz__ spellings have to return self, though.
427
428    def __ior__(self, other):
429        """Update a set with the union of itself and another."""
430        self._binary_sanity_check(other)
431        self._data.update(other._data)
432        return self
433
434    def union_update(self, other):
435        """Update a set with the union of itself and another."""
436        self._update(other)
437
438    def __iand__(self, other):
439        """Update a set with the intersection of itself and another."""
440        self._binary_sanity_check(other)
441        self._data = (self & other)._data
442        return self
443
444    def intersection_update(self, other):
445        """Update a set with the intersection of itself and another."""
446        if isinstance(other, BaseSet):
447            self &= other
448        else:
449            self._data = (self.intersection(other))._data
450
451    def __ixor__(self, other):
452        """Update a set with the symmetric difference of itself and another."""
453        self._binary_sanity_check(other)
454        self.symmetric_difference_update(other)
455        return self
456
457    def symmetric_difference_update(self, other):
458        """Update a set with the symmetric difference of itself and another."""
459        data = self._data
460        value = True
461        if not isinstance(other, BaseSet):
462            other = Set(other)
463        if self is other:
464            self.clear()
465        for elt in other:
466            if elt in data:
467                del data[elt]
468            else:
469                data[elt] = value
470
471    def __isub__(self, other):
472        """Remove all elements of another set from this set."""
473        self._binary_sanity_check(other)
474        self.difference_update(other)
475        return self
476
477    def difference_update(self, other):
478        """Remove all elements of another set from this set."""
479        data = self._data
480        if not isinstance(other, BaseSet):
481            other = Set(other)
482        if self is other:
483            self.clear()
484        for elt in ifilter(data.__contains__, other):
485            del data[elt]
486
487    # Python dict-like mass mutations: update, clear
488
489    def update(self, iterable):
490        """Add all values from an iterable (such as a list or file)."""
491        self._update(iterable)
492
493    def clear(self):
494        """Remove all elements from this set."""
495        self._data.clear()
496
497    # Single-element mutations: add, remove, discard
498
499    def add(self, element):
500        """Add an element to a set.
501
502        This has no effect if the element is already present.
503        """
504        try:
505            self._data[element] = True
506        except TypeError:
507            transform = getattr(element, "__as_immutable__", None)
508            if transform is None:
509                raise # re-raise the TypeError exception we caught
510            self._data[transform()] = True
511
512    def remove(self, element):
513        """Remove an element from a set; it must be a member.
514
515        If the element is not a member, raise a KeyError.
516        """
517        try:
518            del self._data[element]
519        except TypeError:
520            transform = getattr(element, "__as_temporarily_immutable__", None)
521            if transform is None:
522                raise # re-raise the TypeError exception we caught
523            del self._data[transform()]
524
525    def discard(self, element):
526        """Remove an element from a set if it is a member.
527
528        If the element is not a member, do nothing.
529        """
530        try:
531            self.remove(element)
532        except KeyError:
533            pass
534
535    def pop(self):
536        """Remove and return an arbitrary set element."""
537        return self._data.popitem()[0]
538
539    def __as_immutable__(self):
540        # Return a copy of self as an immutable set
541        return ImmutableSet(self)
542
543    def __as_temporarily_immutable__(self):
544        # Return self wrapped in a temporarily immutable set
545        return _TemporarilyImmutableSet(self)
546
547
548class _TemporarilyImmutableSet(BaseSet):
549    # Wrap a mutable set as if it was temporarily immutable.
550    # This only supplies hashing and equality comparisons.
551
552    def __init__(self, set):
553        self._set = set
554        self._data = set._data  # Needed by ImmutableSet.__eq__()
555
556    def __hash__(self):
557        return self._set._compute_hash()
558