• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Manage shelves of pickled objects.
2
3A "shelf" is a persistent, dictionary-like object.  The difference
4with dbm databases is that the values (not the keys!) in a shelf can
5be essentially arbitrary Python objects -- anything that the "pickle"
6module can handle.  This includes most class instances, recursive data
7types, and objects containing lots of shared sub-objects.  The keys
8are ordinary strings.
9
10To summarize the interface (key is a string, data is an arbitrary
11object):
12
13        import shelve
14        d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
15
16        d[key] = data   # store data at key (overwrites old data if
17                        # using an existing key)
18        data = d[key]   # retrieve a COPY of the data at key (raise
19                        # KeyError if no such key) -- NOTE that this
20                        # access returns a *copy* of the entry!
21        del d[key]      # delete data stored at key (raises KeyError
22                        # if no such key)
23        flag = key in d # true if the key exists
24        list = d.keys() # a list of all existing keys (slow!)
25
26        d.close()       # close it
27
28Dependent on the implementation, closing a persistent dictionary may
29or may not be necessary to flush changes to disk.
30
31Normally, d[key] returns a COPY of the entry.  This needs care when
32mutable entries are mutated: for example, if d[key] is a list,
33        d[key].append(anitem)
34does NOT modify the entry d[key] itself, as stored in the persistent
35mapping -- it only modifies the copy, which is then immediately
36discarded, so that the append has NO effect whatsoever.  To append an
37item to d[key] in a way that will affect the persistent mapping, use:
38        data = d[key]
39        data.append(anitem)
40        d[key] = data
41
42To avoid the problem with mutable entries, you may pass the keyword
43argument writeback=True in the call to shelve.open.  When you use:
44        d = shelve.open(filename, writeback=True)
45then d keeps a cache of all entries you access, and writes them all back
46to the persistent mapping when you call d.close().  This ensures that
47such usage as d[key].append(anitem) works as intended.
48
49However, using keyword argument writeback=True may consume vast amount
50of memory for the cache, and it may make d.close() very slow, if you
51access many of d's entries after opening it in this way: d has no way to
52check which of the entries you access are mutable and/or which ones you
53actually mutate, so it must cache, and write back at close, all of the
54entries that you access.  You can call d.sync() to write back all the
55entries in the cache, and empty the cache (d.sync() also synchronizes
56the persistent dictionary on disk, if feasible).
57"""
58
59from pickle import Pickler, Unpickler
60from io import BytesIO
61
62import collections
63
64__all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"]
65
66class _ClosedDict(collections.MutableMapping):
67    'Marker for a closed dict.  Access attempts raise a ValueError.'
68
69    def closed(self, *args):
70        raise ValueError('invalid operation on closed shelf')
71    __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed
72
73    def __repr__(self):
74        return '<Closed Dictionary>'
75
76
77class Shelf(collections.MutableMapping):
78    """Base class for shelf implementations.
79
80    This is initialized with a dictionary-like object.
81    See the module's __doc__ string for an overview of the interface.
82    """
83
84    def __init__(self, dict, protocol=None, writeback=False,
85                 keyencoding="utf-8"):
86        self.dict = dict
87        if protocol is None:
88            protocol = 3
89        self._protocol = protocol
90        self.writeback = writeback
91        self.cache = {}
92        self.keyencoding = keyencoding
93
94    def __iter__(self):
95        for k in self.dict.keys():
96            yield k.decode(self.keyencoding)
97
98    def __len__(self):
99        return len(self.dict)
100
101    def __contains__(self, key):
102        return key.encode(self.keyencoding) in self.dict
103
104    def get(self, key, default=None):
105        if key.encode(self.keyencoding) in self.dict:
106            return self[key]
107        return default
108
109    def __getitem__(self, key):
110        try:
111            value = self.cache[key]
112        except KeyError:
113            f = BytesIO(self.dict[key.encode(self.keyencoding)])
114            value = Unpickler(f).load()
115            if self.writeback:
116                self.cache[key] = value
117        return value
118
119    def __setitem__(self, key, value):
120        if self.writeback:
121            self.cache[key] = value
122        f = BytesIO()
123        p = Pickler(f, self._protocol)
124        p.dump(value)
125        self.dict[key.encode(self.keyencoding)] = f.getvalue()
126
127    def __delitem__(self, key):
128        del self.dict[key.encode(self.keyencoding)]
129        try:
130            del self.cache[key]
131        except KeyError:
132            pass
133
134    def __enter__(self):
135        return self
136
137    def __exit__(self, type, value, traceback):
138        self.close()
139
140    def close(self):
141        if self.dict is None:
142            return
143        try:
144            self.sync()
145            try:
146                self.dict.close()
147            except AttributeError:
148                pass
149        finally:
150            # Catch errors that may happen when close is called from __del__
151            # because CPython is in interpreter shutdown.
152            try:
153                self.dict = _ClosedDict()
154            except:
155                self.dict = None
156
157    def __del__(self):
158        if not hasattr(self, 'writeback'):
159            # __init__ didn't succeed, so don't bother closing
160            # see http://bugs.python.org/issue1339007 for details
161            return
162        self.close()
163
164    def sync(self):
165        if self.writeback and self.cache:
166            self.writeback = False
167            for key, entry in self.cache.items():
168                self[key] = entry
169            self.writeback = True
170            self.cache = {}
171        if hasattr(self.dict, 'sync'):
172            self.dict.sync()
173
174
175class BsdDbShelf(Shelf):
176    """Shelf implementation using the "BSD" db interface.
177
178    This adds methods first(), next(), previous(), last() and
179    set_location() that have no counterpart in [g]dbm databases.
180
181    The actual database must be opened using one of the "bsddb"
182    modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
183    bsddb.rnopen) and passed to the constructor.
184
185    See the module's __doc__ string for an overview of the interface.
186    """
187
188    def __init__(self, dict, protocol=None, writeback=False,
189                 keyencoding="utf-8"):
190        Shelf.__init__(self, dict, protocol, writeback, keyencoding)
191
192    def set_location(self, key):
193        (key, value) = self.dict.set_location(key)
194        f = BytesIO(value)
195        return (key.decode(self.keyencoding), Unpickler(f).load())
196
197    def next(self):
198        (key, value) = next(self.dict)
199        f = BytesIO(value)
200        return (key.decode(self.keyencoding), Unpickler(f).load())
201
202    def previous(self):
203        (key, value) = self.dict.previous()
204        f = BytesIO(value)
205        return (key.decode(self.keyencoding), Unpickler(f).load())
206
207    def first(self):
208        (key, value) = self.dict.first()
209        f = BytesIO(value)
210        return (key.decode(self.keyencoding), Unpickler(f).load())
211
212    def last(self):
213        (key, value) = self.dict.last()
214        f = BytesIO(value)
215        return (key.decode(self.keyencoding), Unpickler(f).load())
216
217
218class DbfilenameShelf(Shelf):
219    """Shelf implementation using the "dbm" generic dbm interface.
220
221    This is initialized with the filename for the dbm database.
222    See the module's __doc__ string for an overview of the interface.
223    """
224
225    def __init__(self, filename, flag='c', protocol=None, writeback=False):
226        import dbm
227        Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback)
228
229
230def open(filename, flag='c', protocol=None, writeback=False):
231    """Open a persistent dictionary for reading and writing.
232
233    The filename parameter is the base filename for the underlying
234    database.  As a side-effect, an extension may be added to the
235    filename and more than one file may be created.  The optional flag
236    parameter has the same interpretation as the flag parameter of
237    dbm.open(). The optional protocol parameter specifies the
238    version of the pickle protocol (0, 1, or 2).
239
240    See the module's __doc__ string for an overview of the interface.
241    """
242
243    return DbfilenameShelf(filename, flag, protocol, writeback)
244