1"""Manage shelves of pickled objects. 2 3A "shelf" is a persistent, dictionary-like object. The difference 4with dbm databases is that the values (not the keys!) in a shelf can 5be essentially arbitrary Python objects -- anything that the "pickle" 6module can handle. This includes most class instances, recursive data 7types, and objects containing lots of shared sub-objects. The keys 8are ordinary strings. 9 10To summarize the interface (key is a string, data is an arbitrary 11object): 12 13 import shelve 14 d = shelve.open(filename) # open, with (g)dbm filename -- no suffix 15 16 d[key] = data # store data at key (overwrites old data if 17 # using an existing key) 18 data = d[key] # retrieve a COPY of the data at key (raise 19 # KeyError if no such key) -- NOTE that this 20 # access returns a *copy* of the entry! 21 del d[key] # delete data stored at key (raises KeyError 22 # if no such key) 23 flag = key in d # true if the key exists 24 list = d.keys() # a list of all existing keys (slow!) 25 26 d.close() # close it 27 28Dependent on the implementation, closing a persistent dictionary may 29or may not be necessary to flush changes to disk. 30 31Normally, d[key] returns a COPY of the entry. This needs care when 32mutable entries are mutated: for example, if d[key] is a list, 33 d[key].append(anitem) 34does NOT modify the entry d[key] itself, as stored in the persistent 35mapping -- it only modifies the copy, which is then immediately 36discarded, so that the append has NO effect whatsoever. To append an 37item to d[key] in a way that will affect the persistent mapping, use: 38 data = d[key] 39 data.append(anitem) 40 d[key] = data 41 42To avoid the problem with mutable entries, you may pass the keyword 43argument writeback=True in the call to shelve.open. When you use: 44 d = shelve.open(filename, writeback=True) 45then d keeps a cache of all entries you access, and writes them all back 46to the persistent mapping when you call d.close(). This ensures that 47such usage as d[key].append(anitem) works as intended. 48 49However, using keyword argument writeback=True may consume vast amount 50of memory for the cache, and it may make d.close() very slow, if you 51access many of d's entries after opening it in this way: d has no way to 52check which of the entries you access are mutable and/or which ones you 53actually mutate, so it must cache, and write back at close, all of the 54entries that you access. You can call d.sync() to write back all the 55entries in the cache, and empty the cache (d.sync() also synchronizes 56the persistent dictionary on disk, if feasible). 57""" 58 59from pickle import Pickler, Unpickler 60from io import BytesIO 61 62import collections 63 64__all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"] 65 66class _ClosedDict(collections.MutableMapping): 67 'Marker for a closed dict. Access attempts raise a ValueError.' 68 69 def closed(self, *args): 70 raise ValueError('invalid operation on closed shelf') 71 __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed 72 73 def __repr__(self): 74 return '<Closed Dictionary>' 75 76 77class Shelf(collections.MutableMapping): 78 """Base class for shelf implementations. 79 80 This is initialized with a dictionary-like object. 81 See the module's __doc__ string for an overview of the interface. 82 """ 83 84 def __init__(self, dict, protocol=None, writeback=False, 85 keyencoding="utf-8"): 86 self.dict = dict 87 if protocol is None: 88 protocol = 3 89 self._protocol = protocol 90 self.writeback = writeback 91 self.cache = {} 92 self.keyencoding = keyencoding 93 94 def __iter__(self): 95 for k in self.dict.keys(): 96 yield k.decode(self.keyencoding) 97 98 def __len__(self): 99 return len(self.dict) 100 101 def __contains__(self, key): 102 return key.encode(self.keyencoding) in self.dict 103 104 def get(self, key, default=None): 105 if key.encode(self.keyencoding) in self.dict: 106 return self[key] 107 return default 108 109 def __getitem__(self, key): 110 try: 111 value = self.cache[key] 112 except KeyError: 113 f = BytesIO(self.dict[key.encode(self.keyencoding)]) 114 value = Unpickler(f).load() 115 if self.writeback: 116 self.cache[key] = value 117 return value 118 119 def __setitem__(self, key, value): 120 if self.writeback: 121 self.cache[key] = value 122 f = BytesIO() 123 p = Pickler(f, self._protocol) 124 p.dump(value) 125 self.dict[key.encode(self.keyencoding)] = f.getvalue() 126 127 def __delitem__(self, key): 128 del self.dict[key.encode(self.keyencoding)] 129 try: 130 del self.cache[key] 131 except KeyError: 132 pass 133 134 def __enter__(self): 135 return self 136 137 def __exit__(self, type, value, traceback): 138 self.close() 139 140 def close(self): 141 if self.dict is None: 142 return 143 try: 144 self.sync() 145 try: 146 self.dict.close() 147 except AttributeError: 148 pass 149 finally: 150 # Catch errors that may happen when close is called from __del__ 151 # because CPython is in interpreter shutdown. 152 try: 153 self.dict = _ClosedDict() 154 except: 155 self.dict = None 156 157 def __del__(self): 158 if not hasattr(self, 'writeback'): 159 # __init__ didn't succeed, so don't bother closing 160 # see http://bugs.python.org/issue1339007 for details 161 return 162 self.close() 163 164 def sync(self): 165 if self.writeback and self.cache: 166 self.writeback = False 167 for key, entry in self.cache.items(): 168 self[key] = entry 169 self.writeback = True 170 self.cache = {} 171 if hasattr(self.dict, 'sync'): 172 self.dict.sync() 173 174 175class BsdDbShelf(Shelf): 176 """Shelf implementation using the "BSD" db interface. 177 178 This adds methods first(), next(), previous(), last() and 179 set_location() that have no counterpart in [g]dbm databases. 180 181 The actual database must be opened using one of the "bsddb" 182 modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or 183 bsddb.rnopen) and passed to the constructor. 184 185 See the module's __doc__ string for an overview of the interface. 186 """ 187 188 def __init__(self, dict, protocol=None, writeback=False, 189 keyencoding="utf-8"): 190 Shelf.__init__(self, dict, protocol, writeback, keyencoding) 191 192 def set_location(self, key): 193 (key, value) = self.dict.set_location(key) 194 f = BytesIO(value) 195 return (key.decode(self.keyencoding), Unpickler(f).load()) 196 197 def next(self): 198 (key, value) = next(self.dict) 199 f = BytesIO(value) 200 return (key.decode(self.keyencoding), Unpickler(f).load()) 201 202 def previous(self): 203 (key, value) = self.dict.previous() 204 f = BytesIO(value) 205 return (key.decode(self.keyencoding), Unpickler(f).load()) 206 207 def first(self): 208 (key, value) = self.dict.first() 209 f = BytesIO(value) 210 return (key.decode(self.keyencoding), Unpickler(f).load()) 211 212 def last(self): 213 (key, value) = self.dict.last() 214 f = BytesIO(value) 215 return (key.decode(self.keyencoding), Unpickler(f).load()) 216 217 218class DbfilenameShelf(Shelf): 219 """Shelf implementation using the "dbm" generic dbm interface. 220 221 This is initialized with the filename for the dbm database. 222 See the module's __doc__ string for an overview of the interface. 223 """ 224 225 def __init__(self, filename, flag='c', protocol=None, writeback=False): 226 import dbm 227 Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback) 228 229 230def open(filename, flag='c', protocol=None, writeback=False): 231 """Open a persistent dictionary for reading and writing. 232 233 The filename parameter is the base filename for the underlying 234 database. As a side-effect, an extension may be added to the 235 filename and more than one file may be created. The optional flag 236 parameter has the same interpretation as the flag parameter of 237 dbm.open(). The optional protocol parameter specifies the 238 version of the pickle protocol (0, 1, or 2). 239 240 See the module's __doc__ string for an overview of the interface. 241 """ 242 243 return DbfilenameShelf(filename, flag, protocol, writeback) 244