1"""A dumb and slow but simple dbm clone. 2 3For database spam, spam.dir contains the index (a text file), 4spam.bak *may* contain a backup of the index (also a text file), 5while spam.dat contains the data (a binary file). 6 7XXX TO DO: 8 9- seems to contain a bug when updating... 10 11- reclaim free space (currently, space once occupied by deleted or expanded 12items is never reused) 13 14- support concurrent access (currently, if two processes take turns making 15updates, they can mess up the index) 16 17- support efficient access to large databases (currently, the whole index 18is read when the database is opened, and some updates rewrite the whole index) 19 20- support opening for read-only (flag = 'm') 21 22""" 23 24import ast as _ast 25import os as _os 26import __builtin__ 27import UserDict 28 29_open = __builtin__.open 30 31_BLOCKSIZE = 512 32 33error = IOError # For anydbm 34 35class _Database(UserDict.DictMixin): 36 37 # The on-disk directory and data files can remain in mutually 38 # inconsistent states for an arbitrarily long time (see comments 39 # at the end of __setitem__). This is only repaired when _commit() 40 # gets called. One place _commit() gets called is from __del__(), 41 # and if that occurs at program shutdown time, module globals may 42 # already have gotten rebound to None. Since it's crucial that 43 # _commit() finish successfully, we can't ignore shutdown races 44 # here, and _commit() must not reference any globals. 45 _os = _os # for _commit() 46 _open = _open # for _commit() 47 48 def __init__(self, filebasename, mode, flag='c'): 49 self._mode = mode 50 self._readonly = (flag == 'r') 51 52 # The directory file is a text file. Each line looks like 53 # "%r, (%d, %d)\n" % (key, pos, siz) 54 # where key is the string key, pos is the offset into the dat 55 # file of the associated value's first byte, and siz is the number 56 # of bytes in the associated value. 57 self._dirfile = filebasename + _os.extsep + 'dir' 58 59 # The data file is a binary file pointed into by the directory 60 # file, and holds the values associated with keys. Each value 61 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw 62 # binary 8-bit string value. 63 self._datfile = filebasename + _os.extsep + 'dat' 64 self._bakfile = filebasename + _os.extsep + 'bak' 65 66 # The index is an in-memory dict, mirroring the directory file. 67 self._index = None # maps keys to (pos, siz) pairs 68 69 # Mod by Jack: create data file if needed 70 try: 71 f = _open(self._datfile, 'r') 72 except IOError: 73 with _open(self._datfile, 'w') as f: 74 self._chmod(self._datfile) 75 else: 76 f.close() 77 self._update() 78 79 # Read directory file into the in-memory index dict. 80 def _update(self): 81 self._index = {} 82 try: 83 f = _open(self._dirfile) 84 except IOError: 85 self._modified = not self._readonly 86 else: 87 self._modified = False 88 with f: 89 for line in f: 90 line = line.rstrip() 91 key, pos_and_siz_pair = _ast.literal_eval(line) 92 self._index[key] = pos_and_siz_pair 93 94 # Write the index dict to the directory file. The original directory 95 # file (if any) is renamed with a .bak extension first. If a .bak 96 # file currently exists, it's deleted. 97 def _commit(self): 98 # CAUTION: It's vital that _commit() succeed, and _commit() can 99 # be called from __del__(). Therefore we must never reference a 100 # global in this routine. 101 if self._index is None or not self._modified: 102 return # nothing to do 103 104 try: 105 self._os.unlink(self._bakfile) 106 except self._os.error: 107 pass 108 109 try: 110 self._os.rename(self._dirfile, self._bakfile) 111 except self._os.error: 112 pass 113 114 with self._open(self._dirfile, 'w') as f: 115 self._chmod(self._dirfile) 116 for key, pos_and_siz_pair in self._index.iteritems(): 117 f.write("%r, %r\n" % (key, pos_and_siz_pair)) 118 119 sync = _commit 120 121 def __getitem__(self, key): 122 pos, siz = self._index[key] # may raise KeyError 123 with _open(self._datfile, 'rb') as f: 124 f.seek(pos) 125 dat = f.read(siz) 126 return dat 127 128 # Append val to the data file, starting at a _BLOCKSIZE-aligned 129 # offset. The data file is first padded with NUL bytes (if needed) 130 # to get to an aligned offset. Return pair 131 # (starting offset of val, len(val)) 132 def _addval(self, val): 133 with _open(self._datfile, 'rb+') as f: 134 f.seek(0, 2) 135 pos = int(f.tell()) 136 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE 137 f.write('\0'*(npos-pos)) 138 pos = npos 139 f.write(val) 140 return (pos, len(val)) 141 142 # Write val to the data file, starting at offset pos. The caller 143 # is responsible for ensuring that there's enough room starting at 144 # pos to hold val, without overwriting some other value. Return 145 # pair (pos, len(val)). 146 def _setval(self, pos, val): 147 with _open(self._datfile, 'rb+') as f: 148 f.seek(pos) 149 f.write(val) 150 return (pos, len(val)) 151 152 # key is a new key whose associated value starts in the data file 153 # at offset pos and with length siz. Add an index record to 154 # the in-memory index dict, and append one to the directory file. 155 def _addkey(self, key, pos_and_siz_pair): 156 self._index[key] = pos_and_siz_pair 157 with _open(self._dirfile, 'a') as f: 158 self._chmod(self._dirfile) 159 f.write("%r, %r\n" % (key, pos_and_siz_pair)) 160 161 def __setitem__(self, key, val): 162 if not type(key) == type('') == type(val): 163 raise TypeError, "keys and values must be strings" 164 self._modified = True 165 if key not in self._index: 166 self._addkey(key, self._addval(val)) 167 else: 168 # See whether the new value is small enough to fit in the 169 # (padded) space currently occupied by the old value. 170 pos, siz = self._index[key] 171 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE 172 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE 173 if newblocks <= oldblocks: 174 self._index[key] = self._setval(pos, val) 175 else: 176 # The new value doesn't fit in the (padded) space used 177 # by the old value. The blocks used by the old value are 178 # forever lost. 179 self._index[key] = self._addval(val) 180 181 # Note that _index may be out of synch with the directory 182 # file now: _setval() and _addval() don't update the directory 183 # file. This also means that the on-disk directory and data 184 # files are in a mutually inconsistent state, and they'll 185 # remain that way until _commit() is called. Note that this 186 # is a disaster (for the database) if the program crashes 187 # (so that _commit() never gets called). 188 189 def __delitem__(self, key): 190 self._modified = True 191 # The blocks used by the associated value are lost. 192 del self._index[key] 193 # XXX It's unclear why we do a _commit() here (the code always 194 # XXX has, so I'm not changing it). _setitem__ doesn't try to 195 # XXX keep the directory file in synch. Why should we? Or 196 # XXX why shouldn't __setitem__? 197 self._commit() 198 199 def keys(self): 200 return self._index.keys() 201 202 def has_key(self, key): 203 return key in self._index 204 205 def __contains__(self, key): 206 return key in self._index 207 208 def iterkeys(self): 209 return self._index.iterkeys() 210 __iter__ = iterkeys 211 212 def __len__(self): 213 return len(self._index) 214 215 def close(self): 216 try: 217 self._commit() 218 finally: 219 self._index = self._datfile = self._dirfile = self._bakfile = None 220 221 __del__ = close 222 223 def _chmod (self, file): 224 if hasattr(self._os, 'chmod'): 225 self._os.chmod(file, self._mode) 226 227 228def open(file, flag=None, mode=0666): 229 """Open the database file, filename, and return corresponding object. 230 231 The flag argument, used to control how the database is opened in the 232 other DBM implementations, is ignored in the dumbdbm module; the 233 database is always opened for update, and will be created if it does 234 not exist. 235 236 The optional mode argument is the UNIX mode of the file, used only when 237 the database has to be created. It defaults to octal code 0666 (and 238 will be modified by the prevailing umask). 239 240 """ 241 # flag argument is currently ignored 242 243 # Modify mode depending on the umask 244 try: 245 um = _os.umask(0) 246 _os.umask(um) 247 except AttributeError: 248 pass 249 else: 250 # Turn off any bits that are set in the umask 251 mode = mode & (~um) 252 253 return _Database(file, mode, flag) 254