1"""A dumb and slow but simple dbm clone. 2 3For database spam, spam.dir contains the index (a text file), 4spam.bak *may* contain a backup of the index (also a text file), 5while spam.dat contains the data (a binary file). 6 7XXX TO DO: 8 9- seems to contain a bug when updating... 10 11- reclaim free space (currently, space once occupied by deleted or expanded 12items is never reused) 13 14- support concurrent access (currently, if two processes take turns making 15updates, they can mess up the index) 16 17- support efficient access to large databases (currently, the whole index 18is read when the database is opened, and some updates rewrite the whole index) 19 20- support opening for read-only (flag = 'm') 21 22""" 23 24import ast as _ast 25import io as _io 26import os as _os 27import collections.abc 28 29__all__ = ["error", "open"] 30 31_BLOCKSIZE = 512 32 33error = OSError 34 35class _Database(collections.abc.MutableMapping): 36 37 # The on-disk directory and data files can remain in mutually 38 # inconsistent states for an arbitrarily long time (see comments 39 # at the end of __setitem__). This is only repaired when _commit() 40 # gets called. One place _commit() gets called is from __del__(), 41 # and if that occurs at program shutdown time, module globals may 42 # already have gotten rebound to None. Since it's crucial that 43 # _commit() finish successfully, we can't ignore shutdown races 44 # here, and _commit() must not reference any globals. 45 _os = _os # for _commit() 46 _io = _io # for _commit() 47 48 def __init__(self, filebasename, mode, flag='c'): 49 self._mode = mode 50 self._readonly = (flag == 'r') 51 52 # The directory file is a text file. Each line looks like 53 # "%r, (%d, %d)\n" % (key, pos, siz) 54 # where key is the string key, pos is the offset into the dat 55 # file of the associated value's first byte, and siz is the number 56 # of bytes in the associated value. 57 self._dirfile = filebasename + '.dir' 58 59 # The data file is a binary file pointed into by the directory 60 # file, and holds the values associated with keys. Each value 61 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw 62 # binary 8-bit string value. 63 self._datfile = filebasename + '.dat' 64 self._bakfile = filebasename + '.bak' 65 66 # The index is an in-memory dict, mirroring the directory file. 67 self._index = None # maps keys to (pos, siz) pairs 68 69 # Handle the creation 70 self._create(flag) 71 self._update(flag) 72 73 def _create(self, flag): 74 if flag == 'n': 75 for filename in (self._datfile, self._bakfile, self._dirfile): 76 try: 77 _os.remove(filename) 78 except OSError: 79 pass 80 # Mod by Jack: create data file if needed 81 try: 82 f = _io.open(self._datfile, 'r', encoding="Latin-1") 83 except OSError: 84 if flag not in ('c', 'n'): 85 import warnings 86 warnings.warn("The database file is missing, the " 87 "semantics of the 'c' flag will be used.", 88 DeprecationWarning, stacklevel=4) 89 with _io.open(self._datfile, 'w', encoding="Latin-1") as f: 90 self._chmod(self._datfile) 91 else: 92 f.close() 93 94 # Read directory file into the in-memory index dict. 95 def _update(self, flag): 96 self._index = {} 97 try: 98 f = _io.open(self._dirfile, 'r', encoding="Latin-1") 99 except OSError: 100 self._modified = not self._readonly 101 if flag not in ('c', 'n'): 102 import warnings 103 warnings.warn("The index file is missing, the " 104 "semantics of the 'c' flag will be used.", 105 DeprecationWarning, stacklevel=4) 106 else: 107 self._modified = False 108 with f: 109 for line in f: 110 line = line.rstrip() 111 key, pos_and_siz_pair = _ast.literal_eval(line) 112 key = key.encode('Latin-1') 113 self._index[key] = pos_and_siz_pair 114 115 # Write the index dict to the directory file. The original directory 116 # file (if any) is renamed with a .bak extension first. If a .bak 117 # file currently exists, it's deleted. 118 def _commit(self): 119 # CAUTION: It's vital that _commit() succeed, and _commit() can 120 # be called from __del__(). Therefore we must never reference a 121 # global in this routine. 122 if self._index is None or not self._modified: 123 return # nothing to do 124 125 try: 126 self._os.unlink(self._bakfile) 127 except OSError: 128 pass 129 130 try: 131 self._os.rename(self._dirfile, self._bakfile) 132 except OSError: 133 pass 134 135 with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f: 136 self._chmod(self._dirfile) 137 for key, pos_and_siz_pair in self._index.items(): 138 # Use Latin-1 since it has no qualms with any value in any 139 # position; UTF-8, though, does care sometimes. 140 entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) 141 f.write(entry) 142 143 sync = _commit 144 145 def _verify_open(self): 146 if self._index is None: 147 raise error('DBM object has already been closed') 148 149 def __getitem__(self, key): 150 if isinstance(key, str): 151 key = key.encode('utf-8') 152 self._verify_open() 153 pos, siz = self._index[key] # may raise KeyError 154 with _io.open(self._datfile, 'rb') as f: 155 f.seek(pos) 156 dat = f.read(siz) 157 return dat 158 159 # Append val to the data file, starting at a _BLOCKSIZE-aligned 160 # offset. The data file is first padded with NUL bytes (if needed) 161 # to get to an aligned offset. Return pair 162 # (starting offset of val, len(val)) 163 def _addval(self, val): 164 with _io.open(self._datfile, 'rb+') as f: 165 f.seek(0, 2) 166 pos = int(f.tell()) 167 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE 168 f.write(b'\0'*(npos-pos)) 169 pos = npos 170 f.write(val) 171 return (pos, len(val)) 172 173 # Write val to the data file, starting at offset pos. The caller 174 # is responsible for ensuring that there's enough room starting at 175 # pos to hold val, without overwriting some other value. Return 176 # pair (pos, len(val)). 177 def _setval(self, pos, val): 178 with _io.open(self._datfile, 'rb+') as f: 179 f.seek(pos) 180 f.write(val) 181 return (pos, len(val)) 182 183 # key is a new key whose associated value starts in the data file 184 # at offset pos and with length siz. Add an index record to 185 # the in-memory index dict, and append one to the directory file. 186 def _addkey(self, key, pos_and_siz_pair): 187 self._index[key] = pos_and_siz_pair 188 with _io.open(self._dirfile, 'a', encoding="Latin-1") as f: 189 self._chmod(self._dirfile) 190 f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair)) 191 192 def __setitem__(self, key, val): 193 if self._readonly: 194 import warnings 195 warnings.warn('The database is opened for reading only', 196 DeprecationWarning, stacklevel=2) 197 if isinstance(key, str): 198 key = key.encode('utf-8') 199 elif not isinstance(key, (bytes, bytearray)): 200 raise TypeError("keys must be bytes or strings") 201 if isinstance(val, str): 202 val = val.encode('utf-8') 203 elif not isinstance(val, (bytes, bytearray)): 204 raise TypeError("values must be bytes or strings") 205 self._verify_open() 206 self._modified = True 207 if key not in self._index: 208 self._addkey(key, self._addval(val)) 209 else: 210 # See whether the new value is small enough to fit in the 211 # (padded) space currently occupied by the old value. 212 pos, siz = self._index[key] 213 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE 214 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE 215 if newblocks <= oldblocks: 216 self._index[key] = self._setval(pos, val) 217 else: 218 # The new value doesn't fit in the (padded) space used 219 # by the old value. The blocks used by the old value are 220 # forever lost. 221 self._index[key] = self._addval(val) 222 223 # Note that _index may be out of synch with the directory 224 # file now: _setval() and _addval() don't update the directory 225 # file. This also means that the on-disk directory and data 226 # files are in a mutually inconsistent state, and they'll 227 # remain that way until _commit() is called. Note that this 228 # is a disaster (for the database) if the program crashes 229 # (so that _commit() never gets called). 230 231 def __delitem__(self, key): 232 if self._readonly: 233 import warnings 234 warnings.warn('The database is opened for reading only', 235 DeprecationWarning, stacklevel=2) 236 if isinstance(key, str): 237 key = key.encode('utf-8') 238 self._verify_open() 239 self._modified = True 240 # The blocks used by the associated value are lost. 241 del self._index[key] 242 # XXX It's unclear why we do a _commit() here (the code always 243 # XXX has, so I'm not changing it). __setitem__ doesn't try to 244 # XXX keep the directory file in synch. Why should we? Or 245 # XXX why shouldn't __setitem__? 246 self._commit() 247 248 def keys(self): 249 try: 250 return list(self._index) 251 except TypeError: 252 raise error('DBM object has already been closed') from None 253 254 def items(self): 255 self._verify_open() 256 return [(key, self[key]) for key in self._index.keys()] 257 258 def __contains__(self, key): 259 if isinstance(key, str): 260 key = key.encode('utf-8') 261 try: 262 return key in self._index 263 except TypeError: 264 if self._index is None: 265 raise error('DBM object has already been closed') from None 266 else: 267 raise 268 269 def iterkeys(self): 270 try: 271 return iter(self._index) 272 except TypeError: 273 raise error('DBM object has already been closed') from None 274 __iter__ = iterkeys 275 276 def __len__(self): 277 try: 278 return len(self._index) 279 except TypeError: 280 raise error('DBM object has already been closed') from None 281 282 def close(self): 283 try: 284 self._commit() 285 finally: 286 self._index = self._datfile = self._dirfile = self._bakfile = None 287 288 __del__ = close 289 290 def _chmod(self, file): 291 if hasattr(self._os, 'chmod'): 292 self._os.chmod(file, self._mode) 293 294 def __enter__(self): 295 return self 296 297 def __exit__(self, *args): 298 self.close() 299 300 301def open(file, flag='c', mode=0o666): 302 """Open the database file, filename, and return corresponding object. 303 304 The flag argument, used to control how the database is opened in the 305 other DBM implementations, supports only the semantics of 'c' and 'n' 306 values. Other values will default to the semantics of 'c' value: 307 the database will always opened for update and will be created if it 308 does not exist. 309 310 The optional mode argument is the UNIX mode of the file, used only when 311 the database has to be created. It defaults to octal code 0o666 (and 312 will be modified by the prevailing umask). 313 314 """ 315 316 # Modify mode depending on the umask 317 try: 318 um = _os.umask(0) 319 _os.umask(um) 320 except AttributeError: 321 pass 322 else: 323 # Turn off any bits that are set in the umask 324 mode = mode & (~um) 325 if flag not in ('r', 'w', 'c', 'n'): 326 import warnings 327 warnings.warn("Flag must be one of 'r', 'w', 'c', or 'n'", 328 DeprecationWarning, stacklevel=2) 329 return _Database(file, mode, flag=flag) 330