1"""Create portable serialized representations of Python objects. 2 3See module copyreg for a mechanism for registering custom picklers. 4See module pickletools source for extensive comments. 5 6Classes: 7 8 Pickler 9 Unpickler 10 11Functions: 12 13 dump(object, file) 14 dumps(object) -> string 15 load(file) -> object 16 loads(bytes) -> object 17 18Misc variables: 19 20 __version__ 21 format_version 22 compatible_formats 23 24""" 25 26from types import FunctionType 27from copyreg import dispatch_table 28from copyreg import _extension_registry, _inverted_registry, _extension_cache 29from itertools import islice 30from functools import partial 31import sys 32from sys import maxsize 33from struct import pack, unpack 34import re 35import io 36import codecs 37import _compat_pickle 38 39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", 40 "Unpickler", "dump", "dumps", "load", "loads"] 41 42try: 43 from _pickle import PickleBuffer 44 __all__.append("PickleBuffer") 45 _HAVE_PICKLE_BUFFER = True 46except ImportError: 47 _HAVE_PICKLE_BUFFER = False 48 49 50# Shortcut for use in isinstance testing 51bytes_types = (bytes, bytearray) 52 53# These are purely informational; no code uses these. 54format_version = "4.0" # File format version we write 55compatible_formats = ["1.0", # Original protocol 0 56 "1.1", # Protocol 0 with INST added 57 "1.2", # Original protocol 1 58 "1.3", # Protocol 1 with BINFLOAT added 59 "2.0", # Protocol 2 60 "3.0", # Protocol 3 61 "4.0", # Protocol 4 62 "5.0", # Protocol 5 63 ] # Old format versions we can read 64 65# This is the highest protocol number we know how to read. 66HIGHEST_PROTOCOL = 5 67 68# The protocol we write by default. May be less than HIGHEST_PROTOCOL. 69# Only bump this if the oldest still supported version of Python already 70# includes it. 71DEFAULT_PROTOCOL = 4 72 73class PickleError(Exception): 74 """A common base class for the other pickling exceptions.""" 75 pass 76 77class PicklingError(PickleError): 78 """This exception is raised when an unpicklable object is passed to the 79 dump() method. 80 81 """ 82 pass 83 84class UnpicklingError(PickleError): 85 """This exception is raised when there is a problem unpickling an object, 86 such as a security violation. 87 88 Note that other exceptions may also be raised during unpickling, including 89 (but not necessarily limited to) AttributeError, EOFError, ImportError, 90 and IndexError. 91 92 """ 93 pass 94 95# An instance of _Stop is raised by Unpickler.load_stop() in response to 96# the STOP opcode, passing the object that is the result of unpickling. 97class _Stop(Exception): 98 def __init__(self, value): 99 self.value = value 100 101# Pickle opcodes. See pickletools.py for extensive docs. The listing 102# here is in kind-of alphabetical order of 1-character pickle code. 103# pickletools groups them by purpose. 104 105MARK = b'(' # push special markobject on stack 106STOP = b'.' # every pickle ends with STOP 107POP = b'0' # discard topmost stack item 108POP_MARK = b'1' # discard stack top through topmost markobject 109DUP = b'2' # duplicate top stack item 110FLOAT = b'F' # push float object; decimal string argument 111INT = b'I' # push integer or bool; decimal string argument 112BININT = b'J' # push four-byte signed int 113BININT1 = b'K' # push 1-byte unsigned int 114LONG = b'L' # push long; decimal string argument 115BININT2 = b'M' # push 2-byte unsigned int 116NONE = b'N' # push None 117PERSID = b'P' # push persistent object; id is taken from string arg 118BINPERSID = b'Q' # " " " ; " " " " stack 119REDUCE = b'R' # apply callable to argtuple, both on stack 120STRING = b'S' # push string; NL-terminated string argument 121BINSTRING = b'T' # push string; counted binary string argument 122SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes 123UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument 124BINUNICODE = b'X' # " " " ; counted UTF-8 string argument 125APPEND = b'a' # append stack top to list below it 126BUILD = b'b' # call __setstate__ or __dict__.update() 127GLOBAL = b'c' # push self.find_class(modname, name); 2 string args 128DICT = b'd' # build a dict from stack items 129EMPTY_DICT = b'}' # push empty dict 130APPENDS = b'e' # extend list on stack by topmost stack slice 131GET = b'g' # push item from memo on stack; index is string arg 132BINGET = b'h' # " " " " " " ; " " 1-byte arg 133INST = b'i' # build & push class instance 134LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg 135LIST = b'l' # build list from topmost stack items 136EMPTY_LIST = b']' # push empty list 137OBJ = b'o' # build & push class instance 138PUT = b'p' # store stack top in memo; index is string arg 139BINPUT = b'q' # " " " " " ; " " 1-byte arg 140LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg 141SETITEM = b's' # add key+value pair to dict 142TUPLE = b't' # build tuple from topmost stack items 143EMPTY_TUPLE = b')' # push empty tuple 144SETITEMS = b'u' # modify dict by adding topmost key+value pairs 145BINFLOAT = b'G' # push float; arg is 8-byte float encoding 146 147TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py 148FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py 149 150# Protocol 2 151 152PROTO = b'\x80' # identify pickle protocol 153NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple 154EXT1 = b'\x82' # push object from extension registry; 1-byte index 155EXT2 = b'\x83' # ditto, but 2-byte index 156EXT4 = b'\x84' # ditto, but 4-byte index 157TUPLE1 = b'\x85' # build 1-tuple from stack top 158TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items 159TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items 160NEWTRUE = b'\x88' # push True 161NEWFALSE = b'\x89' # push False 162LONG1 = b'\x8a' # push long from < 256 bytes 163LONG4 = b'\x8b' # push really big long 164 165_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] 166 167# Protocol 3 (Python 3.x) 168 169BINBYTES = b'B' # push bytes; counted binary string argument 170SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes 171 172# Protocol 4 173 174SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes 175BINUNICODE8 = b'\x8d' # push very long string 176BINBYTES8 = b'\x8e' # push very long bytes string 177EMPTY_SET = b'\x8f' # push empty set on the stack 178ADDITEMS = b'\x90' # modify set by adding topmost stack items 179FROZENSET = b'\x91' # build frozenset from topmost stack items 180NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments 181STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks 182MEMOIZE = b'\x94' # store top of the stack in memo 183FRAME = b'\x95' # indicate the beginning of a new frame 184 185# Protocol 5 186 187BYTEARRAY8 = b'\x96' # push bytearray 188NEXT_BUFFER = b'\x97' # push next out-of-band buffer 189READONLY_BUFFER = b'\x98' # make top of stack readonly 190 191__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) 192 193 194class _Framer: 195 196 _FRAME_SIZE_MIN = 4 197 _FRAME_SIZE_TARGET = 64 * 1024 198 199 def __init__(self, file_write): 200 self.file_write = file_write 201 self.current_frame = None 202 203 def start_framing(self): 204 self.current_frame = io.BytesIO() 205 206 def end_framing(self): 207 if self.current_frame and self.current_frame.tell() > 0: 208 self.commit_frame(force=True) 209 self.current_frame = None 210 211 def commit_frame(self, force=False): 212 if self.current_frame: 213 f = self.current_frame 214 if f.tell() >= self._FRAME_SIZE_TARGET or force: 215 data = f.getbuffer() 216 write = self.file_write 217 if len(data) >= self._FRAME_SIZE_MIN: 218 # Issue a single call to the write method of the underlying 219 # file object for the frame opcode with the size of the 220 # frame. The concatenation is expected to be less expensive 221 # than issuing an additional call to write. 222 write(FRAME + pack("<Q", len(data))) 223 224 # Issue a separate call to write to append the frame 225 # contents without concatenation to the above to avoid a 226 # memory copy. 227 write(data) 228 229 # Start the new frame with a new io.BytesIO instance so that 230 # the file object can have delayed access to the previous frame 231 # contents via an unreleased memoryview of the previous 232 # io.BytesIO instance. 233 self.current_frame = io.BytesIO() 234 235 def write(self, data): 236 if self.current_frame: 237 return self.current_frame.write(data) 238 else: 239 return self.file_write(data) 240 241 def write_large_bytes(self, header, payload): 242 write = self.file_write 243 if self.current_frame: 244 # Terminate the current frame and flush it to the file. 245 self.commit_frame(force=True) 246 247 # Perform direct write of the header and payload of the large binary 248 # object. Be careful not to concatenate the header and the payload 249 # prior to calling 'write' as we do not want to allocate a large 250 # temporary bytes object. 251 # We intentionally do not insert a protocol 4 frame opcode to make 252 # it possible to optimize file.read calls in the loader. 253 write(header) 254 write(payload) 255 256 257class _Unframer: 258 259 def __init__(self, file_read, file_readline, file_tell=None): 260 self.file_read = file_read 261 self.file_readline = file_readline 262 self.current_frame = None 263 264 def readinto(self, buf): 265 if self.current_frame: 266 n = self.current_frame.readinto(buf) 267 if n == 0 and len(buf) != 0: 268 self.current_frame = None 269 n = len(buf) 270 buf[:] = self.file_read(n) 271 return n 272 if n < len(buf): 273 raise UnpicklingError( 274 "pickle exhausted before end of frame") 275 return n 276 else: 277 n = len(buf) 278 buf[:] = self.file_read(n) 279 return n 280 281 def read(self, n): 282 if self.current_frame: 283 data = self.current_frame.read(n) 284 if not data and n != 0: 285 self.current_frame = None 286 return self.file_read(n) 287 if len(data) < n: 288 raise UnpicklingError( 289 "pickle exhausted before end of frame") 290 return data 291 else: 292 return self.file_read(n) 293 294 def readline(self): 295 if self.current_frame: 296 data = self.current_frame.readline() 297 if not data: 298 self.current_frame = None 299 return self.file_readline() 300 if data[-1] != b'\n'[0]: 301 raise UnpicklingError( 302 "pickle exhausted before end of frame") 303 return data 304 else: 305 return self.file_readline() 306 307 def load_frame(self, frame_size): 308 if self.current_frame and self.current_frame.read() != b'': 309 raise UnpicklingError( 310 "beginning of a new frame before end of current frame") 311 self.current_frame = io.BytesIO(self.file_read(frame_size)) 312 313 314# Tools used for pickling. 315 316def _getattribute(obj, name): 317 top = obj 318 for subpath in name.split('.'): 319 if subpath == '<locals>': 320 raise AttributeError("Can't get local attribute {!r} on {!r}" 321 .format(name, top)) 322 try: 323 parent = obj 324 obj = getattr(obj, subpath) 325 except AttributeError: 326 raise AttributeError("Can't get attribute {!r} on {!r}" 327 .format(name, top)) from None 328 return obj, parent 329 330def whichmodule(obj, name): 331 """Find the module an object belong to.""" 332 module_name = getattr(obj, '__module__', None) 333 if module_name is not None: 334 return module_name 335 # Protect the iteration by using a list copy of sys.modules against dynamic 336 # modules that trigger imports of other modules upon calls to getattr. 337 for module_name, module in sys.modules.copy().items(): 338 if (module_name == '__main__' 339 or module_name == '__mp_main__' # bpo-42406 340 or module is None): 341 continue 342 try: 343 if _getattribute(module, name)[0] is obj: 344 return module_name 345 except AttributeError: 346 pass 347 return '__main__' 348 349def encode_long(x): 350 r"""Encode a long to a two's complement little-endian binary string. 351 Note that 0 is a special case, returning an empty string, to save a 352 byte in the LONG1 pickling context. 353 354 >>> encode_long(0) 355 b'' 356 >>> encode_long(255) 357 b'\xff\x00' 358 >>> encode_long(32767) 359 b'\xff\x7f' 360 >>> encode_long(-256) 361 b'\x00\xff' 362 >>> encode_long(-32768) 363 b'\x00\x80' 364 >>> encode_long(-128) 365 b'\x80' 366 >>> encode_long(127) 367 b'\x7f' 368 >>> 369 """ 370 if x == 0: 371 return b'' 372 nbytes = (x.bit_length() >> 3) + 1 373 result = x.to_bytes(nbytes, byteorder='little', signed=True) 374 if x < 0 and nbytes > 1: 375 if result[-1] == 0xff and (result[-2] & 0x80) != 0: 376 result = result[:-1] 377 return result 378 379def decode_long(data): 380 r"""Decode a long from a two's complement little-endian binary string. 381 382 >>> decode_long(b'') 383 0 384 >>> decode_long(b"\xff\x00") 385 255 386 >>> decode_long(b"\xff\x7f") 387 32767 388 >>> decode_long(b"\x00\xff") 389 -256 390 >>> decode_long(b"\x00\x80") 391 -32768 392 >>> decode_long(b"\x80") 393 -128 394 >>> decode_long(b"\x7f") 395 127 396 """ 397 return int.from_bytes(data, byteorder='little', signed=True) 398 399 400_NoValue = object() 401 402# Pickling machinery 403 404class _Pickler: 405 406 def __init__(self, file, protocol=None, *, fix_imports=True, 407 buffer_callback=None): 408 """This takes a binary file for writing a pickle data stream. 409 410 The optional *protocol* argument tells the pickler to use the 411 given protocol; supported protocols are 0, 1, 2, 3, 4 and 5. 412 The default protocol is 4. It was introduced in Python 3.4, and 413 is incompatible with previous versions. 414 415 Specifying a negative protocol version selects the highest 416 protocol version supported. The higher the protocol used, the 417 more recent the version of Python needed to read the pickle 418 produced. 419 420 The *file* argument must have a write() method that accepts a 421 single bytes argument. It can thus be a file object opened for 422 binary writing, an io.BytesIO instance, or any other custom 423 object that meets this interface. 424 425 If *fix_imports* is True and *protocol* is less than 3, pickle 426 will try to map the new Python 3 names to the old module names 427 used in Python 2, so that the pickle data stream is readable 428 with Python 2. 429 430 If *buffer_callback* is None (the default), buffer views are 431 serialized into *file* as part of the pickle stream. 432 433 If *buffer_callback* is not None, then it can be called any number 434 of times with a buffer view. If the callback returns a false value 435 (such as None), the given buffer is out-of-band; otherwise the 436 buffer is serialized in-band, i.e. inside the pickle stream. 437 438 It is an error if *buffer_callback* is not None and *protocol* 439 is None or smaller than 5. 440 """ 441 if protocol is None: 442 protocol = DEFAULT_PROTOCOL 443 if protocol < 0: 444 protocol = HIGHEST_PROTOCOL 445 elif not 0 <= protocol <= HIGHEST_PROTOCOL: 446 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) 447 if buffer_callback is not None and protocol < 5: 448 raise ValueError("buffer_callback needs protocol >= 5") 449 self._buffer_callback = buffer_callback 450 try: 451 self._file_write = file.write 452 except AttributeError: 453 raise TypeError("file must have a 'write' attribute") 454 self.framer = _Framer(self._file_write) 455 self.write = self.framer.write 456 self._write_large_bytes = self.framer.write_large_bytes 457 self.memo = {} 458 self.proto = int(protocol) 459 self.bin = protocol >= 1 460 self.fast = 0 461 self.fix_imports = fix_imports and protocol < 3 462 463 def clear_memo(self): 464 """Clears the pickler's "memo". 465 466 The memo is the data structure that remembers which objects the 467 pickler has already seen, so that shared or recursive objects 468 are pickled by reference and not by value. This method is 469 useful when re-using picklers. 470 """ 471 self.memo.clear() 472 473 def dump(self, obj): 474 """Write a pickled representation of obj to the open file.""" 475 # Check whether Pickler was initialized correctly. This is 476 # only needed to mimic the behavior of _pickle.Pickler.dump(). 477 if not hasattr(self, "_file_write"): 478 raise PicklingError("Pickler.__init__() was not called by " 479 "%s.__init__()" % (self.__class__.__name__,)) 480 if self.proto >= 2: 481 self.write(PROTO + pack("<B", self.proto)) 482 if self.proto >= 4: 483 self.framer.start_framing() 484 self.save(obj) 485 self.write(STOP) 486 self.framer.end_framing() 487 488 def memoize(self, obj): 489 """Store an object in the memo.""" 490 491 # The Pickler memo is a dictionary mapping object ids to 2-tuples 492 # that contain the Unpickler memo key and the object being memoized. 493 # The memo key is written to the pickle and will become 494 # the key in the Unpickler's memo. The object is stored in the 495 # Pickler memo so that transient objects are kept alive during 496 # pickling. 497 498 # The use of the Unpickler memo length as the memo key is just a 499 # convention. The only requirement is that the memo values be unique. 500 # But there appears no advantage to any other scheme, and this 501 # scheme allows the Unpickler memo to be implemented as a plain (but 502 # growable) array, indexed by memo key. 503 if self.fast: 504 return 505 assert id(obj) not in self.memo 506 idx = len(self.memo) 507 self.write(self.put(idx)) 508 self.memo[id(obj)] = idx, obj 509 510 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. 511 def put(self, idx): 512 if self.proto >= 4: 513 return MEMOIZE 514 elif self.bin: 515 if idx < 256: 516 return BINPUT + pack("<B", idx) 517 else: 518 return LONG_BINPUT + pack("<I", idx) 519 else: 520 return PUT + repr(idx).encode("ascii") + b'\n' 521 522 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. 523 def get(self, i): 524 if self.bin: 525 if i < 256: 526 return BINGET + pack("<B", i) 527 else: 528 return LONG_BINGET + pack("<I", i) 529 530 return GET + repr(i).encode("ascii") + b'\n' 531 532 def save(self, obj, save_persistent_id=True): 533 self.framer.commit_frame() 534 535 # Check for persistent id (defined by a subclass) 536 if save_persistent_id: 537 pid = self.persistent_id(obj) 538 if pid is not None: 539 self.save_pers(pid) 540 return 541 542 # Check the memo 543 x = self.memo.get(id(obj)) 544 if x is not None: 545 self.write(self.get(x[0])) 546 return 547 548 rv = NotImplemented 549 reduce = getattr(self, "reducer_override", _NoValue) 550 if reduce is not _NoValue: 551 rv = reduce(obj) 552 553 if rv is NotImplemented: 554 # Check the type dispatch table 555 t = type(obj) 556 f = self.dispatch.get(t) 557 if f is not None: 558 f(self, obj) # Call unbound method with explicit self 559 return 560 561 # Check private dispatch table if any, or else 562 # copyreg.dispatch_table 563 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t, _NoValue) 564 if reduce is not _NoValue: 565 rv = reduce(obj) 566 else: 567 # Check for a class with a custom metaclass; treat as regular 568 # class 569 if issubclass(t, type): 570 self.save_global(obj) 571 return 572 573 # Check for a __reduce_ex__ method, fall back to __reduce__ 574 reduce = getattr(obj, "__reduce_ex__", _NoValue) 575 if reduce is not _NoValue: 576 rv = reduce(self.proto) 577 else: 578 reduce = getattr(obj, "__reduce__", _NoValue) 579 if reduce is not _NoValue: 580 rv = reduce() 581 else: 582 raise PicklingError("Can't pickle %r object: %r" % 583 (t.__name__, obj)) 584 585 # Check for string returned by reduce(), meaning "save as global" 586 if isinstance(rv, str): 587 self.save_global(obj, rv) 588 return 589 590 # Assert that reduce() returned a tuple 591 if not isinstance(rv, tuple): 592 raise PicklingError("%s must return string or tuple" % reduce) 593 594 # Assert that it returned an appropriately sized tuple 595 l = len(rv) 596 if not (2 <= l <= 6): 597 raise PicklingError("Tuple returned by %s must have " 598 "two to six elements" % reduce) 599 600 # Save the reduce() output and finally memoize the object 601 self.save_reduce(obj=obj, *rv) 602 603 def persistent_id(self, obj): 604 # This exists so a subclass can override it 605 return None 606 607 def save_pers(self, pid): 608 # Save a persistent id reference 609 if self.bin: 610 self.save(pid, save_persistent_id=False) 611 self.write(BINPERSID) 612 else: 613 try: 614 self.write(PERSID + str(pid).encode("ascii") + b'\n') 615 except UnicodeEncodeError: 616 raise PicklingError( 617 "persistent IDs in protocol 0 must be ASCII strings") 618 619 def save_reduce(self, func, args, state=None, listitems=None, 620 dictitems=None, state_setter=None, *, obj=None): 621 # This API is called by some subclasses 622 623 if not isinstance(args, tuple): 624 raise PicklingError("args from save_reduce() must be a tuple") 625 if not callable(func): 626 raise PicklingError("func from save_reduce() must be callable") 627 628 save = self.save 629 write = self.write 630 631 func_name = getattr(func, "__name__", "") 632 if self.proto >= 2 and func_name == "__newobj_ex__": 633 cls, args, kwargs = args 634 if not hasattr(cls, "__new__"): 635 raise PicklingError("args[0] from {} args has no __new__" 636 .format(func_name)) 637 if obj is not None and cls is not obj.__class__: 638 raise PicklingError("args[0] from {} args has the wrong class" 639 .format(func_name)) 640 if self.proto >= 4: 641 save(cls) 642 save(args) 643 save(kwargs) 644 write(NEWOBJ_EX) 645 else: 646 func = partial(cls.__new__, cls, *args, **kwargs) 647 save(func) 648 save(()) 649 write(REDUCE) 650 elif self.proto >= 2 and func_name == "__newobj__": 651 # A __reduce__ implementation can direct protocol 2 or newer to 652 # use the more efficient NEWOBJ opcode, while still 653 # allowing protocol 0 and 1 to work normally. For this to 654 # work, the function returned by __reduce__ should be 655 # called __newobj__, and its first argument should be a 656 # class. The implementation for __newobj__ 657 # should be as follows, although pickle has no way to 658 # verify this: 659 # 660 # def __newobj__(cls, *args): 661 # return cls.__new__(cls, *args) 662 # 663 # Protocols 0 and 1 will pickle a reference to __newobj__, 664 # while protocol 2 (and above) will pickle a reference to 665 # cls, the remaining args tuple, and the NEWOBJ code, 666 # which calls cls.__new__(cls, *args) at unpickling time 667 # (see load_newobj below). If __reduce__ returns a 668 # three-tuple, the state from the third tuple item will be 669 # pickled regardless of the protocol, calling __setstate__ 670 # at unpickling time (see load_build below). 671 # 672 # Note that no standard __newobj__ implementation exists; 673 # you have to provide your own. This is to enforce 674 # compatibility with Python 2.2 (pickles written using 675 # protocol 0 or 1 in Python 2.3 should be unpicklable by 676 # Python 2.2). 677 cls = args[0] 678 if not hasattr(cls, "__new__"): 679 raise PicklingError( 680 "args[0] from __newobj__ args has no __new__") 681 if obj is not None and cls is not obj.__class__: 682 raise PicklingError( 683 "args[0] from __newobj__ args has the wrong class") 684 args = args[1:] 685 save(cls) 686 save(args) 687 write(NEWOBJ) 688 else: 689 save(func) 690 save(args) 691 write(REDUCE) 692 693 if obj is not None: 694 # If the object is already in the memo, this means it is 695 # recursive. In this case, throw away everything we put on the 696 # stack, and fetch the object back from the memo. 697 if id(obj) in self.memo: 698 write(POP + self.get(self.memo[id(obj)][0])) 699 else: 700 self.memoize(obj) 701 702 # More new special cases (that work with older protocols as 703 # well): when __reduce__ returns a tuple with 4 or 5 items, 704 # the 4th and 5th item should be iterators that provide list 705 # items and dict items (as (key, value) tuples), or None. 706 707 if listitems is not None: 708 self._batch_appends(listitems) 709 710 if dictitems is not None: 711 self._batch_setitems(dictitems) 712 713 if state is not None: 714 if state_setter is None: 715 save(state) 716 write(BUILD) 717 else: 718 # If a state_setter is specified, call it instead of load_build 719 # to update obj's with its previous state. 720 # First, push state_setter and its tuple of expected arguments 721 # (obj, state) onto the stack. 722 save(state_setter) 723 save(obj) # simple BINGET opcode as obj is already memoized. 724 save(state) 725 write(TUPLE2) 726 # Trigger a state_setter(obj, state) function call. 727 write(REDUCE) 728 # The purpose of state_setter is to carry-out an 729 # inplace modification of obj. We do not care about what the 730 # method might return, so its output is eventually removed from 731 # the stack. 732 write(POP) 733 734 # Methods below this point are dispatched through the dispatch table 735 736 dispatch = {} 737 738 def save_none(self, obj): 739 self.write(NONE) 740 dispatch[type(None)] = save_none 741 742 def save_bool(self, obj): 743 if self.proto >= 2: 744 self.write(NEWTRUE if obj else NEWFALSE) 745 else: 746 self.write(TRUE if obj else FALSE) 747 dispatch[bool] = save_bool 748 749 def save_long(self, obj): 750 if self.bin: 751 # If the int is small enough to fit in a signed 4-byte 2's-comp 752 # format, we can store it more efficiently than the general 753 # case. 754 # First one- and two-byte unsigned ints: 755 if obj >= 0: 756 if obj <= 0xff: 757 self.write(BININT1 + pack("<B", obj)) 758 return 759 if obj <= 0xffff: 760 self.write(BININT2 + pack("<H", obj)) 761 return 762 # Next check for 4-byte signed ints: 763 if -0x80000000 <= obj <= 0x7fffffff: 764 self.write(BININT + pack("<i", obj)) 765 return 766 if self.proto >= 2: 767 encoded = encode_long(obj) 768 n = len(encoded) 769 if n < 256: 770 self.write(LONG1 + pack("<B", n) + encoded) 771 else: 772 self.write(LONG4 + pack("<i", n) + encoded) 773 return 774 if -0x80000000 <= obj <= 0x7fffffff: 775 self.write(INT + repr(obj).encode("ascii") + b'\n') 776 else: 777 self.write(LONG + repr(obj).encode("ascii") + b'L\n') 778 dispatch[int] = save_long 779 780 def save_float(self, obj): 781 if self.bin: 782 self.write(BINFLOAT + pack('>d', obj)) 783 else: 784 self.write(FLOAT + repr(obj).encode("ascii") + b'\n') 785 dispatch[float] = save_float 786 787 def _save_bytes_no_memo(self, obj): 788 # helper for writing bytes objects for protocol >= 3 789 # without memoizing them 790 assert self.proto >= 3 791 n = len(obj) 792 if n <= 0xff: 793 self.write(SHORT_BINBYTES + pack("<B", n) + obj) 794 elif n > 0xffffffff and self.proto >= 4: 795 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj) 796 elif n >= self.framer._FRAME_SIZE_TARGET: 797 self._write_large_bytes(BINBYTES + pack("<I", n), obj) 798 else: 799 self.write(BINBYTES + pack("<I", n) + obj) 800 801 def save_bytes(self, obj): 802 if self.proto < 3: 803 if not obj: # bytes object is empty 804 self.save_reduce(bytes, (), obj=obj) 805 else: 806 self.save_reduce(codecs.encode, 807 (str(obj, 'latin1'), 'latin1'), obj=obj) 808 return 809 self._save_bytes_no_memo(obj) 810 self.memoize(obj) 811 dispatch[bytes] = save_bytes 812 813 def _save_bytearray_no_memo(self, obj): 814 # helper for writing bytearray objects for protocol >= 5 815 # without memoizing them 816 assert self.proto >= 5 817 n = len(obj) 818 if n >= self.framer._FRAME_SIZE_TARGET: 819 self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj) 820 else: 821 self.write(BYTEARRAY8 + pack("<Q", n) + obj) 822 823 def save_bytearray(self, obj): 824 if self.proto < 5: 825 if not obj: # bytearray is empty 826 self.save_reduce(bytearray, (), obj=obj) 827 else: 828 self.save_reduce(bytearray, (bytes(obj),), obj=obj) 829 return 830 self._save_bytearray_no_memo(obj) 831 self.memoize(obj) 832 dispatch[bytearray] = save_bytearray 833 834 if _HAVE_PICKLE_BUFFER: 835 def save_picklebuffer(self, obj): 836 if self.proto < 5: 837 raise PicklingError("PickleBuffer can only be pickled with " 838 "protocol >= 5") 839 with obj.raw() as m: 840 if not m.contiguous: 841 raise PicklingError("PickleBuffer can not be pickled when " 842 "pointing to a non-contiguous buffer") 843 in_band = True 844 if self._buffer_callback is not None: 845 in_band = bool(self._buffer_callback(obj)) 846 if in_band: 847 # Write data in-band 848 # XXX The C implementation avoids a copy here 849 buf = m.tobytes() 850 in_memo = id(buf) in self.memo 851 if m.readonly: 852 if in_memo: 853 self._save_bytes_no_memo(buf) 854 else: 855 self.save_bytes(buf) 856 else: 857 if in_memo: 858 self._save_bytearray_no_memo(buf) 859 else: 860 self.save_bytearray(buf) 861 else: 862 # Write data out-of-band 863 self.write(NEXT_BUFFER) 864 if m.readonly: 865 self.write(READONLY_BUFFER) 866 867 dispatch[PickleBuffer] = save_picklebuffer 868 869 def save_str(self, obj): 870 if self.bin: 871 encoded = obj.encode('utf-8', 'surrogatepass') 872 n = len(encoded) 873 if n <= 0xff and self.proto >= 4: 874 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded) 875 elif n > 0xffffffff and self.proto >= 4: 876 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded) 877 elif n >= self.framer._FRAME_SIZE_TARGET: 878 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded) 879 else: 880 self.write(BINUNICODE + pack("<I", n) + encoded) 881 else: 882 # Escape what raw-unicode-escape doesn't, but memoize the original. 883 tmp = obj.replace("\\", "\\u005c") 884 tmp = tmp.replace("\0", "\\u0000") 885 tmp = tmp.replace("\n", "\\u000a") 886 tmp = tmp.replace("\r", "\\u000d") 887 tmp = tmp.replace("\x1a", "\\u001a") # EOF on DOS 888 self.write(UNICODE + tmp.encode('raw-unicode-escape') + b'\n') 889 self.memoize(obj) 890 dispatch[str] = save_str 891 892 def save_tuple(self, obj): 893 if not obj: # tuple is empty 894 if self.bin: 895 self.write(EMPTY_TUPLE) 896 else: 897 self.write(MARK + TUPLE) 898 return 899 900 n = len(obj) 901 save = self.save 902 memo = self.memo 903 if n <= 3 and self.proto >= 2: 904 for element in obj: 905 save(element) 906 # Subtle. Same as in the big comment below. 907 if id(obj) in memo: 908 get = self.get(memo[id(obj)][0]) 909 self.write(POP * n + get) 910 else: 911 self.write(_tuplesize2code[n]) 912 self.memoize(obj) 913 return 914 915 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple 916 # has more than 3 elements. 917 write = self.write 918 write(MARK) 919 for element in obj: 920 save(element) 921 922 if id(obj) in memo: 923 # Subtle. d was not in memo when we entered save_tuple(), so 924 # the process of saving the tuple's elements must have saved 925 # the tuple itself: the tuple is recursive. The proper action 926 # now is to throw away everything we put on the stack, and 927 # simply GET the tuple (it's already constructed). This check 928 # could have been done in the "for element" loop instead, but 929 # recursive tuples are a rare thing. 930 get = self.get(memo[id(obj)][0]) 931 if self.bin: 932 write(POP_MARK + get) 933 else: # proto 0 -- POP_MARK not available 934 write(POP * (n+1) + get) 935 return 936 937 # No recursion. 938 write(TUPLE) 939 self.memoize(obj) 940 941 dispatch[tuple] = save_tuple 942 943 def save_list(self, obj): 944 if self.bin: 945 self.write(EMPTY_LIST) 946 else: # proto 0 -- can't use EMPTY_LIST 947 self.write(MARK + LIST) 948 949 self.memoize(obj) 950 self._batch_appends(obj) 951 952 dispatch[list] = save_list 953 954 _BATCHSIZE = 1000 955 956 def _batch_appends(self, items): 957 # Helper to batch up APPENDS sequences 958 save = self.save 959 write = self.write 960 961 if not self.bin: 962 for x in items: 963 save(x) 964 write(APPEND) 965 return 966 967 it = iter(items) 968 while True: 969 tmp = list(islice(it, self._BATCHSIZE)) 970 n = len(tmp) 971 if n > 1: 972 write(MARK) 973 for x in tmp: 974 save(x) 975 write(APPENDS) 976 elif n: 977 save(tmp[0]) 978 write(APPEND) 979 # else tmp is empty, and we're done 980 if n < self._BATCHSIZE: 981 return 982 983 def save_dict(self, obj): 984 if self.bin: 985 self.write(EMPTY_DICT) 986 else: # proto 0 -- can't use EMPTY_DICT 987 self.write(MARK + DICT) 988 989 self.memoize(obj) 990 self._batch_setitems(obj.items()) 991 992 dispatch[dict] = save_dict 993 994 def _batch_setitems(self, items): 995 # Helper to batch up SETITEMS sequences; proto >= 1 only 996 save = self.save 997 write = self.write 998 999 if not self.bin: 1000 for k, v in items: 1001 save(k) 1002 save(v) 1003 write(SETITEM) 1004 return 1005 1006 it = iter(items) 1007 while True: 1008 tmp = list(islice(it, self._BATCHSIZE)) 1009 n = len(tmp) 1010 if n > 1: 1011 write(MARK) 1012 for k, v in tmp: 1013 save(k) 1014 save(v) 1015 write(SETITEMS) 1016 elif n: 1017 k, v = tmp[0] 1018 save(k) 1019 save(v) 1020 write(SETITEM) 1021 # else tmp is empty, and we're done 1022 if n < self._BATCHSIZE: 1023 return 1024 1025 def save_set(self, obj): 1026 save = self.save 1027 write = self.write 1028 1029 if self.proto < 4: 1030 self.save_reduce(set, (list(obj),), obj=obj) 1031 return 1032 1033 write(EMPTY_SET) 1034 self.memoize(obj) 1035 1036 it = iter(obj) 1037 while True: 1038 batch = list(islice(it, self._BATCHSIZE)) 1039 n = len(batch) 1040 if n > 0: 1041 write(MARK) 1042 for item in batch: 1043 save(item) 1044 write(ADDITEMS) 1045 if n < self._BATCHSIZE: 1046 return 1047 dispatch[set] = save_set 1048 1049 def save_frozenset(self, obj): 1050 save = self.save 1051 write = self.write 1052 1053 if self.proto < 4: 1054 self.save_reduce(frozenset, (list(obj),), obj=obj) 1055 return 1056 1057 write(MARK) 1058 for item in obj: 1059 save(item) 1060 1061 if id(obj) in self.memo: 1062 # If the object is already in the memo, this means it is 1063 # recursive. In this case, throw away everything we put on the 1064 # stack, and fetch the object back from the memo. 1065 write(POP_MARK + self.get(self.memo[id(obj)][0])) 1066 return 1067 1068 write(FROZENSET) 1069 self.memoize(obj) 1070 dispatch[frozenset] = save_frozenset 1071 1072 def save_global(self, obj, name=None): 1073 write = self.write 1074 memo = self.memo 1075 1076 if name is None: 1077 name = getattr(obj, '__qualname__', None) 1078 if name is None: 1079 name = obj.__name__ 1080 1081 module_name = whichmodule(obj, name) 1082 try: 1083 __import__(module_name, level=0) 1084 module = sys.modules[module_name] 1085 obj2, parent = _getattribute(module, name) 1086 except (ImportError, KeyError, AttributeError): 1087 raise PicklingError( 1088 "Can't pickle %r: it's not found as %s.%s" % 1089 (obj, module_name, name)) from None 1090 else: 1091 if obj2 is not obj: 1092 raise PicklingError( 1093 "Can't pickle %r: it's not the same object as %s.%s" % 1094 (obj, module_name, name)) 1095 1096 if self.proto >= 2: 1097 code = _extension_registry.get((module_name, name), _NoValue) 1098 if code is not _NoValue: 1099 if code <= 0xff: 1100 data = pack("<B", code) 1101 if data == b'\0': 1102 # Should never happen in normal circumstances, 1103 # since the type and the value of the code are 1104 # checked in copyreg.add_extension(). 1105 raise RuntimeError("extension code 0 is out of range") 1106 write(EXT1 + data) 1107 elif code <= 0xffff: 1108 write(EXT2 + pack("<H", code)) 1109 else: 1110 write(EXT4 + pack("<i", code)) 1111 return 1112 lastname = name.rpartition('.')[2] 1113 if parent is module: 1114 name = lastname 1115 # Non-ASCII identifiers are supported only with protocols >= 3. 1116 if self.proto >= 4: 1117 self.save(module_name) 1118 self.save(name) 1119 write(STACK_GLOBAL) 1120 elif '.' in name: 1121 # In protocol < 4, objects with multi-part __qualname__ 1122 # are represented as 1123 # getattr(getattr(..., attrname1), attrname2). 1124 dotted_path = name.split('.') 1125 name = dotted_path.pop(0) 1126 save = self.save 1127 for attrname in dotted_path: 1128 save(getattr) 1129 if self.proto < 2: 1130 write(MARK) 1131 self._save_toplevel_by_name(module_name, name) 1132 for attrname in dotted_path: 1133 save(attrname) 1134 if self.proto < 2: 1135 write(TUPLE) 1136 else: 1137 write(TUPLE2) 1138 write(REDUCE) 1139 else: 1140 self._save_toplevel_by_name(module_name, name) 1141 1142 self.memoize(obj) 1143 1144 def _save_toplevel_by_name(self, module_name, name): 1145 if self.proto >= 3: 1146 # Non-ASCII identifiers are supported only with protocols >= 3. 1147 self.write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + 1148 bytes(name, "utf-8") + b'\n') 1149 else: 1150 if self.fix_imports: 1151 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING 1152 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING 1153 if (module_name, name) in r_name_mapping: 1154 module_name, name = r_name_mapping[(module_name, name)] 1155 elif module_name in r_import_mapping: 1156 module_name = r_import_mapping[module_name] 1157 try: 1158 self.write(GLOBAL + bytes(module_name, "ascii") + b'\n' + 1159 bytes(name, "ascii") + b'\n') 1160 except UnicodeEncodeError: 1161 raise PicklingError( 1162 "can't pickle global identifier '%s.%s' using " 1163 "pickle protocol %i" % (module_name, name, self.proto)) from None 1164 1165 def save_type(self, obj): 1166 if obj is type(None): 1167 return self.save_reduce(type, (None,), obj=obj) 1168 elif obj is type(NotImplemented): 1169 return self.save_reduce(type, (NotImplemented,), obj=obj) 1170 elif obj is type(...): 1171 return self.save_reduce(type, (...,), obj=obj) 1172 return self.save_global(obj) 1173 1174 dispatch[FunctionType] = save_global 1175 dispatch[type] = save_type 1176 1177 1178# Unpickling machinery 1179 1180class _Unpickler: 1181 1182 def __init__(self, file, *, fix_imports=True, 1183 encoding="ASCII", errors="strict", buffers=None): 1184 """This takes a binary file for reading a pickle data stream. 1185 1186 The protocol version of the pickle is detected automatically, so 1187 no proto argument is needed. 1188 1189 The argument *file* must have two methods, a read() method that 1190 takes an integer argument, and a readline() method that requires 1191 no arguments. Both methods should return bytes. Thus *file* 1192 can be a binary file object opened for reading, an io.BytesIO 1193 object, or any other custom object that meets this interface. 1194 1195 The file-like object must have two methods, a read() method 1196 that takes an integer argument, and a readline() method that 1197 requires no arguments. Both methods should return bytes. 1198 Thus file-like object can be a binary file object opened for 1199 reading, a BytesIO object, or any other custom object that 1200 meets this interface. 1201 1202 If *buffers* is not None, it should be an iterable of buffer-enabled 1203 objects that is consumed each time the pickle stream references 1204 an out-of-band buffer view. Such buffers have been given in order 1205 to the *buffer_callback* of a Pickler object. 1206 1207 If *buffers* is None (the default), then the buffers are taken 1208 from the pickle stream, assuming they are serialized there. 1209 It is an error for *buffers* to be None if the pickle stream 1210 was produced with a non-None *buffer_callback*. 1211 1212 Other optional arguments are *fix_imports*, *encoding* and 1213 *errors*, which are used to control compatibility support for 1214 pickle stream generated by Python 2. If *fix_imports* is True, 1215 pickle will try to map the old Python 2 names to the new names 1216 used in Python 3. The *encoding* and *errors* tell pickle how 1217 to decode 8-bit string instances pickled by Python 2; these 1218 default to 'ASCII' and 'strict', respectively. *encoding* can be 1219 'bytes' to read these 8-bit string instances as bytes objects. 1220 """ 1221 self._buffers = iter(buffers) if buffers is not None else None 1222 self._file_readline = file.readline 1223 self._file_read = file.read 1224 self.memo = {} 1225 self.encoding = encoding 1226 self.errors = errors 1227 self.proto = 0 1228 self.fix_imports = fix_imports 1229 1230 def load(self): 1231 """Read a pickled object representation from the open file. 1232 1233 Return the reconstituted object hierarchy specified in the file. 1234 """ 1235 # Check whether Unpickler was initialized correctly. This is 1236 # only needed to mimic the behavior of _pickle.Unpickler.dump(). 1237 if not hasattr(self, "_file_read"): 1238 raise UnpicklingError("Unpickler.__init__() was not called by " 1239 "%s.__init__()" % (self.__class__.__name__,)) 1240 self._unframer = _Unframer(self._file_read, self._file_readline) 1241 self.read = self._unframer.read 1242 self.readinto = self._unframer.readinto 1243 self.readline = self._unframer.readline 1244 self.metastack = [] 1245 self.stack = [] 1246 self.append = self.stack.append 1247 self.proto = 0 1248 read = self.read 1249 dispatch = self.dispatch 1250 try: 1251 while True: 1252 key = read(1) 1253 if not key: 1254 raise EOFError 1255 assert isinstance(key, bytes_types) 1256 dispatch[key[0]](self) 1257 except _Stop as stopinst: 1258 return stopinst.value 1259 1260 # Return a list of items pushed in the stack after last MARK instruction. 1261 def pop_mark(self): 1262 items = self.stack 1263 self.stack = self.metastack.pop() 1264 self.append = self.stack.append 1265 return items 1266 1267 def persistent_load(self, pid): 1268 raise UnpicklingError("unsupported persistent id encountered") 1269 1270 dispatch = {} 1271 1272 def load_proto(self): 1273 proto = self.read(1)[0] 1274 if not 0 <= proto <= HIGHEST_PROTOCOL: 1275 raise ValueError("unsupported pickle protocol: %d" % proto) 1276 self.proto = proto 1277 dispatch[PROTO[0]] = load_proto 1278 1279 def load_frame(self): 1280 frame_size, = unpack('<Q', self.read(8)) 1281 if frame_size > sys.maxsize: 1282 raise ValueError("frame size > sys.maxsize: %d" % frame_size) 1283 self._unframer.load_frame(frame_size) 1284 dispatch[FRAME[0]] = load_frame 1285 1286 def load_persid(self): 1287 try: 1288 pid = self.readline()[:-1].decode("ascii") 1289 except UnicodeDecodeError: 1290 raise UnpicklingError( 1291 "persistent IDs in protocol 0 must be ASCII strings") 1292 self.append(self.persistent_load(pid)) 1293 dispatch[PERSID[0]] = load_persid 1294 1295 def load_binpersid(self): 1296 pid = self.stack.pop() 1297 self.append(self.persistent_load(pid)) 1298 dispatch[BINPERSID[0]] = load_binpersid 1299 1300 def load_none(self): 1301 self.append(None) 1302 dispatch[NONE[0]] = load_none 1303 1304 def load_false(self): 1305 self.append(False) 1306 dispatch[NEWFALSE[0]] = load_false 1307 1308 def load_true(self): 1309 self.append(True) 1310 dispatch[NEWTRUE[0]] = load_true 1311 1312 def load_int(self): 1313 data = self.readline() 1314 if data == FALSE[1:]: 1315 val = False 1316 elif data == TRUE[1:]: 1317 val = True 1318 else: 1319 val = int(data, 0) 1320 self.append(val) 1321 dispatch[INT[0]] = load_int 1322 1323 def load_binint(self): 1324 self.append(unpack('<i', self.read(4))[0]) 1325 dispatch[BININT[0]] = load_binint 1326 1327 def load_binint1(self): 1328 self.append(self.read(1)[0]) 1329 dispatch[BININT1[0]] = load_binint1 1330 1331 def load_binint2(self): 1332 self.append(unpack('<H', self.read(2))[0]) 1333 dispatch[BININT2[0]] = load_binint2 1334 1335 def load_long(self): 1336 val = self.readline()[:-1] 1337 if val and val[-1] == b'L'[0]: 1338 val = val[:-1] 1339 self.append(int(val, 0)) 1340 dispatch[LONG[0]] = load_long 1341 1342 def load_long1(self): 1343 n = self.read(1)[0] 1344 data = self.read(n) 1345 self.append(decode_long(data)) 1346 dispatch[LONG1[0]] = load_long1 1347 1348 def load_long4(self): 1349 n, = unpack('<i', self.read(4)) 1350 if n < 0: 1351 # Corrupt or hostile pickle -- we never write one like this 1352 raise UnpicklingError("LONG pickle has negative byte count") 1353 data = self.read(n) 1354 self.append(decode_long(data)) 1355 dispatch[LONG4[0]] = load_long4 1356 1357 def load_float(self): 1358 self.append(float(self.readline()[:-1])) 1359 dispatch[FLOAT[0]] = load_float 1360 1361 def load_binfloat(self): 1362 self.append(unpack('>d', self.read(8))[0]) 1363 dispatch[BINFLOAT[0]] = load_binfloat 1364 1365 def _decode_string(self, value): 1366 # Used to allow strings from Python 2 to be decoded either as 1367 # bytes or Unicode strings. This should be used only with the 1368 # STRING, BINSTRING and SHORT_BINSTRING opcodes. 1369 if self.encoding == "bytes": 1370 return value 1371 else: 1372 return value.decode(self.encoding, self.errors) 1373 1374 def load_string(self): 1375 data = self.readline()[:-1] 1376 # Strip outermost quotes 1377 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'': 1378 data = data[1:-1] 1379 else: 1380 raise UnpicklingError("the STRING opcode argument must be quoted") 1381 self.append(self._decode_string(codecs.escape_decode(data)[0])) 1382 dispatch[STRING[0]] = load_string 1383 1384 def load_binstring(self): 1385 # Deprecated BINSTRING uses signed 32-bit length 1386 len, = unpack('<i', self.read(4)) 1387 if len < 0: 1388 raise UnpicklingError("BINSTRING pickle has negative byte count") 1389 data = self.read(len) 1390 self.append(self._decode_string(data)) 1391 dispatch[BINSTRING[0]] = load_binstring 1392 1393 def load_binbytes(self): 1394 len, = unpack('<I', self.read(4)) 1395 if len > maxsize: 1396 raise UnpicklingError("BINBYTES exceeds system's maximum size " 1397 "of %d bytes" % maxsize) 1398 self.append(self.read(len)) 1399 dispatch[BINBYTES[0]] = load_binbytes 1400 1401 def load_unicode(self): 1402 self.append(str(self.readline()[:-1], 'raw-unicode-escape')) 1403 dispatch[UNICODE[0]] = load_unicode 1404 1405 def load_binunicode(self): 1406 len, = unpack('<I', self.read(4)) 1407 if len > maxsize: 1408 raise UnpicklingError("BINUNICODE exceeds system's maximum size " 1409 "of %d bytes" % maxsize) 1410 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1411 dispatch[BINUNICODE[0]] = load_binunicode 1412 1413 def load_binunicode8(self): 1414 len, = unpack('<Q', self.read(8)) 1415 if len > maxsize: 1416 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " 1417 "of %d bytes" % maxsize) 1418 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1419 dispatch[BINUNICODE8[0]] = load_binunicode8 1420 1421 def load_binbytes8(self): 1422 len, = unpack('<Q', self.read(8)) 1423 if len > maxsize: 1424 raise UnpicklingError("BINBYTES8 exceeds system's maximum size " 1425 "of %d bytes" % maxsize) 1426 self.append(self.read(len)) 1427 dispatch[BINBYTES8[0]] = load_binbytes8 1428 1429 def load_bytearray8(self): 1430 len, = unpack('<Q', self.read(8)) 1431 if len > maxsize: 1432 raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size " 1433 "of %d bytes" % maxsize) 1434 b = bytearray(len) 1435 self.readinto(b) 1436 self.append(b) 1437 dispatch[BYTEARRAY8[0]] = load_bytearray8 1438 1439 def load_next_buffer(self): 1440 if self._buffers is None: 1441 raise UnpicklingError("pickle stream refers to out-of-band data " 1442 "but no *buffers* argument was given") 1443 try: 1444 buf = next(self._buffers) 1445 except StopIteration: 1446 raise UnpicklingError("not enough out-of-band buffers") 1447 self.append(buf) 1448 dispatch[NEXT_BUFFER[0]] = load_next_buffer 1449 1450 def load_readonly_buffer(self): 1451 buf = self.stack[-1] 1452 with memoryview(buf) as m: 1453 if not m.readonly: 1454 self.stack[-1] = m.toreadonly() 1455 dispatch[READONLY_BUFFER[0]] = load_readonly_buffer 1456 1457 def load_short_binstring(self): 1458 len = self.read(1)[0] 1459 data = self.read(len) 1460 self.append(self._decode_string(data)) 1461 dispatch[SHORT_BINSTRING[0]] = load_short_binstring 1462 1463 def load_short_binbytes(self): 1464 len = self.read(1)[0] 1465 self.append(self.read(len)) 1466 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes 1467 1468 def load_short_binunicode(self): 1469 len = self.read(1)[0] 1470 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1471 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode 1472 1473 def load_tuple(self): 1474 items = self.pop_mark() 1475 self.append(tuple(items)) 1476 dispatch[TUPLE[0]] = load_tuple 1477 1478 def load_empty_tuple(self): 1479 self.append(()) 1480 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple 1481 1482 def load_tuple1(self): 1483 self.stack[-1] = (self.stack[-1],) 1484 dispatch[TUPLE1[0]] = load_tuple1 1485 1486 def load_tuple2(self): 1487 self.stack[-2:] = [(self.stack[-2], self.stack[-1])] 1488 dispatch[TUPLE2[0]] = load_tuple2 1489 1490 def load_tuple3(self): 1491 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] 1492 dispatch[TUPLE3[0]] = load_tuple3 1493 1494 def load_empty_list(self): 1495 self.append([]) 1496 dispatch[EMPTY_LIST[0]] = load_empty_list 1497 1498 def load_empty_dictionary(self): 1499 self.append({}) 1500 dispatch[EMPTY_DICT[0]] = load_empty_dictionary 1501 1502 def load_empty_set(self): 1503 self.append(set()) 1504 dispatch[EMPTY_SET[0]] = load_empty_set 1505 1506 def load_frozenset(self): 1507 items = self.pop_mark() 1508 self.append(frozenset(items)) 1509 dispatch[FROZENSET[0]] = load_frozenset 1510 1511 def load_list(self): 1512 items = self.pop_mark() 1513 self.append(items) 1514 dispatch[LIST[0]] = load_list 1515 1516 def load_dict(self): 1517 items = self.pop_mark() 1518 d = {items[i]: items[i+1] 1519 for i in range(0, len(items), 2)} 1520 self.append(d) 1521 dispatch[DICT[0]] = load_dict 1522 1523 # INST and OBJ differ only in how they get a class object. It's not 1524 # only sensible to do the rest in a common routine, the two routines 1525 # previously diverged and grew different bugs. 1526 # klass is the class to instantiate, and k points to the topmost mark 1527 # object, following which are the arguments for klass.__init__. 1528 def _instantiate(self, klass, args): 1529 if (args or not isinstance(klass, type) or 1530 hasattr(klass, "__getinitargs__")): 1531 try: 1532 value = klass(*args) 1533 except TypeError as err: 1534 raise TypeError("in constructor for %s: %s" % 1535 (klass.__name__, str(err)), err.__traceback__) 1536 else: 1537 value = klass.__new__(klass) 1538 self.append(value) 1539 1540 def load_inst(self): 1541 module = self.readline()[:-1].decode("ascii") 1542 name = self.readline()[:-1].decode("ascii") 1543 klass = self.find_class(module, name) 1544 self._instantiate(klass, self.pop_mark()) 1545 dispatch[INST[0]] = load_inst 1546 1547 def load_obj(self): 1548 # Stack is ... markobject classobject arg1 arg2 ... 1549 args = self.pop_mark() 1550 cls = args.pop(0) 1551 self._instantiate(cls, args) 1552 dispatch[OBJ[0]] = load_obj 1553 1554 def load_newobj(self): 1555 args = self.stack.pop() 1556 cls = self.stack.pop() 1557 obj = cls.__new__(cls, *args) 1558 self.append(obj) 1559 dispatch[NEWOBJ[0]] = load_newobj 1560 1561 def load_newobj_ex(self): 1562 kwargs = self.stack.pop() 1563 args = self.stack.pop() 1564 cls = self.stack.pop() 1565 obj = cls.__new__(cls, *args, **kwargs) 1566 self.append(obj) 1567 dispatch[NEWOBJ_EX[0]] = load_newobj_ex 1568 1569 def load_global(self): 1570 module = self.readline()[:-1].decode("utf-8") 1571 name = self.readline()[:-1].decode("utf-8") 1572 klass = self.find_class(module, name) 1573 self.append(klass) 1574 dispatch[GLOBAL[0]] = load_global 1575 1576 def load_stack_global(self): 1577 name = self.stack.pop() 1578 module = self.stack.pop() 1579 if type(name) is not str or type(module) is not str: 1580 raise UnpicklingError("STACK_GLOBAL requires str") 1581 self.append(self.find_class(module, name)) 1582 dispatch[STACK_GLOBAL[0]] = load_stack_global 1583 1584 def load_ext1(self): 1585 code = self.read(1)[0] 1586 self.get_extension(code) 1587 dispatch[EXT1[0]] = load_ext1 1588 1589 def load_ext2(self): 1590 code, = unpack('<H', self.read(2)) 1591 self.get_extension(code) 1592 dispatch[EXT2[0]] = load_ext2 1593 1594 def load_ext4(self): 1595 code, = unpack('<i', self.read(4)) 1596 self.get_extension(code) 1597 dispatch[EXT4[0]] = load_ext4 1598 1599 def get_extension(self, code): 1600 obj = _extension_cache.get(code, _NoValue) 1601 if obj is not _NoValue: 1602 self.append(obj) 1603 return 1604 key = _inverted_registry.get(code) 1605 if not key: 1606 if code <= 0: # note that 0 is forbidden 1607 # Corrupt or hostile pickle. 1608 raise UnpicklingError("EXT specifies code <= 0") 1609 raise ValueError("unregistered extension code %d" % code) 1610 obj = self.find_class(*key) 1611 _extension_cache[code] = obj 1612 self.append(obj) 1613 1614 def find_class(self, module, name): 1615 # Subclasses may override this. 1616 sys.audit('pickle.find_class', module, name) 1617 if self.proto < 3 and self.fix_imports: 1618 if (module, name) in _compat_pickle.NAME_MAPPING: 1619 module, name = _compat_pickle.NAME_MAPPING[(module, name)] 1620 elif module in _compat_pickle.IMPORT_MAPPING: 1621 module = _compat_pickle.IMPORT_MAPPING[module] 1622 __import__(module, level=0) 1623 if self.proto >= 4: 1624 return _getattribute(sys.modules[module], name)[0] 1625 else: 1626 return getattr(sys.modules[module], name) 1627 1628 def load_reduce(self): 1629 stack = self.stack 1630 args = stack.pop() 1631 func = stack[-1] 1632 stack[-1] = func(*args) 1633 dispatch[REDUCE[0]] = load_reduce 1634 1635 def load_pop(self): 1636 if self.stack: 1637 del self.stack[-1] 1638 else: 1639 self.pop_mark() 1640 dispatch[POP[0]] = load_pop 1641 1642 def load_pop_mark(self): 1643 self.pop_mark() 1644 dispatch[POP_MARK[0]] = load_pop_mark 1645 1646 def load_dup(self): 1647 self.append(self.stack[-1]) 1648 dispatch[DUP[0]] = load_dup 1649 1650 def load_get(self): 1651 i = int(self.readline()[:-1]) 1652 try: 1653 self.append(self.memo[i]) 1654 except KeyError: 1655 msg = f'Memo value not found at index {i}' 1656 raise UnpicklingError(msg) from None 1657 dispatch[GET[0]] = load_get 1658 1659 def load_binget(self): 1660 i = self.read(1)[0] 1661 try: 1662 self.append(self.memo[i]) 1663 except KeyError as exc: 1664 msg = f'Memo value not found at index {i}' 1665 raise UnpicklingError(msg) from None 1666 dispatch[BINGET[0]] = load_binget 1667 1668 def load_long_binget(self): 1669 i, = unpack('<I', self.read(4)) 1670 try: 1671 self.append(self.memo[i]) 1672 except KeyError as exc: 1673 msg = f'Memo value not found at index {i}' 1674 raise UnpicklingError(msg) from None 1675 dispatch[LONG_BINGET[0]] = load_long_binget 1676 1677 def load_put(self): 1678 i = int(self.readline()[:-1]) 1679 if i < 0: 1680 raise ValueError("negative PUT argument") 1681 self.memo[i] = self.stack[-1] 1682 dispatch[PUT[0]] = load_put 1683 1684 def load_binput(self): 1685 i = self.read(1)[0] 1686 if i < 0: 1687 raise ValueError("negative BINPUT argument") 1688 self.memo[i] = self.stack[-1] 1689 dispatch[BINPUT[0]] = load_binput 1690 1691 def load_long_binput(self): 1692 i, = unpack('<I', self.read(4)) 1693 if i > maxsize: 1694 raise ValueError("negative LONG_BINPUT argument") 1695 self.memo[i] = self.stack[-1] 1696 dispatch[LONG_BINPUT[0]] = load_long_binput 1697 1698 def load_memoize(self): 1699 memo = self.memo 1700 memo[len(memo)] = self.stack[-1] 1701 dispatch[MEMOIZE[0]] = load_memoize 1702 1703 def load_append(self): 1704 stack = self.stack 1705 value = stack.pop() 1706 list = stack[-1] 1707 list.append(value) 1708 dispatch[APPEND[0]] = load_append 1709 1710 def load_appends(self): 1711 items = self.pop_mark() 1712 list_obj = self.stack[-1] 1713 try: 1714 extend = list_obj.extend 1715 except AttributeError: 1716 pass 1717 else: 1718 extend(items) 1719 return 1720 # Even if the PEP 307 requires extend() and append() methods, 1721 # fall back on append() if the object has no extend() method 1722 # for backward compatibility. 1723 append = list_obj.append 1724 for item in items: 1725 append(item) 1726 dispatch[APPENDS[0]] = load_appends 1727 1728 def load_setitem(self): 1729 stack = self.stack 1730 value = stack.pop() 1731 key = stack.pop() 1732 dict = stack[-1] 1733 dict[key] = value 1734 dispatch[SETITEM[0]] = load_setitem 1735 1736 def load_setitems(self): 1737 items = self.pop_mark() 1738 dict = self.stack[-1] 1739 for i in range(0, len(items), 2): 1740 dict[items[i]] = items[i + 1] 1741 dispatch[SETITEMS[0]] = load_setitems 1742 1743 def load_additems(self): 1744 items = self.pop_mark() 1745 set_obj = self.stack[-1] 1746 if isinstance(set_obj, set): 1747 set_obj.update(items) 1748 else: 1749 add = set_obj.add 1750 for item in items: 1751 add(item) 1752 dispatch[ADDITEMS[0]] = load_additems 1753 1754 def load_build(self): 1755 stack = self.stack 1756 state = stack.pop() 1757 inst = stack[-1] 1758 setstate = getattr(inst, "__setstate__", _NoValue) 1759 if setstate is not _NoValue: 1760 setstate(state) 1761 return 1762 slotstate = None 1763 if isinstance(state, tuple) and len(state) == 2: 1764 state, slotstate = state 1765 if state: 1766 inst_dict = inst.__dict__ 1767 intern = sys.intern 1768 for k, v in state.items(): 1769 if type(k) is str: 1770 inst_dict[intern(k)] = v 1771 else: 1772 inst_dict[k] = v 1773 if slotstate: 1774 for k, v in slotstate.items(): 1775 setattr(inst, k, v) 1776 dispatch[BUILD[0]] = load_build 1777 1778 def load_mark(self): 1779 self.metastack.append(self.stack) 1780 self.stack = [] 1781 self.append = self.stack.append 1782 dispatch[MARK[0]] = load_mark 1783 1784 def load_stop(self): 1785 value = self.stack.pop() 1786 raise _Stop(value) 1787 dispatch[STOP[0]] = load_stop 1788 1789 1790# Shorthands 1791 1792def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None): 1793 _Pickler(file, protocol, fix_imports=fix_imports, 1794 buffer_callback=buffer_callback).dump(obj) 1795 1796def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None): 1797 f = io.BytesIO() 1798 _Pickler(f, protocol, fix_imports=fix_imports, 1799 buffer_callback=buffer_callback).dump(obj) 1800 res = f.getvalue() 1801 assert isinstance(res, bytes_types) 1802 return res 1803 1804def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict", 1805 buffers=None): 1806 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, 1807 encoding=encoding, errors=errors).load() 1808 1809def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict", 1810 buffers=None): 1811 if isinstance(s, str): 1812 raise TypeError("Can't load pickle from unicode string") 1813 file = io.BytesIO(s) 1814 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, 1815 encoding=encoding, errors=errors).load() 1816 1817# Use the faster _pickle if possible 1818try: 1819 from _pickle import ( 1820 PickleError, 1821 PicklingError, 1822 UnpicklingError, 1823 Pickler, 1824 Unpickler, 1825 dump, 1826 dumps, 1827 load, 1828 loads 1829 ) 1830except ImportError: 1831 Pickler, Unpickler = _Pickler, _Unpickler 1832 dump, dumps, load, loads = _dump, _dumps, _load, _loads 1833 1834# Doctest 1835def _test(): 1836 import doctest 1837 return doctest.testmod() 1838 1839if __name__ == "__main__": 1840 import argparse 1841 parser = argparse.ArgumentParser( 1842 description='display contents of the pickle files') 1843 parser.add_argument( 1844 'pickle_file', 1845 nargs='*', help='the pickle file') 1846 parser.add_argument( 1847 '-t', '--test', action='store_true', 1848 help='run self-test suite') 1849 parser.add_argument( 1850 '-v', action='store_true', 1851 help='run verbosely; only affects self-test run') 1852 args = parser.parse_args() 1853 if args.test: 1854 _test() 1855 else: 1856 if not args.pickle_file: 1857 parser.print_help() 1858 else: 1859 import pprint 1860 for fn in args.pickle_file: 1861 if fn == '-': 1862 obj = load(sys.stdin.buffer) 1863 else: 1864 with open(fn, 'rb') as f: 1865 obj = load(f) 1866 pprint.pprint(obj) 1867