1"""Create portable serialized representations of Python objects. 2 3See module copyreg for a mechanism for registering custom picklers. 4See module pickletools source for extensive comments. 5 6Classes: 7 8 Pickler 9 Unpickler 10 11Functions: 12 13 dump(object, file) 14 dumps(object) -> string 15 load(file) -> object 16 loads(bytes) -> object 17 18Misc variables: 19 20 __version__ 21 format_version 22 compatible_formats 23 24""" 25 26from types import FunctionType 27from copyreg import dispatch_table 28from copyreg import _extension_registry, _inverted_registry, _extension_cache 29from itertools import islice 30from functools import partial 31import sys 32from sys import maxsize 33from struct import pack, unpack 34import re 35import io 36import codecs 37import _compat_pickle 38 39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", 40 "Unpickler", "dump", "dumps", "load", "loads"] 41 42try: 43 from _pickle import PickleBuffer 44 __all__.append("PickleBuffer") 45 _HAVE_PICKLE_BUFFER = True 46except ImportError: 47 _HAVE_PICKLE_BUFFER = False 48 49 50# Shortcut for use in isinstance testing 51bytes_types = (bytes, bytearray) 52 53# These are purely informational; no code uses these. 54format_version = "4.0" # File format version we write 55compatible_formats = ["1.0", # Original protocol 0 56 "1.1", # Protocol 0 with INST added 57 "1.2", # Original protocol 1 58 "1.3", # Protocol 1 with BINFLOAT added 59 "2.0", # Protocol 2 60 "3.0", # Protocol 3 61 "4.0", # Protocol 4 62 "5.0", # Protocol 5 63 ] # Old format versions we can read 64 65# This is the highest protocol number we know how to read. 66HIGHEST_PROTOCOL = 5 67 68# The protocol we write by default. May be less than HIGHEST_PROTOCOL. 69# Only bump this if the oldest still supported version of Python already 70# includes it. 71DEFAULT_PROTOCOL = 4 72 73class PickleError(Exception): 74 """A common base class for the other pickling exceptions.""" 75 pass 76 77class PicklingError(PickleError): 78 """This exception is raised when an unpicklable object is passed to the 79 dump() method. 80 81 """ 82 pass 83 84class UnpicklingError(PickleError): 85 """This exception is raised when there is a problem unpickling an object, 86 such as a security violation. 87 88 Note that other exceptions may also be raised during unpickling, including 89 (but not necessarily limited to) AttributeError, EOFError, ImportError, 90 and IndexError. 91 92 """ 93 pass 94 95# An instance of _Stop is raised by Unpickler.load_stop() in response to 96# the STOP opcode, passing the object that is the result of unpickling. 97class _Stop(Exception): 98 def __init__(self, value): 99 self.value = value 100 101# Jython has PyStringMap; it's a dict subclass with string keys 102try: 103 from org.python.core import PyStringMap 104except ImportError: 105 PyStringMap = None 106 107# Pickle opcodes. See pickletools.py for extensive docs. The listing 108# here is in kind-of alphabetical order of 1-character pickle code. 109# pickletools groups them by purpose. 110 111MARK = b'(' # push special markobject on stack 112STOP = b'.' # every pickle ends with STOP 113POP = b'0' # discard topmost stack item 114POP_MARK = b'1' # discard stack top through topmost markobject 115DUP = b'2' # duplicate top stack item 116FLOAT = b'F' # push float object; decimal string argument 117INT = b'I' # push integer or bool; decimal string argument 118BININT = b'J' # push four-byte signed int 119BININT1 = b'K' # push 1-byte unsigned int 120LONG = b'L' # push long; decimal string argument 121BININT2 = b'M' # push 2-byte unsigned int 122NONE = b'N' # push None 123PERSID = b'P' # push persistent object; id is taken from string arg 124BINPERSID = b'Q' # " " " ; " " " " stack 125REDUCE = b'R' # apply callable to argtuple, both on stack 126STRING = b'S' # push string; NL-terminated string argument 127BINSTRING = b'T' # push string; counted binary string argument 128SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes 129UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument 130BINUNICODE = b'X' # " " " ; counted UTF-8 string argument 131APPEND = b'a' # append stack top to list below it 132BUILD = b'b' # call __setstate__ or __dict__.update() 133GLOBAL = b'c' # push self.find_class(modname, name); 2 string args 134DICT = b'd' # build a dict from stack items 135EMPTY_DICT = b'}' # push empty dict 136APPENDS = b'e' # extend list on stack by topmost stack slice 137GET = b'g' # push item from memo on stack; index is string arg 138BINGET = b'h' # " " " " " " ; " " 1-byte arg 139INST = b'i' # build & push class instance 140LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg 141LIST = b'l' # build list from topmost stack items 142EMPTY_LIST = b']' # push empty list 143OBJ = b'o' # build & push class instance 144PUT = b'p' # store stack top in memo; index is string arg 145BINPUT = b'q' # " " " " " ; " " 1-byte arg 146LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg 147SETITEM = b's' # add key+value pair to dict 148TUPLE = b't' # build tuple from topmost stack items 149EMPTY_TUPLE = b')' # push empty tuple 150SETITEMS = b'u' # modify dict by adding topmost key+value pairs 151BINFLOAT = b'G' # push float; arg is 8-byte float encoding 152 153TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py 154FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py 155 156# Protocol 2 157 158PROTO = b'\x80' # identify pickle protocol 159NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple 160EXT1 = b'\x82' # push object from extension registry; 1-byte index 161EXT2 = b'\x83' # ditto, but 2-byte index 162EXT4 = b'\x84' # ditto, but 4-byte index 163TUPLE1 = b'\x85' # build 1-tuple from stack top 164TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items 165TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items 166NEWTRUE = b'\x88' # push True 167NEWFALSE = b'\x89' # push False 168LONG1 = b'\x8a' # push long from < 256 bytes 169LONG4 = b'\x8b' # push really big long 170 171_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] 172 173# Protocol 3 (Python 3.x) 174 175BINBYTES = b'B' # push bytes; counted binary string argument 176SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes 177 178# Protocol 4 179 180SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes 181BINUNICODE8 = b'\x8d' # push very long string 182BINBYTES8 = b'\x8e' # push very long bytes string 183EMPTY_SET = b'\x8f' # push empty set on the stack 184ADDITEMS = b'\x90' # modify set by adding topmost stack items 185FROZENSET = b'\x91' # build frozenset from topmost stack items 186NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments 187STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks 188MEMOIZE = b'\x94' # store top of the stack in memo 189FRAME = b'\x95' # indicate the beginning of a new frame 190 191# Protocol 5 192 193BYTEARRAY8 = b'\x96' # push bytearray 194NEXT_BUFFER = b'\x97' # push next out-of-band buffer 195READONLY_BUFFER = b'\x98' # make top of stack readonly 196 197__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) 198 199 200class _Framer: 201 202 _FRAME_SIZE_MIN = 4 203 _FRAME_SIZE_TARGET = 64 * 1024 204 205 def __init__(self, file_write): 206 self.file_write = file_write 207 self.current_frame = None 208 209 def start_framing(self): 210 self.current_frame = io.BytesIO() 211 212 def end_framing(self): 213 if self.current_frame and self.current_frame.tell() > 0: 214 self.commit_frame(force=True) 215 self.current_frame = None 216 217 def commit_frame(self, force=False): 218 if self.current_frame: 219 f = self.current_frame 220 if f.tell() >= self._FRAME_SIZE_TARGET or force: 221 data = f.getbuffer() 222 write = self.file_write 223 if len(data) >= self._FRAME_SIZE_MIN: 224 # Issue a single call to the write method of the underlying 225 # file object for the frame opcode with the size of the 226 # frame. The concatenation is expected to be less expensive 227 # than issuing an additional call to write. 228 write(FRAME + pack("<Q", len(data))) 229 230 # Issue a separate call to write to append the frame 231 # contents without concatenation to the above to avoid a 232 # memory copy. 233 write(data) 234 235 # Start the new frame with a new io.BytesIO instance so that 236 # the file object can have delayed access to the previous frame 237 # contents via an unreleased memoryview of the previous 238 # io.BytesIO instance. 239 self.current_frame = io.BytesIO() 240 241 def write(self, data): 242 if self.current_frame: 243 return self.current_frame.write(data) 244 else: 245 return self.file_write(data) 246 247 def write_large_bytes(self, header, payload): 248 write = self.file_write 249 if self.current_frame: 250 # Terminate the current frame and flush it to the file. 251 self.commit_frame(force=True) 252 253 # Perform direct write of the header and payload of the large binary 254 # object. Be careful not to concatenate the header and the payload 255 # prior to calling 'write' as we do not want to allocate a large 256 # temporary bytes object. 257 # We intentionally do not insert a protocol 4 frame opcode to make 258 # it possible to optimize file.read calls in the loader. 259 write(header) 260 write(payload) 261 262 263class _Unframer: 264 265 def __init__(self, file_read, file_readline, file_tell=None): 266 self.file_read = file_read 267 self.file_readline = file_readline 268 self.current_frame = None 269 270 def readinto(self, buf): 271 if self.current_frame: 272 n = self.current_frame.readinto(buf) 273 if n == 0 and len(buf) != 0: 274 self.current_frame = None 275 n = len(buf) 276 buf[:] = self.file_read(n) 277 return n 278 if n < len(buf): 279 raise UnpicklingError( 280 "pickle exhausted before end of frame") 281 return n 282 else: 283 n = len(buf) 284 buf[:] = self.file_read(n) 285 return n 286 287 def read(self, n): 288 if self.current_frame: 289 data = self.current_frame.read(n) 290 if not data and n != 0: 291 self.current_frame = None 292 return self.file_read(n) 293 if len(data) < n: 294 raise UnpicklingError( 295 "pickle exhausted before end of frame") 296 return data 297 else: 298 return self.file_read(n) 299 300 def readline(self): 301 if self.current_frame: 302 data = self.current_frame.readline() 303 if not data: 304 self.current_frame = None 305 return self.file_readline() 306 if data[-1] != b'\n'[0]: 307 raise UnpicklingError( 308 "pickle exhausted before end of frame") 309 return data 310 else: 311 return self.file_readline() 312 313 def load_frame(self, frame_size): 314 if self.current_frame and self.current_frame.read() != b'': 315 raise UnpicklingError( 316 "beginning of a new frame before end of current frame") 317 self.current_frame = io.BytesIO(self.file_read(frame_size)) 318 319 320# Tools used for pickling. 321 322def _getattribute(obj, name): 323 for subpath in name.split('.'): 324 if subpath == '<locals>': 325 raise AttributeError("Can't get local attribute {!r} on {!r}" 326 .format(name, obj)) 327 try: 328 parent = obj 329 obj = getattr(obj, subpath) 330 except AttributeError: 331 raise AttributeError("Can't get attribute {!r} on {!r}" 332 .format(name, obj)) from None 333 return obj, parent 334 335def whichmodule(obj, name): 336 """Find the module an object belong to.""" 337 module_name = getattr(obj, '__module__', None) 338 if module_name is not None: 339 return module_name 340 # Protect the iteration by using a list copy of sys.modules against dynamic 341 # modules that trigger imports of other modules upon calls to getattr. 342 for module_name, module in sys.modules.copy().items(): 343 if (module_name == '__main__' 344 or module_name == '__mp_main__' # bpo-42406 345 or module is None): 346 continue 347 try: 348 if _getattribute(module, name)[0] is obj: 349 return module_name 350 except AttributeError: 351 pass 352 return '__main__' 353 354def encode_long(x): 355 r"""Encode a long to a two's complement little-endian binary string. 356 Note that 0 is a special case, returning an empty string, to save a 357 byte in the LONG1 pickling context. 358 359 >>> encode_long(0) 360 b'' 361 >>> encode_long(255) 362 b'\xff\x00' 363 >>> encode_long(32767) 364 b'\xff\x7f' 365 >>> encode_long(-256) 366 b'\x00\xff' 367 >>> encode_long(-32768) 368 b'\x00\x80' 369 >>> encode_long(-128) 370 b'\x80' 371 >>> encode_long(127) 372 b'\x7f' 373 >>> 374 """ 375 if x == 0: 376 return b'' 377 nbytes = (x.bit_length() >> 3) + 1 378 result = x.to_bytes(nbytes, byteorder='little', signed=True) 379 if x < 0 and nbytes > 1: 380 if result[-1] == 0xff and (result[-2] & 0x80) != 0: 381 result = result[:-1] 382 return result 383 384def decode_long(data): 385 r"""Decode a long from a two's complement little-endian binary string. 386 387 >>> decode_long(b'') 388 0 389 >>> decode_long(b"\xff\x00") 390 255 391 >>> decode_long(b"\xff\x7f") 392 32767 393 >>> decode_long(b"\x00\xff") 394 -256 395 >>> decode_long(b"\x00\x80") 396 -32768 397 >>> decode_long(b"\x80") 398 -128 399 >>> decode_long(b"\x7f") 400 127 401 """ 402 return int.from_bytes(data, byteorder='little', signed=True) 403 404 405# Pickling machinery 406 407class _Pickler: 408 409 def __init__(self, file, protocol=None, *, fix_imports=True, 410 buffer_callback=None): 411 """This takes a binary file for writing a pickle data stream. 412 413 The optional *protocol* argument tells the pickler to use the 414 given protocol; supported protocols are 0, 1, 2, 3, 4 and 5. 415 The default protocol is 4. It was introduced in Python 3.4, and 416 is incompatible with previous versions. 417 418 Specifying a negative protocol version selects the highest 419 protocol version supported. The higher the protocol used, the 420 more recent the version of Python needed to read the pickle 421 produced. 422 423 The *file* argument must have a write() method that accepts a 424 single bytes argument. It can thus be a file object opened for 425 binary writing, an io.BytesIO instance, or any other custom 426 object that meets this interface. 427 428 If *fix_imports* is True and *protocol* is less than 3, pickle 429 will try to map the new Python 3 names to the old module names 430 used in Python 2, so that the pickle data stream is readable 431 with Python 2. 432 433 If *buffer_callback* is None (the default), buffer views are 434 serialized into *file* as part of the pickle stream. 435 436 If *buffer_callback* is not None, then it can be called any number 437 of times with a buffer view. If the callback returns a false value 438 (such as None), the given buffer is out-of-band; otherwise the 439 buffer is serialized in-band, i.e. inside the pickle stream. 440 441 It is an error if *buffer_callback* is not None and *protocol* 442 is None or smaller than 5. 443 """ 444 if protocol is None: 445 protocol = DEFAULT_PROTOCOL 446 if protocol < 0: 447 protocol = HIGHEST_PROTOCOL 448 elif not 0 <= protocol <= HIGHEST_PROTOCOL: 449 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) 450 if buffer_callback is not None and protocol < 5: 451 raise ValueError("buffer_callback needs protocol >= 5") 452 self._buffer_callback = buffer_callback 453 try: 454 self._file_write = file.write 455 except AttributeError: 456 raise TypeError("file must have a 'write' attribute") 457 self.framer = _Framer(self._file_write) 458 self.write = self.framer.write 459 self._write_large_bytes = self.framer.write_large_bytes 460 self.memo = {} 461 self.proto = int(protocol) 462 self.bin = protocol >= 1 463 self.fast = 0 464 self.fix_imports = fix_imports and protocol < 3 465 466 def clear_memo(self): 467 """Clears the pickler's "memo". 468 469 The memo is the data structure that remembers which objects the 470 pickler has already seen, so that shared or recursive objects 471 are pickled by reference and not by value. This method is 472 useful when re-using picklers. 473 """ 474 self.memo.clear() 475 476 def dump(self, obj): 477 """Write a pickled representation of obj to the open file.""" 478 # Check whether Pickler was initialized correctly. This is 479 # only needed to mimic the behavior of _pickle.Pickler.dump(). 480 if not hasattr(self, "_file_write"): 481 raise PicklingError("Pickler.__init__() was not called by " 482 "%s.__init__()" % (self.__class__.__name__,)) 483 if self.proto >= 2: 484 self.write(PROTO + pack("<B", self.proto)) 485 if self.proto >= 4: 486 self.framer.start_framing() 487 self.save(obj) 488 self.write(STOP) 489 self.framer.end_framing() 490 491 def memoize(self, obj): 492 """Store an object in the memo.""" 493 494 # The Pickler memo is a dictionary mapping object ids to 2-tuples 495 # that contain the Unpickler memo key and the object being memoized. 496 # The memo key is written to the pickle and will become 497 # the key in the Unpickler's memo. The object is stored in the 498 # Pickler memo so that transient objects are kept alive during 499 # pickling. 500 501 # The use of the Unpickler memo length as the memo key is just a 502 # convention. The only requirement is that the memo values be unique. 503 # But there appears no advantage to any other scheme, and this 504 # scheme allows the Unpickler memo to be implemented as a plain (but 505 # growable) array, indexed by memo key. 506 if self.fast: 507 return 508 assert id(obj) not in self.memo 509 idx = len(self.memo) 510 self.write(self.put(idx)) 511 self.memo[id(obj)] = idx, obj 512 513 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. 514 def put(self, idx): 515 if self.proto >= 4: 516 return MEMOIZE 517 elif self.bin: 518 if idx < 256: 519 return BINPUT + pack("<B", idx) 520 else: 521 return LONG_BINPUT + pack("<I", idx) 522 else: 523 return PUT + repr(idx).encode("ascii") + b'\n' 524 525 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. 526 def get(self, i): 527 if self.bin: 528 if i < 256: 529 return BINGET + pack("<B", i) 530 else: 531 return LONG_BINGET + pack("<I", i) 532 533 return GET + repr(i).encode("ascii") + b'\n' 534 535 def save(self, obj, save_persistent_id=True): 536 self.framer.commit_frame() 537 538 # Check for persistent id (defined by a subclass) 539 pid = self.persistent_id(obj) 540 if pid is not None and save_persistent_id: 541 self.save_pers(pid) 542 return 543 544 # Check the memo 545 x = self.memo.get(id(obj)) 546 if x is not None: 547 self.write(self.get(x[0])) 548 return 549 550 rv = NotImplemented 551 reduce = getattr(self, "reducer_override", None) 552 if reduce is not None: 553 rv = reduce(obj) 554 555 if rv is NotImplemented: 556 # Check the type dispatch table 557 t = type(obj) 558 f = self.dispatch.get(t) 559 if f is not None: 560 f(self, obj) # Call unbound method with explicit self 561 return 562 563 # Check private dispatch table if any, or else 564 # copyreg.dispatch_table 565 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t) 566 if reduce is not None: 567 rv = reduce(obj) 568 else: 569 # Check for a class with a custom metaclass; treat as regular 570 # class 571 if issubclass(t, type): 572 self.save_global(obj) 573 return 574 575 # Check for a __reduce_ex__ method, fall back to __reduce__ 576 reduce = getattr(obj, "__reduce_ex__", None) 577 if reduce is not None: 578 rv = reduce(self.proto) 579 else: 580 reduce = getattr(obj, "__reduce__", None) 581 if reduce is not None: 582 rv = reduce() 583 else: 584 raise PicklingError("Can't pickle %r object: %r" % 585 (t.__name__, obj)) 586 587 # Check for string returned by reduce(), meaning "save as global" 588 if isinstance(rv, str): 589 self.save_global(obj, rv) 590 return 591 592 # Assert that reduce() returned a tuple 593 if not isinstance(rv, tuple): 594 raise PicklingError("%s must return string or tuple" % reduce) 595 596 # Assert that it returned an appropriately sized tuple 597 l = len(rv) 598 if not (2 <= l <= 6): 599 raise PicklingError("Tuple returned by %s must have " 600 "two to six elements" % reduce) 601 602 # Save the reduce() output and finally memoize the object 603 self.save_reduce(obj=obj, *rv) 604 605 def persistent_id(self, obj): 606 # This exists so a subclass can override it 607 return None 608 609 def save_pers(self, pid): 610 # Save a persistent id reference 611 if self.bin: 612 self.save(pid, save_persistent_id=False) 613 self.write(BINPERSID) 614 else: 615 try: 616 self.write(PERSID + str(pid).encode("ascii") + b'\n') 617 except UnicodeEncodeError: 618 raise PicklingError( 619 "persistent IDs in protocol 0 must be ASCII strings") 620 621 def save_reduce(self, func, args, state=None, listitems=None, 622 dictitems=None, state_setter=None, obj=None): 623 # This API is called by some subclasses 624 625 if not isinstance(args, tuple): 626 raise PicklingError("args from save_reduce() must be a tuple") 627 if not callable(func): 628 raise PicklingError("func from save_reduce() must be callable") 629 630 save = self.save 631 write = self.write 632 633 func_name = getattr(func, "__name__", "") 634 if self.proto >= 2 and func_name == "__newobj_ex__": 635 cls, args, kwargs = args 636 if not hasattr(cls, "__new__"): 637 raise PicklingError("args[0] from {} args has no __new__" 638 .format(func_name)) 639 if obj is not None and cls is not obj.__class__: 640 raise PicklingError("args[0] from {} args has the wrong class" 641 .format(func_name)) 642 if self.proto >= 4: 643 save(cls) 644 save(args) 645 save(kwargs) 646 write(NEWOBJ_EX) 647 else: 648 func = partial(cls.__new__, cls, *args, **kwargs) 649 save(func) 650 save(()) 651 write(REDUCE) 652 elif self.proto >= 2 and func_name == "__newobj__": 653 # A __reduce__ implementation can direct protocol 2 or newer to 654 # use the more efficient NEWOBJ opcode, while still 655 # allowing protocol 0 and 1 to work normally. For this to 656 # work, the function returned by __reduce__ should be 657 # called __newobj__, and its first argument should be a 658 # class. The implementation for __newobj__ 659 # should be as follows, although pickle has no way to 660 # verify this: 661 # 662 # def __newobj__(cls, *args): 663 # return cls.__new__(cls, *args) 664 # 665 # Protocols 0 and 1 will pickle a reference to __newobj__, 666 # while protocol 2 (and above) will pickle a reference to 667 # cls, the remaining args tuple, and the NEWOBJ code, 668 # which calls cls.__new__(cls, *args) at unpickling time 669 # (see load_newobj below). If __reduce__ returns a 670 # three-tuple, the state from the third tuple item will be 671 # pickled regardless of the protocol, calling __setstate__ 672 # at unpickling time (see load_build below). 673 # 674 # Note that no standard __newobj__ implementation exists; 675 # you have to provide your own. This is to enforce 676 # compatibility with Python 2.2 (pickles written using 677 # protocol 0 or 1 in Python 2.3 should be unpicklable by 678 # Python 2.2). 679 cls = args[0] 680 if not hasattr(cls, "__new__"): 681 raise PicklingError( 682 "args[0] from __newobj__ args has no __new__") 683 if obj is not None and cls is not obj.__class__: 684 raise PicklingError( 685 "args[0] from __newobj__ args has the wrong class") 686 args = args[1:] 687 save(cls) 688 save(args) 689 write(NEWOBJ) 690 else: 691 save(func) 692 save(args) 693 write(REDUCE) 694 695 if obj is not None: 696 # If the object is already in the memo, this means it is 697 # recursive. In this case, throw away everything we put on the 698 # stack, and fetch the object back from the memo. 699 if id(obj) in self.memo: 700 write(POP + self.get(self.memo[id(obj)][0])) 701 else: 702 self.memoize(obj) 703 704 # More new special cases (that work with older protocols as 705 # well): when __reduce__ returns a tuple with 4 or 5 items, 706 # the 4th and 5th item should be iterators that provide list 707 # items and dict items (as (key, value) tuples), or None. 708 709 if listitems is not None: 710 self._batch_appends(listitems) 711 712 if dictitems is not None: 713 self._batch_setitems(dictitems) 714 715 if state is not None: 716 if state_setter is None: 717 save(state) 718 write(BUILD) 719 else: 720 # If a state_setter is specified, call it instead of load_build 721 # to update obj's with its previous state. 722 # First, push state_setter and its tuple of expected arguments 723 # (obj, state) onto the stack. 724 save(state_setter) 725 save(obj) # simple BINGET opcode as obj is already memoized. 726 save(state) 727 write(TUPLE2) 728 # Trigger a state_setter(obj, state) function call. 729 write(REDUCE) 730 # The purpose of state_setter is to carry-out an 731 # inplace modification of obj. We do not care about what the 732 # method might return, so its output is eventually removed from 733 # the stack. 734 write(POP) 735 736 # Methods below this point are dispatched through the dispatch table 737 738 dispatch = {} 739 740 def save_none(self, obj): 741 self.write(NONE) 742 dispatch[type(None)] = save_none 743 744 def save_bool(self, obj): 745 if self.proto >= 2: 746 self.write(NEWTRUE if obj else NEWFALSE) 747 else: 748 self.write(TRUE if obj else FALSE) 749 dispatch[bool] = save_bool 750 751 def save_long(self, obj): 752 if self.bin: 753 # If the int is small enough to fit in a signed 4-byte 2's-comp 754 # format, we can store it more efficiently than the general 755 # case. 756 # First one- and two-byte unsigned ints: 757 if obj >= 0: 758 if obj <= 0xff: 759 self.write(BININT1 + pack("<B", obj)) 760 return 761 if obj <= 0xffff: 762 self.write(BININT2 + pack("<H", obj)) 763 return 764 # Next check for 4-byte signed ints: 765 if -0x80000000 <= obj <= 0x7fffffff: 766 self.write(BININT + pack("<i", obj)) 767 return 768 if self.proto >= 2: 769 encoded = encode_long(obj) 770 n = len(encoded) 771 if n < 256: 772 self.write(LONG1 + pack("<B", n) + encoded) 773 else: 774 self.write(LONG4 + pack("<i", n) + encoded) 775 return 776 if -0x80000000 <= obj <= 0x7fffffff: 777 self.write(INT + repr(obj).encode("ascii") + b'\n') 778 else: 779 self.write(LONG + repr(obj).encode("ascii") + b'L\n') 780 dispatch[int] = save_long 781 782 def save_float(self, obj): 783 if self.bin: 784 self.write(BINFLOAT + pack('>d', obj)) 785 else: 786 self.write(FLOAT + repr(obj).encode("ascii") + b'\n') 787 dispatch[float] = save_float 788 789 def save_bytes(self, obj): 790 if self.proto < 3: 791 if not obj: # bytes object is empty 792 self.save_reduce(bytes, (), obj=obj) 793 else: 794 self.save_reduce(codecs.encode, 795 (str(obj, 'latin1'), 'latin1'), obj=obj) 796 return 797 n = len(obj) 798 if n <= 0xff: 799 self.write(SHORT_BINBYTES + pack("<B", n) + obj) 800 elif n > 0xffffffff and self.proto >= 4: 801 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj) 802 elif n >= self.framer._FRAME_SIZE_TARGET: 803 self._write_large_bytes(BINBYTES + pack("<I", n), obj) 804 else: 805 self.write(BINBYTES + pack("<I", n) + obj) 806 self.memoize(obj) 807 dispatch[bytes] = save_bytes 808 809 def save_bytearray(self, obj): 810 if self.proto < 5: 811 if not obj: # bytearray is empty 812 self.save_reduce(bytearray, (), obj=obj) 813 else: 814 self.save_reduce(bytearray, (bytes(obj),), obj=obj) 815 return 816 n = len(obj) 817 if n >= self.framer._FRAME_SIZE_TARGET: 818 self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj) 819 else: 820 self.write(BYTEARRAY8 + pack("<Q", n) + obj) 821 dispatch[bytearray] = save_bytearray 822 823 if _HAVE_PICKLE_BUFFER: 824 def save_picklebuffer(self, obj): 825 if self.proto < 5: 826 raise PicklingError("PickleBuffer can only pickled with " 827 "protocol >= 5") 828 with obj.raw() as m: 829 if not m.contiguous: 830 raise PicklingError("PickleBuffer can not be pickled when " 831 "pointing to a non-contiguous buffer") 832 in_band = True 833 if self._buffer_callback is not None: 834 in_band = bool(self._buffer_callback(obj)) 835 if in_band: 836 # Write data in-band 837 # XXX The C implementation avoids a copy here 838 if m.readonly: 839 self.save_bytes(m.tobytes()) 840 else: 841 self.save_bytearray(m.tobytes()) 842 else: 843 # Write data out-of-band 844 self.write(NEXT_BUFFER) 845 if m.readonly: 846 self.write(READONLY_BUFFER) 847 848 dispatch[PickleBuffer] = save_picklebuffer 849 850 def save_str(self, obj): 851 if self.bin: 852 encoded = obj.encode('utf-8', 'surrogatepass') 853 n = len(encoded) 854 if n <= 0xff and self.proto >= 4: 855 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded) 856 elif n > 0xffffffff and self.proto >= 4: 857 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded) 858 elif n >= self.framer._FRAME_SIZE_TARGET: 859 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded) 860 else: 861 self.write(BINUNICODE + pack("<I", n) + encoded) 862 else: 863 obj = obj.replace("\\", "\\u005c") 864 obj = obj.replace("\0", "\\u0000") 865 obj = obj.replace("\n", "\\u000a") 866 obj = obj.replace("\r", "\\u000d") 867 obj = obj.replace("\x1a", "\\u001a") # EOF on DOS 868 self.write(UNICODE + obj.encode('raw-unicode-escape') + 869 b'\n') 870 self.memoize(obj) 871 dispatch[str] = save_str 872 873 def save_tuple(self, obj): 874 if not obj: # tuple is empty 875 if self.bin: 876 self.write(EMPTY_TUPLE) 877 else: 878 self.write(MARK + TUPLE) 879 return 880 881 n = len(obj) 882 save = self.save 883 memo = self.memo 884 if n <= 3 and self.proto >= 2: 885 for element in obj: 886 save(element) 887 # Subtle. Same as in the big comment below. 888 if id(obj) in memo: 889 get = self.get(memo[id(obj)][0]) 890 self.write(POP * n + get) 891 else: 892 self.write(_tuplesize2code[n]) 893 self.memoize(obj) 894 return 895 896 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple 897 # has more than 3 elements. 898 write = self.write 899 write(MARK) 900 for element in obj: 901 save(element) 902 903 if id(obj) in memo: 904 # Subtle. d was not in memo when we entered save_tuple(), so 905 # the process of saving the tuple's elements must have saved 906 # the tuple itself: the tuple is recursive. The proper action 907 # now is to throw away everything we put on the stack, and 908 # simply GET the tuple (it's already constructed). This check 909 # could have been done in the "for element" loop instead, but 910 # recursive tuples are a rare thing. 911 get = self.get(memo[id(obj)][0]) 912 if self.bin: 913 write(POP_MARK + get) 914 else: # proto 0 -- POP_MARK not available 915 write(POP * (n+1) + get) 916 return 917 918 # No recursion. 919 write(TUPLE) 920 self.memoize(obj) 921 922 dispatch[tuple] = save_tuple 923 924 def save_list(self, obj): 925 if self.bin: 926 self.write(EMPTY_LIST) 927 else: # proto 0 -- can't use EMPTY_LIST 928 self.write(MARK + LIST) 929 930 self.memoize(obj) 931 self._batch_appends(obj) 932 933 dispatch[list] = save_list 934 935 _BATCHSIZE = 1000 936 937 def _batch_appends(self, items): 938 # Helper to batch up APPENDS sequences 939 save = self.save 940 write = self.write 941 942 if not self.bin: 943 for x in items: 944 save(x) 945 write(APPEND) 946 return 947 948 it = iter(items) 949 while True: 950 tmp = list(islice(it, self._BATCHSIZE)) 951 n = len(tmp) 952 if n > 1: 953 write(MARK) 954 for x in tmp: 955 save(x) 956 write(APPENDS) 957 elif n: 958 save(tmp[0]) 959 write(APPEND) 960 # else tmp is empty, and we're done 961 if n < self._BATCHSIZE: 962 return 963 964 def save_dict(self, obj): 965 if self.bin: 966 self.write(EMPTY_DICT) 967 else: # proto 0 -- can't use EMPTY_DICT 968 self.write(MARK + DICT) 969 970 self.memoize(obj) 971 self._batch_setitems(obj.items()) 972 973 dispatch[dict] = save_dict 974 if PyStringMap is not None: 975 dispatch[PyStringMap] = save_dict 976 977 def _batch_setitems(self, items): 978 # Helper to batch up SETITEMS sequences; proto >= 1 only 979 save = self.save 980 write = self.write 981 982 if not self.bin: 983 for k, v in items: 984 save(k) 985 save(v) 986 write(SETITEM) 987 return 988 989 it = iter(items) 990 while True: 991 tmp = list(islice(it, self._BATCHSIZE)) 992 n = len(tmp) 993 if n > 1: 994 write(MARK) 995 for k, v in tmp: 996 save(k) 997 save(v) 998 write(SETITEMS) 999 elif n: 1000 k, v = tmp[0] 1001 save(k) 1002 save(v) 1003 write(SETITEM) 1004 # else tmp is empty, and we're done 1005 if n < self._BATCHSIZE: 1006 return 1007 1008 def save_set(self, obj): 1009 save = self.save 1010 write = self.write 1011 1012 if self.proto < 4: 1013 self.save_reduce(set, (list(obj),), obj=obj) 1014 return 1015 1016 write(EMPTY_SET) 1017 self.memoize(obj) 1018 1019 it = iter(obj) 1020 while True: 1021 batch = list(islice(it, self._BATCHSIZE)) 1022 n = len(batch) 1023 if n > 0: 1024 write(MARK) 1025 for item in batch: 1026 save(item) 1027 write(ADDITEMS) 1028 if n < self._BATCHSIZE: 1029 return 1030 dispatch[set] = save_set 1031 1032 def save_frozenset(self, obj): 1033 save = self.save 1034 write = self.write 1035 1036 if self.proto < 4: 1037 self.save_reduce(frozenset, (list(obj),), obj=obj) 1038 return 1039 1040 write(MARK) 1041 for item in obj: 1042 save(item) 1043 1044 if id(obj) in self.memo: 1045 # If the object is already in the memo, this means it is 1046 # recursive. In this case, throw away everything we put on the 1047 # stack, and fetch the object back from the memo. 1048 write(POP_MARK + self.get(self.memo[id(obj)][0])) 1049 return 1050 1051 write(FROZENSET) 1052 self.memoize(obj) 1053 dispatch[frozenset] = save_frozenset 1054 1055 def save_global(self, obj, name=None): 1056 write = self.write 1057 memo = self.memo 1058 1059 if name is None: 1060 name = getattr(obj, '__qualname__', None) 1061 if name is None: 1062 name = obj.__name__ 1063 1064 module_name = whichmodule(obj, name) 1065 try: 1066 __import__(module_name, level=0) 1067 module = sys.modules[module_name] 1068 obj2, parent = _getattribute(module, name) 1069 except (ImportError, KeyError, AttributeError): 1070 raise PicklingError( 1071 "Can't pickle %r: it's not found as %s.%s" % 1072 (obj, module_name, name)) from None 1073 else: 1074 if obj2 is not obj: 1075 raise PicklingError( 1076 "Can't pickle %r: it's not the same object as %s.%s" % 1077 (obj, module_name, name)) 1078 1079 if self.proto >= 2: 1080 code = _extension_registry.get((module_name, name)) 1081 if code: 1082 assert code > 0 1083 if code <= 0xff: 1084 write(EXT1 + pack("<B", code)) 1085 elif code <= 0xffff: 1086 write(EXT2 + pack("<H", code)) 1087 else: 1088 write(EXT4 + pack("<i", code)) 1089 return 1090 lastname = name.rpartition('.')[2] 1091 if parent is module: 1092 name = lastname 1093 # Non-ASCII identifiers are supported only with protocols >= 3. 1094 if self.proto >= 4: 1095 self.save(module_name) 1096 self.save(name) 1097 write(STACK_GLOBAL) 1098 elif parent is not module: 1099 self.save_reduce(getattr, (parent, lastname)) 1100 elif self.proto >= 3: 1101 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + 1102 bytes(name, "utf-8") + b'\n') 1103 else: 1104 if self.fix_imports: 1105 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING 1106 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING 1107 if (module_name, name) in r_name_mapping: 1108 module_name, name = r_name_mapping[(module_name, name)] 1109 elif module_name in r_import_mapping: 1110 module_name = r_import_mapping[module_name] 1111 try: 1112 write(GLOBAL + bytes(module_name, "ascii") + b'\n' + 1113 bytes(name, "ascii") + b'\n') 1114 except UnicodeEncodeError: 1115 raise PicklingError( 1116 "can't pickle global identifier '%s.%s' using " 1117 "pickle protocol %i" % (module, name, self.proto)) from None 1118 1119 self.memoize(obj) 1120 1121 def save_type(self, obj): 1122 if obj is type(None): 1123 return self.save_reduce(type, (None,), obj=obj) 1124 elif obj is type(NotImplemented): 1125 return self.save_reduce(type, (NotImplemented,), obj=obj) 1126 elif obj is type(...): 1127 return self.save_reduce(type, (...,), obj=obj) 1128 return self.save_global(obj) 1129 1130 dispatch[FunctionType] = save_global 1131 dispatch[type] = save_type 1132 1133 1134# Unpickling machinery 1135 1136class _Unpickler: 1137 1138 def __init__(self, file, *, fix_imports=True, 1139 encoding="ASCII", errors="strict", buffers=None): 1140 """This takes a binary file for reading a pickle data stream. 1141 1142 The protocol version of the pickle is detected automatically, so 1143 no proto argument is needed. 1144 1145 The argument *file* must have two methods, a read() method that 1146 takes an integer argument, and a readline() method that requires 1147 no arguments. Both methods should return bytes. Thus *file* 1148 can be a binary file object opened for reading, an io.BytesIO 1149 object, or any other custom object that meets this interface. 1150 1151 The file-like object must have two methods, a read() method 1152 that takes an integer argument, and a readline() method that 1153 requires no arguments. Both methods should return bytes. 1154 Thus file-like object can be a binary file object opened for 1155 reading, a BytesIO object, or any other custom object that 1156 meets this interface. 1157 1158 If *buffers* is not None, it should be an iterable of buffer-enabled 1159 objects that is consumed each time the pickle stream references 1160 an out-of-band buffer view. Such buffers have been given in order 1161 to the *buffer_callback* of a Pickler object. 1162 1163 If *buffers* is None (the default), then the buffers are taken 1164 from the pickle stream, assuming they are serialized there. 1165 It is an error for *buffers* to be None if the pickle stream 1166 was produced with a non-None *buffer_callback*. 1167 1168 Other optional arguments are *fix_imports*, *encoding* and 1169 *errors*, which are used to control compatibility support for 1170 pickle stream generated by Python 2. If *fix_imports* is True, 1171 pickle will try to map the old Python 2 names to the new names 1172 used in Python 3. The *encoding* and *errors* tell pickle how 1173 to decode 8-bit string instances pickled by Python 2; these 1174 default to 'ASCII' and 'strict', respectively. *encoding* can be 1175 'bytes' to read theses 8-bit string instances as bytes objects. 1176 """ 1177 self._buffers = iter(buffers) if buffers is not None else None 1178 self._file_readline = file.readline 1179 self._file_read = file.read 1180 self.memo = {} 1181 self.encoding = encoding 1182 self.errors = errors 1183 self.proto = 0 1184 self.fix_imports = fix_imports 1185 1186 def load(self): 1187 """Read a pickled object representation from the open file. 1188 1189 Return the reconstituted object hierarchy specified in the file. 1190 """ 1191 # Check whether Unpickler was initialized correctly. This is 1192 # only needed to mimic the behavior of _pickle.Unpickler.dump(). 1193 if not hasattr(self, "_file_read"): 1194 raise UnpicklingError("Unpickler.__init__() was not called by " 1195 "%s.__init__()" % (self.__class__.__name__,)) 1196 self._unframer = _Unframer(self._file_read, self._file_readline) 1197 self.read = self._unframer.read 1198 self.readinto = self._unframer.readinto 1199 self.readline = self._unframer.readline 1200 self.metastack = [] 1201 self.stack = [] 1202 self.append = self.stack.append 1203 self.proto = 0 1204 read = self.read 1205 dispatch = self.dispatch 1206 try: 1207 while True: 1208 key = read(1) 1209 if not key: 1210 raise EOFError 1211 assert isinstance(key, bytes_types) 1212 dispatch[key[0]](self) 1213 except _Stop as stopinst: 1214 return stopinst.value 1215 1216 # Return a list of items pushed in the stack after last MARK instruction. 1217 def pop_mark(self): 1218 items = self.stack 1219 self.stack = self.metastack.pop() 1220 self.append = self.stack.append 1221 return items 1222 1223 def persistent_load(self, pid): 1224 raise UnpicklingError("unsupported persistent id encountered") 1225 1226 dispatch = {} 1227 1228 def load_proto(self): 1229 proto = self.read(1)[0] 1230 if not 0 <= proto <= HIGHEST_PROTOCOL: 1231 raise ValueError("unsupported pickle protocol: %d" % proto) 1232 self.proto = proto 1233 dispatch[PROTO[0]] = load_proto 1234 1235 def load_frame(self): 1236 frame_size, = unpack('<Q', self.read(8)) 1237 if frame_size > sys.maxsize: 1238 raise ValueError("frame size > sys.maxsize: %d" % frame_size) 1239 self._unframer.load_frame(frame_size) 1240 dispatch[FRAME[0]] = load_frame 1241 1242 def load_persid(self): 1243 try: 1244 pid = self.readline()[:-1].decode("ascii") 1245 except UnicodeDecodeError: 1246 raise UnpicklingError( 1247 "persistent IDs in protocol 0 must be ASCII strings") 1248 self.append(self.persistent_load(pid)) 1249 dispatch[PERSID[0]] = load_persid 1250 1251 def load_binpersid(self): 1252 pid = self.stack.pop() 1253 self.append(self.persistent_load(pid)) 1254 dispatch[BINPERSID[0]] = load_binpersid 1255 1256 def load_none(self): 1257 self.append(None) 1258 dispatch[NONE[0]] = load_none 1259 1260 def load_false(self): 1261 self.append(False) 1262 dispatch[NEWFALSE[0]] = load_false 1263 1264 def load_true(self): 1265 self.append(True) 1266 dispatch[NEWTRUE[0]] = load_true 1267 1268 def load_int(self): 1269 data = self.readline() 1270 if data == FALSE[1:]: 1271 val = False 1272 elif data == TRUE[1:]: 1273 val = True 1274 else: 1275 val = int(data, 0) 1276 self.append(val) 1277 dispatch[INT[0]] = load_int 1278 1279 def load_binint(self): 1280 self.append(unpack('<i', self.read(4))[0]) 1281 dispatch[BININT[0]] = load_binint 1282 1283 def load_binint1(self): 1284 self.append(self.read(1)[0]) 1285 dispatch[BININT1[0]] = load_binint1 1286 1287 def load_binint2(self): 1288 self.append(unpack('<H', self.read(2))[0]) 1289 dispatch[BININT2[0]] = load_binint2 1290 1291 def load_long(self): 1292 val = self.readline()[:-1] 1293 if val and val[-1] == b'L'[0]: 1294 val = val[:-1] 1295 self.append(int(val, 0)) 1296 dispatch[LONG[0]] = load_long 1297 1298 def load_long1(self): 1299 n = self.read(1)[0] 1300 data = self.read(n) 1301 self.append(decode_long(data)) 1302 dispatch[LONG1[0]] = load_long1 1303 1304 def load_long4(self): 1305 n, = unpack('<i', self.read(4)) 1306 if n < 0: 1307 # Corrupt or hostile pickle -- we never write one like this 1308 raise UnpicklingError("LONG pickle has negative byte count") 1309 data = self.read(n) 1310 self.append(decode_long(data)) 1311 dispatch[LONG4[0]] = load_long4 1312 1313 def load_float(self): 1314 self.append(float(self.readline()[:-1])) 1315 dispatch[FLOAT[0]] = load_float 1316 1317 def load_binfloat(self): 1318 self.append(unpack('>d', self.read(8))[0]) 1319 dispatch[BINFLOAT[0]] = load_binfloat 1320 1321 def _decode_string(self, value): 1322 # Used to allow strings from Python 2 to be decoded either as 1323 # bytes or Unicode strings. This should be used only with the 1324 # STRING, BINSTRING and SHORT_BINSTRING opcodes. 1325 if self.encoding == "bytes": 1326 return value 1327 else: 1328 return value.decode(self.encoding, self.errors) 1329 1330 def load_string(self): 1331 data = self.readline()[:-1] 1332 # Strip outermost quotes 1333 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'': 1334 data = data[1:-1] 1335 else: 1336 raise UnpicklingError("the STRING opcode argument must be quoted") 1337 self.append(self._decode_string(codecs.escape_decode(data)[0])) 1338 dispatch[STRING[0]] = load_string 1339 1340 def load_binstring(self): 1341 # Deprecated BINSTRING uses signed 32-bit length 1342 len, = unpack('<i', self.read(4)) 1343 if len < 0: 1344 raise UnpicklingError("BINSTRING pickle has negative byte count") 1345 data = self.read(len) 1346 self.append(self._decode_string(data)) 1347 dispatch[BINSTRING[0]] = load_binstring 1348 1349 def load_binbytes(self): 1350 len, = unpack('<I', self.read(4)) 1351 if len > maxsize: 1352 raise UnpicklingError("BINBYTES exceeds system's maximum size " 1353 "of %d bytes" % maxsize) 1354 self.append(self.read(len)) 1355 dispatch[BINBYTES[0]] = load_binbytes 1356 1357 def load_unicode(self): 1358 self.append(str(self.readline()[:-1], 'raw-unicode-escape')) 1359 dispatch[UNICODE[0]] = load_unicode 1360 1361 def load_binunicode(self): 1362 len, = unpack('<I', self.read(4)) 1363 if len > maxsize: 1364 raise UnpicklingError("BINUNICODE exceeds system's maximum size " 1365 "of %d bytes" % maxsize) 1366 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1367 dispatch[BINUNICODE[0]] = load_binunicode 1368 1369 def load_binunicode8(self): 1370 len, = unpack('<Q', self.read(8)) 1371 if len > maxsize: 1372 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " 1373 "of %d bytes" % maxsize) 1374 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1375 dispatch[BINUNICODE8[0]] = load_binunicode8 1376 1377 def load_binbytes8(self): 1378 len, = unpack('<Q', self.read(8)) 1379 if len > maxsize: 1380 raise UnpicklingError("BINBYTES8 exceeds system's maximum size " 1381 "of %d bytes" % maxsize) 1382 self.append(self.read(len)) 1383 dispatch[BINBYTES8[0]] = load_binbytes8 1384 1385 def load_bytearray8(self): 1386 len, = unpack('<Q', self.read(8)) 1387 if len > maxsize: 1388 raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size " 1389 "of %d bytes" % maxsize) 1390 b = bytearray(len) 1391 self.readinto(b) 1392 self.append(b) 1393 dispatch[BYTEARRAY8[0]] = load_bytearray8 1394 1395 def load_next_buffer(self): 1396 if self._buffers is None: 1397 raise UnpicklingError("pickle stream refers to out-of-band data " 1398 "but no *buffers* argument was given") 1399 try: 1400 buf = next(self._buffers) 1401 except StopIteration: 1402 raise UnpicklingError("not enough out-of-band buffers") 1403 self.append(buf) 1404 dispatch[NEXT_BUFFER[0]] = load_next_buffer 1405 1406 def load_readonly_buffer(self): 1407 buf = self.stack[-1] 1408 with memoryview(buf) as m: 1409 if not m.readonly: 1410 self.stack[-1] = m.toreadonly() 1411 dispatch[READONLY_BUFFER[0]] = load_readonly_buffer 1412 1413 def load_short_binstring(self): 1414 len = self.read(1)[0] 1415 data = self.read(len) 1416 self.append(self._decode_string(data)) 1417 dispatch[SHORT_BINSTRING[0]] = load_short_binstring 1418 1419 def load_short_binbytes(self): 1420 len = self.read(1)[0] 1421 self.append(self.read(len)) 1422 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes 1423 1424 def load_short_binunicode(self): 1425 len = self.read(1)[0] 1426 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1427 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode 1428 1429 def load_tuple(self): 1430 items = self.pop_mark() 1431 self.append(tuple(items)) 1432 dispatch[TUPLE[0]] = load_tuple 1433 1434 def load_empty_tuple(self): 1435 self.append(()) 1436 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple 1437 1438 def load_tuple1(self): 1439 self.stack[-1] = (self.stack[-1],) 1440 dispatch[TUPLE1[0]] = load_tuple1 1441 1442 def load_tuple2(self): 1443 self.stack[-2:] = [(self.stack[-2], self.stack[-1])] 1444 dispatch[TUPLE2[0]] = load_tuple2 1445 1446 def load_tuple3(self): 1447 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] 1448 dispatch[TUPLE3[0]] = load_tuple3 1449 1450 def load_empty_list(self): 1451 self.append([]) 1452 dispatch[EMPTY_LIST[0]] = load_empty_list 1453 1454 def load_empty_dictionary(self): 1455 self.append({}) 1456 dispatch[EMPTY_DICT[0]] = load_empty_dictionary 1457 1458 def load_empty_set(self): 1459 self.append(set()) 1460 dispatch[EMPTY_SET[0]] = load_empty_set 1461 1462 def load_frozenset(self): 1463 items = self.pop_mark() 1464 self.append(frozenset(items)) 1465 dispatch[FROZENSET[0]] = load_frozenset 1466 1467 def load_list(self): 1468 items = self.pop_mark() 1469 self.append(items) 1470 dispatch[LIST[0]] = load_list 1471 1472 def load_dict(self): 1473 items = self.pop_mark() 1474 d = {items[i]: items[i+1] 1475 for i in range(0, len(items), 2)} 1476 self.append(d) 1477 dispatch[DICT[0]] = load_dict 1478 1479 # INST and OBJ differ only in how they get a class object. It's not 1480 # only sensible to do the rest in a common routine, the two routines 1481 # previously diverged and grew different bugs. 1482 # klass is the class to instantiate, and k points to the topmost mark 1483 # object, following which are the arguments for klass.__init__. 1484 def _instantiate(self, klass, args): 1485 if (args or not isinstance(klass, type) or 1486 hasattr(klass, "__getinitargs__")): 1487 try: 1488 value = klass(*args) 1489 except TypeError as err: 1490 raise TypeError("in constructor for %s: %s" % 1491 (klass.__name__, str(err)), sys.exc_info()[2]) 1492 else: 1493 value = klass.__new__(klass) 1494 self.append(value) 1495 1496 def load_inst(self): 1497 module = self.readline()[:-1].decode("ascii") 1498 name = self.readline()[:-1].decode("ascii") 1499 klass = self.find_class(module, name) 1500 self._instantiate(klass, self.pop_mark()) 1501 dispatch[INST[0]] = load_inst 1502 1503 def load_obj(self): 1504 # Stack is ... markobject classobject arg1 arg2 ... 1505 args = self.pop_mark() 1506 cls = args.pop(0) 1507 self._instantiate(cls, args) 1508 dispatch[OBJ[0]] = load_obj 1509 1510 def load_newobj(self): 1511 args = self.stack.pop() 1512 cls = self.stack.pop() 1513 obj = cls.__new__(cls, *args) 1514 self.append(obj) 1515 dispatch[NEWOBJ[0]] = load_newobj 1516 1517 def load_newobj_ex(self): 1518 kwargs = self.stack.pop() 1519 args = self.stack.pop() 1520 cls = self.stack.pop() 1521 obj = cls.__new__(cls, *args, **kwargs) 1522 self.append(obj) 1523 dispatch[NEWOBJ_EX[0]] = load_newobj_ex 1524 1525 def load_global(self): 1526 module = self.readline()[:-1].decode("utf-8") 1527 name = self.readline()[:-1].decode("utf-8") 1528 klass = self.find_class(module, name) 1529 self.append(klass) 1530 dispatch[GLOBAL[0]] = load_global 1531 1532 def load_stack_global(self): 1533 name = self.stack.pop() 1534 module = self.stack.pop() 1535 if type(name) is not str or type(module) is not str: 1536 raise UnpicklingError("STACK_GLOBAL requires str") 1537 self.append(self.find_class(module, name)) 1538 dispatch[STACK_GLOBAL[0]] = load_stack_global 1539 1540 def load_ext1(self): 1541 code = self.read(1)[0] 1542 self.get_extension(code) 1543 dispatch[EXT1[0]] = load_ext1 1544 1545 def load_ext2(self): 1546 code, = unpack('<H', self.read(2)) 1547 self.get_extension(code) 1548 dispatch[EXT2[0]] = load_ext2 1549 1550 def load_ext4(self): 1551 code, = unpack('<i', self.read(4)) 1552 self.get_extension(code) 1553 dispatch[EXT4[0]] = load_ext4 1554 1555 def get_extension(self, code): 1556 nil = [] 1557 obj = _extension_cache.get(code, nil) 1558 if obj is not nil: 1559 self.append(obj) 1560 return 1561 key = _inverted_registry.get(code) 1562 if not key: 1563 if code <= 0: # note that 0 is forbidden 1564 # Corrupt or hostile pickle. 1565 raise UnpicklingError("EXT specifies code <= 0") 1566 raise ValueError("unregistered extension code %d" % code) 1567 obj = self.find_class(*key) 1568 _extension_cache[code] = obj 1569 self.append(obj) 1570 1571 def find_class(self, module, name): 1572 # Subclasses may override this. 1573 sys.audit('pickle.find_class', module, name) 1574 if self.proto < 3 and self.fix_imports: 1575 if (module, name) in _compat_pickle.NAME_MAPPING: 1576 module, name = _compat_pickle.NAME_MAPPING[(module, name)] 1577 elif module in _compat_pickle.IMPORT_MAPPING: 1578 module = _compat_pickle.IMPORT_MAPPING[module] 1579 __import__(module, level=0) 1580 if self.proto >= 4: 1581 return _getattribute(sys.modules[module], name)[0] 1582 else: 1583 return getattr(sys.modules[module], name) 1584 1585 def load_reduce(self): 1586 stack = self.stack 1587 args = stack.pop() 1588 func = stack[-1] 1589 stack[-1] = func(*args) 1590 dispatch[REDUCE[0]] = load_reduce 1591 1592 def load_pop(self): 1593 if self.stack: 1594 del self.stack[-1] 1595 else: 1596 self.pop_mark() 1597 dispatch[POP[0]] = load_pop 1598 1599 def load_pop_mark(self): 1600 self.pop_mark() 1601 dispatch[POP_MARK[0]] = load_pop_mark 1602 1603 def load_dup(self): 1604 self.append(self.stack[-1]) 1605 dispatch[DUP[0]] = load_dup 1606 1607 def load_get(self): 1608 i = int(self.readline()[:-1]) 1609 try: 1610 self.append(self.memo[i]) 1611 except KeyError: 1612 msg = f'Memo value not found at index {i}' 1613 raise UnpicklingError(msg) from None 1614 dispatch[GET[0]] = load_get 1615 1616 def load_binget(self): 1617 i = self.read(1)[0] 1618 try: 1619 self.append(self.memo[i]) 1620 except KeyError as exc: 1621 msg = f'Memo value not found at index {i}' 1622 raise UnpicklingError(msg) from None 1623 dispatch[BINGET[0]] = load_binget 1624 1625 def load_long_binget(self): 1626 i, = unpack('<I', self.read(4)) 1627 try: 1628 self.append(self.memo[i]) 1629 except KeyError as exc: 1630 msg = f'Memo value not found at index {i}' 1631 raise UnpicklingError(msg) from None 1632 dispatch[LONG_BINGET[0]] = load_long_binget 1633 1634 def load_put(self): 1635 i = int(self.readline()[:-1]) 1636 if i < 0: 1637 raise ValueError("negative PUT argument") 1638 self.memo[i] = self.stack[-1] 1639 dispatch[PUT[0]] = load_put 1640 1641 def load_binput(self): 1642 i = self.read(1)[0] 1643 if i < 0: 1644 raise ValueError("negative BINPUT argument") 1645 self.memo[i] = self.stack[-1] 1646 dispatch[BINPUT[0]] = load_binput 1647 1648 def load_long_binput(self): 1649 i, = unpack('<I', self.read(4)) 1650 if i > maxsize: 1651 raise ValueError("negative LONG_BINPUT argument") 1652 self.memo[i] = self.stack[-1] 1653 dispatch[LONG_BINPUT[0]] = load_long_binput 1654 1655 def load_memoize(self): 1656 memo = self.memo 1657 memo[len(memo)] = self.stack[-1] 1658 dispatch[MEMOIZE[0]] = load_memoize 1659 1660 def load_append(self): 1661 stack = self.stack 1662 value = stack.pop() 1663 list = stack[-1] 1664 list.append(value) 1665 dispatch[APPEND[0]] = load_append 1666 1667 def load_appends(self): 1668 items = self.pop_mark() 1669 list_obj = self.stack[-1] 1670 try: 1671 extend = list_obj.extend 1672 except AttributeError: 1673 pass 1674 else: 1675 extend(items) 1676 return 1677 # Even if the PEP 307 requires extend() and append() methods, 1678 # fall back on append() if the object has no extend() method 1679 # for backward compatibility. 1680 append = list_obj.append 1681 for item in items: 1682 append(item) 1683 dispatch[APPENDS[0]] = load_appends 1684 1685 def load_setitem(self): 1686 stack = self.stack 1687 value = stack.pop() 1688 key = stack.pop() 1689 dict = stack[-1] 1690 dict[key] = value 1691 dispatch[SETITEM[0]] = load_setitem 1692 1693 def load_setitems(self): 1694 items = self.pop_mark() 1695 dict = self.stack[-1] 1696 for i in range(0, len(items), 2): 1697 dict[items[i]] = items[i + 1] 1698 dispatch[SETITEMS[0]] = load_setitems 1699 1700 def load_additems(self): 1701 items = self.pop_mark() 1702 set_obj = self.stack[-1] 1703 if isinstance(set_obj, set): 1704 set_obj.update(items) 1705 else: 1706 add = set_obj.add 1707 for item in items: 1708 add(item) 1709 dispatch[ADDITEMS[0]] = load_additems 1710 1711 def load_build(self): 1712 stack = self.stack 1713 state = stack.pop() 1714 inst = stack[-1] 1715 setstate = getattr(inst, "__setstate__", None) 1716 if setstate is not None: 1717 setstate(state) 1718 return 1719 slotstate = None 1720 if isinstance(state, tuple) and len(state) == 2: 1721 state, slotstate = state 1722 if state: 1723 inst_dict = inst.__dict__ 1724 intern = sys.intern 1725 for k, v in state.items(): 1726 if type(k) is str: 1727 inst_dict[intern(k)] = v 1728 else: 1729 inst_dict[k] = v 1730 if slotstate: 1731 for k, v in slotstate.items(): 1732 setattr(inst, k, v) 1733 dispatch[BUILD[0]] = load_build 1734 1735 def load_mark(self): 1736 self.metastack.append(self.stack) 1737 self.stack = [] 1738 self.append = self.stack.append 1739 dispatch[MARK[0]] = load_mark 1740 1741 def load_stop(self): 1742 value = self.stack.pop() 1743 raise _Stop(value) 1744 dispatch[STOP[0]] = load_stop 1745 1746 1747# Shorthands 1748 1749def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None): 1750 _Pickler(file, protocol, fix_imports=fix_imports, 1751 buffer_callback=buffer_callback).dump(obj) 1752 1753def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None): 1754 f = io.BytesIO() 1755 _Pickler(f, protocol, fix_imports=fix_imports, 1756 buffer_callback=buffer_callback).dump(obj) 1757 res = f.getvalue() 1758 assert isinstance(res, bytes_types) 1759 return res 1760 1761def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict", 1762 buffers=None): 1763 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, 1764 encoding=encoding, errors=errors).load() 1765 1766def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict", 1767 buffers=None): 1768 if isinstance(s, str): 1769 raise TypeError("Can't load pickle from unicode string") 1770 file = io.BytesIO(s) 1771 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, 1772 encoding=encoding, errors=errors).load() 1773 1774# Use the faster _pickle if possible 1775try: 1776 from _pickle import ( 1777 PickleError, 1778 PicklingError, 1779 UnpicklingError, 1780 Pickler, 1781 Unpickler, 1782 dump, 1783 dumps, 1784 load, 1785 loads 1786 ) 1787except ImportError: 1788 Pickler, Unpickler = _Pickler, _Unpickler 1789 dump, dumps, load, loads = _dump, _dumps, _load, _loads 1790 1791# Doctest 1792def _test(): 1793 import doctest 1794 return doctest.testmod() 1795 1796if __name__ == "__main__": 1797 import argparse 1798 parser = argparse.ArgumentParser( 1799 description='display contents of the pickle files') 1800 parser.add_argument( 1801 'pickle_file', type=argparse.FileType('br'), 1802 nargs='*', help='the pickle file') 1803 parser.add_argument( 1804 '-t', '--test', action='store_true', 1805 help='run self-test suite') 1806 parser.add_argument( 1807 '-v', action='store_true', 1808 help='run verbosely; only affects self-test run') 1809 args = parser.parse_args() 1810 if args.test: 1811 _test() 1812 else: 1813 if not args.pickle_file: 1814 parser.print_help() 1815 else: 1816 import pprint 1817 for f in args.pickle_file: 1818 obj = load(f) 1819 pprint.pprint(obj) 1820