• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import builtins
2import collections
3import copyreg
4import dbm
5import io
6import functools
7import os
8import math
9import pickle
10import pickletools
11import shutil
12import struct
13import sys
14import threading
15import types
16import unittest
17import weakref
18from textwrap import dedent
19from http.cookies import SimpleCookie
20
21try:
22    import _testbuffer
23except ImportError:
24    _testbuffer = None
25
26from test import support
27from test.support import os_helper
28from test.support import (
29    TestFailed, run_with_locales, no_tracing,
30    _2G, _4G, bigmemtest
31    )
32from test.support.import_helper import forget
33from test.support.os_helper import TESTFN
34from test.support import threading_helper
35from test.support.warnings_helper import save_restore_warnings_filters
36
37from pickle import bytes_types
38
39
40# bpo-41003: Save/restore warnings filters to leave them unchanged.
41# Ignore filters installed by numpy.
42try:
43    with save_restore_warnings_filters():
44        import numpy as np
45except ImportError:
46    np = None
47
48
49requires_32b = unittest.skipUnless(sys.maxsize < 2**32,
50                                   "test is only meaningful on 32-bit builds")
51
52# Tests that try a number of pickle protocols should have a
53#     for proto in protocols:
54# kind of outer loop.
55protocols = range(pickle.HIGHEST_PROTOCOL + 1)
56
57
58# Return True if opcode code appears in the pickle, else False.
59def opcode_in_pickle(code, pickle):
60    for op, dummy, dummy in pickletools.genops(pickle):
61        if op.code == code.decode("latin-1"):
62            return True
63    return False
64
65# Return the number of times opcode code appears in pickle.
66def count_opcode(code, pickle):
67    n = 0
68    for op, dummy, dummy in pickletools.genops(pickle):
69        if op.code == code.decode("latin-1"):
70            n += 1
71    return n
72
73
74def identity(x):
75    return x
76
77
78class UnseekableIO(io.BytesIO):
79    def peek(self, *args):
80        raise NotImplementedError
81
82    def seekable(self):
83        return False
84
85    def seek(self, *args):
86        raise io.UnsupportedOperation
87
88    def tell(self):
89        raise io.UnsupportedOperation
90
91
92class MinimalIO(object):
93    """
94    A file-like object that doesn't support readinto().
95    """
96    def __init__(self, *args):
97        self._bio = io.BytesIO(*args)
98        self.getvalue = self._bio.getvalue
99        self.read = self._bio.read
100        self.readline = self._bio.readline
101        self.write = self._bio.write
102
103
104# We can't very well test the extension registry without putting known stuff
105# in it, but we have to be careful to restore its original state.  Code
106# should do this:
107#
108#     e = ExtensionSaver(extension_code)
109#     try:
110#         fiddle w/ the extension registry's stuff for extension_code
111#     finally:
112#         e.restore()
113
114class ExtensionSaver:
115    # Remember current registration for code (if any), and remove it (if
116    # there is one).
117    def __init__(self, code):
118        self.code = code
119        if code in copyreg._inverted_registry:
120            self.pair = copyreg._inverted_registry[code]
121            copyreg.remove_extension(self.pair[0], self.pair[1], code)
122        else:
123            self.pair = None
124
125    # Restore previous registration for code.
126    def restore(self):
127        code = self.code
128        curpair = copyreg._inverted_registry.get(code)
129        if curpair is not None:
130            copyreg.remove_extension(curpair[0], curpair[1], code)
131        pair = self.pair
132        if pair is not None:
133            copyreg.add_extension(pair[0], pair[1], code)
134
135class C:
136    def __eq__(self, other):
137        return self.__dict__ == other.__dict__
138
139class D(C):
140    def __init__(self, arg):
141        pass
142
143class E(C):
144    def __getinitargs__(self):
145        return ()
146
147import __main__
148__main__.C = C
149C.__module__ = "__main__"
150__main__.D = D
151D.__module__ = "__main__"
152__main__.E = E
153E.__module__ = "__main__"
154
155# Simple mutable object.
156class Object:
157    pass
158
159# Hashable immutable key object containing unheshable mutable data.
160class K:
161    def __init__(self, value):
162        self.value = value
163
164    def __reduce__(self):
165        # Shouldn't support the recursion itself
166        return K, (self.value,)
167
168class myint(int):
169    def __init__(self, x):
170        self.str = str(x)
171
172class initarg(C):
173
174    def __init__(self, a, b):
175        self.a = a
176        self.b = b
177
178    def __getinitargs__(self):
179        return self.a, self.b
180
181class metaclass(type):
182    pass
183
184class use_metaclass(object, metaclass=metaclass):
185    pass
186
187class pickling_metaclass(type):
188    def __eq__(self, other):
189        return (type(self) == type(other) and
190                self.reduce_args == other.reduce_args)
191
192    def __reduce__(self):
193        return (create_dynamic_class, self.reduce_args)
194
195def create_dynamic_class(name, bases):
196    result = pickling_metaclass(name, bases, dict())
197    result.reduce_args = (name, bases)
198    return result
199
200
201class ZeroCopyBytes(bytes):
202    readonly = True
203    c_contiguous = True
204    f_contiguous = True
205    zero_copy_reconstruct = True
206
207    def __reduce_ex__(self, protocol):
208        if protocol >= 5:
209            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
210        else:
211            return type(self)._reconstruct, (bytes(self),)
212
213    def __repr__(self):
214        return "{}({!r})".format(self.__class__.__name__, bytes(self))
215
216    __str__ = __repr__
217
218    @classmethod
219    def _reconstruct(cls, obj):
220        with memoryview(obj) as m:
221            obj = m.obj
222            if type(obj) is cls:
223                # Zero-copy
224                return obj
225            else:
226                return cls(obj)
227
228
229class ZeroCopyBytearray(bytearray):
230    readonly = False
231    c_contiguous = True
232    f_contiguous = True
233    zero_copy_reconstruct = True
234
235    def __reduce_ex__(self, protocol):
236        if protocol >= 5:
237            return type(self)._reconstruct, (pickle.PickleBuffer(self),), None
238        else:
239            return type(self)._reconstruct, (bytes(self),)
240
241    def __repr__(self):
242        return "{}({!r})".format(self.__class__.__name__, bytes(self))
243
244    __str__ = __repr__
245
246    @classmethod
247    def _reconstruct(cls, obj):
248        with memoryview(obj) as m:
249            obj = m.obj
250            if type(obj) is cls:
251                # Zero-copy
252                return obj
253            else:
254                return cls(obj)
255
256
257if _testbuffer is not None:
258
259    class PicklableNDArray:
260        # A not-really-zero-copy picklable ndarray, as the ndarray()
261        # constructor doesn't allow for it
262
263        zero_copy_reconstruct = False
264
265        def __init__(self, *args, **kwargs):
266            self.array = _testbuffer.ndarray(*args, **kwargs)
267
268        def __getitem__(self, idx):
269            cls = type(self)
270            new = cls.__new__(cls)
271            new.array = self.array[idx]
272            return new
273
274        @property
275        def readonly(self):
276            return self.array.readonly
277
278        @property
279        def c_contiguous(self):
280            return self.array.c_contiguous
281
282        @property
283        def f_contiguous(self):
284            return self.array.f_contiguous
285
286        def __eq__(self, other):
287            if not isinstance(other, PicklableNDArray):
288                return NotImplemented
289            return (other.array.format == self.array.format and
290                    other.array.shape == self.array.shape and
291                    other.array.strides == self.array.strides and
292                    other.array.readonly == self.array.readonly and
293                    other.array.tobytes() == self.array.tobytes())
294
295        def __ne__(self, other):
296            if not isinstance(other, PicklableNDArray):
297                return NotImplemented
298            return not (self == other)
299
300        def __repr__(self):
301            return (f"{type(self)}(shape={self.array.shape},"
302                    f"strides={self.array.strides}, "
303                    f"bytes={self.array.tobytes()})")
304
305        def __reduce_ex__(self, protocol):
306            if not self.array.contiguous:
307                raise NotImplementedError("Reconstructing a non-contiguous "
308                                          "ndarray does not seem possible")
309            ndarray_kwargs = {"shape": self.array.shape,
310                              "strides": self.array.strides,
311                              "format": self.array.format,
312                              "flags": (0 if self.readonly
313                                        else _testbuffer.ND_WRITABLE)}
314            pb = pickle.PickleBuffer(self.array)
315            if protocol >= 5:
316                return (type(self)._reconstruct,
317                        (pb, ndarray_kwargs))
318            else:
319                # Need to serialize the bytes in physical order
320                with pb.raw() as m:
321                    return (type(self)._reconstruct,
322                            (m.tobytes(), ndarray_kwargs))
323
324        @classmethod
325        def _reconstruct(cls, obj, kwargs):
326            with memoryview(obj) as m:
327                # For some reason, ndarray() wants a list of integers...
328                # XXX This only works if format == 'B'
329                items = list(m.tobytes())
330            return cls(items, **kwargs)
331
332
333# DATA0 .. DATA4 are the pickles we expect under the various protocols, for
334# the object returned by create_data().
335
336DATA0 = (
337    b'(lp0\nL0L\naL1L\naF2.0\n'
338    b'ac__builtin__\ncomple'
339    b'x\np1\n(F3.0\nF0.0\ntp2\n'
340    b'Rp3\naL1L\naL-1L\naL255'
341    b'L\naL-255L\naL-256L\naL'
342    b'65535L\naL-65535L\naL-'
343    b'65536L\naL2147483647L'
344    b'\naL-2147483647L\naL-2'
345    b'147483648L\na(Vabc\np4'
346    b'\ng4\nccopy_reg\n_recon'
347    b'structor\np5\n(c__main'
348    b'__\nC\np6\nc__builtin__'
349    b'\nobject\np7\nNtp8\nRp9\n'
350    b'(dp10\nVfoo\np11\nL1L\ns'
351    b'Vbar\np12\nL2L\nsbg9\ntp'
352    b'13\nag13\naL5L\na.'
353)
354
355# Disassembly of DATA0
356DATA0_DIS = """\
357    0: (    MARK
358    1: l        LIST       (MARK at 0)
359    2: p    PUT        0
360    5: L    LONG       0
361    9: a    APPEND
362   10: L    LONG       1
363   14: a    APPEND
364   15: F    FLOAT      2.0
365   20: a    APPEND
366   21: c    GLOBAL     '__builtin__ complex'
367   42: p    PUT        1
368   45: (    MARK
369   46: F        FLOAT      3.0
370   51: F        FLOAT      0.0
371   56: t        TUPLE      (MARK at 45)
372   57: p    PUT        2
373   60: R    REDUCE
374   61: p    PUT        3
375   64: a    APPEND
376   65: L    LONG       1
377   69: a    APPEND
378   70: L    LONG       -1
379   75: a    APPEND
380   76: L    LONG       255
381   82: a    APPEND
382   83: L    LONG       -255
383   90: a    APPEND
384   91: L    LONG       -256
385   98: a    APPEND
386   99: L    LONG       65535
387  107: a    APPEND
388  108: L    LONG       -65535
389  117: a    APPEND
390  118: L    LONG       -65536
391  127: a    APPEND
392  128: L    LONG       2147483647
393  141: a    APPEND
394  142: L    LONG       -2147483647
395  156: a    APPEND
396  157: L    LONG       -2147483648
397  171: a    APPEND
398  172: (    MARK
399  173: V        UNICODE    'abc'
400  178: p        PUT        4
401  181: g        GET        4
402  184: c        GLOBAL     'copy_reg _reconstructor'
403  209: p        PUT        5
404  212: (        MARK
405  213: c            GLOBAL     '__main__ C'
406  225: p            PUT        6
407  228: c            GLOBAL     '__builtin__ object'
408  248: p            PUT        7
409  251: N            NONE
410  252: t            TUPLE      (MARK at 212)
411  253: p        PUT        8
412  256: R        REDUCE
413  257: p        PUT        9
414  260: (        MARK
415  261: d            DICT       (MARK at 260)
416  262: p        PUT        10
417  266: V        UNICODE    'foo'
418  271: p        PUT        11
419  275: L        LONG       1
420  279: s        SETITEM
421  280: V        UNICODE    'bar'
422  285: p        PUT        12
423  289: L        LONG       2
424  293: s        SETITEM
425  294: b        BUILD
426  295: g        GET        9
427  298: t        TUPLE      (MARK at 172)
428  299: p    PUT        13
429  303: a    APPEND
430  304: g    GET        13
431  308: a    APPEND
432  309: L    LONG       5
433  313: a    APPEND
434  314: .    STOP
435highest protocol among opcodes = 0
436"""
437
438DATA1 = (
439    b']q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c__'
440    b'builtin__\ncomplex\nq\x01'
441    b'(G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00t'
442    b'q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ'
443    b'\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff'
444    b'\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00ab'
445    b'cq\x04h\x04ccopy_reg\n_reco'
446    b'nstructor\nq\x05(c__main'
447    b'__\nC\nq\x06c__builtin__\n'
448    b'object\nq\x07Ntq\x08Rq\t}q\n('
449    b'X\x03\x00\x00\x00fooq\x0bK\x01X\x03\x00\x00\x00bar'
450    b'q\x0cK\x02ubh\ttq\rh\rK\x05e.'
451)
452
453# Disassembly of DATA1
454DATA1_DIS = """\
455    0: ]    EMPTY_LIST
456    1: q    BINPUT     0
457    3: (    MARK
458    4: K        BININT1    0
459    6: K        BININT1    1
460    8: G        BINFLOAT   2.0
461   17: c        GLOBAL     '__builtin__ complex'
462   38: q        BINPUT     1
463   40: (        MARK
464   41: G            BINFLOAT   3.0
465   50: G            BINFLOAT   0.0
466   59: t            TUPLE      (MARK at 40)
467   60: q        BINPUT     2
468   62: R        REDUCE
469   63: q        BINPUT     3
470   65: K        BININT1    1
471   67: J        BININT     -1
472   72: K        BININT1    255
473   74: J        BININT     -255
474   79: J        BININT     -256
475   84: M        BININT2    65535
476   87: J        BININT     -65535
477   92: J        BININT     -65536
478   97: J        BININT     2147483647
479  102: J        BININT     -2147483647
480  107: J        BININT     -2147483648
481  112: (        MARK
482  113: X            BINUNICODE 'abc'
483  121: q            BINPUT     4
484  123: h            BINGET     4
485  125: c            GLOBAL     'copy_reg _reconstructor'
486  150: q            BINPUT     5
487  152: (            MARK
488  153: c                GLOBAL     '__main__ C'
489  165: q                BINPUT     6
490  167: c                GLOBAL     '__builtin__ object'
491  187: q                BINPUT     7
492  189: N                NONE
493  190: t                TUPLE      (MARK at 152)
494  191: q            BINPUT     8
495  193: R            REDUCE
496  194: q            BINPUT     9
497  196: }            EMPTY_DICT
498  197: q            BINPUT     10
499  199: (            MARK
500  200: X                BINUNICODE 'foo'
501  208: q                BINPUT     11
502  210: K                BININT1    1
503  212: X                BINUNICODE 'bar'
504  220: q                BINPUT     12
505  222: K                BININT1    2
506  224: u                SETITEMS   (MARK at 199)
507  225: b            BUILD
508  226: h            BINGET     9
509  228: t            TUPLE      (MARK at 112)
510  229: q        BINPUT     13
511  231: h        BINGET     13
512  233: K        BININT1    5
513  235: e        APPENDS    (MARK at 3)
514  236: .    STOP
515highest protocol among opcodes = 1
516"""
517
518DATA2 = (
519    b'\x80\x02]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
520    b'__builtin__\ncomplex\n'
521    b'q\x01G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00'
522    b'\x86q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xff'
523    b'J\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff'
524    b'\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00a'
525    b'bcq\x04h\x04c__main__\nC\nq\x05'
526    b')\x81q\x06}q\x07(X\x03\x00\x00\x00fooq\x08K\x01'
527    b'X\x03\x00\x00\x00barq\tK\x02ubh\x06tq\nh'
528    b'\nK\x05e.'
529)
530
531# Disassembly of DATA2
532DATA2_DIS = """\
533    0: \x80 PROTO      2
534    2: ]    EMPTY_LIST
535    3: q    BINPUT     0
536    5: (    MARK
537    6: K        BININT1    0
538    8: K        BININT1    1
539   10: G        BINFLOAT   2.0
540   19: c        GLOBAL     '__builtin__ complex'
541   40: q        BINPUT     1
542   42: G        BINFLOAT   3.0
543   51: G        BINFLOAT   0.0
544   60: \x86     TUPLE2
545   61: q        BINPUT     2
546   63: R        REDUCE
547   64: q        BINPUT     3
548   66: K        BININT1    1
549   68: J        BININT     -1
550   73: K        BININT1    255
551   75: J        BININT     -255
552   80: J        BININT     -256
553   85: M        BININT2    65535
554   88: J        BININT     -65535
555   93: J        BININT     -65536
556   98: J        BININT     2147483647
557  103: J        BININT     -2147483647
558  108: J        BININT     -2147483648
559  113: (        MARK
560  114: X            BINUNICODE 'abc'
561  122: q            BINPUT     4
562  124: h            BINGET     4
563  126: c            GLOBAL     '__main__ C'
564  138: q            BINPUT     5
565  140: )            EMPTY_TUPLE
566  141: \x81         NEWOBJ
567  142: q            BINPUT     6
568  144: }            EMPTY_DICT
569  145: q            BINPUT     7
570  147: (            MARK
571  148: X                BINUNICODE 'foo'
572  156: q                BINPUT     8
573  158: K                BININT1    1
574  160: X                BINUNICODE 'bar'
575  168: q                BINPUT     9
576  170: K                BININT1    2
577  172: u                SETITEMS   (MARK at 147)
578  173: b            BUILD
579  174: h            BINGET     6
580  176: t            TUPLE      (MARK at 113)
581  177: q        BINPUT     10
582  179: h        BINGET     10
583  181: K        BININT1    5
584  183: e        APPENDS    (MARK at 5)
585  184: .    STOP
586highest protocol among opcodes = 2
587"""
588
589DATA3 = (
590    b'\x80\x03]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c'
591    b'builtins\ncomplex\nq\x01G'
592    b'@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00\x86q\x02'
593    b'Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff'
594    b'\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7f'
595    b'J\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00abcq'
596    b'\x04h\x04c__main__\nC\nq\x05)\x81q'
597    b'\x06}q\x07(X\x03\x00\x00\x00barq\x08K\x02X\x03\x00'
598    b'\x00\x00fooq\tK\x01ubh\x06tq\nh\nK\x05'
599    b'e.'
600)
601
602# Disassembly of DATA3
603DATA3_DIS = """\
604    0: \x80 PROTO      3
605    2: ]    EMPTY_LIST
606    3: q    BINPUT     0
607    5: (    MARK
608    6: K        BININT1    0
609    8: K        BININT1    1
610   10: G        BINFLOAT   2.0
611   19: c        GLOBAL     'builtins complex'
612   37: q        BINPUT     1
613   39: G        BINFLOAT   3.0
614   48: G        BINFLOAT   0.0
615   57: \x86     TUPLE2
616   58: q        BINPUT     2
617   60: R        REDUCE
618   61: q        BINPUT     3
619   63: K        BININT1    1
620   65: J        BININT     -1
621   70: K        BININT1    255
622   72: J        BININT     -255
623   77: J        BININT     -256
624   82: M        BININT2    65535
625   85: J        BININT     -65535
626   90: J        BININT     -65536
627   95: J        BININT     2147483647
628  100: J        BININT     -2147483647
629  105: J        BININT     -2147483648
630  110: (        MARK
631  111: X            BINUNICODE 'abc'
632  119: q            BINPUT     4
633  121: h            BINGET     4
634  123: c            GLOBAL     '__main__ C'
635  135: q            BINPUT     5
636  137: )            EMPTY_TUPLE
637  138: \x81         NEWOBJ
638  139: q            BINPUT     6
639  141: }            EMPTY_DICT
640  142: q            BINPUT     7
641  144: (            MARK
642  145: X                BINUNICODE 'bar'
643  153: q                BINPUT     8
644  155: K                BININT1    2
645  157: X                BINUNICODE 'foo'
646  165: q                BINPUT     9
647  167: K                BININT1    1
648  169: u                SETITEMS   (MARK at 144)
649  170: b            BUILD
650  171: h            BINGET     6
651  173: t            TUPLE      (MARK at 110)
652  174: q        BINPUT     10
653  176: h        BINGET     10
654  178: K        BININT1    5
655  180: e        APPENDS    (MARK at 5)
656  181: .    STOP
657highest protocol among opcodes = 2
658"""
659
660DATA4 = (
661    b'\x80\x04\x95\xa8\x00\x00\x00\x00\x00\x00\x00]\x94(K\x00K\x01G@'
662    b'\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x94\x8c\x07'
663    b'complex\x94\x93\x94G@\x08\x00\x00\x00\x00\x00\x00G'
664    b'\x00\x00\x00\x00\x00\x00\x00\x00\x86\x94R\x94K\x01J\xff\xff\xff\xffK'
665    b'\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ'
666    b'\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80('
667    b'\x8c\x03abc\x94h\x06\x8c\x08__main__\x94\x8c'
668    b'\x01C\x94\x93\x94)\x81\x94}\x94(\x8c\x03bar\x94K\x02\x8c'
669    b'\x03foo\x94K\x01ubh\nt\x94h\x0eK\x05e.'
670)
671
672# Disassembly of DATA4
673DATA4_DIS = """\
674    0: \x80 PROTO      4
675    2: \x95 FRAME      168
676   11: ]    EMPTY_LIST
677   12: \x94 MEMOIZE
678   13: (    MARK
679   14: K        BININT1    0
680   16: K        BININT1    1
681   18: G        BINFLOAT   2.0
682   27: \x8c     SHORT_BINUNICODE 'builtins'
683   37: \x94     MEMOIZE
684   38: \x8c     SHORT_BINUNICODE 'complex'
685   47: \x94     MEMOIZE
686   48: \x93     STACK_GLOBAL
687   49: \x94     MEMOIZE
688   50: G        BINFLOAT   3.0
689   59: G        BINFLOAT   0.0
690   68: \x86     TUPLE2
691   69: \x94     MEMOIZE
692   70: R        REDUCE
693   71: \x94     MEMOIZE
694   72: K        BININT1    1
695   74: J        BININT     -1
696   79: K        BININT1    255
697   81: J        BININT     -255
698   86: J        BININT     -256
699   91: M        BININT2    65535
700   94: J        BININT     -65535
701   99: J        BININT     -65536
702  104: J        BININT     2147483647
703  109: J        BININT     -2147483647
704  114: J        BININT     -2147483648
705  119: (        MARK
706  120: \x8c         SHORT_BINUNICODE 'abc'
707  125: \x94         MEMOIZE
708  126: h            BINGET     6
709  128: \x8c         SHORT_BINUNICODE '__main__'
710  138: \x94         MEMOIZE
711  139: \x8c         SHORT_BINUNICODE 'C'
712  142: \x94         MEMOIZE
713  143: \x93         STACK_GLOBAL
714  144: \x94         MEMOIZE
715  145: )            EMPTY_TUPLE
716  146: \x81         NEWOBJ
717  147: \x94         MEMOIZE
718  148: }            EMPTY_DICT
719  149: \x94         MEMOIZE
720  150: (            MARK
721  151: \x8c             SHORT_BINUNICODE 'bar'
722  156: \x94             MEMOIZE
723  157: K                BININT1    2
724  159: \x8c             SHORT_BINUNICODE 'foo'
725  164: \x94             MEMOIZE
726  165: K                BININT1    1
727  167: u                SETITEMS   (MARK at 150)
728  168: b            BUILD
729  169: h            BINGET     10
730  171: t            TUPLE      (MARK at 119)
731  172: \x94     MEMOIZE
732  173: h        BINGET     14
733  175: K        BININT1    5
734  177: e        APPENDS    (MARK at 13)
735  178: .    STOP
736highest protocol among opcodes = 4
737"""
738
739# set([1,2]) pickled from 2.x with protocol 2
740DATA_SET = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.'
741
742# xrange(5) pickled from 2.x with protocol 2
743DATA_XRANGE = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.'
744
745# a SimpleCookie() object pickled from 2.x with protocol 2
746DATA_COOKIE = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key'
747               b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U'
748               b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07'
749               b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U'
750               b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b'
751               b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.')
752
753# set([3]) pickled from 2.x with protocol 2
754DATA_SET2 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.'
755
756python2_exceptions_without_args = (
757    ArithmeticError,
758    AssertionError,
759    AttributeError,
760    BaseException,
761    BufferError,
762    BytesWarning,
763    DeprecationWarning,
764    EOFError,
765    EnvironmentError,
766    Exception,
767    FloatingPointError,
768    FutureWarning,
769    GeneratorExit,
770    IOError,
771    ImportError,
772    ImportWarning,
773    IndentationError,
774    IndexError,
775    KeyError,
776    KeyboardInterrupt,
777    LookupError,
778    MemoryError,
779    NameError,
780    NotImplementedError,
781    OSError,
782    OverflowError,
783    PendingDeprecationWarning,
784    ReferenceError,
785    RuntimeError,
786    RuntimeWarning,
787    # StandardError is gone in Python 3, we map it to Exception
788    StopIteration,
789    SyntaxError,
790    SyntaxWarning,
791    SystemError,
792    SystemExit,
793    TabError,
794    TypeError,
795    UnboundLocalError,
796    UnicodeError,
797    UnicodeWarning,
798    UserWarning,
799    ValueError,
800    Warning,
801    ZeroDivisionError,
802)
803
804exception_pickle = b'\x80\x02cexceptions\n?\nq\x00)Rq\x01.'
805
806# UnicodeEncodeError object pickled from 2.x with protocol 2
807DATA_UEERR = (b'\x80\x02cexceptions\nUnicodeEncodeError\n'
808              b'q\x00(U\x05asciiq\x01X\x03\x00\x00\x00fooq\x02K\x00K\x01'
809              b'U\x03badq\x03tq\x04Rq\x05.')
810
811
812def create_data():
813    c = C()
814    c.foo = 1
815    c.bar = 2
816    x = [0, 1, 2.0, 3.0+0j]
817    # Append some integer test cases at cPickle.c's internal size
818    # cutoffs.
819    uint1max = 0xff
820    uint2max = 0xffff
821    int4max = 0x7fffffff
822    x.extend([1, -1,
823              uint1max, -uint1max, -uint1max-1,
824              uint2max, -uint2max, -uint2max-1,
825               int4max,  -int4max,  -int4max-1])
826    y = ('abc', 'abc', c, c)
827    x.append(y)
828    x.append(y)
829    x.append(5)
830    return x
831
832
833class AbstractUnpickleTests:
834    # Subclass must define self.loads.
835
836    _testdata = create_data()
837
838    def assert_is_copy(self, obj, objcopy, msg=None):
839        """Utility method to verify if two objects are copies of each others.
840        """
841        if msg is None:
842            msg = "{!r} is not a copy of {!r}".format(obj, objcopy)
843        self.assertEqual(obj, objcopy, msg=msg)
844        self.assertIs(type(obj), type(objcopy), msg=msg)
845        if hasattr(obj, '__dict__'):
846            self.assertDictEqual(obj.__dict__, objcopy.__dict__, msg=msg)
847            self.assertIsNot(obj.__dict__, objcopy.__dict__, msg=msg)
848        if hasattr(obj, '__slots__'):
849            self.assertListEqual(obj.__slots__, objcopy.__slots__, msg=msg)
850            for slot in obj.__slots__:
851                self.assertEqual(
852                    hasattr(obj, slot), hasattr(objcopy, slot), msg=msg)
853                self.assertEqual(getattr(obj, slot, None),
854                                 getattr(objcopy, slot, None), msg=msg)
855
856    def check_unpickling_error(self, errors, data):
857        with self.subTest(data=data), \
858             self.assertRaises(errors):
859            try:
860                self.loads(data)
861            except BaseException as exc:
862                if support.verbose > 1:
863                    print('%-32r - %s: %s' %
864                          (data, exc.__class__.__name__, exc))
865                raise
866
867    def test_load_from_data0(self):
868        self.assert_is_copy(self._testdata, self.loads(DATA0))
869
870    def test_load_from_data1(self):
871        self.assert_is_copy(self._testdata, self.loads(DATA1))
872
873    def test_load_from_data2(self):
874        self.assert_is_copy(self._testdata, self.loads(DATA2))
875
876    def test_load_from_data3(self):
877        self.assert_is_copy(self._testdata, self.loads(DATA3))
878
879    def test_load_from_data4(self):
880        self.assert_is_copy(self._testdata, self.loads(DATA4))
881
882    def test_load_classic_instance(self):
883        # See issue5180.  Test loading 2.x pickles that
884        # contain an instance of old style class.
885        for X, args in [(C, ()), (D, ('x',)), (E, ())]:
886            xname = X.__name__.encode('ascii')
887            # Protocol 0 (text mode pickle):
888            """
889             0: (    MARK
890             1: i        INST       '__main__ X' (MARK at 0)
891            13: p    PUT        0
892            16: (    MARK
893            17: d        DICT       (MARK at 16)
894            18: p    PUT        1
895            21: b    BUILD
896            22: .    STOP
897            """
898            pickle0 = (b"(i__main__\n"
899                       b"X\n"
900                       b"p0\n"
901                       b"(dp1\nb.").replace(b'X', xname)
902            self.assert_is_copy(X(*args), self.loads(pickle0))
903
904            # Protocol 1 (binary mode pickle)
905            """
906             0: (    MARK
907             1: c        GLOBAL     '__main__ X'
908            13: q        BINPUT     0
909            15: o        OBJ        (MARK at 0)
910            16: q    BINPUT     1
911            18: }    EMPTY_DICT
912            19: q    BINPUT     2
913            21: b    BUILD
914            22: .    STOP
915            """
916            pickle1 = (b'(c__main__\n'
917                       b'X\n'
918                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
919            self.assert_is_copy(X(*args), self.loads(pickle1))
920
921            # Protocol 2 (pickle2 = b'\x80\x02' + pickle1)
922            """
923             0: \x80 PROTO      2
924             2: (    MARK
925             3: c        GLOBAL     '__main__ X'
926            15: q        BINPUT     0
927            17: o        OBJ        (MARK at 2)
928            18: q    BINPUT     1
929            20: }    EMPTY_DICT
930            21: q    BINPUT     2
931            23: b    BUILD
932            24: .    STOP
933            """
934            pickle2 = (b'\x80\x02(c__main__\n'
935                       b'X\n'
936                       b'q\x00oq\x01}q\x02b.').replace(b'X', xname)
937            self.assert_is_copy(X(*args), self.loads(pickle2))
938
939    def test_maxint64(self):
940        maxint64 = (1 << 63) - 1
941        data = b'I' + str(maxint64).encode("ascii") + b'\n.'
942        got = self.loads(data)
943        self.assert_is_copy(maxint64, got)
944
945        # Try too with a bogus literal.
946        data = b'I' + str(maxint64).encode("ascii") + b'JUNK\n.'
947        self.check_unpickling_error(ValueError, data)
948
949    def test_unpickle_from_2x(self):
950        # Unpickle non-trivial data from Python 2.x.
951        loaded = self.loads(DATA_SET)
952        self.assertEqual(loaded, set([1, 2]))
953        loaded = self.loads(DATA_XRANGE)
954        self.assertEqual(type(loaded), type(range(0)))
955        self.assertEqual(list(loaded), list(range(5)))
956        loaded = self.loads(DATA_COOKIE)
957        self.assertEqual(type(loaded), SimpleCookie)
958        self.assertEqual(list(loaded.keys()), ["key"])
959        self.assertEqual(loaded["key"].value, "value")
960
961        # Exception objects without arguments pickled from 2.x with protocol 2
962        for exc in python2_exceptions_without_args:
963            data = exception_pickle.replace(b'?', exc.__name__.encode("ascii"))
964            loaded = self.loads(data)
965            self.assertIs(type(loaded), exc)
966
967        # StandardError is mapped to Exception, test that separately
968        loaded = self.loads(exception_pickle.replace(b'?', b'StandardError'))
969        self.assertIs(type(loaded), Exception)
970
971        loaded = self.loads(DATA_UEERR)
972        self.assertIs(type(loaded), UnicodeEncodeError)
973        self.assertEqual(loaded.object, "foo")
974        self.assertEqual(loaded.encoding, "ascii")
975        self.assertEqual(loaded.start, 0)
976        self.assertEqual(loaded.end, 1)
977        self.assertEqual(loaded.reason, "bad")
978
979    def test_load_python2_str_as_bytes(self):
980        # From Python 2: pickle.dumps('a\x00\xa0', protocol=0)
981        self.assertEqual(self.loads(b"S'a\\x00\\xa0'\n.",
982                                    encoding="bytes"), b'a\x00\xa0')
983        # From Python 2: pickle.dumps('a\x00\xa0', protocol=1)
984        self.assertEqual(self.loads(b'U\x03a\x00\xa0.',
985                                    encoding="bytes"), b'a\x00\xa0')
986        # From Python 2: pickle.dumps('a\x00\xa0', protocol=2)
987        self.assertEqual(self.loads(b'\x80\x02U\x03a\x00\xa0.',
988                                    encoding="bytes"), b'a\x00\xa0')
989
990    def test_load_python2_unicode_as_str(self):
991        # From Python 2: pickle.dumps(u'π', protocol=0)
992        self.assertEqual(self.loads(b'V\\u03c0\n.',
993                                    encoding='bytes'), 'π')
994        # From Python 2: pickle.dumps(u'π', protocol=1)
995        self.assertEqual(self.loads(b'X\x02\x00\x00\x00\xcf\x80.',
996                                    encoding="bytes"), 'π')
997        # From Python 2: pickle.dumps(u'π', protocol=2)
998        self.assertEqual(self.loads(b'\x80\x02X\x02\x00\x00\x00\xcf\x80.',
999                                    encoding="bytes"), 'π')
1000
1001    def test_load_long_python2_str_as_bytes(self):
1002        # From Python 2: pickle.dumps('x' * 300, protocol=1)
1003        self.assertEqual(self.loads(pickle.BINSTRING +
1004                                    struct.pack("<I", 300) +
1005                                    b'x' * 300 + pickle.STOP,
1006                                    encoding='bytes'), b'x' * 300)
1007
1008    def test_constants(self):
1009        self.assertIsNone(self.loads(b'N.'))
1010        self.assertIs(self.loads(b'\x88.'), True)
1011        self.assertIs(self.loads(b'\x89.'), False)
1012        self.assertIs(self.loads(b'I01\n.'), True)
1013        self.assertIs(self.loads(b'I00\n.'), False)
1014
1015    def test_empty_bytestring(self):
1016        # issue 11286
1017        empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r')
1018        self.assertEqual(empty, '')
1019
1020    def test_short_binbytes(self):
1021        dumped = b'\x80\x03C\x04\xe2\x82\xac\x00.'
1022        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1023
1024    def test_binbytes(self):
1025        dumped = b'\x80\x03B\x04\x00\x00\x00\xe2\x82\xac\x00.'
1026        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1027
1028    @requires_32b
1029    def test_negative_32b_binbytes(self):
1030        # On 32-bit builds, a BINBYTES of 2**31 or more is refused
1031        dumped = b'\x80\x03B\xff\xff\xff\xffxyzq\x00.'
1032        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1033                                    dumped)
1034
1035    @requires_32b
1036    def test_negative_32b_binunicode(self):
1037        # On 32-bit builds, a BINUNICODE of 2**31 or more is refused
1038        dumped = b'\x80\x03X\xff\xff\xff\xffxyzq\x00.'
1039        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1040                                    dumped)
1041
1042    def test_short_binunicode(self):
1043        dumped = b'\x80\x04\x8c\x04\xe2\x82\xac\x00.'
1044        self.assertEqual(self.loads(dumped), '\u20ac\x00')
1045
1046    def test_misc_get(self):
1047        self.check_unpickling_error(pickle.UnpicklingError, b'g0\np0')
1048        self.check_unpickling_error(pickle.UnpicklingError, b'jens:')
1049        self.check_unpickling_error(pickle.UnpicklingError, b'hens:')
1050        self.assert_is_copy([(100,), (100,)],
1051                            self.loads(b'((Kdtp0\nh\x00l.))'))
1052
1053    def test_binbytes8(self):
1054        dumped = b'\x80\x04\x8e\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
1055        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
1056
1057    def test_binunicode8(self):
1058        dumped = b'\x80\x04\x8d\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
1059        self.assertEqual(self.loads(dumped), '\u20ac\x00')
1060
1061    def test_bytearray8(self):
1062        dumped = b'\x80\x05\x96\x03\x00\x00\x00\x00\x00\x00\x00xxx.'
1063        self.assertEqual(self.loads(dumped), bytearray(b'xxx'))
1064
1065    @requires_32b
1066    def test_large_32b_binbytes8(self):
1067        dumped = b'\x80\x04\x8e\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1068        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1069                                    dumped)
1070
1071    @requires_32b
1072    def test_large_32b_bytearray8(self):
1073        dumped = b'\x80\x05\x96\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1074        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1075                                    dumped)
1076
1077    @requires_32b
1078    def test_large_32b_binunicode8(self):
1079        dumped = b'\x80\x04\x8d\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
1080        self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
1081                                    dumped)
1082
1083    def test_get(self):
1084        pickled = b'((lp100000\ng100000\nt.'
1085        unpickled = self.loads(pickled)
1086        self.assertEqual(unpickled, ([],)*2)
1087        self.assertIs(unpickled[0], unpickled[1])
1088
1089    def test_binget(self):
1090        pickled = b'(]q\xffh\xfft.'
1091        unpickled = self.loads(pickled)
1092        self.assertEqual(unpickled, ([],)*2)
1093        self.assertIs(unpickled[0], unpickled[1])
1094
1095    def test_long_binget(self):
1096        pickled = b'(]r\x00\x00\x01\x00j\x00\x00\x01\x00t.'
1097        unpickled = self.loads(pickled)
1098        self.assertEqual(unpickled, ([],)*2)
1099        self.assertIs(unpickled[0], unpickled[1])
1100
1101    def test_dup(self):
1102        pickled = b'((l2t.'
1103        unpickled = self.loads(pickled)
1104        self.assertEqual(unpickled, ([],)*2)
1105        self.assertIs(unpickled[0], unpickled[1])
1106
1107    def test_negative_put(self):
1108        # Issue #12847
1109        dumped = b'Va\np-1\n.'
1110        self.check_unpickling_error(ValueError, dumped)
1111
1112    @requires_32b
1113    def test_negative_32b_binput(self):
1114        # Issue #12847
1115        dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
1116        self.check_unpickling_error(ValueError, dumped)
1117
1118    def test_badly_escaped_string(self):
1119        self.check_unpickling_error(ValueError, b"S'\\'\n.")
1120
1121    def test_badly_quoted_string(self):
1122        # Issue #17710
1123        badpickles = [b"S'\n.",
1124                      b'S"\n.',
1125                      b'S\' \n.',
1126                      b'S" \n.',
1127                      b'S\'"\n.',
1128                      b'S"\'\n.',
1129                      b"S' ' \n.",
1130                      b'S" " \n.',
1131                      b"S ''\n.",
1132                      b'S ""\n.',
1133                      b'S \n.',
1134                      b'S\n.',
1135                      b'S.']
1136        for p in badpickles:
1137            self.check_unpickling_error(pickle.UnpicklingError, p)
1138
1139    def test_correctly_quoted_string(self):
1140        goodpickles = [(b"S''\n.", ''),
1141                       (b'S""\n.', ''),
1142                       (b'S"\\n"\n.', '\n'),
1143                       (b"S'\\n'\n.", '\n')]
1144        for p, expected in goodpickles:
1145            self.assertEqual(self.loads(p), expected)
1146
1147    def test_frame_readline(self):
1148        pickled = b'\x80\x04\x95\x05\x00\x00\x00\x00\x00\x00\x00I42\n.'
1149        #    0: \x80 PROTO      4
1150        #    2: \x95 FRAME      5
1151        #   11: I    INT        42
1152        #   15: .    STOP
1153        self.assertEqual(self.loads(pickled), 42)
1154
1155    def test_compat_unpickle(self):
1156        # xrange(1, 7)
1157        pickled = b'\x80\x02c__builtin__\nxrange\nK\x01K\x07K\x01\x87R.'
1158        unpickled = self.loads(pickled)
1159        self.assertIs(type(unpickled), range)
1160        self.assertEqual(unpickled, range(1, 7))
1161        self.assertEqual(list(unpickled), [1, 2, 3, 4, 5, 6])
1162        # reduce
1163        pickled = b'\x80\x02c__builtin__\nreduce\n.'
1164        self.assertIs(self.loads(pickled), functools.reduce)
1165        # whichdb.whichdb
1166        pickled = b'\x80\x02cwhichdb\nwhichdb\n.'
1167        self.assertIs(self.loads(pickled), dbm.whichdb)
1168        # Exception(), StandardError()
1169        for name in (b'Exception', b'StandardError'):
1170            pickled = (b'\x80\x02cexceptions\n' + name + b'\nU\x03ugh\x85R.')
1171            unpickled = self.loads(pickled)
1172            self.assertIs(type(unpickled), Exception)
1173            self.assertEqual(str(unpickled), 'ugh')
1174        # UserDict.UserDict({1: 2}), UserDict.IterableUserDict({1: 2})
1175        for name in (b'UserDict', b'IterableUserDict'):
1176            pickled = (b'\x80\x02(cUserDict\n' + name +
1177                       b'\no}U\x04data}K\x01K\x02ssb.')
1178            unpickled = self.loads(pickled)
1179            self.assertIs(type(unpickled), collections.UserDict)
1180            self.assertEqual(unpickled, collections.UserDict({1: 2}))
1181
1182    def test_load_global(self):
1183        self.assertIs(self.loads(b'cbuiltins\nstr\n.'), str)
1184        self.assertIs(self.loads(b'cmath\nlog\n.'), math.log)
1185        self.assertIs(self.loads(b'cos.path\njoin\n.'), os.path.join)
1186        self.assertIs(self.loads(b'\x80\x04cbuiltins\nstr.upper\n.'), str.upper)
1187        with support.swap_item(sys.modules, 'mödule', types.SimpleNamespace(glöbal=42)):
1188            self.assertEqual(self.loads(b'\x80\x04cm\xc3\xb6dule\ngl\xc3\xb6bal\n.'), 42)
1189
1190        self.assertRaises(UnicodeDecodeError, self.loads, b'c\xff\nlog\n.')
1191        self.assertRaises(UnicodeDecodeError, self.loads, b'cmath\n\xff\n.')
1192        self.assertRaises(self.truncated_errors, self.loads, b'c\nlog\n.')
1193        self.assertRaises(self.truncated_errors, self.loads, b'cmath\n\n.')
1194        self.assertRaises(self.truncated_errors, self.loads, b'\x80\x04cmath\n\n.')
1195
1196    def test_load_stack_global(self):
1197        self.assertIs(self.loads(b'\x8c\x08builtins\x8c\x03str\x93.'), str)
1198        self.assertIs(self.loads(b'\x8c\x04math\x8c\x03log\x93.'), math.log)
1199        self.assertIs(self.loads(b'\x8c\x07os.path\x8c\x04join\x93.'),
1200                      os.path.join)
1201        self.assertIs(self.loads(b'\x80\x04\x8c\x08builtins\x8c\x09str.upper\x93.'),
1202                      str.upper)
1203        with support.swap_item(sys.modules, 'mödule', types.SimpleNamespace(glöbal=42)):
1204            self.assertEqual(self.loads(b'\x80\x04\x8c\x07m\xc3\xb6dule\x8c\x07gl\xc3\xb6bal\x93.'), 42)
1205
1206        self.assertRaises(UnicodeDecodeError, self.loads, b'\x8c\x01\xff\x8c\x03log\x93.')
1207        self.assertRaises(UnicodeDecodeError, self.loads, b'\x8c\x04math\x8c\x01\xff\x93.')
1208        self.assertRaises(ValueError, self.loads, b'\x8c\x00\x8c\x03log\x93.')
1209        self.assertRaises(AttributeError, self.loads, b'\x8c\x04math\x8c\x00\x93.')
1210        self.assertRaises(AttributeError, self.loads, b'\x80\x04\x8c\x04math\x8c\x00\x93.')
1211
1212        self.assertRaises(pickle.UnpicklingError, self.loads, b'N\x8c\x03log\x93.')
1213        self.assertRaises(pickle.UnpicklingError, self.loads, b'\x8c\x04mathN\x93.')
1214        self.assertRaises(pickle.UnpicklingError, self.loads, b'\x80\x04\x8c\x04mathN\x93.')
1215
1216    def test_find_class(self):
1217        unpickler = self.unpickler(io.BytesIO())
1218        unpickler_nofix = self.unpickler(io.BytesIO(), fix_imports=False)
1219        unpickler4 = self.unpickler(io.BytesIO(b'\x80\x04N.'))
1220        unpickler4.load()
1221
1222        self.assertIs(unpickler.find_class('__builtin__', 'str'), str)
1223        self.assertRaises(ModuleNotFoundError,
1224                          unpickler_nofix.find_class, '__builtin__', 'str')
1225        self.assertIs(unpickler.find_class('builtins', 'str'), str)
1226        self.assertIs(unpickler_nofix.find_class('builtins', 'str'), str)
1227        self.assertIs(unpickler.find_class('math', 'log'), math.log)
1228        self.assertIs(unpickler.find_class('os.path', 'join'), os.path.join)
1229        self.assertIs(unpickler.find_class('os.path', 'join'), os.path.join)
1230
1231        self.assertIs(unpickler4.find_class('builtins', 'str.upper'), str.upper)
1232        with self.assertRaises(AttributeError):
1233            unpickler.find_class('builtins', 'str.upper')
1234
1235        with self.assertRaises(AttributeError):
1236            unpickler.find_class('math', 'spam')
1237        with self.assertRaises(AttributeError):
1238            unpickler4.find_class('math', 'spam')
1239        with self.assertRaises(AttributeError):
1240            unpickler.find_class('math', 'log.spam')
1241        with self.assertRaises(AttributeError):
1242            unpickler4.find_class('math', 'log.spam')
1243        with self.assertRaises(AttributeError):
1244            unpickler.find_class('math', 'log.<locals>.spam')
1245        with self.assertRaises(AttributeError):
1246            unpickler4.find_class('math', 'log.<locals>.spam')
1247        with self.assertRaises(AttributeError):
1248            unpickler.find_class('math', '')
1249        with self.assertRaises(AttributeError):
1250            unpickler4.find_class('math', '')
1251        self.assertRaises(ModuleNotFoundError, unpickler.find_class, 'spam', 'log')
1252        self.assertRaises(ValueError, unpickler.find_class, '', 'log')
1253
1254        self.assertRaises(TypeError, unpickler.find_class, None, 'log')
1255        self.assertRaises(TypeError, unpickler.find_class, 'math', None)
1256        self.assertRaises((TypeError, AttributeError), unpickler4.find_class, 'math', None)
1257
1258    def test_custom_find_class(self):
1259        def loads(data):
1260            class Unpickler(self.unpickler):
1261                def find_class(self, module_name, global_name):
1262                    return (module_name, global_name)
1263            return Unpickler(io.BytesIO(data)).load()
1264
1265        self.assertEqual(loads(b'cmath\nlog\n.'), ('math', 'log'))
1266        self.assertEqual(loads(b'\x8c\x04math\x8c\x03log\x93.'), ('math', 'log'))
1267
1268        def loads(data):
1269            class Unpickler(self.unpickler):
1270                @staticmethod
1271                def find_class(module_name, global_name):
1272                    return (module_name, global_name)
1273            return Unpickler(io.BytesIO(data)).load()
1274
1275        self.assertEqual(loads(b'cmath\nlog\n.'), ('math', 'log'))
1276        self.assertEqual(loads(b'\x8c\x04math\x8c\x03log\x93.'), ('math', 'log'))
1277
1278        def loads(data):
1279            class Unpickler(self.unpickler):
1280                @classmethod
1281                def find_class(cls, module_name, global_name):
1282                    return (module_name, global_name)
1283            return Unpickler(io.BytesIO(data)).load()
1284
1285        self.assertEqual(loads(b'cmath\nlog\n.'), ('math', 'log'))
1286        self.assertEqual(loads(b'\x8c\x04math\x8c\x03log\x93.'), ('math', 'log'))
1287
1288        def loads(data):
1289            class Unpickler(self.unpickler):
1290                pass
1291            def find_class(module_name, global_name):
1292                return (module_name, global_name)
1293            unpickler = Unpickler(io.BytesIO(data))
1294            unpickler.find_class = find_class
1295            return unpickler.load()
1296
1297        self.assertEqual(loads(b'cmath\nlog\n.'), ('math', 'log'))
1298        self.assertEqual(loads(b'\x8c\x04math\x8c\x03log\x93.'), ('math', 'log'))
1299
1300    def test_bad_ext_code(self):
1301        # unregistered extension code
1302        self.check_unpickling_error(ValueError, b'\x82\x01.')
1303        self.check_unpickling_error(ValueError, b'\x82\xff.')
1304        self.check_unpickling_error(ValueError, b'\x83\x01\x00.')
1305        self.check_unpickling_error(ValueError, b'\x83\xff\xff.')
1306        self.check_unpickling_error(ValueError, b'\x84\x01\x00\x00\x00.')
1307        self.check_unpickling_error(ValueError, b'\x84\xff\xff\xff\x7f.')
1308        # EXT specifies code <= 0
1309        self.check_unpickling_error(pickle.UnpicklingError, b'\x82\x00.')
1310        self.check_unpickling_error(pickle.UnpicklingError, b'\x83\x00\x00.')
1311        self.check_unpickling_error(pickle.UnpicklingError, b'\x84\x00\x00\x00\x00.')
1312        self.check_unpickling_error(pickle.UnpicklingError, b'\x84\x00\x00\x00\x80.')
1313        self.check_unpickling_error(pickle.UnpicklingError, b'\x84\xff\xff\xff\xff.')
1314
1315    @support.cpython_only
1316    def test_bad_ext_inverted_registry(self):
1317        code = 1
1318        def check(key, exc):
1319            with support.swap_item(copyreg._inverted_registry, code, key):
1320                with self.assertRaises(exc):
1321                    self.loads(b'\x82\x01.')
1322        check(None, ValueError)
1323        check((), ValueError)
1324        check((__name__,), (TypeError, ValueError))
1325        check((__name__, "MyList", "x"), (TypeError, ValueError))
1326        check((__name__, None), (TypeError, ValueError))
1327        check((None, "MyList"), (TypeError, ValueError))
1328
1329    def test_bad_reduce(self):
1330        self.assertEqual(self.loads(b'cbuiltins\nint\n)R.'), 0)
1331        self.check_unpickling_error(TypeError, b'N)R.')
1332        self.check_unpickling_error(TypeError, b'cbuiltins\nint\nNR.')
1333
1334    def test_bad_newobj(self):
1335        error = (pickle.UnpicklingError, TypeError)
1336        self.assertEqual(self.loads(b'cbuiltins\nint\n)\x81.'), 0)
1337        self.check_unpickling_error(error, b'cbuiltins\nlen\n)\x81.')
1338        self.check_unpickling_error(error, b'cbuiltins\nint\nN\x81.')
1339
1340    def test_bad_newobj_ex(self):
1341        error = (pickle.UnpicklingError, TypeError)
1342        self.assertEqual(self.loads(b'cbuiltins\nint\n)}\x92.'), 0)
1343        self.check_unpickling_error(error, b'cbuiltins\nlen\n)}\x92.')
1344        self.check_unpickling_error(error, b'cbuiltins\nint\nN}\x92.')
1345        self.check_unpickling_error(error, b'cbuiltins\nint\n)N\x92.')
1346
1347    def test_bad_state(self):
1348        c = C()
1349        c.x = None
1350        base = b'c__main__\nC\n)\x81'
1351        self.assertEqual(self.loads(base + b'}X\x01\x00\x00\x00xNsb.'), c)
1352        self.assertEqual(self.loads(base + b'N}X\x01\x00\x00\x00xNs\x86b.'), c)
1353        # non-hashable dict key
1354        self.check_unpickling_error(TypeError, base + b'}]Nsb.')
1355        # state = list
1356        error = (pickle.UnpicklingError, AttributeError)
1357        self.check_unpickling_error(error, base + b'](}}eb.')
1358        # state = 1-tuple
1359        self.check_unpickling_error(error, base + b'}\x85b.')
1360        # state = 3-tuple
1361        self.check_unpickling_error(error, base + b'}}}\x87b.')
1362        # non-hashable slot name
1363        self.check_unpickling_error(TypeError, base + b'}}]Ns\x86b.')
1364        # non-string slot name
1365        self.check_unpickling_error(TypeError, base + b'}}NNs\x86b.')
1366        # dict = True
1367        self.check_unpickling_error(error, base + b'\x88}\x86b.')
1368        # slots dict = True
1369        self.check_unpickling_error(error, base + b'}\x88\x86b.')
1370
1371        class BadKey1:
1372            count = 1
1373            def __hash__(self):
1374                if not self.count:
1375                    raise CustomError
1376                self.count -= 1
1377                return 42
1378        __main__.BadKey1 = BadKey1
1379        # bad hashable dict key
1380        self.check_unpickling_error(CustomError, base + b'}c__main__\nBadKey1\n)\x81Nsb.')
1381
1382    def test_bad_stack(self):
1383        badpickles = [
1384            b'.',                       # STOP
1385            b'0',                       # POP
1386            b'1',                       # POP_MARK
1387            b'2',                       # DUP
1388            b'(2',
1389            b'R',                       # REDUCE
1390            b')R',
1391            b'a',                       # APPEND
1392            b'Na',
1393            b'b',                       # BUILD
1394            b'Nb',
1395            b'd',                       # DICT
1396            b'e',                       # APPENDS
1397            b'(e',
1398            b'ibuiltins\nlist\n',       # INST
1399            b'l',                       # LIST
1400            b'o',                       # OBJ
1401            b'(o',
1402            b'p1\n',                    # PUT
1403            b'q\x00',                   # BINPUT
1404            b'r\x00\x00\x00\x00',       # LONG_BINPUT
1405            b's',                       # SETITEM
1406            b'Ns',
1407            b'NNs',
1408            b't',                       # TUPLE
1409            b'u',                       # SETITEMS
1410            b'(u',
1411            b'}(Nu',
1412            b'\x81',                    # NEWOBJ
1413            b')\x81',
1414            b'\x85',                    # TUPLE1
1415            b'\x86',                    # TUPLE2
1416            b'N\x86',
1417            b'\x87',                    # TUPLE3
1418            b'N\x87',
1419            b'NN\x87',
1420            b'\x90',                    # ADDITEMS
1421            b'(\x90',
1422            b'\x91',                    # FROZENSET
1423            b'\x92',                    # NEWOBJ_EX
1424            b')}\x92',
1425            b'\x93',                    # STACK_GLOBAL
1426            b'Vlist\n\x93',
1427            b'\x94',                    # MEMOIZE
1428        ]
1429        for p in badpickles:
1430            self.check_unpickling_error(self.bad_stack_errors, p)
1431
1432    def test_bad_mark(self):
1433        badpickles = [
1434            b'N(.',                     # STOP
1435            b'N(2',                     # DUP
1436            b'cbuiltins\nlist\n)(R',    # REDUCE
1437            b'cbuiltins\nlist\n()R',
1438            b']N(a',                    # APPEND
1439                                        # BUILD
1440            b'cbuiltins\nValueError\n)R}(b',
1441            b'cbuiltins\nValueError\n)R(}b',
1442            b'(Nd',                     # DICT
1443            b'N(p1\n',                  # PUT
1444            b'N(q\x00',                 # BINPUT
1445            b'N(r\x00\x00\x00\x00',     # LONG_BINPUT
1446            b'}NN(s',                   # SETITEM
1447            b'}N(Ns',
1448            b'}(NNs',
1449            b'}((u',                    # SETITEMS
1450            b'cbuiltins\nlist\n)(\x81', # NEWOBJ
1451            b'cbuiltins\nlist\n()\x81',
1452            b'N(\x85',                  # TUPLE1
1453            b'NN(\x86',                 # TUPLE2
1454            b'N(N\x86',
1455            b'NNN(\x87',                # TUPLE3
1456            b'NN(N\x87',
1457            b'N(NN\x87',
1458            b']((\x90',                 # ADDITEMS
1459                                        # NEWOBJ_EX
1460            b'cbuiltins\nlist\n)}(\x92',
1461            b'cbuiltins\nlist\n)(}\x92',
1462            b'cbuiltins\nlist\n()}\x92',
1463                                        # STACK_GLOBAL
1464            b'Vbuiltins\n(Vlist\n\x93',
1465            b'Vbuiltins\nVlist\n(\x93',
1466            b'N(\x94',                  # MEMOIZE
1467        ]
1468        for p in badpickles:
1469            self.check_unpickling_error(self.bad_stack_errors, p)
1470
1471    def test_truncated_data(self):
1472        self.check_unpickling_error(EOFError, b'')
1473        self.check_unpickling_error(EOFError, b'N')
1474        badpickles = [
1475            b'B',                       # BINBYTES
1476            b'B\x03\x00\x00',
1477            b'B\x03\x00\x00\x00',
1478            b'B\x03\x00\x00\x00ab',
1479            b'C',                       # SHORT_BINBYTES
1480            b'C\x03',
1481            b'C\x03ab',
1482            b'F',                       # FLOAT
1483            b'F0.0',
1484            b'F0.00',
1485            b'G',                       # BINFLOAT
1486            b'G\x00\x00\x00\x00\x00\x00\x00',
1487            b'I',                       # INT
1488            b'I0',
1489            b'J',                       # BININT
1490            b'J\x00\x00\x00',
1491            b'K',                       # BININT1
1492            b'L',                       # LONG
1493            b'L0',
1494            b'L10',
1495            b'L0L',
1496            b'L10L',
1497            b'M',                       # BININT2
1498            b'M\x00',
1499            # b'P',                       # PERSID
1500            # b'Pabc',
1501            b'S',                       # STRING
1502            b"S'abc'",
1503            b'T',                       # BINSTRING
1504            b'T\x03\x00\x00',
1505            b'T\x03\x00\x00\x00',
1506            b'T\x03\x00\x00\x00ab',
1507            b'U',                       # SHORT_BINSTRING
1508            b'U\x03',
1509            b'U\x03ab',
1510            b'V',                       # UNICODE
1511            b'Vabc',
1512            b'X',                       # BINUNICODE
1513            b'X\x03\x00\x00',
1514            b'X\x03\x00\x00\x00',
1515            b'X\x03\x00\x00\x00ab',
1516            b'(c',                      # GLOBAL
1517            b'(cbuiltins',
1518            b'(cbuiltins\n',
1519            b'(cbuiltins\nlist',
1520            b'Ng',                      # GET
1521            b'Ng0',
1522            b'(i',                      # INST
1523            b'(ibuiltins',
1524            b'(ibuiltins\n',
1525            b'(ibuiltins\nlist',
1526            b'Nh',                      # BINGET
1527            b'Nj',                      # LONG_BINGET
1528            b'Nj\x00\x00\x00',
1529            b'Np',                      # PUT
1530            b'Np0',
1531            b'Nq',                      # BINPUT
1532            b'Nr',                      # LONG_BINPUT
1533            b'Nr\x00\x00\x00',
1534            b'\x80',                    # PROTO
1535            b'\x82',                    # EXT1
1536            b'\x83',                    # EXT2
1537            b'\x84\x01',
1538            b'\x84',                    # EXT4
1539            b'\x84\x01\x00\x00',
1540            b'\x8a',                    # LONG1
1541            b'\x8b',                    # LONG4
1542            b'\x8b\x00\x00\x00',
1543            b'\x8c',                    # SHORT_BINUNICODE
1544            b'\x8c\x03',
1545            b'\x8c\x03ab',
1546            b'\x8d',                    # BINUNICODE8
1547            b'\x8d\x03\x00\x00\x00\x00\x00\x00',
1548            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00',
1549            b'\x8d\x03\x00\x00\x00\x00\x00\x00\x00ab',
1550            b'\x8e',                    # BINBYTES8
1551            b'\x8e\x03\x00\x00\x00\x00\x00\x00',
1552            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00',
1553            b'\x8e\x03\x00\x00\x00\x00\x00\x00\x00ab',
1554            b'\x96',                    # BYTEARRAY8
1555            b'\x96\x03\x00\x00\x00\x00\x00\x00',
1556            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00',
1557            b'\x96\x03\x00\x00\x00\x00\x00\x00\x00ab',
1558            b'\x95',                    # FRAME
1559            b'\x95\x02\x00\x00\x00\x00\x00\x00',
1560            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00',
1561            b'\x95\x02\x00\x00\x00\x00\x00\x00\x00N',
1562        ]
1563        for p in badpickles:
1564            self.check_unpickling_error(self.truncated_errors, p)
1565
1566    @threading_helper.reap_threads
1567    @threading_helper.requires_working_threading()
1568    def test_unpickle_module_race(self):
1569        # https://bugs.python.org/issue34572
1570        locker_module = dedent("""
1571        import threading
1572        barrier = threading.Barrier(2)
1573        """)
1574        locking_import_module = dedent("""
1575        import locker
1576        locker.barrier.wait()
1577        class ToBeUnpickled(object):
1578            pass
1579        """)
1580
1581        os.mkdir(TESTFN)
1582        self.addCleanup(shutil.rmtree, TESTFN)
1583        sys.path.insert(0, TESTFN)
1584        self.addCleanup(sys.path.remove, TESTFN)
1585        with open(os.path.join(TESTFN, "locker.py"), "wb") as f:
1586            f.write(locker_module.encode('utf-8'))
1587        with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f:
1588            f.write(locking_import_module.encode('utf-8'))
1589        self.addCleanup(forget, "locker")
1590        self.addCleanup(forget, "locking_import")
1591
1592        import locker
1593
1594        pickle_bytes = (
1595            b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.')
1596
1597        # Then try to unpickle two of these simultaneously
1598        # One of them will cause the module import, and we want it to block
1599        # until the other one either:
1600        #   - fails (before the patch for this issue)
1601        #   - blocks on the import lock for the module, as it should
1602        results = []
1603        barrier = threading.Barrier(3)
1604        def t():
1605            # This ensures the threads have all started
1606            # presumably barrier release is faster than thread startup
1607            barrier.wait()
1608            results.append(pickle.loads(pickle_bytes))
1609
1610        t1 = threading.Thread(target=t)
1611        t2 = threading.Thread(target=t)
1612        t1.start()
1613        t2.start()
1614
1615        barrier.wait()
1616        # could have delay here
1617        locker.barrier.wait()
1618
1619        t1.join()
1620        t2.join()
1621
1622        from locking_import import ToBeUnpickled
1623        self.assertEqual(
1624            [type(x) for x in results],
1625            [ToBeUnpickled] * 2)
1626
1627
1628class AbstractPicklingErrorTests:
1629    # Subclass must define self.dumps, self.pickler.
1630
1631    def test_bad_reduce_result(self):
1632        obj = REX([print, ()])
1633        for proto in protocols:
1634            with self.subTest(proto=proto):
1635                with self.assertRaises(pickle.PicklingError):
1636                    self.dumps(obj, proto)
1637
1638        obj = REX((print,))
1639        for proto in protocols:
1640            with self.subTest(proto=proto):
1641                with self.assertRaises(pickle.PicklingError):
1642                    self.dumps(obj, proto)
1643
1644        obj = REX((print, (), None, None, None, None, None))
1645        for proto in protocols:
1646            with self.subTest(proto=proto):
1647                with self.assertRaises(pickle.PicklingError):
1648                    self.dumps(obj, proto)
1649
1650    def test_bad_reconstructor(self):
1651        obj = REX((42, ()))
1652        for proto in protocols:
1653            with self.subTest(proto=proto):
1654                with self.assertRaises(pickle.PicklingError):
1655                    self.dumps(obj, proto)
1656
1657    def test_unpickleable_reconstructor(self):
1658        obj = REX((UnpickleableCallable(), ()))
1659        for proto in protocols:
1660            with self.subTest(proto=proto):
1661                with self.assertRaises(CustomError):
1662                    self.dumps(obj, proto)
1663
1664    def test_bad_reconstructor_args(self):
1665        obj = REX((print, []))
1666        for proto in protocols:
1667            with self.subTest(proto=proto):
1668                with self.assertRaises(pickle.PicklingError):
1669                    self.dumps(obj, proto)
1670
1671    def test_unpickleable_reconstructor_args(self):
1672        obj = REX((print, (1, 2, UNPICKLEABLE)))
1673        for proto in protocols:
1674            with self.subTest(proto=proto):
1675                with self.assertRaises(CustomError):
1676                    self.dumps(obj, proto)
1677
1678    def test_bad_newobj_args(self):
1679        obj = REX((copyreg.__newobj__, ()))
1680        for proto in protocols[2:]:
1681            with self.subTest(proto=proto):
1682                with self.assertRaises((IndexError, pickle.PicklingError)) as cm:
1683                    self.dumps(obj, proto)
1684
1685        obj = REX((copyreg.__newobj__, [REX]))
1686        for proto in protocols[2:]:
1687            with self.subTest(proto=proto):
1688                with self.assertRaises((IndexError, pickle.PicklingError)):
1689                    self.dumps(obj, proto)
1690
1691    def test_bad_newobj_class(self):
1692        obj = REX((copyreg.__newobj__, (NoNew(),)))
1693        for proto in protocols[2:]:
1694            with self.subTest(proto=proto):
1695                with self.assertRaises(pickle.PicklingError):
1696                    self.dumps(obj, proto)
1697
1698    def test_wrong_newobj_class(self):
1699        obj = REX((copyreg.__newobj__, (str,)))
1700        for proto in protocols[2:]:
1701            with self.subTest(proto=proto):
1702                with self.assertRaises(pickle.PicklingError):
1703                    self.dumps(obj, proto)
1704
1705    def test_unpickleable_newobj_class(self):
1706        class LocalREX(REX): pass
1707        obj = LocalREX((copyreg.__newobj__, (LocalREX,)))
1708        for proto in protocols:
1709            with self.subTest(proto=proto):
1710                with self.assertRaises((pickle.PicklingError, AttributeError)):
1711                    self.dumps(obj, proto)
1712
1713    def test_unpickleable_newobj_args(self):
1714        obj = REX((copyreg.__newobj__, (REX, 1, 2, UNPICKLEABLE)))
1715        for proto in protocols:
1716            with self.subTest(proto=proto):
1717                with self.assertRaises(CustomError):
1718                    self.dumps(obj, proto)
1719
1720    def test_bad_newobj_ex_args(self):
1721        obj = REX((copyreg.__newobj_ex__, ()))
1722        for proto in protocols[2:]:
1723            with self.subTest(proto=proto):
1724                with self.assertRaises((ValueError, pickle.PicklingError)):
1725                    self.dumps(obj, proto)
1726
1727        obj = REX((copyreg.__newobj_ex__, 42))
1728        for proto in protocols[2:]:
1729            with self.subTest(proto=proto):
1730                with self.assertRaises(pickle.PicklingError):
1731                    self.dumps(obj, proto)
1732
1733        obj = REX((copyreg.__newobj_ex__, (REX, 42, {})))
1734        is_py = self.pickler is pickle._Pickler
1735        for proto in protocols[2:4] if is_py else protocols[2:]:
1736            with self.subTest(proto=proto):
1737                with self.assertRaises((TypeError, pickle.PicklingError)):
1738                    self.dumps(obj, proto)
1739
1740        obj = REX((copyreg.__newobj_ex__, (REX, (), [])))
1741        for proto in protocols[2:4] if is_py else protocols[2:]:
1742            with self.subTest(proto=proto):
1743                with self.assertRaises((TypeError, pickle.PicklingError)):
1744                    self.dumps(obj, proto)
1745
1746    def test_bad_newobj_ex__class(self):
1747        obj = REX((copyreg.__newobj_ex__, (NoNew(), (), {})))
1748        for proto in protocols[2:]:
1749            with self.subTest(proto=proto):
1750                with self.assertRaises(pickle.PicklingError):
1751                    self.dumps(obj, proto)
1752
1753    def test_wrong_newobj_ex_class(self):
1754        if self.pickler is not pickle._Pickler:
1755            self.skipTest('only verified in the Python implementation')
1756        obj = REX((copyreg.__newobj_ex__, (str, (), {})))
1757        for proto in protocols[2:]:
1758            with self.subTest(proto=proto):
1759                with self.assertRaises(pickle.PicklingError):
1760                    self.dumps(obj, proto)
1761
1762    def test_unpickleable_newobj_ex_class(self):
1763        class LocalREX(REX): pass
1764        obj = LocalREX((copyreg.__newobj_ex__, (LocalREX, (), {})))
1765        for proto in protocols:
1766            with self.subTest(proto=proto):
1767                with self.assertRaises((pickle.PicklingError, AttributeError)):
1768                    self.dumps(obj, proto)
1769
1770    def test_unpickleable_newobj_ex_args(self):
1771        obj = REX((copyreg.__newobj_ex__, (REX, (1, 2, UNPICKLEABLE), {})))
1772        for proto in protocols:
1773            with self.subTest(proto=proto):
1774                with self.assertRaises(CustomError):
1775                    self.dumps(obj, proto)
1776
1777    def test_unpickleable_newobj_ex_kwargs(self):
1778        obj = REX((copyreg.__newobj_ex__, (REX, (), {'a': UNPICKLEABLE})))
1779        for proto in protocols:
1780            with self.subTest(proto=proto):
1781                with self.assertRaises(CustomError):
1782                    self.dumps(obj, proto)
1783
1784    def test_unpickleable_state(self):
1785        obj = REX_state(UNPICKLEABLE)
1786        for proto in protocols:
1787            with self.subTest(proto=proto):
1788                with self.assertRaises(CustomError):
1789                    self.dumps(obj, proto)
1790
1791    def test_bad_state_setter(self):
1792        if self.pickler is pickle._Pickler:
1793            self.skipTest('only verified in the C implementation')
1794        obj = REX((print, (), 'state', None, None, 42))
1795        for proto in protocols:
1796            with self.subTest(proto=proto):
1797                with self.assertRaises(pickle.PicklingError):
1798                    self.dumps(obj, proto)
1799
1800    def test_unpickleable_state_setter(self):
1801        obj = REX((print, (), 'state', None, None, UnpickleableCallable()))
1802        for proto in protocols:
1803            with self.subTest(proto=proto):
1804                with self.assertRaises(CustomError):
1805                    self.dumps(obj, proto)
1806
1807    def test_unpickleable_state_with_state_setter(self):
1808        obj = REX((print, (), UNPICKLEABLE, None, None, print))
1809        for proto in protocols:
1810            with self.subTest(proto=proto):
1811                with self.assertRaises(CustomError):
1812                    self.dumps(obj, proto)
1813
1814    def test_bad_object_list_items(self):
1815        # Issue4176: crash when 4th and 5th items of __reduce__()
1816        # are not iterators
1817        obj = REX((list, (), None, 42))
1818        for proto in protocols:
1819            with self.subTest(proto=proto):
1820                with self.assertRaises((TypeError, pickle.PicklingError)):
1821                    self.dumps(obj, proto)
1822
1823        if self.pickler is not pickle._Pickler:
1824            # Python implementation is less strict and also accepts iterables.
1825            obj = REX((list, (), None, []))
1826            for proto in protocols:
1827                with self.subTest(proto=proto):
1828                    with self.assertRaises((TypeError, pickle.PicklingError)):
1829                        self.dumps(obj, proto)
1830
1831    def test_unpickleable_object_list_items(self):
1832        obj = REX_six([1, 2, UNPICKLEABLE])
1833        for proto in protocols:
1834            with self.subTest(proto=proto):
1835                with self.assertRaises(CustomError):
1836                    self.dumps(obj, proto)
1837
1838    def test_bad_object_dict_items(self):
1839        # Issue4176: crash when 4th and 5th items of __reduce__()
1840        # are not iterators
1841        obj = REX((dict, (), None, None, 42))
1842        for proto in protocols:
1843            with self.subTest(proto=proto):
1844                with self.assertRaises((TypeError, pickle.PicklingError)):
1845                    self.dumps(obj, proto)
1846
1847        for proto in protocols:
1848            obj = REX((dict, (), None, None, iter([('a',)])))
1849            with self.subTest(proto=proto):
1850                with self.assertRaises((ValueError, TypeError)):
1851                    self.dumps(obj, proto)
1852
1853        if self.pickler is not pickle._Pickler:
1854            # Python implementation is less strict and also accepts iterables.
1855            obj = REX((dict, (), None, None, []))
1856            for proto in protocols:
1857                with self.subTest(proto=proto):
1858                    with self.assertRaises((TypeError, pickle.PicklingError)):
1859                        self.dumps(obj, proto)
1860
1861    def test_unpickleable_object_dict_items(self):
1862        obj = REX_seven({'a': UNPICKLEABLE})
1863        for proto in protocols:
1864            with self.subTest(proto=proto):
1865                with self.assertRaises(CustomError):
1866                    self.dumps(obj, proto)
1867
1868    def test_unpickleable_list_items(self):
1869        obj = [1, [2, 3, UNPICKLEABLE]]
1870        for proto in protocols:
1871            with self.subTest(proto=proto):
1872                with self.assertRaises(CustomError):
1873                    self.dumps(obj, proto)
1874        for n in [0, 1, 1000, 1005]:
1875            obj = [*range(n), UNPICKLEABLE]
1876            for proto in protocols:
1877                with self.subTest(proto=proto):
1878                    with self.assertRaises(CustomError):
1879                        self.dumps(obj, proto)
1880
1881    def test_unpickleable_tuple_items(self):
1882        obj = (1, (2, 3, UNPICKLEABLE))
1883        for proto in protocols:
1884            with self.subTest(proto=proto):
1885                with self.assertRaises(CustomError):
1886                    self.dumps(obj, proto)
1887        obj = (*range(10), UNPICKLEABLE)
1888        for proto in protocols:
1889            with self.subTest(proto=proto):
1890                with self.assertRaises(CustomError):
1891                    self.dumps(obj, proto)
1892
1893    def test_unpickleable_dict_items(self):
1894        obj = {'a': {'b': UNPICKLEABLE}}
1895        for proto in protocols:
1896            with self.subTest(proto=proto):
1897                with self.assertRaises(CustomError):
1898                    self.dumps(obj, proto)
1899        for n in [0, 1, 1000, 1005]:
1900            obj = dict.fromkeys(range(n))
1901            obj['a'] = UNPICKLEABLE
1902            for proto in protocols:
1903                with self.subTest(proto=proto, n=n):
1904                    with self.assertRaises(CustomError):
1905                        self.dumps(obj, proto)
1906
1907    def test_unpickleable_set_items(self):
1908        obj = {UNPICKLEABLE}
1909        for proto in protocols:
1910            with self.subTest(proto=proto):
1911                with self.assertRaises(CustomError):
1912                    self.dumps(obj, proto)
1913
1914    def test_unpickleable_frozenset_items(self):
1915        obj = frozenset({frozenset({UNPICKLEABLE})})
1916        for proto in protocols:
1917            with self.subTest(proto=proto):
1918                with self.assertRaises(CustomError):
1919                    self.dumps(obj, proto)
1920
1921    def test_global_lookup_error(self):
1922        # Global name does not exist
1923        obj = REX('spam')
1924        obj.__module__ = __name__
1925        for proto in protocols:
1926            with self.subTest(proto=proto):
1927                with self.assertRaises(pickle.PicklingError):
1928                    self.dumps(obj, proto)
1929
1930        obj.__module__ = 'nonexisting'
1931        for proto in protocols:
1932            with self.subTest(proto=proto):
1933                with self.assertRaises(pickle.PicklingError):
1934                    self.dumps(obj, proto)
1935
1936        obj.__module__ = ''
1937        for proto in protocols:
1938            with self.subTest(proto=proto):
1939                with self.assertRaises((ValueError, pickle.PicklingError)):
1940                    self.dumps(obj, proto)
1941
1942        obj.__module__ = None
1943        for proto in protocols:
1944            with self.subTest(proto=proto):
1945                with self.assertRaises(pickle.PicklingError):
1946                    self.dumps(obj, proto)
1947
1948    def test_nonencodable_global_name_error(self):
1949        for proto in protocols[:4]:
1950            with self.subTest(proto=proto):
1951                name = 'nonascii\xff' if proto < 3 else 'nonencodable\udbff'
1952                obj = REX(name)
1953                obj.__module__ = __name__
1954                with support.swap_item(globals(), name, obj):
1955                    with self.assertRaises((UnicodeEncodeError, pickle.PicklingError)):
1956                        self.dumps(obj, proto)
1957
1958    def test_nonencodable_module_name_error(self):
1959        for proto in protocols[:4]:
1960            with self.subTest(proto=proto):
1961                name = 'nonascii\xff' if proto < 3 else 'nonencodable\udbff'
1962                obj = REX('test')
1963                obj.__module__ = name
1964                mod = types.SimpleNamespace(test=obj)
1965                with support.swap_item(sys.modules, name, mod):
1966                    with self.assertRaises((UnicodeEncodeError, pickle.PicklingError)):
1967                        self.dumps(obj, proto)
1968
1969    def test_nested_lookup_error(self):
1970        # Nested name does not exist
1971        obj = REX('AbstractPickleTests.spam')
1972        obj.__module__ = __name__
1973        for proto in protocols:
1974            with self.subTest(proto=proto):
1975                with self.assertRaises(pickle.PicklingError):
1976                    self.dumps(obj, proto)
1977
1978        obj.__module__ = None
1979        for proto in protocols:
1980            with self.subTest(proto=proto):
1981                with self.assertRaises(pickle.PicklingError):
1982                    self.dumps(obj, proto)
1983
1984    def test_wrong_object_lookup_error(self):
1985        # Name is bound to different object
1986        obj = REX('AbstractPickleTests')
1987        obj.__module__ = __name__
1988        AbstractPickleTests.ham = []
1989        for proto in protocols:
1990            with self.subTest(proto=proto):
1991                with self.assertRaises(pickle.PicklingError):
1992                    self.dumps(obj, proto)
1993
1994        obj.__module__ = None
1995        for proto in protocols:
1996            with self.subTest(proto=proto):
1997                with self.assertRaises(pickle.PicklingError):
1998                    self.dumps(obj, proto)
1999
2000    def test_local_lookup_error(self):
2001        # Test that whichmodule() errors out cleanly when looking up
2002        # an assumed globally-reachable object fails.
2003        def f():
2004            pass
2005        # Since the function is local, lookup will fail
2006        for proto in protocols:
2007            with self.subTest(proto=proto):
2008                with self.assertRaises((AttributeError, pickle.PicklingError)):
2009                    self.dumps(f, proto)
2010        # Same without a __module__ attribute (exercises a different path
2011        # in _pickle.c).
2012        del f.__module__
2013        for proto in protocols:
2014            with self.subTest(proto=proto):
2015                with self.assertRaises((AttributeError, pickle.PicklingError)):
2016                    self.dumps(f, proto)
2017        # Yet a different path.
2018        f.__name__ = f.__qualname__
2019        for proto in protocols:
2020            with self.subTest(proto=proto):
2021                with self.assertRaises((AttributeError, pickle.PicklingError)):
2022                    self.dumps(f, proto)
2023
2024    def test_reduce_ex_None(self):
2025        c = REX_None()
2026        with self.assertRaises(TypeError):
2027            self.dumps(c)
2028
2029    def test_reduce_None(self):
2030        c = R_None()
2031        with self.assertRaises(TypeError):
2032            self.dumps(c)
2033
2034    @no_tracing
2035    def test_bad_getattr(self):
2036        # Issue #3514: crash when there is an infinite loop in __getattr__
2037        x = BadGetattr()
2038        for proto in range(2):
2039            with support.infinite_recursion(25):
2040                self.assertRaises(RuntimeError, self.dumps, x, proto)
2041        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2042            s = self.dumps(x, proto)
2043
2044    def test_picklebuffer_error(self):
2045        # PickleBuffer forbidden with protocol < 5
2046        pb = pickle.PickleBuffer(b"foobar")
2047        for proto in range(0, 5):
2048            with self.subTest(proto=proto):
2049                with self.assertRaises(pickle.PickleError) as cm:
2050                    self.dumps(pb, proto)
2051                self.assertEqual(str(cm.exception),
2052                    'PickleBuffer can only be pickled with protocol >= 5')
2053
2054    def test_non_continuous_buffer(self):
2055        for proto in protocols[5:]:
2056            with self.subTest(proto=proto):
2057                pb = pickle.PickleBuffer(memoryview(b"foobar")[::2])
2058                with self.assertRaises((pickle.PicklingError, BufferError)):
2059                    self.dumps(pb, proto)
2060
2061    def test_buffer_callback_error(self):
2062        def buffer_callback(buffers):
2063            raise CustomError
2064        pb = pickle.PickleBuffer(b"foobar")
2065        with self.assertRaises(CustomError):
2066            self.dumps(pb, 5, buffer_callback=buffer_callback)
2067
2068    def test_evil_pickler_mutating_collection(self):
2069        # https://github.com/python/cpython/issues/92930
2070        global Clearer
2071        class Clearer:
2072            pass
2073
2074        def check(collection):
2075            class EvilPickler(self.pickler):
2076                def persistent_id(self, obj):
2077                    if isinstance(obj, Clearer):
2078                        collection.clear()
2079                    return None
2080            pickler = EvilPickler(io.BytesIO(), proto)
2081            try:
2082                pickler.dump(collection)
2083            except RuntimeError as e:
2084                expected = "changed size during iteration"
2085                self.assertIn(expected, str(e))
2086
2087        for proto in protocols:
2088            check([Clearer()])
2089            check([Clearer(), Clearer()])
2090            check({Clearer()})
2091            check({Clearer(), Clearer()})
2092            check({Clearer(): 1})
2093            check({Clearer(): 1, Clearer(): 2})
2094            check({1: Clearer(), 2: Clearer()})
2095
2096    @support.cpython_only
2097    def test_bad_ext_code(self):
2098        # This should never happen in normal circumstances, because the type
2099        # and the value of the extension code is checked in copyreg.add_extension().
2100        key = (__name__, 'MyList')
2101        def check(code, exc):
2102            assert key not in copyreg._extension_registry
2103            assert code not in copyreg._inverted_registry
2104            with (support.swap_item(copyreg._extension_registry, key, code),
2105                  support.swap_item(copyreg._inverted_registry, code, key)):
2106                for proto in protocols[2:]:
2107                    with self.assertRaises(exc):
2108                        self.dumps(MyList, proto)
2109
2110        check(object(), TypeError)
2111        check(None, TypeError)
2112        check(-1, (RuntimeError, struct.error))
2113        check(0, RuntimeError)
2114        check(2**31, (RuntimeError, OverflowError, struct.error))
2115        check(2**1000, (OverflowError, struct.error))
2116        check(-2**1000, (OverflowError, struct.error))
2117
2118
2119class AbstractPickleTests:
2120    # Subclass must define self.dumps, self.loads.
2121
2122    optimized = False
2123
2124    _testdata = AbstractUnpickleTests._testdata
2125
2126    def setUp(self):
2127        pass
2128
2129    assert_is_copy = AbstractUnpickleTests.assert_is_copy
2130
2131    def test_misc(self):
2132        # test various datatypes not tested by testdata
2133        for proto in protocols:
2134            x = myint(4)
2135            s = self.dumps(x, proto)
2136            y = self.loads(s)
2137            self.assert_is_copy(x, y)
2138
2139            x = (1, ())
2140            s = self.dumps(x, proto)
2141            y = self.loads(s)
2142            self.assert_is_copy(x, y)
2143
2144            x = initarg(1, x)
2145            s = self.dumps(x, proto)
2146            y = self.loads(s)
2147            self.assert_is_copy(x, y)
2148
2149        # XXX test __reduce__ protocol?
2150
2151    def test_roundtrip_equality(self):
2152        expected = self._testdata
2153        for proto in protocols:
2154            s = self.dumps(expected, proto)
2155            got = self.loads(s)
2156            self.assert_is_copy(expected, got)
2157
2158    # There are gratuitous differences between pickles produced by
2159    # pickle and cPickle, largely because cPickle starts PUT indices at
2160    # 1 and pickle starts them at 0.  See XXX comment in cPickle's put2() --
2161    # there's a comment with an exclamation point there whose meaning
2162    # is a mystery.  cPickle also suppresses PUT for objects with a refcount
2163    # of 1.
2164    def dont_test_disassembly(self):
2165        from io import StringIO
2166        from pickletools import dis
2167
2168        for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS):
2169            s = self.dumps(self._testdata, proto)
2170            filelike = StringIO()
2171            dis(s, out=filelike)
2172            got = filelike.getvalue()
2173            self.assertEqual(expected, got)
2174
2175    def _test_recursive_list(self, cls, aslist=identity, minprotocol=0):
2176        # List containing itself.
2177        l = cls()
2178        l.append(l)
2179        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2180            s = self.dumps(l, proto)
2181            x = self.loads(s)
2182            self.assertIsInstance(x, cls)
2183            y = aslist(x)
2184            self.assertEqual(len(y), 1)
2185            self.assertIs(y[0], x)
2186
2187    def test_recursive_list(self):
2188        self._test_recursive_list(list)
2189
2190    def test_recursive_list_subclass(self):
2191        self._test_recursive_list(MyList, minprotocol=2)
2192
2193    def test_recursive_list_like(self):
2194        self._test_recursive_list(REX_six, aslist=lambda x: x.items)
2195
2196    def _test_recursive_tuple_and_list(self, cls, aslist=identity, minprotocol=0):
2197        # Tuple containing a list containing the original tuple.
2198        t = (cls(),)
2199        t[0].append(t)
2200        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2201            s = self.dumps(t, proto)
2202            x = self.loads(s)
2203            self.assertIsInstance(x, tuple)
2204            self.assertEqual(len(x), 1)
2205            self.assertIsInstance(x[0], cls)
2206            y = aslist(x[0])
2207            self.assertEqual(len(y), 1)
2208            self.assertIs(y[0], x)
2209
2210        # List containing a tuple containing the original list.
2211        t, = t
2212        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2213            s = self.dumps(t, proto)
2214            x = self.loads(s)
2215            self.assertIsInstance(x, cls)
2216            y = aslist(x)
2217            self.assertEqual(len(y), 1)
2218            self.assertIsInstance(y[0], tuple)
2219            self.assertEqual(len(y[0]), 1)
2220            self.assertIs(y[0][0], x)
2221
2222    def test_recursive_tuple_and_list(self):
2223        self._test_recursive_tuple_and_list(list)
2224
2225    def test_recursive_tuple_and_list_subclass(self):
2226        self._test_recursive_tuple_and_list(MyList, minprotocol=2)
2227
2228    def test_recursive_tuple_and_list_like(self):
2229        self._test_recursive_tuple_and_list(REX_six, aslist=lambda x: x.items)
2230
2231    def _test_recursive_dict(self, cls, asdict=identity, minprotocol=0):
2232        # Dict containing itself.
2233        d = cls()
2234        d[1] = d
2235        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2236            s = self.dumps(d, proto)
2237            x = self.loads(s)
2238            self.assertIsInstance(x, cls)
2239            y = asdict(x)
2240            self.assertEqual(list(y.keys()), [1])
2241            self.assertIs(y[1], x)
2242
2243    def test_recursive_dict(self):
2244        self._test_recursive_dict(dict)
2245
2246    def test_recursive_dict_subclass(self):
2247        self._test_recursive_dict(MyDict, minprotocol=2)
2248
2249    def test_recursive_dict_like(self):
2250        self._test_recursive_dict(REX_seven, asdict=lambda x: x.table)
2251
2252    def _test_recursive_tuple_and_dict(self, cls, asdict=identity, minprotocol=0):
2253        # Tuple containing a dict containing the original tuple.
2254        t = (cls(),)
2255        t[0][1] = t
2256        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2257            s = self.dumps(t, proto)
2258            x = self.loads(s)
2259            self.assertIsInstance(x, tuple)
2260            self.assertEqual(len(x), 1)
2261            self.assertIsInstance(x[0], cls)
2262            y = asdict(x[0])
2263            self.assertEqual(list(y), [1])
2264            self.assertIs(y[1], x)
2265
2266        # Dict containing a tuple containing the original dict.
2267        t, = t
2268        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2269            s = self.dumps(t, proto)
2270            x = self.loads(s)
2271            self.assertIsInstance(x, cls)
2272            y = asdict(x)
2273            self.assertEqual(list(y), [1])
2274            self.assertIsInstance(y[1], tuple)
2275            self.assertEqual(len(y[1]), 1)
2276            self.assertIs(y[1][0], x)
2277
2278    def test_recursive_tuple_and_dict(self):
2279        self._test_recursive_tuple_and_dict(dict)
2280
2281    def test_recursive_tuple_and_dict_subclass(self):
2282        self._test_recursive_tuple_and_dict(MyDict, minprotocol=2)
2283
2284    def test_recursive_tuple_and_dict_like(self):
2285        self._test_recursive_tuple_and_dict(REX_seven, asdict=lambda x: x.table)
2286
2287    def _test_recursive_dict_key(self, cls, asdict=identity, minprotocol=0):
2288        # Dict containing an immutable object (as key) containing the original
2289        # dict.
2290        d = cls()
2291        d[K(d)] = 1
2292        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2293            s = self.dumps(d, proto)
2294            x = self.loads(s)
2295            self.assertIsInstance(x, cls)
2296            y = asdict(x)
2297            self.assertEqual(len(y.keys()), 1)
2298            self.assertIsInstance(list(y.keys())[0], K)
2299            self.assertIs(list(y.keys())[0].value, x)
2300
2301    def test_recursive_dict_key(self):
2302        self._test_recursive_dict_key(dict)
2303
2304    def test_recursive_dict_subclass_key(self):
2305        self._test_recursive_dict_key(MyDict, minprotocol=2)
2306
2307    def test_recursive_dict_like_key(self):
2308        self._test_recursive_dict_key(REX_seven, asdict=lambda x: x.table)
2309
2310    def _test_recursive_tuple_and_dict_key(self, cls, asdict=identity, minprotocol=0):
2311        # Tuple containing a dict containing an immutable object (as key)
2312        # containing the original tuple.
2313        t = (cls(),)
2314        t[0][K(t)] = 1
2315        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2316            s = self.dumps(t, proto)
2317            x = self.loads(s)
2318            self.assertIsInstance(x, tuple)
2319            self.assertEqual(len(x), 1)
2320            self.assertIsInstance(x[0], cls)
2321            y = asdict(x[0])
2322            self.assertEqual(len(y), 1)
2323            self.assertIsInstance(list(y.keys())[0], K)
2324            self.assertIs(list(y.keys())[0].value, x)
2325
2326        # Dict containing an immutable object (as key) containing a tuple
2327        # containing the original dict.
2328        t, = t
2329        for proto in range(minprotocol, pickle.HIGHEST_PROTOCOL + 1):
2330            s = self.dumps(t, proto)
2331            x = self.loads(s)
2332            self.assertIsInstance(x, cls)
2333            y = asdict(x)
2334            self.assertEqual(len(y), 1)
2335            self.assertIsInstance(list(y.keys())[0], K)
2336            self.assertIs(list(y.keys())[0].value[0], x)
2337
2338    def test_recursive_tuple_and_dict_key(self):
2339        self._test_recursive_tuple_and_dict_key(dict)
2340
2341    def test_recursive_tuple_and_dict_subclass_key(self):
2342        self._test_recursive_tuple_and_dict_key(MyDict, minprotocol=2)
2343
2344    def test_recursive_tuple_and_dict_like_key(self):
2345        self._test_recursive_tuple_and_dict_key(REX_seven, asdict=lambda x: x.table)
2346
2347    def test_recursive_set(self):
2348        # Set containing an immutable object containing the original set.
2349        y = set()
2350        y.add(K(y))
2351        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2352            s = self.dumps(y, proto)
2353            x = self.loads(s)
2354            self.assertIsInstance(x, set)
2355            self.assertEqual(len(x), 1)
2356            self.assertIsInstance(list(x)[0], K)
2357            self.assertIs(list(x)[0].value, x)
2358
2359        # Immutable object containing a set containing the original object.
2360        y, = y
2361        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2362            s = self.dumps(y, proto)
2363            x = self.loads(s)
2364            self.assertIsInstance(x, K)
2365            self.assertIsInstance(x.value, set)
2366            self.assertEqual(len(x.value), 1)
2367            self.assertIs(list(x.value)[0], x)
2368
2369    def test_recursive_inst(self):
2370        # Mutable object containing itself.
2371        i = Object()
2372        i.attr = i
2373        for proto in protocols:
2374            s = self.dumps(i, proto)
2375            x = self.loads(s)
2376            self.assertIsInstance(x, Object)
2377            self.assertEqual(dir(x), dir(i))
2378            self.assertIs(x.attr, x)
2379
2380    def test_recursive_multi(self):
2381        l = []
2382        d = {1:l}
2383        i = Object()
2384        i.attr = d
2385        l.append(i)
2386        for proto in protocols:
2387            s = self.dumps(l, proto)
2388            x = self.loads(s)
2389            self.assertIsInstance(x, list)
2390            self.assertEqual(len(x), 1)
2391            self.assertEqual(dir(x[0]), dir(i))
2392            self.assertEqual(list(x[0].attr.keys()), [1])
2393            self.assertIs(x[0].attr[1], x)
2394
2395    def _test_recursive_collection_and_inst(self, factory):
2396        # Mutable object containing a collection containing the original
2397        # object.
2398        o = Object()
2399        o.attr = factory([o])
2400        t = type(o.attr)
2401        for proto in protocols:
2402            s = self.dumps(o, proto)
2403            x = self.loads(s)
2404            self.assertIsInstance(x.attr, t)
2405            self.assertEqual(len(x.attr), 1)
2406            self.assertIsInstance(list(x.attr)[0], Object)
2407            self.assertIs(list(x.attr)[0], x)
2408
2409        # Collection containing a mutable object containing the original
2410        # collection.
2411        o = o.attr
2412        for proto in protocols:
2413            s = self.dumps(o, proto)
2414            x = self.loads(s)
2415            self.assertIsInstance(x, t)
2416            self.assertEqual(len(x), 1)
2417            self.assertIsInstance(list(x)[0], Object)
2418            self.assertIs(list(x)[0].attr, x)
2419
2420    def test_recursive_list_and_inst(self):
2421        self._test_recursive_collection_and_inst(list)
2422
2423    def test_recursive_tuple_and_inst(self):
2424        self._test_recursive_collection_and_inst(tuple)
2425
2426    def test_recursive_dict_and_inst(self):
2427        self._test_recursive_collection_and_inst(dict.fromkeys)
2428
2429    def test_recursive_set_and_inst(self):
2430        self._test_recursive_collection_and_inst(set)
2431
2432    def test_recursive_frozenset_and_inst(self):
2433        self._test_recursive_collection_and_inst(frozenset)
2434
2435    def test_recursive_list_subclass_and_inst(self):
2436        self._test_recursive_collection_and_inst(MyList)
2437
2438    def test_recursive_tuple_subclass_and_inst(self):
2439        self._test_recursive_collection_and_inst(MyTuple)
2440
2441    def test_recursive_dict_subclass_and_inst(self):
2442        self._test_recursive_collection_and_inst(MyDict.fromkeys)
2443
2444    def test_recursive_set_subclass_and_inst(self):
2445        self._test_recursive_collection_and_inst(MySet)
2446
2447    def test_recursive_frozenset_subclass_and_inst(self):
2448        self._test_recursive_collection_and_inst(MyFrozenSet)
2449
2450    def test_recursive_inst_state(self):
2451        # Mutable object containing itself.
2452        y = REX_state()
2453        y.state = y
2454        for proto in protocols:
2455            s = self.dumps(y, proto)
2456            x = self.loads(s)
2457            self.assertIsInstance(x, REX_state)
2458            self.assertIs(x.state, x)
2459
2460    def test_recursive_tuple_and_inst_state(self):
2461        # Tuple containing a mutable object containing the original tuple.
2462        t = (REX_state(),)
2463        t[0].state = t
2464        for proto in protocols:
2465            s = self.dumps(t, proto)
2466            x = self.loads(s)
2467            self.assertIsInstance(x, tuple)
2468            self.assertEqual(len(x), 1)
2469            self.assertIsInstance(x[0], REX_state)
2470            self.assertIs(x[0].state, x)
2471
2472        # Mutable object containing a tuple containing the object.
2473        t, = t
2474        for proto in protocols:
2475            s = self.dumps(t, proto)
2476            x = self.loads(s)
2477            self.assertIsInstance(x, REX_state)
2478            self.assertIsInstance(x.state, tuple)
2479            self.assertEqual(len(x.state), 1)
2480            self.assertIs(x.state[0], x)
2481
2482    def test_unicode(self):
2483        endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
2484                    '<\\>', '<\\\U00012345>',
2485                    # surrogates
2486                    '<\udc80>']
2487        for proto in protocols:
2488            for u in endcases:
2489                p = self.dumps(u, proto)
2490                u2 = self.loads(p)
2491                self.assert_is_copy(u, u2)
2492
2493    def test_unicode_high_plane(self):
2494        t = '\U00012345'
2495        for proto in protocols:
2496            p = self.dumps(t, proto)
2497            t2 = self.loads(p)
2498            self.assert_is_copy(t, t2)
2499
2500    def test_unicode_memoization(self):
2501        # Repeated str is re-used (even when escapes added).
2502        for proto in protocols:
2503            for s in '', 'xyz', 'xyz\n', 'x\\yz', 'x\xa1yz\r':
2504                p = self.dumps((s, s), proto)
2505                s1, s2 = self.loads(p)
2506                self.assertIs(s1, s2)
2507
2508    def test_bytes(self):
2509        for proto in protocols:
2510            for s in b'', b'xyz', b'xyz'*100:
2511                p = self.dumps(s, proto)
2512                self.assert_is_copy(s, self.loads(p))
2513            for s in [bytes([i]) for i in range(256)]:
2514                p = self.dumps(s, proto)
2515                self.assert_is_copy(s, self.loads(p))
2516            for s in [bytes([i, i]) for i in range(256)]:
2517                p = self.dumps(s, proto)
2518                self.assert_is_copy(s, self.loads(p))
2519
2520    def test_bytes_memoization(self):
2521        for proto in protocols:
2522            for array_type in [bytes, ZeroCopyBytes]:
2523                for s in b'', b'xyz', b'xyz'*100:
2524                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
2525                        b = array_type(s)
2526                        p = self.dumps((b, b), proto)
2527                        x, y = self.loads(p)
2528                        self.assertIs(x, y)
2529                        self.assert_is_copy((b, b), (x, y))
2530
2531                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
2532                        b1, b2 = array_type(s), array_type(s)
2533                        p = self.dumps((b1, b2), proto)
2534                        # Note that (b1, b2) = self.loads(p) might have identical
2535                        # components, i.e., b1 is b2, but this is not always the
2536                        # case if the content is large (equality still holds).
2537                        self.assert_is_copy((b1, b2), self.loads(p))
2538
2539    def test_bytearray(self):
2540        for proto in protocols:
2541            for s in b'', b'xyz', b'xyz'*100:
2542                b = bytearray(s)
2543                p = self.dumps(b, proto)
2544                bb = self.loads(p)
2545                self.assertIsNot(bb, b)
2546                self.assert_is_copy(b, bb)
2547                if proto <= 3:
2548                    # bytearray is serialized using a global reference
2549                    self.assertIn(b'bytearray', p)
2550                    self.assertTrue(opcode_in_pickle(pickle.GLOBAL, p))
2551                elif proto == 4:
2552                    self.assertIn(b'bytearray', p)
2553                    self.assertTrue(opcode_in_pickle(pickle.STACK_GLOBAL, p))
2554                elif proto == 5:
2555                    self.assertNotIn(b'bytearray', p)
2556                    self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
2557
2558    def test_bytearray_memoization(self):
2559        for proto in protocols:
2560            for array_type in [bytearray, ZeroCopyBytearray]:
2561                for s in b'', b'xyz', b'xyz'*100:
2562                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
2563                        b = array_type(s)
2564                        p = self.dumps((b, b), proto)
2565                        b1, b2 = self.loads(p)
2566                        self.assertIs(b1, b2)
2567
2568                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
2569                        b1a, b2a = array_type(s), array_type(s)
2570                        # Unlike bytes, equal but independent bytearray objects are
2571                        # never identical.
2572                        self.assertIsNot(b1a, b2a)
2573
2574                        p = self.dumps((b1a, b2a), proto)
2575                        b1b, b2b = self.loads(p)
2576                        self.assertIsNot(b1b, b2b)
2577
2578                        self.assertIsNot(b1a, b1b)
2579                        self.assert_is_copy(b1a, b1b)
2580
2581                        self.assertIsNot(b2a, b2b)
2582                        self.assert_is_copy(b2a, b2b)
2583
2584    def test_ints(self):
2585        for proto in protocols:
2586            n = sys.maxsize
2587            while n:
2588                for expected in (-n, n):
2589                    s = self.dumps(expected, proto)
2590                    n2 = self.loads(s)
2591                    self.assert_is_copy(expected, n2)
2592                n = n >> 1
2593
2594    def test_long(self):
2595        for proto in protocols:
2596            # 256 bytes is where LONG4 begins.
2597            for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
2598                nbase = 1 << nbits
2599                for npos in nbase-1, nbase, nbase+1:
2600                    for n in npos, -npos:
2601                        pickle = self.dumps(n, proto)
2602                        got = self.loads(pickle)
2603                        self.assert_is_copy(n, got)
2604        # Try a monster.  This is quadratic-time in protos 0 & 1, so don't
2605        # bother with those.
2606        nbase = int("deadbeeffeedface", 16)
2607        nbase += nbase << 1000000
2608        for n in nbase, -nbase:
2609            p = self.dumps(n, 2)
2610            got = self.loads(p)
2611            # assert_is_copy is very expensive here as it precomputes
2612            # a failure message by computing the repr() of n and got,
2613            # we just do the check ourselves.
2614            self.assertIs(type(got), int)
2615            self.assertEqual(n, got)
2616
2617    def test_float(self):
2618        test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5,
2619                       3.14, 263.44582062374053, 6.022e23, 1e30]
2620        test_values = test_values + [-x for x in test_values]
2621        for proto in protocols:
2622            for value in test_values:
2623                pickle = self.dumps(value, proto)
2624                got = self.loads(pickle)
2625                self.assert_is_copy(value, got)
2626
2627    @run_with_locales('LC_ALL', 'de_DE', 'fr_FR', '')
2628    def test_float_format(self):
2629        # make sure that floats are formatted locale independent with proto 0
2630        self.assertEqual(self.dumps(1.2, 0)[0:3], b'F1.')
2631
2632    def test_reduce(self):
2633        for proto in protocols:
2634            inst = AAA()
2635            dumped = self.dumps(inst, proto)
2636            loaded = self.loads(dumped)
2637            self.assertEqual(loaded, REDUCE_A)
2638
2639    def test_getinitargs(self):
2640        for proto in protocols:
2641            inst = initarg(1, 2)
2642            dumped = self.dumps(inst, proto)
2643            loaded = self.loads(dumped)
2644            self.assert_is_copy(inst, loaded)
2645
2646    def test_metaclass(self):
2647        a = use_metaclass()
2648        for proto in protocols:
2649            s = self.dumps(a, proto)
2650            b = self.loads(s)
2651            self.assertEqual(a.__class__, b.__class__)
2652
2653    def test_dynamic_class(self):
2654        a = create_dynamic_class("my_dynamic_class", (object,))
2655        copyreg.pickle(pickling_metaclass, pickling_metaclass.__reduce__)
2656        for proto in protocols:
2657            s = self.dumps(a, proto)
2658            b = self.loads(s)
2659            self.assertEqual(a, b)
2660            self.assertIs(type(a), type(b))
2661
2662    def test_structseq(self):
2663        import time
2664        import os
2665
2666        t = time.localtime()
2667        for proto in protocols:
2668            s = self.dumps(t, proto)
2669            u = self.loads(s)
2670            self.assert_is_copy(t, u)
2671            t = os.stat(os.curdir)
2672            s = self.dumps(t, proto)
2673            u = self.loads(s)
2674            self.assert_is_copy(t, u)
2675            if hasattr(os, "statvfs"):
2676                t = os.statvfs(os.curdir)
2677                s = self.dumps(t, proto)
2678                u = self.loads(s)
2679                self.assert_is_copy(t, u)
2680
2681    def test_ellipsis(self):
2682        for proto in protocols:
2683            s = self.dumps(..., proto)
2684            u = self.loads(s)
2685            self.assertIs(..., u)
2686
2687    def test_notimplemented(self):
2688        for proto in protocols:
2689            s = self.dumps(NotImplemented, proto)
2690            u = self.loads(s)
2691            self.assertIs(NotImplemented, u)
2692
2693    def test_singleton_types(self):
2694        # Issue #6477: Test that types of built-in singletons can be pickled.
2695        singletons = [None, ..., NotImplemented]
2696        for singleton in singletons:
2697            for proto in protocols:
2698                s = self.dumps(type(singleton), proto)
2699                u = self.loads(s)
2700                self.assertIs(type(singleton), u)
2701
2702    def test_builtin_types(self):
2703        for t in builtins.__dict__.values():
2704            if isinstance(t, type) and not issubclass(t, BaseException):
2705                for proto in protocols:
2706                    s = self.dumps(t, proto)
2707                    self.assertIs(self.loads(s), t)
2708
2709    def test_builtin_exceptions(self):
2710        for t in builtins.__dict__.values():
2711            if isinstance(t, type) and issubclass(t, BaseException):
2712                for proto in protocols:
2713                    s = self.dumps(t, proto)
2714                    u = self.loads(s)
2715                    if proto <= 2 and issubclass(t, OSError) and t is not BlockingIOError:
2716                        self.assertIs(u, OSError)
2717                    elif proto <= 2 and issubclass(t, ImportError):
2718                        self.assertIs(u, ImportError)
2719                    else:
2720                        self.assertIs(u, t)
2721
2722    def test_builtin_functions(self):
2723        for t in builtins.__dict__.values():
2724            if isinstance(t, types.BuiltinFunctionType):
2725                for proto in protocols:
2726                    s = self.dumps(t, proto)
2727                    self.assertIs(self.loads(s), t)
2728
2729    # Tests for protocol 2
2730
2731    def test_proto(self):
2732        for proto in protocols:
2733            pickled = self.dumps(None, proto)
2734            if proto >= 2:
2735                proto_header = pickle.PROTO + bytes([proto])
2736                self.assertTrue(pickled.startswith(proto_header))
2737            else:
2738                self.assertEqual(count_opcode(pickle.PROTO, pickled), 0)
2739
2740        oob = protocols[-1] + 1     # a future protocol
2741        build_none = pickle.NONE + pickle.STOP
2742        badpickle = pickle.PROTO + bytes([oob]) + build_none
2743        try:
2744            self.loads(badpickle)
2745        except ValueError as err:
2746            self.assertIn("unsupported pickle protocol", str(err))
2747        else:
2748            self.fail("expected bad protocol number to raise ValueError")
2749
2750    def test_long1(self):
2751        x = 12345678910111213141516178920
2752        for proto in protocols:
2753            s = self.dumps(x, proto)
2754            y = self.loads(s)
2755            self.assert_is_copy(x, y)
2756            self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2)
2757
2758    def test_long4(self):
2759        x = 12345678910111213141516178920 << (256*8)
2760        for proto in protocols:
2761            s = self.dumps(x, proto)
2762            y = self.loads(s)
2763            self.assert_is_copy(x, y)
2764            self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2)
2765
2766    def test_short_tuples(self):
2767        # Map (proto, len(tuple)) to expected opcode.
2768        expected_opcode = {(0, 0): pickle.TUPLE,
2769                           (0, 1): pickle.TUPLE,
2770                           (0, 2): pickle.TUPLE,
2771                           (0, 3): pickle.TUPLE,
2772                           (0, 4): pickle.TUPLE,
2773
2774                           (1, 0): pickle.EMPTY_TUPLE,
2775                           (1, 1): pickle.TUPLE,
2776                           (1, 2): pickle.TUPLE,
2777                           (1, 3): pickle.TUPLE,
2778                           (1, 4): pickle.TUPLE,
2779
2780                           (2, 0): pickle.EMPTY_TUPLE,
2781                           (2, 1): pickle.TUPLE1,
2782                           (2, 2): pickle.TUPLE2,
2783                           (2, 3): pickle.TUPLE3,
2784                           (2, 4): pickle.TUPLE,
2785
2786                           (3, 0): pickle.EMPTY_TUPLE,
2787                           (3, 1): pickle.TUPLE1,
2788                           (3, 2): pickle.TUPLE2,
2789                           (3, 3): pickle.TUPLE3,
2790                           (3, 4): pickle.TUPLE,
2791                          }
2792        a = ()
2793        b = (1,)
2794        c = (1, 2)
2795        d = (1, 2, 3)
2796        e = (1, 2, 3, 4)
2797        for proto in protocols:
2798            for x in a, b, c, d, e:
2799                s = self.dumps(x, proto)
2800                y = self.loads(s)
2801                self.assert_is_copy(x, y)
2802                expected = expected_opcode[min(proto, 3), len(x)]
2803                self.assertTrue(opcode_in_pickle(expected, s))
2804
2805    def test_singletons(self):
2806        # Map (proto, singleton) to expected opcode.
2807        expected_opcode = {(0, None): pickle.NONE,
2808                           (1, None): pickle.NONE,
2809                           (2, None): pickle.NONE,
2810                           (3, None): pickle.NONE,
2811
2812                           (0, True): pickle.INT,
2813                           (1, True): pickle.INT,
2814                           (2, True): pickle.NEWTRUE,
2815                           (3, True): pickle.NEWTRUE,
2816
2817                           (0, False): pickle.INT,
2818                           (1, False): pickle.INT,
2819                           (2, False): pickle.NEWFALSE,
2820                           (3, False): pickle.NEWFALSE,
2821                          }
2822        for proto in protocols:
2823            for x in None, False, True:
2824                s = self.dumps(x, proto)
2825                y = self.loads(s)
2826                self.assertTrue(x is y, (proto, x, s, y))
2827                expected = expected_opcode[min(proto, 3), x]
2828                self.assertTrue(opcode_in_pickle(expected, s))
2829
2830    def test_newobj_tuple(self):
2831        x = MyTuple([1, 2, 3])
2832        x.foo = 42
2833        x.bar = "hello"
2834        for proto in protocols:
2835            s = self.dumps(x, proto)
2836            y = self.loads(s)
2837            self.assert_is_copy(x, y)
2838
2839    def test_newobj_list(self):
2840        x = MyList([1, 2, 3])
2841        x.foo = 42
2842        x.bar = "hello"
2843        for proto in protocols:
2844            s = self.dumps(x, proto)
2845            y = self.loads(s)
2846            self.assert_is_copy(x, y)
2847
2848    def test_newobj_generic(self):
2849        for proto in protocols:
2850            for C in myclasses:
2851                B = C.__base__
2852                x = C(C.sample)
2853                x.foo = 42
2854                s = self.dumps(x, proto)
2855                y = self.loads(s)
2856                detail = (proto, C, B, x, y, type(y))
2857                self.assert_is_copy(x, y) # XXX revisit
2858                self.assertEqual(B(x), B(y), detail)
2859                self.assertEqual(x.__dict__, y.__dict__, detail)
2860
2861    def test_newobj_proxies(self):
2862        # NEWOBJ should use the __class__ rather than the raw type
2863        classes = myclasses[:]
2864        # Cannot create weakproxies to these classes
2865        for c in (MyInt, MyTuple):
2866            classes.remove(c)
2867        for proto in protocols:
2868            for C in classes:
2869                B = C.__base__
2870                x = C(C.sample)
2871                x.foo = 42
2872                p = weakref.proxy(x)
2873                s = self.dumps(p, proto)
2874                y = self.loads(s)
2875                self.assertEqual(type(y), type(x))  # rather than type(p)
2876                detail = (proto, C, B, x, y, type(y))
2877                self.assertEqual(B(x), B(y), detail)
2878                self.assertEqual(x.__dict__, y.__dict__, detail)
2879
2880    def test_newobj_overridden_new(self):
2881        # Test that Python class with C implemented __new__ is pickleable
2882        for proto in protocols:
2883            x = MyIntWithNew2(1)
2884            x.foo = 42
2885            s = self.dumps(x, proto)
2886            y = self.loads(s)
2887            self.assertIs(type(y), MyIntWithNew2)
2888            self.assertEqual(int(y), 1)
2889            self.assertEqual(y.foo, 42)
2890
2891    def test_newobj_not_class(self):
2892        # Issue 24552
2893        global SimpleNewObj
2894        save = SimpleNewObj
2895        o = SimpleNewObj.__new__(SimpleNewObj)
2896        b = self.dumps(o, 4)
2897        try:
2898            SimpleNewObj = 42
2899            self.assertRaises((TypeError, pickle.UnpicklingError), self.loads, b)
2900        finally:
2901            SimpleNewObj = save
2902
2903    # Register a type with copyreg, with extension code extcode.  Pickle
2904    # an object of that type.  Check that the resulting pickle uses opcode
2905    # (EXT[124]) under proto 2, and not in proto 1.
2906
2907    def produce_global_ext(self, extcode, opcode):
2908        e = ExtensionSaver(extcode)
2909        try:
2910            copyreg.add_extension(__name__, "MyList", extcode)
2911            x = MyList([1, 2, 3])
2912            x.foo = 42
2913            x.bar = "hello"
2914
2915            # Dump using protocol 1 for comparison.
2916            s1 = self.dumps(x, 1)
2917            self.assertIn(__name__.encode("utf-8"), s1)
2918            self.assertIn(b"MyList", s1)
2919            self.assertFalse(opcode_in_pickle(opcode, s1))
2920
2921            y = self.loads(s1)
2922            self.assert_is_copy(x, y)
2923
2924            # Dump using protocol 2 for test.
2925            s2 = self.dumps(x, 2)
2926            self.assertNotIn(__name__.encode("utf-8"), s2)
2927            self.assertNotIn(b"MyList", s2)
2928            self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2))
2929
2930            y = self.loads(s2)
2931            self.assert_is_copy(x, y)
2932        finally:
2933            e.restore()
2934
2935    def test_global_ext1(self):
2936        self.produce_global_ext(0x00000001, pickle.EXT1)  # smallest EXT1 code
2937        self.produce_global_ext(0x000000ff, pickle.EXT1)  # largest EXT1 code
2938
2939    def test_global_ext2(self):
2940        self.produce_global_ext(0x00000100, pickle.EXT2)  # smallest EXT2 code
2941        self.produce_global_ext(0x0000ffff, pickle.EXT2)  # largest EXT2 code
2942        self.produce_global_ext(0x0000abcd, pickle.EXT2)  # check endianness
2943
2944    def test_global_ext4(self):
2945        self.produce_global_ext(0x00010000, pickle.EXT4)  # smallest EXT4 code
2946        self.produce_global_ext(0x7fffffff, pickle.EXT4)  # largest EXT4 code
2947        self.produce_global_ext(0x12abcdef, pickle.EXT4)  # check endianness
2948
2949    def test_list_chunking(self):
2950        n = 10  # too small to chunk
2951        x = list(range(n))
2952        for proto in protocols:
2953            s = self.dumps(x, proto)
2954            y = self.loads(s)
2955            self.assert_is_copy(x, y)
2956            num_appends = count_opcode(pickle.APPENDS, s)
2957            self.assertEqual(num_appends, proto > 0)
2958
2959        n = 2500  # expect at least two chunks when proto > 0
2960        x = list(range(n))
2961        for proto in protocols:
2962            s = self.dumps(x, proto)
2963            y = self.loads(s)
2964            self.assert_is_copy(x, y)
2965            num_appends = count_opcode(pickle.APPENDS, s)
2966            if proto == 0:
2967                self.assertEqual(num_appends, 0)
2968            else:
2969                self.assertTrue(num_appends >= 2)
2970
2971    def test_dict_chunking(self):
2972        n = 10  # too small to chunk
2973        x = dict.fromkeys(range(n))
2974        for proto in protocols:
2975            s = self.dumps(x, proto)
2976            self.assertIsInstance(s, bytes_types)
2977            y = self.loads(s)
2978            self.assert_is_copy(x, y)
2979            num_setitems = count_opcode(pickle.SETITEMS, s)
2980            self.assertEqual(num_setitems, proto > 0)
2981
2982        n = 2500  # expect at least two chunks when proto > 0
2983        x = dict.fromkeys(range(n))
2984        for proto in protocols:
2985            s = self.dumps(x, proto)
2986            y = self.loads(s)
2987            self.assert_is_copy(x, y)
2988            num_setitems = count_opcode(pickle.SETITEMS, s)
2989            if proto == 0:
2990                self.assertEqual(num_setitems, 0)
2991            else:
2992                self.assertTrue(num_setitems >= 2)
2993
2994    def test_set_chunking(self):
2995        n = 10  # too small to chunk
2996        x = set(range(n))
2997        for proto in protocols:
2998            s = self.dumps(x, proto)
2999            y = self.loads(s)
3000            self.assert_is_copy(x, y)
3001            num_additems = count_opcode(pickle.ADDITEMS, s)
3002            if proto < 4:
3003                self.assertEqual(num_additems, 0)
3004            else:
3005                self.assertEqual(num_additems, 1)
3006
3007        n = 2500  # expect at least two chunks when proto >= 4
3008        x = set(range(n))
3009        for proto in protocols:
3010            s = self.dumps(x, proto)
3011            y = self.loads(s)
3012            self.assert_is_copy(x, y)
3013            num_additems = count_opcode(pickle.ADDITEMS, s)
3014            if proto < 4:
3015                self.assertEqual(num_additems, 0)
3016            else:
3017                self.assertGreaterEqual(num_additems, 2)
3018
3019    def test_simple_newobj(self):
3020        x = SimpleNewObj.__new__(SimpleNewObj, 0xface)  # avoid __init__
3021        x.abc = 666
3022        for proto in protocols:
3023            with self.subTest(proto=proto):
3024                s = self.dumps(x, proto)
3025                if proto < 1:
3026                    self.assertIn(b'\nI64206', s)  # INT
3027                else:
3028                    self.assertIn(b'M\xce\xfa', s)  # BININT2
3029                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
3030                                 2 <= proto)
3031                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
3032                y = self.loads(s)   # will raise TypeError if __init__ called
3033                self.assert_is_copy(x, y)
3034
3035    def test_complex_newobj(self):
3036        x = ComplexNewObj.__new__(ComplexNewObj, 0xface)  # avoid __init__
3037        x.abc = 666
3038        for proto in protocols:
3039            with self.subTest(proto=proto):
3040                s = self.dumps(x, proto)
3041                if proto < 1:
3042                    self.assertIn(b'\nI64206', s)  # INT
3043                elif proto < 2:
3044                    self.assertIn(b'M\xce\xfa', s)  # BININT2
3045                elif proto < 4:
3046                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
3047                else:
3048                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
3049                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
3050                                 2 <= proto)
3051                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ_EX, s))
3052                y = self.loads(s)   # will raise TypeError if __init__ called
3053                self.assert_is_copy(x, y)
3054
3055    def test_complex_newobj_ex(self):
3056        x = ComplexNewObjEx.__new__(ComplexNewObjEx, 0xface)  # avoid __init__
3057        x.abc = 666
3058        for proto in protocols:
3059            with self.subTest(proto=proto):
3060                s = self.dumps(x, proto)
3061                if proto < 1:
3062                    self.assertIn(b'\nI64206', s)  # INT
3063                elif proto < 2:
3064                    self.assertIn(b'M\xce\xfa', s)  # BININT2
3065                elif proto < 4:
3066                    self.assertIn(b'X\x04\x00\x00\x00FACE', s)  # BINUNICODE
3067                else:
3068                    self.assertIn(b'\x8c\x04FACE', s)  # SHORT_BINUNICODE
3069                self.assertFalse(opcode_in_pickle(pickle.NEWOBJ, s))
3070                self.assertEqual(opcode_in_pickle(pickle.NEWOBJ_EX, s),
3071                                 4 <= proto)
3072                y = self.loads(s)   # will raise TypeError if __init__ called
3073                self.assert_is_copy(x, y)
3074
3075    def test_newobj_list_slots(self):
3076        x = SlotList([1, 2, 3])
3077        x.foo = 42
3078        x.bar = "hello"
3079        s = self.dumps(x, 2)
3080        y = self.loads(s)
3081        self.assert_is_copy(x, y)
3082
3083    def test_reduce_overrides_default_reduce_ex(self):
3084        for proto in protocols:
3085            x = REX_one()
3086            self.assertEqual(x._reduce_called, 0)
3087            s = self.dumps(x, proto)
3088            self.assertEqual(x._reduce_called, 1)
3089            y = self.loads(s)
3090            self.assertEqual(y._reduce_called, 0)
3091
3092    def test_reduce_ex_called(self):
3093        for proto in protocols:
3094            x = REX_two()
3095            self.assertEqual(x._proto, None)
3096            s = self.dumps(x, proto)
3097            self.assertEqual(x._proto, proto)
3098            y = self.loads(s)
3099            self.assertEqual(y._proto, None)
3100
3101    def test_reduce_ex_overrides_reduce(self):
3102        for proto in protocols:
3103            x = REX_three()
3104            self.assertEqual(x._proto, None)
3105            s = self.dumps(x, proto)
3106            self.assertEqual(x._proto, proto)
3107            y = self.loads(s)
3108            self.assertEqual(y._proto, None)
3109
3110    def test_reduce_ex_calls_base(self):
3111        for proto in protocols:
3112            x = REX_four()
3113            self.assertEqual(x._proto, None)
3114            s = self.dumps(x, proto)
3115            self.assertEqual(x._proto, proto)
3116            y = self.loads(s)
3117            self.assertEqual(y._proto, proto)
3118
3119    def test_reduce_calls_base(self):
3120        for proto in protocols:
3121            x = REX_five()
3122            self.assertEqual(x._reduce_called, 0)
3123            s = self.dumps(x, proto)
3124            self.assertEqual(x._reduce_called, 1)
3125            y = self.loads(s)
3126            self.assertEqual(y._reduce_called, 1)
3127
3128    def test_pickle_setstate_None(self):
3129        c = C_None_setstate()
3130        p = self.dumps(c)
3131        with self.assertRaises(TypeError):
3132            self.loads(p)
3133
3134    def test_many_puts_and_gets(self):
3135        # Test that internal data structures correctly deal with lots of
3136        # puts/gets.
3137        keys = ("aaa" + str(i) for i in range(100))
3138        large_dict = dict((k, [4, 5, 6]) for k in keys)
3139        obj = [dict(large_dict), dict(large_dict), dict(large_dict)]
3140
3141        for proto in protocols:
3142            with self.subTest(proto=proto):
3143                dumped = self.dumps(obj, proto)
3144                loaded = self.loads(dumped)
3145                self.assert_is_copy(obj, loaded)
3146
3147    def test_attribute_name_interning(self):
3148        # Test that attribute names of pickled objects are interned when
3149        # unpickling.
3150        for proto in protocols:
3151            x = C()
3152            x.foo = 42
3153            x.bar = "hello"
3154            s = self.dumps(x, proto)
3155            y = self.loads(s)
3156            x_keys = sorted(x.__dict__)
3157            y_keys = sorted(y.__dict__)
3158            for x_key, y_key in zip(x_keys, y_keys):
3159                self.assertIs(x_key, y_key)
3160
3161    def test_pickle_to_2x(self):
3162        # Pickle non-trivial data with protocol 2, expecting that it yields
3163        # the same result as Python 2.x did.
3164        # NOTE: this test is a bit too strong since we can produce different
3165        # bytecode that 2.x will still understand.
3166        dumped = self.dumps(range(5), 2)
3167        self.assertEqual(dumped, DATA_XRANGE)
3168        dumped = self.dumps(set([3]), 2)
3169        self.assertEqual(dumped, DATA_SET2)
3170
3171    def test_large_pickles(self):
3172        # Test the correctness of internal buffering routines when handling
3173        # large data.
3174        for proto in protocols:
3175            data = (1, min, b'xy' * (30 * 1024), len)
3176            dumped = self.dumps(data, proto)
3177            loaded = self.loads(dumped)
3178            self.assertEqual(len(loaded), len(data))
3179            self.assertEqual(loaded, data)
3180
3181    def test_int_pickling_efficiency(self):
3182        # Test compacity of int representation (see issue #12744)
3183        for proto in protocols:
3184            with self.subTest(proto=proto):
3185                pickles = [self.dumps(2**n, proto) for n in range(70)]
3186                sizes = list(map(len, pickles))
3187                # the size function is monotonic
3188                self.assertEqual(sorted(sizes), sizes)
3189                if proto >= 2:
3190                    for p in pickles:
3191                        self.assertFalse(opcode_in_pickle(pickle.LONG, p))
3192
3193    def _check_pickling_with_opcode(self, obj, opcode, proto):
3194        pickled = self.dumps(obj, proto)
3195        self.assertTrue(opcode_in_pickle(opcode, pickled))
3196        unpickled = self.loads(pickled)
3197        self.assertEqual(obj, unpickled)
3198
3199    def test_appends_on_non_lists(self):
3200        # Issue #17720
3201        obj = REX_six([1, 2, 3])
3202        for proto in protocols:
3203            if proto == 0:
3204                self._check_pickling_with_opcode(obj, pickle.APPEND, proto)
3205            else:
3206                self._check_pickling_with_opcode(obj, pickle.APPENDS, proto)
3207
3208    def test_setitems_on_non_dicts(self):
3209        obj = REX_seven({1: -1, 2: -2, 3: -3})
3210        for proto in protocols:
3211            if proto == 0:
3212                self._check_pickling_with_opcode(obj, pickle.SETITEM, proto)
3213            else:
3214                self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto)
3215
3216    # Exercise framing (proto >= 4) for significant workloads
3217
3218    FRAME_SIZE_MIN = 4
3219    FRAME_SIZE_TARGET = 64 * 1024
3220
3221    def check_frame_opcodes(self, pickled):
3222        """
3223        Check the arguments of FRAME opcodes in a protocol 4+ pickle.
3224
3225        Note that binary objects that are larger than FRAME_SIZE_TARGET are not
3226        framed by default and are therefore considered a frame by themselves in
3227        the following consistency check.
3228        """
3229        frame_end = frameless_start = None
3230        frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8',
3231                             'BINUNICODE8', 'BYTEARRAY8'}
3232        for op, arg, pos in pickletools.genops(pickled):
3233            if frame_end is not None:
3234                self.assertLessEqual(pos, frame_end)
3235                if pos == frame_end:
3236                    frame_end = None
3237
3238            if frame_end is not None:  # framed
3239                self.assertNotEqual(op.name, 'FRAME')
3240                if op.name in frameless_opcodes:
3241                    # Only short bytes and str objects should be written
3242                    # in a frame
3243                    self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
3244
3245            else:  # not framed
3246                if (op.name == 'FRAME' or
3247                    (op.name in frameless_opcodes and
3248                     len(arg) > self.FRAME_SIZE_TARGET)):
3249                    # Frame or large bytes or str object
3250                    if frameless_start is not None:
3251                        # Only short data should be written outside of a frame
3252                        self.assertLess(pos - frameless_start,
3253                                        self.FRAME_SIZE_MIN)
3254                        frameless_start = None
3255                elif frameless_start is None and op.name != 'PROTO':
3256                    frameless_start = pos
3257
3258            if op.name == 'FRAME':
3259                self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
3260                frame_end = pos + 9 + arg
3261
3262        pos = len(pickled)
3263        if frame_end is not None:
3264            self.assertEqual(frame_end, pos)
3265        elif frameless_start is not None:
3266            self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
3267
3268    @support.skip_if_pgo_task
3269    @support.requires_resource('cpu')
3270    def test_framing_many_objects(self):
3271        obj = list(range(10**5))
3272        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
3273            with self.subTest(proto=proto):
3274                pickled = self.dumps(obj, proto)
3275                unpickled = self.loads(pickled)
3276                self.assertEqual(obj, unpickled)
3277                bytes_per_frame = (len(pickled) /
3278                                   count_opcode(pickle.FRAME, pickled))
3279                self.assertGreater(bytes_per_frame,
3280                                   self.FRAME_SIZE_TARGET / 2)
3281                self.assertLessEqual(bytes_per_frame,
3282                                     self.FRAME_SIZE_TARGET * 1)
3283                self.check_frame_opcodes(pickled)
3284
3285    def test_framing_large_objects(self):
3286        N = 1024 * 1024
3287        small_items = [[i] for i in range(10)]
3288        obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
3289        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
3290            for fast in [False, True]:
3291                with self.subTest(proto=proto, fast=fast):
3292                    if not fast:
3293                        # fast=False by default.
3294                        # This covers in-memory pickling with pickle.dumps().
3295                        pickled = self.dumps(obj, proto)
3296                    else:
3297                        # Pickler is required when fast=True.
3298                        if not hasattr(self, 'pickler'):
3299                            continue
3300                        buf = io.BytesIO()
3301                        pickler = self.pickler(buf, protocol=proto)
3302                        pickler.fast = fast
3303                        pickler.dump(obj)
3304                        pickled = buf.getvalue()
3305                    unpickled = self.loads(pickled)
3306                    # More informative error message in case of failure.
3307                    self.assertEqual([len(x) for x in obj],
3308                                     [len(x) for x in unpickled])
3309                    # Perform full equality check if the lengths match.
3310                    self.assertEqual(obj, unpickled)
3311                    n_frames = count_opcode(pickle.FRAME, pickled)
3312                    # A single frame for small objects between
3313                    # first two large objects.
3314                    self.assertEqual(n_frames, 1)
3315                    self.check_frame_opcodes(pickled)
3316
3317    def test_optional_frames(self):
3318        if pickle.HIGHEST_PROTOCOL < 4:
3319            return
3320
3321        def remove_frames(pickled, keep_frame=None):
3322            """Remove frame opcodes from the given pickle."""
3323            frame_starts = []
3324            # 1 byte for the opcode and 8 for the argument
3325            frame_opcode_size = 9
3326            for opcode, _, pos in pickletools.genops(pickled):
3327                if opcode.name == 'FRAME':
3328                    frame_starts.append(pos)
3329
3330            newpickle = bytearray()
3331            last_frame_end = 0
3332            for i, pos in enumerate(frame_starts):
3333                if keep_frame and keep_frame(i):
3334                    continue
3335                newpickle += pickled[last_frame_end:pos]
3336                last_frame_end = pos + frame_opcode_size
3337            newpickle += pickled[last_frame_end:]
3338            return newpickle
3339
3340        frame_size = self.FRAME_SIZE_TARGET
3341        num_frames = 20
3342        # Large byte objects (dict values) intermittent with small objects
3343        # (dict keys)
3344        for bytes_type in (bytes, bytearray):
3345            obj = {i: bytes_type([i]) * frame_size for i in range(num_frames)}
3346
3347            for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
3348                pickled = self.dumps(obj, proto)
3349
3350                frameless_pickle = remove_frames(pickled)
3351                self.assertEqual(count_opcode(pickle.FRAME, frameless_pickle), 0)
3352                self.assertEqual(obj, self.loads(frameless_pickle))
3353
3354                some_frames_pickle = remove_frames(pickled, lambda i: i % 2)
3355                self.assertLess(count_opcode(pickle.FRAME, some_frames_pickle),
3356                                count_opcode(pickle.FRAME, pickled))
3357                self.assertEqual(obj, self.loads(some_frames_pickle))
3358
3359    @support.skip_if_pgo_task
3360    def test_framed_write_sizes_with_delayed_writer(self):
3361        class ChunkAccumulator:
3362            """Accumulate pickler output in a list of raw chunks."""
3363            def __init__(self):
3364                self.chunks = []
3365            def write(self, chunk):
3366                self.chunks.append(chunk)
3367            def concatenate_chunks(self):
3368                return b"".join(self.chunks)
3369
3370        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
3371            objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
3372                       for i in range(int(1e4))]
3373            # Add a large unique ASCII string
3374            objects.append('0123456789abcdef' *
3375                           (self.FRAME_SIZE_TARGET // 16 + 1))
3376
3377            # Protocol 4 packs groups of small objects into frames and issues
3378            # calls to write only once or twice per frame:
3379            # The C pickler issues one call to write per-frame (header and
3380            # contents) while Python pickler issues two calls to write: one for
3381            # the frame header and one for the frame binary contents.
3382            writer = ChunkAccumulator()
3383            self.pickler(writer, proto).dump(objects)
3384
3385            # Actually read the binary content of the chunks after the end
3386            # of the call to dump: any memoryview passed to write should not
3387            # be released otherwise this delayed access would not be possible.
3388            pickled = writer.concatenate_chunks()
3389            reconstructed = self.loads(pickled)
3390            self.assertEqual(reconstructed, objects)
3391            self.assertGreater(len(writer.chunks), 1)
3392
3393            # memoryviews should own the memory.
3394            del objects
3395            support.gc_collect()
3396            self.assertEqual(writer.concatenate_chunks(), pickled)
3397
3398            n_frames = (len(pickled) - 1) // self.FRAME_SIZE_TARGET + 1
3399            # There should be at least one call to write per frame
3400            self.assertGreaterEqual(len(writer.chunks), n_frames)
3401
3402            # but not too many either: there can be one for the proto,
3403            # one per-frame header, one per frame for the actual contents,
3404            # and two for the header.
3405            self.assertLessEqual(len(writer.chunks), 2 * n_frames + 3)
3406
3407            chunk_sizes = [len(c) for c in writer.chunks]
3408            large_sizes = [s for s in chunk_sizes
3409                           if s >= self.FRAME_SIZE_TARGET]
3410            medium_sizes = [s for s in chunk_sizes
3411                           if 9 < s < self.FRAME_SIZE_TARGET]
3412            small_sizes = [s for s in chunk_sizes if s <= 9]
3413
3414            # Large chunks should not be too large:
3415            for chunk_size in large_sizes:
3416                self.assertLess(chunk_size, 2 * self.FRAME_SIZE_TARGET,
3417                                chunk_sizes)
3418            # There shouldn't bee too many small chunks: the protocol header,
3419            # the frame headers and the large string headers are written
3420            # in small chunks.
3421            self.assertLessEqual(len(small_sizes),
3422                                 len(large_sizes) + len(medium_sizes) + 3,
3423                                 chunk_sizes)
3424
3425    def test_nested_names(self):
3426        global Nested
3427        class Nested:
3428            class A:
3429                class B:
3430                    class C:
3431                        pass
3432        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3433            for obj in [Nested.A, Nested.A.B, Nested.A.B.C]:
3434                with self.subTest(proto=proto, obj=obj):
3435                    unpickled = self.loads(self.dumps(obj, proto))
3436                    self.assertIs(obj, unpickled)
3437
3438    def test_recursive_nested_names(self):
3439        global Recursive
3440        class Recursive:
3441            pass
3442        Recursive.mod = sys.modules[Recursive.__module__]
3443        Recursive.__qualname__ = 'Recursive.mod.Recursive'
3444        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3445            with self.subTest(proto=proto):
3446                unpickled = self.loads(self.dumps(Recursive, proto))
3447                self.assertIs(unpickled, Recursive)
3448        del Recursive.mod # break reference loop
3449
3450    def test_recursive_nested_names2(self):
3451        global Recursive
3452        class Recursive:
3453            pass
3454        Recursive.ref = Recursive
3455        Recursive.__qualname__ = 'Recursive.ref'
3456        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3457            with self.subTest(proto=proto):
3458                unpickled = self.loads(self.dumps(Recursive, proto))
3459                self.assertIs(unpickled, Recursive)
3460        del Recursive.ref # break reference loop
3461
3462    def test_py_methods(self):
3463        global PyMethodsTest
3464        class PyMethodsTest:
3465            @staticmethod
3466            def cheese():
3467                return "cheese"
3468            @classmethod
3469            def wine(cls):
3470                assert cls is PyMethodsTest
3471                return "wine"
3472            def biscuits(self):
3473                assert isinstance(self, PyMethodsTest)
3474                return "biscuits"
3475            class Nested:
3476                "Nested class"
3477                @staticmethod
3478                def ketchup():
3479                    return "ketchup"
3480                @classmethod
3481                def maple(cls):
3482                    assert cls is PyMethodsTest.Nested
3483                    return "maple"
3484                def pie(self):
3485                    assert isinstance(self, PyMethodsTest.Nested)
3486                    return "pie"
3487
3488        py_methods = (
3489            PyMethodsTest.cheese,
3490            PyMethodsTest.wine,
3491            PyMethodsTest().biscuits,
3492            PyMethodsTest.Nested.ketchup,
3493            PyMethodsTest.Nested.maple,
3494            PyMethodsTest.Nested().pie
3495        )
3496        py_unbound_methods = (
3497            (PyMethodsTest.biscuits, PyMethodsTest),
3498            (PyMethodsTest.Nested.pie, PyMethodsTest.Nested)
3499        )
3500        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3501            for method in py_methods:
3502                with self.subTest(proto=proto, method=method):
3503                    unpickled = self.loads(self.dumps(method, proto))
3504                    self.assertEqual(method(), unpickled())
3505            for method, cls in py_unbound_methods:
3506                obj = cls()
3507                with self.subTest(proto=proto, method=method):
3508                    unpickled = self.loads(self.dumps(method, proto))
3509                    self.assertEqual(method(obj), unpickled(obj))
3510
3511        descriptors = (
3512            PyMethodsTest.__dict__['cheese'],  # static method descriptor
3513            PyMethodsTest.__dict__['wine'],  # class method descriptor
3514        )
3515        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3516            for descr in descriptors:
3517                with self.subTest(proto=proto, descr=descr):
3518                    self.assertRaises(TypeError, self.dumps, descr, proto)
3519
3520    def test_c_methods(self):
3521        global Subclass
3522        class Subclass(tuple):
3523            class Nested(str):
3524                pass
3525
3526        c_methods = (
3527            # bound built-in method
3528            ("abcd".index, ("c",)),
3529            # unbound built-in method
3530            (str.index, ("abcd", "c")),
3531            # bound "slot" method
3532            ([1, 2, 3].__len__, ()),
3533            # unbound "slot" method
3534            (list.__len__, ([1, 2, 3],)),
3535            # bound "coexist" method
3536            ({1, 2}.__contains__, (2,)),
3537            # unbound "coexist" method
3538            (set.__contains__, ({1, 2}, 2)),
3539            # built-in class method
3540            (dict.fromkeys, (("a", 1), ("b", 2))),
3541            # built-in static method
3542            (bytearray.maketrans, (b"abc", b"xyz")),
3543            # subclass methods
3544            (Subclass([1,2,2]).count, (2,)),
3545            (Subclass.count, (Subclass([1,2,2]), 2)),
3546            (Subclass.Nested("sweet").count, ("e",)),
3547            (Subclass.Nested.count, (Subclass.Nested("sweet"), "e")),
3548        )
3549        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3550            for method, args in c_methods:
3551                with self.subTest(proto=proto, method=method):
3552                    unpickled = self.loads(self.dumps(method, proto))
3553                    self.assertEqual(method(*args), unpickled(*args))
3554
3555        descriptors = (
3556            bytearray.__dict__['maketrans'],  # built-in static method descriptor
3557            dict.__dict__['fromkeys'],  # built-in class method descriptor
3558        )
3559        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3560            for descr in descriptors:
3561                with self.subTest(proto=proto, descr=descr):
3562                    self.assertRaises(TypeError, self.dumps, descr, proto)
3563
3564    def test_compat_pickle(self):
3565        tests = [
3566            (range(1, 7), '__builtin__', 'xrange'),
3567            (map(int, '123'), 'itertools', 'imap'),
3568            (functools.reduce, '__builtin__', 'reduce'),
3569            (dbm.whichdb, 'whichdb', 'whichdb'),
3570            (Exception(), 'exceptions', 'Exception'),
3571            (collections.UserDict(), 'UserDict', 'IterableUserDict'),
3572            (collections.UserList(), 'UserList', 'UserList'),
3573            (collections.defaultdict(), 'collections', 'defaultdict'),
3574        ]
3575        for val, mod, name in tests:
3576            for proto in range(3):
3577                with self.subTest(type=type(val), proto=proto):
3578                    pickled = self.dumps(val, proto)
3579                    self.assertIn(('c%s\n%s' % (mod, name)).encode(), pickled)
3580                    self.assertIs(type(self.loads(pickled)), type(val))
3581
3582    #
3583    # PEP 574 tests below
3584    #
3585
3586    def buffer_like_objects(self):
3587        # Yield buffer-like objects with the bytestring "abcdef" in them
3588        bytestring = b"abcdefgh"
3589        yield ZeroCopyBytes(bytestring)
3590        yield ZeroCopyBytearray(bytestring)
3591        if _testbuffer is not None:
3592            items = list(bytestring)
3593            value = int.from_bytes(bytestring, byteorder='little')
3594            for flags in (0, _testbuffer.ND_WRITABLE):
3595                # 1-D, contiguous
3596                yield PicklableNDArray(items, format='B', shape=(8,),
3597                                       flags=flags)
3598                # 2-D, C-contiguous
3599                yield PicklableNDArray(items, format='B', shape=(4, 2),
3600                                       strides=(2, 1), flags=flags)
3601                # 2-D, Fortran-contiguous
3602                yield PicklableNDArray(items, format='B',
3603                                       shape=(4, 2), strides=(1, 4),
3604                                       flags=flags)
3605
3606    def test_in_band_buffers(self):
3607        # Test in-band buffers (PEP 574)
3608        for obj in self.buffer_like_objects():
3609            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
3610                data = self.dumps(obj, proto)
3611                if obj.c_contiguous and proto >= 5:
3612                    # The raw memory bytes are serialized in physical order
3613                    self.assertIn(b"abcdefgh", data)
3614                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 0)
3615                if proto >= 5:
3616                    self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data),
3617                                     1 if obj.readonly else 0)
3618                    self.assertEqual(count_opcode(pickle.BYTEARRAY8, data),
3619                                     0 if obj.readonly else 1)
3620                    # Return a true value from buffer_callback should have
3621                    # the same effect
3622                    def buffer_callback(obj):
3623                        return True
3624                    data2 = self.dumps(obj, proto,
3625                                       buffer_callback=buffer_callback)
3626                    self.assertEqual(data2, data)
3627
3628                new = self.loads(data)
3629                # It's a copy
3630                self.assertIsNot(new, obj)
3631                self.assertIs(type(new), type(obj))
3632                self.assertEqual(new, obj)
3633
3634    # XXX Unfortunately cannot test non-contiguous array
3635    # (see comment in PicklableNDArray.__reduce_ex__)
3636
3637    def test_oob_buffers(self):
3638        # Test out-of-band buffers (PEP 574)
3639        for obj in self.buffer_like_objects():
3640            for proto in range(0, 5):
3641                # Need protocol >= 5 for buffer_callback
3642                with self.assertRaises(ValueError):
3643                    self.dumps(obj, proto,
3644                               buffer_callback=[].append)
3645            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3646                buffers = []
3647                buffer_callback = lambda pb: buffers.append(pb.raw())
3648                data = self.dumps(obj, proto,
3649                                  buffer_callback=buffer_callback)
3650                self.assertNotIn(b"abcdefgh", data)
3651                self.assertEqual(count_opcode(pickle.SHORT_BINBYTES, data), 0)
3652                self.assertEqual(count_opcode(pickle.BYTEARRAY8, data), 0)
3653                self.assertEqual(count_opcode(pickle.NEXT_BUFFER, data), 1)
3654                self.assertEqual(count_opcode(pickle.READONLY_BUFFER, data),
3655                                 1 if obj.readonly else 0)
3656
3657                if obj.c_contiguous:
3658                    self.assertEqual(bytes(buffers[0]), b"abcdefgh")
3659                # Need buffers argument to unpickle properly
3660                with self.assertRaises(pickle.UnpicklingError):
3661                    self.loads(data)
3662
3663                new = self.loads(data, buffers=buffers)
3664                if obj.zero_copy_reconstruct:
3665                    # Zero-copy achieved
3666                    self.assertIs(new, obj)
3667                else:
3668                    self.assertIs(type(new), type(obj))
3669                    self.assertEqual(new, obj)
3670                # Non-sequence buffers accepted too
3671                new = self.loads(data, buffers=iter(buffers))
3672                if obj.zero_copy_reconstruct:
3673                    # Zero-copy achieved
3674                    self.assertIs(new, obj)
3675                else:
3676                    self.assertIs(type(new), type(obj))
3677                    self.assertEqual(new, obj)
3678
3679    def test_oob_buffers_writable_to_readonly(self):
3680        # Test reconstructing readonly object from writable buffer
3681        obj = ZeroCopyBytes(b"foobar")
3682        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3683            buffers = []
3684            buffer_callback = buffers.append
3685            data = self.dumps(obj, proto, buffer_callback=buffer_callback)
3686
3687            buffers = map(bytearray, buffers)
3688            new = self.loads(data, buffers=buffers)
3689            self.assertIs(type(new), type(obj))
3690            self.assertEqual(new, obj)
3691
3692    def test_buffers_error(self):
3693        pb = pickle.PickleBuffer(b"foobar")
3694        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3695            data = self.dumps(pb, proto, buffer_callback=[].append)
3696            # Non iterable buffers
3697            with self.assertRaises(TypeError):
3698                self.loads(data, buffers=object())
3699            # Buffer iterable exhausts too early
3700            with self.assertRaises(pickle.UnpicklingError):
3701                self.loads(data, buffers=[])
3702
3703    def test_inband_accept_default_buffers_argument(self):
3704        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3705            data_pickled = self.dumps(1, proto, buffer_callback=None)
3706            data = self.loads(data_pickled, buffers=None)
3707
3708    @unittest.skipIf(np is None, "Test needs Numpy")
3709    def test_buffers_numpy(self):
3710        def check_no_copy(x, y):
3711            np.testing.assert_equal(x, y)
3712            self.assertEqual(x.ctypes.data, y.ctypes.data)
3713
3714        def check_copy(x, y):
3715            np.testing.assert_equal(x, y)
3716            self.assertNotEqual(x.ctypes.data, y.ctypes.data)
3717
3718        def check_array(arr):
3719            # In-band
3720            for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
3721                data = self.dumps(arr, proto)
3722                new = self.loads(data)
3723                check_copy(arr, new)
3724            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3725                buffer_callback = lambda _: True
3726                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
3727                new = self.loads(data)
3728                check_copy(arr, new)
3729            # Out-of-band
3730            for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
3731                buffers = []
3732                buffer_callback = buffers.append
3733                data = self.dumps(arr, proto, buffer_callback=buffer_callback)
3734                new = self.loads(data, buffers=buffers)
3735                if arr.flags.c_contiguous or arr.flags.f_contiguous:
3736                    check_no_copy(arr, new)
3737                else:
3738                    check_copy(arr, new)
3739
3740        # 1-D
3741        arr = np.arange(6)
3742        check_array(arr)
3743        # 1-D, non-contiguous
3744        check_array(arr[::2])
3745        # 2-D, C-contiguous
3746        arr = np.arange(12).reshape((3, 4))
3747        check_array(arr)
3748        # 2-D, F-contiguous
3749        check_array(arr.T)
3750        # 2-D, non-contiguous
3751        check_array(arr[::2])
3752
3753    def test_evil_class_mutating_dict(self):
3754        # https://github.com/python/cpython/issues/92930
3755        from random import getrandbits
3756
3757        global Bad
3758        class Bad:
3759            def __eq__(self, other):
3760                return ENABLED
3761            def __hash__(self):
3762                return 42
3763            def __reduce__(self):
3764                if getrandbits(6) == 0:
3765                    collection.clear()
3766                return (Bad, ())
3767
3768        for proto in protocols:
3769            for _ in range(20):
3770                ENABLED = False
3771                collection = {Bad(): Bad() for _ in range(20)}
3772                for bad in collection:
3773                    bad.bad = bad
3774                    bad.collection = collection
3775                ENABLED = True
3776                try:
3777                    data = self.dumps(collection, proto)
3778                    self.loads(data)
3779                except RuntimeError as e:
3780                    expected = "changed size during iteration"
3781                    self.assertIn(expected, str(e))
3782
3783
3784class BigmemPickleTests:
3785
3786    # Binary protocols can serialize longs of up to 2 GiB-1
3787
3788    @bigmemtest(size=_2G, memuse=3.6, dry_run=False)
3789    def test_huge_long_32b(self, size):
3790        data = 1 << (8 * size)
3791        try:
3792            for proto in protocols:
3793                if proto < 2:
3794                    continue
3795                with self.subTest(proto=proto):
3796                    with self.assertRaises((ValueError, OverflowError)):
3797                        self.dumps(data, protocol=proto)
3798        finally:
3799            data = None
3800
3801    # Protocol 3 can serialize up to 4 GiB-1 as a bytes object
3802    # (older protocols don't have a dedicated opcode for bytes and are
3803    # too inefficient)
3804
3805    @bigmemtest(size=_2G, memuse=2.5, dry_run=False)
3806    def test_huge_bytes_32b(self, size):
3807        data = b"abcd" * (size // 4)
3808        try:
3809            for proto in protocols:
3810                if proto < 3:
3811                    continue
3812                with self.subTest(proto=proto):
3813                    try:
3814                        pickled = self.dumps(data, protocol=proto)
3815                        header = (pickle.BINBYTES +
3816                                  struct.pack("<I", len(data)))
3817                        data_start = pickled.index(data)
3818                        self.assertEqual(
3819                            header,
3820                            pickled[data_start-len(header):data_start])
3821                    finally:
3822                        pickled = None
3823        finally:
3824            data = None
3825
3826    @bigmemtest(size=_4G, memuse=2.5, dry_run=False)
3827    def test_huge_bytes_64b(self, size):
3828        data = b"acbd" * (size // 4)
3829        try:
3830            for proto in protocols:
3831                if proto < 3:
3832                    continue
3833                with self.subTest(proto=proto):
3834                    if proto == 3:
3835                        # Protocol 3 does not support large bytes objects.
3836                        # Verify that we do not crash when processing one.
3837                        with self.assertRaises((ValueError, OverflowError)):
3838                            self.dumps(data, protocol=proto)
3839                        continue
3840                    try:
3841                        pickled = self.dumps(data, protocol=proto)
3842                        header = (pickle.BINBYTES8 +
3843                                  struct.pack("<Q", len(data)))
3844                        data_start = pickled.index(data)
3845                        self.assertEqual(
3846                            header,
3847                            pickled[data_start-len(header):data_start])
3848                    finally:
3849                        pickled = None
3850        finally:
3851            data = None
3852
3853    # All protocols use 1-byte per printable ASCII character; we add another
3854    # byte because the encoded form has to be copied into the internal buffer.
3855
3856    @bigmemtest(size=_2G, memuse=8, dry_run=False)
3857    def test_huge_str_32b(self, size):
3858        data = "abcd" * (size // 4)
3859        try:
3860            for proto in protocols:
3861                if proto == 0:
3862                    continue
3863                with self.subTest(proto=proto):
3864                    try:
3865                        pickled = self.dumps(data, protocol=proto)
3866                        header = (pickle.BINUNICODE +
3867                                  struct.pack("<I", len(data)))
3868                        data_start = pickled.index(b'abcd')
3869                        self.assertEqual(
3870                            header,
3871                            pickled[data_start-len(header):data_start])
3872                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
3873                                          pickled.index(b"abcd")), len(data))
3874                    finally:
3875                        pickled = None
3876        finally:
3877            data = None
3878
3879    # BINUNICODE (protocols 1, 2 and 3) cannot carry more than 2**32 - 1 bytes
3880    # of utf-8 encoded unicode. BINUNICODE8 (protocol 4) supports these huge
3881    # unicode strings however.
3882
3883    @bigmemtest(size=_4G, memuse=8, dry_run=False)
3884    def test_huge_str_64b(self, size):
3885        data = "abcd" * (size // 4)
3886        try:
3887            for proto in protocols:
3888                if proto == 0:
3889                    continue
3890                with self.subTest(proto=proto):
3891                    if proto < 4:
3892                        with self.assertRaises((ValueError, OverflowError)):
3893                            self.dumps(data, protocol=proto)
3894                        continue
3895                    try:
3896                        pickled = self.dumps(data, protocol=proto)
3897                        header = (pickle.BINUNICODE8 +
3898                                  struct.pack("<Q", len(data)))
3899                        data_start = pickled.index(b'abcd')
3900                        self.assertEqual(
3901                            header,
3902                            pickled[data_start-len(header):data_start])
3903                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
3904                                          pickled.index(b"abcd")), len(data))
3905                    finally:
3906                        pickled = None
3907        finally:
3908            data = None
3909
3910
3911# Test classes for reduce_ex
3912
3913class R:
3914    def __init__(self, reduce=None):
3915        self.reduce = reduce
3916    def __reduce__(self, proto):
3917        return self.reduce
3918
3919class REX:
3920    def __init__(self, reduce_ex=None):
3921        self.reduce_ex = reduce_ex
3922    def __reduce_ex__(self, proto):
3923        return self.reduce_ex
3924
3925class REX_one(object):
3926    """No __reduce_ex__ here, but inheriting it from object"""
3927    _reduce_called = 0
3928    def __reduce__(self):
3929        self._reduce_called = 1
3930        return REX_one, ()
3931
3932class REX_two(object):
3933    """No __reduce__ here, but inheriting it from object"""
3934    _proto = None
3935    def __reduce_ex__(self, proto):
3936        self._proto = proto
3937        return REX_two, ()
3938
3939class REX_three(object):
3940    _proto = None
3941    def __reduce_ex__(self, proto):
3942        self._proto = proto
3943        return REX_two, ()
3944    def __reduce__(self):
3945        raise TestFailed("This __reduce__ shouldn't be called")
3946
3947class REX_four(object):
3948    """Calling base class method should succeed"""
3949    _proto = None
3950    def __reduce_ex__(self, proto):
3951        self._proto = proto
3952        return object.__reduce_ex__(self, proto)
3953
3954class REX_five(object):
3955    """This one used to fail with infinite recursion"""
3956    _reduce_called = 0
3957    def __reduce__(self):
3958        self._reduce_called = 1
3959        return object.__reduce__(self)
3960
3961class REX_six(object):
3962    """This class is used to check the 4th argument (list iterator) of
3963    the reduce protocol.
3964    """
3965    def __init__(self, items=None):
3966        self.items = items if items is not None else []
3967    def __eq__(self, other):
3968        return type(self) is type(other) and self.items == other.items
3969    def append(self, item):
3970        self.items.append(item)
3971    def __reduce__(self):
3972        return type(self), (), None, iter(self.items), None
3973
3974class REX_seven(object):
3975    """This class is used to check the 5th argument (dict iterator) of
3976    the reduce protocol.
3977    """
3978    def __init__(self, table=None):
3979        self.table = table if table is not None else {}
3980    def __eq__(self, other):
3981        return type(self) is type(other) and self.table == other.table
3982    def __setitem__(self, key, value):
3983        self.table[key] = value
3984    def __reduce__(self):
3985        return type(self), (), None, None, iter(self.table.items())
3986
3987class REX_state(object):
3988    """This class is used to check the 3th argument (state) of
3989    the reduce protocol.
3990    """
3991    def __init__(self, state=None):
3992        self.state = state
3993    def __eq__(self, other):
3994        return type(self) is type(other) and self.state == other.state
3995    def __setstate__(self, state):
3996        self.state = state
3997    def __reduce__(self):
3998        return type(self), (), self.state
3999
4000class REX_None:
4001    """ Setting __reduce_ex__ to None should fail """
4002    __reduce_ex__ = None
4003
4004class R_None:
4005    """ Setting __reduce__ to None should fail """
4006    __reduce__ = None
4007
4008class C_None_setstate:
4009    """  Setting __setstate__ to None should fail """
4010    def __getstate__(self):
4011        return 1
4012
4013    __setstate__ = None
4014
4015class CustomError(Exception):
4016    pass
4017
4018class Unpickleable:
4019    def __reduce__(self):
4020        raise CustomError
4021
4022UNPICKLEABLE = Unpickleable()
4023
4024class UnpickleableCallable(Unpickleable):
4025    def __call__(self, *args, **kwargs):
4026        pass
4027
4028
4029# Test classes for newobj
4030
4031class MyInt(int):
4032    sample = 1
4033
4034class MyFloat(float):
4035    sample = 1.0
4036
4037class MyComplex(complex):
4038    sample = 1.0 + 0.0j
4039
4040class MyStr(str):
4041    sample = "hello"
4042
4043class MyUnicode(str):
4044    sample = "hello \u1234"
4045
4046class MyTuple(tuple):
4047    sample = (1, 2, 3)
4048
4049class MyList(list):
4050    sample = [1, 2, 3]
4051
4052class MyDict(dict):
4053    sample = {"a": 1, "b": 2}
4054
4055class MySet(set):
4056    sample = {"a", "b"}
4057
4058class MyFrozenSet(frozenset):
4059    sample = frozenset({"a", "b"})
4060
4061myclasses = [MyInt, MyFloat,
4062             MyComplex,
4063             MyStr, MyUnicode,
4064             MyTuple, MyList, MyDict, MySet, MyFrozenSet]
4065
4066class MyIntWithNew(int):
4067    def __new__(cls, value):
4068        raise AssertionError
4069
4070class MyIntWithNew2(MyIntWithNew):
4071    __new__ = int.__new__
4072
4073
4074class SlotList(MyList):
4075    __slots__ = ["foo"]
4076
4077# Ruff "redefined while unused" false positive here due to `global` variables
4078# being assigned (and then restored) from within test methods earlier in the file
4079class SimpleNewObj(int):  # noqa: F811
4080    def __init__(self, *args, **kwargs):
4081        # raise an error, to make sure this isn't called
4082        raise TypeError("SimpleNewObj.__init__() didn't expect to get called")
4083    def __eq__(self, other):
4084        return int(self) == int(other) and self.__dict__ == other.__dict__
4085
4086class ComplexNewObj(SimpleNewObj):
4087    def __getnewargs__(self):
4088        return ('%X' % self, 16)
4089
4090class ComplexNewObjEx(SimpleNewObj):
4091    def __getnewargs_ex__(self):
4092        return ('%X' % self,), {'base': 16}
4093
4094class BadGetattr:
4095    def __getattr__(self, key):
4096        self.foo
4097
4098class NoNew:
4099    def __getattribute__(self, name):
4100        if name == '__new__':
4101            raise AttributeError
4102        return super().__getattribute__(name)
4103
4104
4105class AbstractPickleModuleTests:
4106
4107    def test_dump_closed_file(self):
4108        f = open(TESTFN, "wb")
4109        try:
4110            f.close()
4111            self.assertRaises(ValueError, self.dump, 123, f)
4112        finally:
4113            os_helper.unlink(TESTFN)
4114
4115    def test_load_closed_file(self):
4116        f = open(TESTFN, "wb")
4117        try:
4118            f.close()
4119            self.assertRaises(ValueError, self.dump, 123, f)
4120        finally:
4121            os_helper.unlink(TESTFN)
4122
4123    def test_load_from_and_dump_to_file(self):
4124        stream = io.BytesIO()
4125        data = [123, {}, 124]
4126        self.dump(data, stream)
4127        stream.seek(0)
4128        unpickled = self.load(stream)
4129        self.assertEqual(unpickled, data)
4130
4131    def test_highest_protocol(self):
4132        # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
4133        self.assertEqual(pickle.HIGHEST_PROTOCOL, 5)
4134
4135    def test_callapi(self):
4136        f = io.BytesIO()
4137        # With and without keyword arguments
4138        self.dump(123, f, -1)
4139        self.dump(123, file=f, protocol=-1)
4140        self.dumps(123, -1)
4141        self.dumps(123, protocol=-1)
4142        self.Pickler(f, -1)
4143        self.Pickler(f, protocol=-1)
4144
4145    def test_dump_text_file(self):
4146        f = open(TESTFN, "w")
4147        try:
4148            for proto in protocols:
4149                self.assertRaises(TypeError, self.dump, 123, f, proto)
4150        finally:
4151            f.close()
4152            os_helper.unlink(TESTFN)
4153
4154    def test_incomplete_input(self):
4155        s = io.BytesIO(b"X''.")
4156        self.assertRaises((EOFError, struct.error, pickle.UnpicklingError), self.load, s)
4157
4158    def test_bad_init(self):
4159        # Test issue3664 (pickle can segfault from a badly initialized Pickler).
4160        # Override initialization without calling __init__() of the superclass.
4161        class BadPickler(self.Pickler):
4162            def __init__(self): pass
4163
4164        class BadUnpickler(self.Unpickler):
4165            def __init__(self): pass
4166
4167        self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
4168        self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
4169
4170    def test_unpickler_bad_file(self):
4171        # bpo-38384: Crash in _pickle if the read attribute raises an error.
4172        def raises_oserror(self, *args, **kwargs):
4173            raise OSError
4174        @property
4175        def bad_property(self):
4176            raise CustomError
4177
4178        # File without read and readline
4179        class F:
4180            pass
4181        self.assertRaises((AttributeError, TypeError), self.Unpickler, F())
4182
4183        # File without read
4184        class F:
4185            readline = raises_oserror
4186        self.assertRaises((AttributeError, TypeError), self.Unpickler, F())
4187
4188        # File without readline
4189        class F:
4190            read = raises_oserror
4191        self.assertRaises((AttributeError, TypeError), self.Unpickler, F())
4192
4193        # File with bad read
4194        class F:
4195            read = bad_property
4196            readline = raises_oserror
4197        self.assertRaises(CustomError, self.Unpickler, F())
4198
4199        # File with bad readline
4200        class F:
4201            readline = bad_property
4202            read = raises_oserror
4203        self.assertRaises(CustomError, self.Unpickler, F())
4204
4205        # File with bad readline, no read
4206        class F:
4207            readline = bad_property
4208        self.assertRaises(CustomError, self.Unpickler, F())
4209
4210        # File with bad read, no readline
4211        class F:
4212            read = bad_property
4213        self.assertRaises((AttributeError, CustomError), self.Unpickler, F())
4214
4215        # File with bad peek
4216        class F:
4217            peek = bad_property
4218            read = raises_oserror
4219            readline = raises_oserror
4220        try:
4221            self.Unpickler(F())
4222        except CustomError:
4223            pass
4224
4225        # File with bad readinto
4226        class F:
4227            readinto = bad_property
4228            read = raises_oserror
4229            readline = raises_oserror
4230        try:
4231            self.Unpickler(F())
4232        except CustomError:
4233            pass
4234
4235    def test_pickler_bad_file(self):
4236        # File without write
4237        class F:
4238            pass
4239        self.assertRaises(TypeError, self.Pickler, F())
4240
4241        # File with bad write
4242        class F:
4243            @property
4244            def write(self):
4245                raise CustomError
4246        self.assertRaises(CustomError, self.Pickler, F())
4247
4248    def check_dumps_loads_oob_buffers(self, dumps, loads):
4249        # No need to do the full gamut of tests here, just enough to
4250        # check that dumps() and loads() redirect their arguments
4251        # to the underlying Pickler and Unpickler, respectively.
4252        obj = ZeroCopyBytes(b"foo")
4253
4254        for proto in range(0, 5):
4255            # Need protocol >= 5 for buffer_callback
4256            with self.assertRaises(ValueError):
4257                dumps(obj, protocol=proto,
4258                      buffer_callback=[].append)
4259        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
4260            buffers = []
4261            buffer_callback = buffers.append
4262            data = dumps(obj, protocol=proto,
4263                         buffer_callback=buffer_callback)
4264            self.assertNotIn(b"foo", data)
4265            self.assertEqual(bytes(buffers[0]), b"foo")
4266            # Need buffers argument to unpickle properly
4267            with self.assertRaises(pickle.UnpicklingError):
4268                loads(data)
4269            new = loads(data, buffers=buffers)
4270            self.assertIs(new, obj)
4271
4272    def test_dumps_loads_oob_buffers(self):
4273        # Test out-of-band buffers (PEP 574) with top-level dumps() and loads()
4274        self.check_dumps_loads_oob_buffers(self.dumps, self.loads)
4275
4276    def test_dump_load_oob_buffers(self):
4277        # Test out-of-band buffers (PEP 574) with top-level dump() and load()
4278        def dumps(obj, **kwargs):
4279            f = io.BytesIO()
4280            self.dump(obj, f, **kwargs)
4281            return f.getvalue()
4282
4283        def loads(data, **kwargs):
4284            f = io.BytesIO(data)
4285            return self.load(f, **kwargs)
4286
4287        self.check_dumps_loads_oob_buffers(dumps, loads)
4288
4289
4290class AbstractPersistentPicklerTests:
4291
4292    # This class defines persistent_id() and persistent_load()
4293    # functions that should be used by the pickler.  All even integers
4294    # are pickled using persistent ids.
4295
4296    def persistent_id(self, object):
4297        if isinstance(object, int) and object % 2 == 0:
4298            self.id_count += 1
4299            return str(object)
4300        elif object == "test_false_value":
4301            self.false_count += 1
4302            return ""
4303        else:
4304            return None
4305
4306    def persistent_load(self, oid):
4307        if not oid:
4308            self.load_false_count += 1
4309            return "test_false_value"
4310        else:
4311            self.load_count += 1
4312            object = int(oid)
4313            assert object % 2 == 0
4314            return object
4315
4316    def test_persistence(self):
4317        L = list(range(10)) + ["test_false_value"]
4318        for proto in protocols:
4319            self.id_count = 0
4320            self.false_count = 0
4321            self.load_false_count = 0
4322            self.load_count = 0
4323            self.assertEqual(self.loads(self.dumps(L, proto)), L)
4324            self.assertEqual(self.id_count, 5)
4325            self.assertEqual(self.false_count, 1)
4326            self.assertEqual(self.load_count, 5)
4327            self.assertEqual(self.load_false_count, 1)
4328
4329
4330class AbstractIdentityPersistentPicklerTests:
4331
4332    def persistent_id(self, obj):
4333        return obj
4334
4335    def persistent_load(self, pid):
4336        return pid
4337
4338    def _check_return_correct_type(self, obj, proto):
4339        unpickled = self.loads(self.dumps(obj, proto))
4340        self.assertIsInstance(unpickled, type(obj))
4341        self.assertEqual(unpickled, obj)
4342
4343    def test_return_correct_type(self):
4344        for proto in protocols:
4345            # Protocol 0 supports only ASCII strings.
4346            if proto == 0:
4347                self._check_return_correct_type("abc", 0)
4348            else:
4349                for obj in [b"abc\n", "abc\n", -1, -1.1 * 0.1, str]:
4350                    self._check_return_correct_type(obj, proto)
4351
4352    def test_protocol0_is_ascii_only(self):
4353        non_ascii_str = "\N{EMPTY SET}"
4354        with self.assertRaises(pickle.PicklingError) as cm:
4355            self.dumps(non_ascii_str, 0)
4356        self.assertEqual(str(cm.exception),
4357                         'persistent IDs in protocol 0 must be ASCII strings')
4358        pickled = pickle.PERSID + non_ascii_str.encode('utf-8') + b'\n.'
4359        with self.assertRaises(pickle.UnpicklingError) as cm:
4360            self.loads(pickled)
4361        self.assertEqual(str(cm.exception),
4362                         'persistent IDs in protocol 0 must be ASCII strings')
4363
4364
4365class AbstractPicklerUnpicklerObjectTests:
4366
4367    pickler_class = None
4368    unpickler_class = None
4369
4370    def setUp(self):
4371        assert self.pickler_class
4372        assert self.unpickler_class
4373
4374    def test_clear_pickler_memo(self):
4375        # To test whether clear_memo() has any effect, we pickle an object,
4376        # then pickle it again without clearing the memo; the two serialized
4377        # forms should be different. If we clear_memo() and then pickle the
4378        # object again, the third serialized form should be identical to the
4379        # first one we obtained.
4380        data = ["abcdefg", "abcdefg", 44]
4381        for proto in protocols:
4382            f = io.BytesIO()
4383            pickler = self.pickler_class(f, proto)
4384
4385            pickler.dump(data)
4386            first_pickled = f.getvalue()
4387
4388            # Reset BytesIO object.
4389            f.seek(0)
4390            f.truncate()
4391
4392            pickler.dump(data)
4393            second_pickled = f.getvalue()
4394
4395            # Reset the Pickler and BytesIO objects.
4396            pickler.clear_memo()
4397            f.seek(0)
4398            f.truncate()
4399
4400            pickler.dump(data)
4401            third_pickled = f.getvalue()
4402
4403            self.assertNotEqual(first_pickled, second_pickled)
4404            self.assertEqual(first_pickled, third_pickled)
4405
4406    def test_priming_pickler_memo(self):
4407        # Verify that we can set the Pickler's memo attribute.
4408        data = ["abcdefg", "abcdefg", 44]
4409        f = io.BytesIO()
4410        pickler = self.pickler_class(f)
4411
4412        pickler.dump(data)
4413        first_pickled = f.getvalue()
4414
4415        f = io.BytesIO()
4416        primed = self.pickler_class(f)
4417        primed.memo = pickler.memo
4418
4419        primed.dump(data)
4420        primed_pickled = f.getvalue()
4421
4422        self.assertNotEqual(first_pickled, primed_pickled)
4423
4424    def test_priming_unpickler_memo(self):
4425        # Verify that we can set the Unpickler's memo attribute.
4426        data = ["abcdefg", "abcdefg", 44]
4427        f = io.BytesIO()
4428        pickler = self.pickler_class(f)
4429
4430        pickler.dump(data)
4431        first_pickled = f.getvalue()
4432
4433        f = io.BytesIO()
4434        primed = self.pickler_class(f)
4435        primed.memo = pickler.memo
4436
4437        primed.dump(data)
4438        primed_pickled = f.getvalue()
4439
4440        unpickler = self.unpickler_class(io.BytesIO(first_pickled))
4441        unpickled_data1 = unpickler.load()
4442
4443        self.assertEqual(unpickled_data1, data)
4444
4445        primed = self.unpickler_class(io.BytesIO(primed_pickled))
4446        primed.memo = unpickler.memo
4447        unpickled_data2 = primed.load()
4448
4449        primed.memo.clear()
4450
4451        self.assertEqual(unpickled_data2, data)
4452        self.assertTrue(unpickled_data2 is unpickled_data1)
4453
4454    def test_reusing_unpickler_objects(self):
4455        data1 = ["abcdefg", "abcdefg", 44]
4456        f = io.BytesIO()
4457        pickler = self.pickler_class(f)
4458        pickler.dump(data1)
4459        pickled1 = f.getvalue()
4460
4461        data2 = ["abcdefg", 44, 44]
4462        f = io.BytesIO()
4463        pickler = self.pickler_class(f)
4464        pickler.dump(data2)
4465        pickled2 = f.getvalue()
4466
4467        f = io.BytesIO()
4468        f.write(pickled1)
4469        f.seek(0)
4470        unpickler = self.unpickler_class(f)
4471        self.assertEqual(unpickler.load(), data1)
4472
4473        f.seek(0)
4474        f.truncate()
4475        f.write(pickled2)
4476        f.seek(0)
4477        self.assertEqual(unpickler.load(), data2)
4478
4479    def _check_multiple_unpicklings(self, ioclass, *, seekable=True):
4480        for proto in protocols:
4481            with self.subTest(proto=proto):
4482                data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len]
4483                f = ioclass()
4484                pickler = self.pickler_class(f, protocol=proto)
4485                pickler.dump(data1)
4486                pickled = f.getvalue()
4487
4488                N = 5
4489                f = ioclass(pickled * N)
4490                unpickler = self.unpickler_class(f)
4491                for i in range(N):
4492                    if seekable:
4493                        pos = f.tell()
4494                    self.assertEqual(unpickler.load(), data1)
4495                    if seekable:
4496                        self.assertEqual(f.tell(), pos + len(pickled))
4497                self.assertRaises(EOFError, unpickler.load)
4498
4499    def test_multiple_unpicklings_seekable(self):
4500        self._check_multiple_unpicklings(io.BytesIO)
4501
4502    def test_multiple_unpicklings_unseekable(self):
4503        self._check_multiple_unpicklings(UnseekableIO, seekable=False)
4504
4505    def test_multiple_unpicklings_minimal(self):
4506        # File-like object that doesn't support peek() and readinto()
4507        # (bpo-39681)
4508        self._check_multiple_unpicklings(MinimalIO, seekable=False)
4509
4510    def test_unpickling_buffering_readline(self):
4511        # Issue #12687: the unpickler's buffering logic could fail with
4512        # text mode opcodes.
4513        data = list(range(10))
4514        for proto in protocols:
4515            for buf_size in range(1, 11):
4516                f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size)
4517                pickler = self.pickler_class(f, protocol=proto)
4518                pickler.dump(data)
4519                f.seek(0)
4520                unpickler = self.unpickler_class(f)
4521                self.assertEqual(unpickler.load(), data)
4522
4523    def test_pickle_invalid_reducer_override(self):
4524        # gh-103035
4525        obj = object()
4526
4527        f = io.BytesIO()
4528        class MyPickler(self.pickler_class):
4529            pass
4530        pickler = MyPickler(f)
4531        pickler.dump(obj)
4532
4533        pickler.clear_memo()
4534        pickler.reducer_override = None
4535        with self.assertRaises(TypeError):
4536            pickler.dump(obj)
4537
4538        pickler.clear_memo()
4539        pickler.reducer_override = 10
4540        with self.assertRaises(TypeError):
4541            pickler.dump(obj)
4542
4543# Tests for dispatch_table attribute
4544
4545REDUCE_A = 'reduce_A'
4546
4547class AAA(object):
4548    def __reduce__(self):
4549        return str, (REDUCE_A,)
4550
4551class BBB(object):
4552    def __init__(self):
4553        # Add an instance attribute to enable state-saving routines at pickling
4554        # time.
4555        self.a = "some attribute"
4556
4557    def __setstate__(self, state):
4558        self.a = "BBB.__setstate__"
4559
4560
4561def setstate_bbb(obj, state):
4562    """Custom state setter for BBB objects
4563
4564    Such callable may be created by other persons than the ones who created the
4565    BBB class. If passed as the state_setter item of a custom reducer, this
4566    allows for custom state setting behavior of BBB objects. One can think of
4567    it as the analogous of list_setitems or dict_setitems but for foreign
4568    classes/functions.
4569    """
4570    obj.a = "custom state_setter"
4571
4572
4573
4574class AbstractCustomPicklerClass:
4575    """Pickler implementing a reducing hook using reducer_override."""
4576    def reducer_override(self, obj):
4577        obj_name = getattr(obj, "__name__", None)
4578
4579        if obj_name == 'f':
4580            # asking the pickler to save f as 5
4581            return int, (5, )
4582
4583        if obj_name == 'MyClass':
4584            return str, ('some str',)
4585
4586        elif obj_name == 'g':
4587            # in this case, the callback returns an invalid result (not a 2-5
4588            # tuple or a string), the pickler should raise a proper error.
4589            return False
4590
4591        elif obj_name == 'h':
4592            # Simulate a case when the reducer fails. The error should
4593            # be propagated to the original ``dump`` call.
4594            raise ValueError('The reducer just failed')
4595
4596        return NotImplemented
4597
4598class AbstractHookTests:
4599    def test_pickler_hook(self):
4600        # test the ability of a custom, user-defined CPickler subclass to
4601        # override the default reducing routines of any type using the method
4602        # reducer_override
4603
4604        def f():
4605            pass
4606
4607        def g():
4608            pass
4609
4610        def h():
4611            pass
4612
4613        class MyClass:
4614            pass
4615
4616        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
4617            with self.subTest(proto=proto):
4618                bio = io.BytesIO()
4619                p = self.pickler_class(bio, proto)
4620
4621                p.dump([f, MyClass, math.log])
4622                new_f, some_str, math_log = pickle.loads(bio.getvalue())
4623
4624                self.assertEqual(new_f, 5)
4625                self.assertEqual(some_str, 'some str')
4626                # math.log does not have its usual reducer overridden, so the
4627                # custom reduction callback should silently direct the pickler
4628                # to the default pickling by attribute, by returning
4629                # NotImplemented
4630                self.assertIs(math_log, math.log)
4631
4632                with self.assertRaises(pickle.PicklingError):
4633                    p.dump(g)
4634
4635                with self.assertRaisesRegex(
4636                        ValueError, 'The reducer just failed'):
4637                    p.dump(h)
4638
4639    @support.cpython_only
4640    def test_reducer_override_no_reference_cycle(self):
4641        # bpo-39492: reducer_override used to induce a spurious reference cycle
4642        # inside the Pickler object, that could prevent all serialized objects
4643        # from being garbage-collected without explicitly invoking gc.collect.
4644
4645        for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
4646            with self.subTest(proto=proto):
4647                def f():
4648                    pass
4649
4650                wr = weakref.ref(f)
4651
4652                bio = io.BytesIO()
4653                p = self.pickler_class(bio, proto)
4654                p.dump(f)
4655                new_f = pickle.loads(bio.getvalue())
4656                assert new_f == 5
4657
4658                del p
4659                del f
4660
4661                self.assertIsNone(wr())
4662
4663
4664class AbstractDispatchTableTests:
4665
4666    def test_default_dispatch_table(self):
4667        # No dispatch_table attribute by default
4668        f = io.BytesIO()
4669        p = self.pickler_class(f, 0)
4670        with self.assertRaises(AttributeError):
4671            p.dispatch_table
4672        self.assertFalse(hasattr(p, 'dispatch_table'))
4673
4674    def test_class_dispatch_table(self):
4675        # A dispatch_table attribute can be specified class-wide
4676        dt = self.get_dispatch_table()
4677
4678        class MyPickler(self.pickler_class):
4679            dispatch_table = dt
4680
4681        def dumps(obj, protocol=None):
4682            f = io.BytesIO()
4683            p = MyPickler(f, protocol)
4684            self.assertEqual(p.dispatch_table, dt)
4685            p.dump(obj)
4686            return f.getvalue()
4687
4688        self._test_dispatch_table(dumps, dt)
4689
4690    def test_instance_dispatch_table(self):
4691        # A dispatch_table attribute can also be specified instance-wide
4692        dt = self.get_dispatch_table()
4693
4694        def dumps(obj, protocol=None):
4695            f = io.BytesIO()
4696            p = self.pickler_class(f, protocol)
4697            p.dispatch_table = dt
4698            self.assertEqual(p.dispatch_table, dt)
4699            p.dump(obj)
4700            return f.getvalue()
4701
4702        self._test_dispatch_table(dumps, dt)
4703
4704    def test_dispatch_table_None_item(self):
4705        # gh-93627
4706        obj = object()
4707        f = io.BytesIO()
4708        pickler = self.pickler_class(f)
4709        pickler.dispatch_table = {type(obj): None}
4710        with self.assertRaises(TypeError):
4711            pickler.dump(obj)
4712
4713    def _test_dispatch_table(self, dumps, dispatch_table):
4714        def custom_load_dump(obj):
4715            return pickle.loads(dumps(obj, 0))
4716
4717        def default_load_dump(obj):
4718            return pickle.loads(pickle.dumps(obj, 0))
4719
4720        # pickling complex numbers using protocol 0 relies on copyreg
4721        # so check pickling a complex number still works
4722        z = 1 + 2j
4723        self.assertEqual(custom_load_dump(z), z)
4724        self.assertEqual(default_load_dump(z), z)
4725
4726        # modify pickling of complex
4727        REDUCE_1 = 'reduce_1'
4728        def reduce_1(obj):
4729            return str, (REDUCE_1,)
4730        dispatch_table[complex] = reduce_1
4731        self.assertEqual(custom_load_dump(z), REDUCE_1)
4732        self.assertEqual(default_load_dump(z), z)
4733
4734        # check picklability of AAA and BBB
4735        a = AAA()
4736        b = BBB()
4737        self.assertEqual(custom_load_dump(a), REDUCE_A)
4738        self.assertIsInstance(custom_load_dump(b), BBB)
4739        self.assertEqual(default_load_dump(a), REDUCE_A)
4740        self.assertIsInstance(default_load_dump(b), BBB)
4741
4742        # modify pickling of BBB
4743        dispatch_table[BBB] = reduce_1
4744        self.assertEqual(custom_load_dump(a), REDUCE_A)
4745        self.assertEqual(custom_load_dump(b), REDUCE_1)
4746        self.assertEqual(default_load_dump(a), REDUCE_A)
4747        self.assertIsInstance(default_load_dump(b), BBB)
4748
4749        # revert pickling of BBB and modify pickling of AAA
4750        REDUCE_2 = 'reduce_2'
4751        def reduce_2(obj):
4752            return str, (REDUCE_2,)
4753        dispatch_table[AAA] = reduce_2
4754        del dispatch_table[BBB]
4755        self.assertEqual(custom_load_dump(a), REDUCE_2)
4756        self.assertIsInstance(custom_load_dump(b), BBB)
4757        self.assertEqual(default_load_dump(a), REDUCE_A)
4758        self.assertIsInstance(default_load_dump(b), BBB)
4759
4760        # End-to-end testing of save_reduce with the state_setter keyword
4761        # argument. This is a dispatch_table test as the primary goal of
4762        # state_setter is to tweak objects reduction behavior.
4763        # In particular, state_setter is useful when the default __setstate__
4764        # behavior is not flexible enough.
4765
4766        # No custom reducer for b has been registered for now, so
4767        # BBB.__setstate__ should be used at unpickling time
4768        self.assertEqual(default_load_dump(b).a, "BBB.__setstate__")
4769
4770        def reduce_bbb(obj):
4771            return BBB, (), obj.__dict__, None, None, setstate_bbb
4772
4773        dispatch_table[BBB] = reduce_bbb
4774
4775        # The custom reducer reduce_bbb includes a state setter, that should
4776        # have priority over BBB.__setstate__
4777        self.assertEqual(custom_load_dump(b).a, "custom state_setter")
4778
4779
4780if __name__ == "__main__":
4781    # Print some stuff that can be used to rewrite DATA{0,1,2}
4782    from pickletools import dis
4783    x = create_data()
4784    for i in range(pickle.HIGHEST_PROTOCOL+1):
4785        p = pickle.dumps(x, i)
4786        print("DATA{0} = (".format(i))
4787        for j in range(0, len(p), 20):
4788            b = bytes(p[j:j+20])
4789            print("    {0!r}".format(b))
4790        print(")")
4791        print()
4792        print("# Disassembly of DATA{0}".format(i))
4793        print("DATA{0}_DIS = \"\"\"\\".format(i))
4794        dis(p)
4795        print("\"\"\"")
4796        print()
4797