• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* pickle accelerator C extensor: _pickle module.
2  *
3  * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4  * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5  * platforms. */
6 
7 #ifndef Py_BUILD_CORE_BUILTIN
8 #  define Py_BUILD_CORE_MODULE 1
9 #endif
10 
11 #include "Python.h"
12 #include "pycore_bytesobject.h"       // _PyBytesWriter
13 #include "pycore_ceval.h"             // _Py_EnterRecursiveCall()
14 #include "pycore_critical_section.h"  // Py_BEGIN_CRITICAL_SECTION()
15 #include "pycore_long.h"              // _PyLong_AsByteArray()
16 #include "pycore_moduleobject.h"      // _PyModule_GetState()
17 #include "pycore_object.h"            // _PyNone_Type
18 #include "pycore_pystate.h"           // _PyThreadState_GET()
19 #include "pycore_runtime.h"           // _Py_ID()
20 #include "pycore_setobject.h"         // _PySet_NextEntry()
21 #include "pycore_sysmodule.h"         // _PySys_GetAttr()
22 
23 #include <stdlib.h>               // strtol()
24 
25 
26 PyDoc_STRVAR(pickle_module_doc,
27 "Optimized C implementation for the Python pickle module.");
28 
29 /*[clinic input]
30 module _pickle
31 class _pickle.Pickler "PicklerObject *" ""
32 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" ""
33 class _pickle.Unpickler "UnpicklerObject *" ""
34 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" ""
35 [clinic start generated code]*/
36 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b6d7191ab6466cda]*/
37 
38 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
39    Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
40    already includes it. */
41 enum {
42     HIGHEST_PROTOCOL = 5,
43     DEFAULT_PROTOCOL = 4
44 };
45 
46 #ifdef MS_WINDOWS
47 // These are already typedefs from windows.h, pulled in via pycore_runtime.h.
48 #define FLOAT FLOAT_
49 #define INT INT_
50 #define LONG LONG_
51 
52 /* This can already be defined on Windows to set the character set
53    the Windows header files treat as default */
54 #ifdef UNICODE
55 #undef UNICODE
56 #endif
57 #endif
58 
59 /* Pickle opcodes. These must be kept updated with pickle.py.
60    Extensive docs are in pickletools.py. */
61 enum opcode {
62     MARK            = '(',
63     STOP            = '.',
64     POP             = '0',
65     POP_MARK        = '1',
66     DUP             = '2',
67     FLOAT           = 'F',
68     INT             = 'I',
69     BININT          = 'J',
70     BININT1         = 'K',
71     LONG            = 'L',
72     BININT2         = 'M',
73     NONE            = 'N',
74     PERSID          = 'P',
75     BINPERSID       = 'Q',
76     REDUCE          = 'R',
77     STRING          = 'S',
78     BINSTRING       = 'T',
79     SHORT_BINSTRING = 'U',
80     UNICODE         = 'V',
81     BINUNICODE      = 'X',
82     APPEND          = 'a',
83     BUILD           = 'b',
84     GLOBAL          = 'c',
85     DICT            = 'd',
86     EMPTY_DICT      = '}',
87     APPENDS         = 'e',
88     GET             = 'g',
89     BINGET          = 'h',
90     INST            = 'i',
91     LONG_BINGET     = 'j',
92     LIST            = 'l',
93     EMPTY_LIST      = ']',
94     OBJ             = 'o',
95     PUT             = 'p',
96     BINPUT          = 'q',
97     LONG_BINPUT     = 'r',
98     SETITEM         = 's',
99     TUPLE           = 't',
100     EMPTY_TUPLE     = ')',
101     SETITEMS        = 'u',
102     BINFLOAT        = 'G',
103 
104     /* Protocol 2. */
105     PROTO       = '\x80',
106     NEWOBJ      = '\x81',
107     EXT1        = '\x82',
108     EXT2        = '\x83',
109     EXT4        = '\x84',
110     TUPLE1      = '\x85',
111     TUPLE2      = '\x86',
112     TUPLE3      = '\x87',
113     NEWTRUE     = '\x88',
114     NEWFALSE    = '\x89',
115     LONG1       = '\x8a',
116     LONG4       = '\x8b',
117 
118     /* Protocol 3 (Python 3.x) */
119     BINBYTES       = 'B',
120     SHORT_BINBYTES = 'C',
121 
122     /* Protocol 4 */
123     SHORT_BINUNICODE = '\x8c',
124     BINUNICODE8      = '\x8d',
125     BINBYTES8        = '\x8e',
126     EMPTY_SET        = '\x8f',
127     ADDITEMS         = '\x90',
128     FROZENSET        = '\x91',
129     NEWOBJ_EX        = '\x92',
130     STACK_GLOBAL     = '\x93',
131     MEMOIZE          = '\x94',
132     FRAME            = '\x95',
133 
134     /* Protocol 5 */
135     BYTEARRAY8       = '\x96',
136     NEXT_BUFFER      = '\x97',
137     READONLY_BUFFER  = '\x98'
138 };
139 
140 enum {
141    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
142       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
143       break if this gets out of synch with pickle.py, but it's unclear that would
144       help anything either. */
145     BATCHSIZE = 1000,
146 
147     /* Nesting limit until Pickler, when running in "fast mode", starts
148        checking for self-referential data-structures. */
149     FAST_NESTING_LIMIT = 50,
150 
151     /* Initial size of the write buffer of Pickler. */
152     WRITE_BUF_SIZE = 4096,
153 
154     /* Prefetch size when unpickling (disabled on unpeekable streams) */
155     PREFETCH = 8192 * 16,
156 
157     FRAME_SIZE_MIN = 4,
158     FRAME_SIZE_TARGET = 64 * 1024,
159     FRAME_HEADER_SIZE = 9
160 };
161 
162 /*************************************************************************/
163 
164 /* State of the pickle module, per PEP 3121. */
165 typedef struct {
166     /* Exception classes for pickle. */
167     PyObject *PickleError;
168     PyObject *PicklingError;
169     PyObject *UnpicklingError;
170 
171     /* copyreg.dispatch_table, {type_object: pickling_function} */
172     PyObject *dispatch_table;
173 
174     /* For the extension opcodes EXT1, EXT2 and EXT4. */
175 
176     /* copyreg._extension_registry, {(module_name, function_name): code} */
177     PyObject *extension_registry;
178     /* copyreg._extension_cache, {code: object} */
179     PyObject *extension_cache;
180     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
181     PyObject *inverted_registry;
182 
183     /* Import mappings for compatibility with Python 2.x */
184 
185     /* _compat_pickle.NAME_MAPPING,
186        {(oldmodule, oldname): (newmodule, newname)} */
187     PyObject *name_mapping_2to3;
188     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
189     PyObject *import_mapping_2to3;
190     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
191     PyObject *name_mapping_3to2;
192     PyObject *import_mapping_3to2;
193 
194     /* codecs.encode, used for saving bytes in older protocols */
195     PyObject *codecs_encode;
196     /* builtins.getattr, used for saving nested names with protocol < 4 */
197     PyObject *getattr;
198     /* functools.partial, used for implementing __newobj_ex__ with protocols
199        2 and 3 */
200     PyObject *partial;
201 
202     /* Types */
203     PyTypeObject *Pickler_Type;
204     PyTypeObject *Unpickler_Type;
205     PyTypeObject *Pdata_Type;
206     PyTypeObject *PicklerMemoProxyType;
207     PyTypeObject *UnpicklerMemoProxyType;
208 } PickleState;
209 
210 /* Forward declaration of the _pickle module definition. */
211 static struct PyModuleDef _picklemodule;
212 
213 /* Given a module object, get its per-module state. */
214 static inline PickleState *
_Pickle_GetState(PyObject * module)215 _Pickle_GetState(PyObject *module)
216 {
217     void *state = _PyModule_GetState(module);
218     assert(state != NULL);
219     return (PickleState *)state;
220 }
221 
222 static inline PickleState *
_Pickle_GetStateByClass(PyTypeObject * cls)223 _Pickle_GetStateByClass(PyTypeObject *cls)
224 {
225     void *state = _PyType_GetModuleState(cls);
226     assert(state != NULL);
227     return (PickleState *)state;
228 }
229 
230 static inline PickleState *
_Pickle_FindStateByType(PyTypeObject * tp)231 _Pickle_FindStateByType(PyTypeObject *tp)
232 {
233     PyObject *module = PyType_GetModuleByDef(tp, &_picklemodule);
234     assert(module != NULL);
235     return _Pickle_GetState(module);
236 }
237 
238 /* Clear the given pickle module state. */
239 static void
_Pickle_ClearState(PickleState * st)240 _Pickle_ClearState(PickleState *st)
241 {
242     Py_CLEAR(st->PickleError);
243     Py_CLEAR(st->PicklingError);
244     Py_CLEAR(st->UnpicklingError);
245     Py_CLEAR(st->dispatch_table);
246     Py_CLEAR(st->extension_registry);
247     Py_CLEAR(st->extension_cache);
248     Py_CLEAR(st->inverted_registry);
249     Py_CLEAR(st->name_mapping_2to3);
250     Py_CLEAR(st->import_mapping_2to3);
251     Py_CLEAR(st->name_mapping_3to2);
252     Py_CLEAR(st->import_mapping_3to2);
253     Py_CLEAR(st->codecs_encode);
254     Py_CLEAR(st->getattr);
255     Py_CLEAR(st->partial);
256     Py_CLEAR(st->Pickler_Type);
257     Py_CLEAR(st->Unpickler_Type);
258     Py_CLEAR(st->Pdata_Type);
259     Py_CLEAR(st->PicklerMemoProxyType);
260     Py_CLEAR(st->UnpicklerMemoProxyType);
261 }
262 
263 /* Initialize the given pickle module state. */
264 static int
_Pickle_InitState(PickleState * st)265 _Pickle_InitState(PickleState *st)
266 {
267     PyObject *copyreg = NULL;
268     PyObject *compat_pickle = NULL;
269 
270     st->getattr = _PyEval_GetBuiltin(&_Py_ID(getattr));
271     if (st->getattr == NULL)
272         goto error;
273 
274     copyreg = PyImport_ImportModule("copyreg");
275     if (!copyreg)
276         goto error;
277     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
278     if (!st->dispatch_table)
279         goto error;
280     if (!PyDict_CheckExact(st->dispatch_table)) {
281         PyErr_Format(PyExc_RuntimeError,
282                      "copyreg.dispatch_table should be a dict, not %.200s",
283                      Py_TYPE(st->dispatch_table)->tp_name);
284         goto error;
285     }
286     st->extension_registry = \
287         PyObject_GetAttrString(copyreg, "_extension_registry");
288     if (!st->extension_registry)
289         goto error;
290     if (!PyDict_CheckExact(st->extension_registry)) {
291         PyErr_Format(PyExc_RuntimeError,
292                      "copyreg._extension_registry should be a dict, "
293                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
294         goto error;
295     }
296     st->inverted_registry = \
297         PyObject_GetAttrString(copyreg, "_inverted_registry");
298     if (!st->inverted_registry)
299         goto error;
300     if (!PyDict_CheckExact(st->inverted_registry)) {
301         PyErr_Format(PyExc_RuntimeError,
302                      "copyreg._inverted_registry should be a dict, "
303                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
304         goto error;
305     }
306     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
307     if (!st->extension_cache)
308         goto error;
309     if (!PyDict_CheckExact(st->extension_cache)) {
310         PyErr_Format(PyExc_RuntimeError,
311                      "copyreg._extension_cache should be a dict, "
312                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
313         goto error;
314     }
315     Py_CLEAR(copyreg);
316 
317     /* Load the 2.x -> 3.x stdlib module mapping tables */
318     compat_pickle = PyImport_ImportModule("_compat_pickle");
319     if (!compat_pickle)
320         goto error;
321     st->name_mapping_2to3 = \
322         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
323     if (!st->name_mapping_2to3)
324         goto error;
325     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
326         PyErr_Format(PyExc_RuntimeError,
327                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
328                      Py_TYPE(st->name_mapping_2to3)->tp_name);
329         goto error;
330     }
331     st->import_mapping_2to3 = \
332         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
333     if (!st->import_mapping_2to3)
334         goto error;
335     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
336         PyErr_Format(PyExc_RuntimeError,
337                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
338                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
339         goto error;
340     }
341     /* ... and the 3.x -> 2.x mapping tables */
342     st->name_mapping_3to2 = \
343         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
344     if (!st->name_mapping_3to2)
345         goto error;
346     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
347         PyErr_Format(PyExc_RuntimeError,
348                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
349                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
350         goto error;
351     }
352     st->import_mapping_3to2 = \
353         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
354     if (!st->import_mapping_3to2)
355         goto error;
356     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
357         PyErr_Format(PyExc_RuntimeError,
358                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
359                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
360         goto error;
361     }
362     Py_CLEAR(compat_pickle);
363 
364     st->codecs_encode = _PyImport_GetModuleAttrString("codecs", "encode");
365     if (st->codecs_encode == NULL) {
366         goto error;
367     }
368     if (!PyCallable_Check(st->codecs_encode)) {
369         PyErr_Format(PyExc_RuntimeError,
370                      "codecs.encode should be a callable, not %.200s",
371                      Py_TYPE(st->codecs_encode)->tp_name);
372         goto error;
373     }
374 
375     st->partial = _PyImport_GetModuleAttrString("functools", "partial");
376     if (!st->partial)
377         goto error;
378 
379     return 0;
380 
381   error:
382     Py_CLEAR(copyreg);
383     Py_CLEAR(compat_pickle);
384     _Pickle_ClearState(st);
385     return -1;
386 }
387 
388 /* Helper for calling a function with a single argument quickly.
389 
390    This function steals the reference of the given argument. */
391 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)392 _Pickle_FastCall(PyObject *func, PyObject *obj)
393 {
394     PyObject *result;
395 
396     result = PyObject_CallOneArg(func, obj);
397     Py_DECREF(obj);
398     return result;
399 }
400 
401 /*************************************************************************/
402 
403 /* Internal data type used as the unpickling stack. */
404 typedef struct {
405     PyObject_VAR_HEAD
406     PyObject **data;
407     int mark_set;          /* is MARK set? */
408     Py_ssize_t fence;      /* position of top MARK or 0 */
409     Py_ssize_t allocated;  /* number of slots in data allocated */
410 } Pdata;
411 
412 static int
Pdata_traverse(Pdata * self,visitproc visit,void * arg)413 Pdata_traverse(Pdata *self, visitproc visit, void *arg)
414 {
415     Py_VISIT(Py_TYPE(self));
416     return 0;
417 }
418 
419 static void
Pdata_dealloc(Pdata * self)420 Pdata_dealloc(Pdata *self)
421 {
422     PyTypeObject *tp = Py_TYPE(self);
423     PyObject_GC_UnTrack(self);
424     Py_ssize_t i = Py_SIZE(self);
425     while (--i >= 0) {
426         Py_DECREF(self->data[i]);
427     }
428     PyMem_Free(self->data);
429     tp->tp_free((PyObject *)self);
430     Py_DECREF(tp);
431 }
432 
433 static PyType_Slot pdata_slots[] = {
434     {Py_tp_dealloc, Pdata_dealloc},
435     {Py_tp_traverse, Pdata_traverse},
436     {0, NULL},
437 };
438 
439 static PyType_Spec pdata_spec = {
440     .name = "_pickle.Pdata",
441     .basicsize = sizeof(Pdata),
442     .itemsize = sizeof(PyObject *),
443     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
444               Py_TPFLAGS_IMMUTABLETYPE),
445     .slots = pdata_slots,
446 };
447 
448 static PyObject *
Pdata_New(PickleState * state)449 Pdata_New(PickleState *state)
450 {
451     Pdata *self;
452 
453     if (!(self = PyObject_GC_New(Pdata, state->Pdata_Type)))
454         return NULL;
455     Py_SET_SIZE(self, 0);
456     self->mark_set = 0;
457     self->fence = 0;
458     self->allocated = 8;
459     self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
460     if (self->data) {
461         PyObject_GC_Track(self);
462         return (PyObject *)self;
463     }
464     Py_DECREF(self);
465     return PyErr_NoMemory();
466 }
467 
468 
469 /* Retain only the initial clearto items.  If clearto >= the current
470  * number of items, this is a (non-erroneous) NOP.
471  */
472 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)473 Pdata_clear(Pdata *self, Py_ssize_t clearto)
474 {
475     Py_ssize_t i = Py_SIZE(self);
476 
477     assert(clearto >= self->fence);
478     if (clearto >= i)
479         return 0;
480 
481     while (--i >= clearto) {
482         Py_CLEAR(self->data[i]);
483     }
484     Py_SET_SIZE(self, clearto);
485     return 0;
486 }
487 
488 static int
Pdata_grow(Pdata * self)489 Pdata_grow(Pdata *self)
490 {
491     PyObject **data = self->data;
492     size_t allocated = (size_t)self->allocated;
493     size_t new_allocated;
494 
495     new_allocated = (allocated >> 3) + 6;
496     /* check for integer overflow */
497     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
498         goto nomemory;
499     new_allocated += allocated;
500     PyMem_RESIZE(data, PyObject *, new_allocated);
501     if (data == NULL)
502         goto nomemory;
503 
504     self->data = data;
505     self->allocated = (Py_ssize_t)new_allocated;
506     return 0;
507 
508   nomemory:
509     PyErr_NoMemory();
510     return -1;
511 }
512 
513 static int
Pdata_stack_underflow(PickleState * st,Pdata * self)514 Pdata_stack_underflow(PickleState *st, Pdata *self)
515 {
516     PyErr_SetString(st->UnpicklingError,
517                     self->mark_set ?
518                     "unexpected MARK found" :
519                     "unpickling stack underflow");
520     return -1;
521 }
522 
523 /* D is a Pdata*.  Pop the topmost element and store it into V, which
524  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
525  * is raised and V is set to NULL.
526  */
527 static PyObject *
Pdata_pop(PickleState * state,Pdata * self)528 Pdata_pop(PickleState *state, Pdata *self)
529 {
530     if (Py_SIZE(self) <= self->fence) {
531         Pdata_stack_underflow(state, self);
532         return NULL;
533     }
534     Py_SET_SIZE(self, Py_SIZE(self) - 1);
535     return self->data[Py_SIZE(self)];
536 }
537 #define PDATA_POP(S, D, V) do { (V) = Pdata_pop(S, (D)); } while (0)
538 
539 static int
Pdata_push(Pdata * self,PyObject * obj)540 Pdata_push(Pdata *self, PyObject *obj)
541 {
542     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
543         return -1;
544     }
545     self->data[Py_SIZE(self)] = obj;
546     Py_SET_SIZE(self, Py_SIZE(self) + 1);
547     return 0;
548 }
549 
550 /* Push an object on stack, transferring its ownership to the stack. */
551 #define PDATA_PUSH(D, O, ER) do {                               \
552         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
553 
554 /* Push an object on stack, adding a new reference to the object. */
555 #define PDATA_APPEND(D, O, ER) do {                             \
556         Py_INCREF((O));                                         \
557         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
558 
559 static PyObject *
Pdata_poptuple(PickleState * state,Pdata * self,Py_ssize_t start)560 Pdata_poptuple(PickleState *state, Pdata *self, Py_ssize_t start)
561 {
562     PyObject *tuple;
563     Py_ssize_t len, i, j;
564 
565     if (start < self->fence) {
566         Pdata_stack_underflow(state, self);
567         return NULL;
568     }
569     len = Py_SIZE(self) - start;
570     tuple = PyTuple_New(len);
571     if (tuple == NULL)
572         return NULL;
573     for (i = start, j = 0; j < len; i++, j++)
574         PyTuple_SET_ITEM(tuple, j, self->data[i]);
575 
576     Py_SET_SIZE(self, start);
577     return tuple;
578 }
579 
580 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)581 Pdata_poplist(Pdata *self, Py_ssize_t start)
582 {
583     PyObject *list;
584     Py_ssize_t len, i, j;
585 
586     len = Py_SIZE(self) - start;
587     list = PyList_New(len);
588     if (list == NULL)
589         return NULL;
590     for (i = start, j = 0; j < len; i++, j++)
591         PyList_SET_ITEM(list, j, self->data[i]);
592 
593     Py_SET_SIZE(self, start);
594     return list;
595 }
596 
597 typedef struct {
598     PyObject *me_key;
599     Py_ssize_t me_value;
600 } PyMemoEntry;
601 
602 typedef struct {
603     size_t mt_mask;
604     size_t mt_used;
605     size_t mt_allocated;
606     PyMemoEntry *mt_table;
607 } PyMemoTable;
608 
609 typedef struct PicklerObject {
610     PyObject_HEAD
611     PyMemoTable *memo;          /* Memo table, keep track of the seen
612                                    objects to support self-referential objects
613                                    pickling. */
614     PyObject *persistent_id;    /* persistent_id() method, can be NULL */
615     PyObject *persistent_id_attr; /* instance attribute, can be NULL */
616     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
617     PyObject *reducer_override; /* hook for invoking user-defined callbacks
618                                    instead of save_global when pickling
619                                    functions and classes*/
620 
621     PyObject *write;            /* write() method of the output stream. */
622     PyObject *output_buffer;    /* Write into a local bytearray buffer before
623                                    flushing to the stream. */
624     Py_ssize_t output_len;      /* Length of output_buffer. */
625     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
626     int proto;                  /* Pickle protocol number, >= 0 */
627     int bin;                    /* Boolean, true if proto > 0 */
628     int framing;                /* True when framing is enabled, proto >= 4 */
629     Py_ssize_t frame_start;     /* Position in output_buffer where the
630                                    current frame begins. -1 if there
631                                    is no frame currently open. */
632 
633     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
634     int fast;                   /* Enable fast mode if set to a true value.
635                                    The fast mode disable the usage of memo,
636                                    therefore speeding the pickling process by
637                                    not generating superfluous PUT opcodes. It
638                                    should not be used if with self-referential
639                                    objects. */
640     int fast_nesting;
641     int fix_imports;            /* Indicate whether Pickler should fix
642                                    the name of globals for Python 2.x. */
643     PyObject *fast_memo;
644     PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
645 } PicklerObject;
646 
647 typedef struct UnpicklerObject {
648     PyObject_HEAD
649     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
650 
651     /* The unpickler memo is just an array of PyObject *s. Using a dict
652        is unnecessary, since the keys are contiguous ints. */
653     PyObject **memo;
654     size_t memo_size;       /* Capacity of the memo array */
655     size_t memo_len;        /* Number of objects in the memo */
656 
657     PyObject *persistent_load;  /* persistent_load() method, can be NULL. */
658     PyObject *persistent_load_attr;  /* instance attribute, can be NULL. */
659 
660     Py_buffer buffer;
661     char *input_buffer;
662     char *input_line;
663     Py_ssize_t input_len;
664     Py_ssize_t next_read_idx;
665     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
666 
667     PyObject *read;             /* read() method of the input stream. */
668     PyObject *readinto;         /* readinto() method of the input stream. */
669     PyObject *readline;         /* readline() method of the input stream. */
670     PyObject *peek;             /* peek() method of the input stream, or NULL */
671     PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
672 
673     char *encoding;             /* Name of the encoding to be used for
674                                    decoding strings pickled using Python
675                                    2.x. The default value is "ASCII" */
676     char *errors;               /* Name of errors handling scheme to used when
677                                    decoding strings. The default value is
678                                    "strict". */
679     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
680                                    objects. */
681     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
682     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
683     int proto;                  /* Protocol of the pickle loaded. */
684     int fix_imports;            /* Indicate whether Unpickler should fix
685                                    the name of globals pickled by Python 2.x. */
686 } UnpicklerObject;
687 
688 typedef struct {
689     PyObject_HEAD
690     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
691 }  PicklerMemoProxyObject;
692 
693 typedef struct {
694     PyObject_HEAD
695     UnpicklerObject *unpickler;
696 } UnpicklerMemoProxyObject;
697 
698 /* Forward declarations */
699 static int save(PickleState *state, PicklerObject *, PyObject *, int);
700 static int save_reduce(PickleState *, PicklerObject *, PyObject *, PyObject *);
701 
702 #include "clinic/_pickle.c.h"
703 
704 /*************************************************************************
705  A custom hashtable mapping void* to Python ints. This is used by the pickler
706  for memoization. Using a custom hashtable rather than PyDict allows us to skip
707  a bunch of unnecessary object creation. This makes a huge performance
708  difference. */
709 
710 #define MT_MINSIZE 8
711 #define PERTURB_SHIFT 5
712 
713 
714 static PyMemoTable *
PyMemoTable_New(void)715 PyMemoTable_New(void)
716 {
717     PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
718     if (memo == NULL) {
719         PyErr_NoMemory();
720         return NULL;
721     }
722 
723     memo->mt_used = 0;
724     memo->mt_allocated = MT_MINSIZE;
725     memo->mt_mask = MT_MINSIZE - 1;
726     memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
727     if (memo->mt_table == NULL) {
728         PyMem_Free(memo);
729         PyErr_NoMemory();
730         return NULL;
731     }
732     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
733 
734     return memo;
735 }
736 
737 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)738 PyMemoTable_Copy(PyMemoTable *self)
739 {
740     PyMemoTable *new = PyMemoTable_New();
741     if (new == NULL)
742         return NULL;
743 
744     new->mt_used = self->mt_used;
745     new->mt_allocated = self->mt_allocated;
746     new->mt_mask = self->mt_mask;
747     /* The table we get from _New() is probably smaller than we wanted.
748        Free it and allocate one that's the right size. */
749     PyMem_Free(new->mt_table);
750     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
751     if (new->mt_table == NULL) {
752         PyMem_Free(new);
753         PyErr_NoMemory();
754         return NULL;
755     }
756     for (size_t i = 0; i < self->mt_allocated; i++) {
757         Py_XINCREF(self->mt_table[i].me_key);
758     }
759     memcpy(new->mt_table, self->mt_table,
760            sizeof(PyMemoEntry) * self->mt_allocated);
761 
762     return new;
763 }
764 
765 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)766 PyMemoTable_Size(PyMemoTable *self)
767 {
768     return self->mt_used;
769 }
770 
771 static int
PyMemoTable_Clear(PyMemoTable * self)772 PyMemoTable_Clear(PyMemoTable *self)
773 {
774     Py_ssize_t i = self->mt_allocated;
775 
776     while (--i >= 0) {
777         Py_XDECREF(self->mt_table[i].me_key);
778     }
779     self->mt_used = 0;
780     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
781     return 0;
782 }
783 
784 static void
PyMemoTable_Del(PyMemoTable * self)785 PyMemoTable_Del(PyMemoTable *self)
786 {
787     if (self == NULL)
788         return;
789     PyMemoTable_Clear(self);
790 
791     PyMem_Free(self->mt_table);
792     PyMem_Free(self);
793 }
794 
795 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
796    can be considerably simpler than dictobject.c's lookdict(). */
797 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)798 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
799 {
800     size_t i;
801     size_t perturb;
802     size_t mask = self->mt_mask;
803     PyMemoEntry *table = self->mt_table;
804     PyMemoEntry *entry;
805     Py_hash_t hash = (Py_hash_t)key >> 3;
806 
807     i = hash & mask;
808     entry = &table[i];
809     if (entry->me_key == NULL || entry->me_key == key)
810         return entry;
811 
812     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
813         i = (i << 2) + i + perturb + 1;
814         entry = &table[i & mask];
815         if (entry->me_key == NULL || entry->me_key == key)
816             return entry;
817     }
818     Py_UNREACHABLE();
819 }
820 
821 /* Returns -1 on failure, 0 on success. */
822 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)823 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
824 {
825     PyMemoEntry *oldtable = NULL;
826     PyMemoEntry *oldentry, *newentry;
827     size_t new_size = MT_MINSIZE;
828     size_t to_process;
829 
830     assert(min_size > 0);
831 
832     if (min_size > PY_SSIZE_T_MAX) {
833         PyErr_NoMemory();
834         return -1;
835     }
836 
837     /* Find the smallest valid table size >= min_size. */
838     while (new_size < min_size) {
839         new_size <<= 1;
840     }
841     /* new_size needs to be a power of two. */
842     assert((new_size & (new_size - 1)) == 0);
843 
844     /* Allocate new table. */
845     oldtable = self->mt_table;
846     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
847     if (self->mt_table == NULL) {
848         self->mt_table = oldtable;
849         PyErr_NoMemory();
850         return -1;
851     }
852     self->mt_allocated = new_size;
853     self->mt_mask = new_size - 1;
854     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
855 
856     /* Copy entries from the old table. */
857     to_process = self->mt_used;
858     for (oldentry = oldtable; to_process > 0; oldentry++) {
859         if (oldentry->me_key != NULL) {
860             to_process--;
861             /* newentry is a pointer to a chunk of the new
862                mt_table, so we're setting the key:value pair
863                in-place. */
864             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
865             newentry->me_key = oldentry->me_key;
866             newentry->me_value = oldentry->me_value;
867         }
868     }
869 
870     /* Deallocate the old table. */
871     PyMem_Free(oldtable);
872     return 0;
873 }
874 
875 /* Returns NULL on failure, a pointer to the value otherwise. */
876 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)877 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
878 {
879     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
880     if (entry->me_key == NULL)
881         return NULL;
882     return &entry->me_value;
883 }
884 
885 /* Returns -1 on failure, 0 on success. */
886 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)887 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
888 {
889     PyMemoEntry *entry;
890 
891     assert(key != NULL);
892 
893     entry = _PyMemoTable_Lookup(self, key);
894     if (entry->me_key != NULL) {
895         entry->me_value = value;
896         return 0;
897     }
898     entry->me_key = Py_NewRef(key);
899     entry->me_value = value;
900     self->mt_used++;
901 
902     /* If we added a key, we can safely resize. Otherwise just return!
903      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
904      *
905      * Quadrupling the size improves average table sparseness
906      * (reducing collisions) at the cost of some memory. It also halves
907      * the number of expensive resize operations in a growing memo table.
908      *
909      * Very large memo tables (over 50K items) use doubling instead.
910      * This may help applications with severe memory constraints.
911      */
912     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
913         return 0;
914     }
915     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
916     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
917     return _PyMemoTable_ResizeTable(self, desired_size);
918 }
919 
920 #undef MT_MINSIZE
921 #undef PERTURB_SHIFT
922 
923 /*************************************************************************/
924 
925 
926 static int
_Pickler_ClearBuffer(PicklerObject * self)927 _Pickler_ClearBuffer(PicklerObject *self)
928 {
929     Py_XSETREF(self->output_buffer,
930               PyBytes_FromStringAndSize(NULL, self->max_output_len));
931     if (self->output_buffer == NULL)
932         return -1;
933     self->output_len = 0;
934     self->frame_start = -1;
935     return 0;
936 }
937 
938 static void
_write_size64(char * out,size_t value)939 _write_size64(char *out, size_t value)
940 {
941     size_t i;
942 
943     static_assert(sizeof(size_t) <= 8, "size_t is larger than 64-bit");
944 
945     for (i = 0; i < sizeof(size_t); i++) {
946         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
947     }
948     for (i = sizeof(size_t); i < 8; i++) {
949         out[i] = 0;
950     }
951 }
952 
953 static int
_Pickler_CommitFrame(PicklerObject * self)954 _Pickler_CommitFrame(PicklerObject *self)
955 {
956     size_t frame_len;
957     char *qdata;
958 
959     if (!self->framing || self->frame_start == -1)
960         return 0;
961     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
962     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
963     if (frame_len >= FRAME_SIZE_MIN) {
964         qdata[0] = FRAME;
965         _write_size64(qdata + 1, frame_len);
966     }
967     else {
968         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
969         self->output_len -= FRAME_HEADER_SIZE;
970     }
971     self->frame_start = -1;
972     return 0;
973 }
974 
975 static PyObject *
_Pickler_GetString(PicklerObject * self)976 _Pickler_GetString(PicklerObject *self)
977 {
978     PyObject *output_buffer = self->output_buffer;
979 
980     assert(self->output_buffer != NULL);
981 
982     if (_Pickler_CommitFrame(self))
983         return NULL;
984 
985     self->output_buffer = NULL;
986     /* Resize down to exact size */
987     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
988         return NULL;
989     return output_buffer;
990 }
991 
992 static int
_Pickler_FlushToFile(PicklerObject * self)993 _Pickler_FlushToFile(PicklerObject *self)
994 {
995     PyObject *output, *result;
996 
997     assert(self->write != NULL);
998 
999     /* This will commit the frame first */
1000     output = _Pickler_GetString(self);
1001     if (output == NULL)
1002         return -1;
1003 
1004     result = _Pickle_FastCall(self->write, output);
1005     Py_XDECREF(result);
1006     return (result == NULL) ? -1 : 0;
1007 }
1008 
1009 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1010 _Pickler_OpcodeBoundary(PicklerObject *self)
1011 {
1012     Py_ssize_t frame_len;
1013 
1014     if (!self->framing || self->frame_start == -1) {
1015         return 0;
1016     }
1017     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1018     if (frame_len >= FRAME_SIZE_TARGET) {
1019         if(_Pickler_CommitFrame(self)) {
1020             return -1;
1021         }
1022         /* Flush the content of the committed frame to the underlying
1023          * file and reuse the pickler buffer for the next frame so as
1024          * to limit memory usage when dumping large complex objects to
1025          * a file.
1026          *
1027          * self->write is NULL when called via dumps.
1028          */
1029         if (self->write != NULL) {
1030             if (_Pickler_FlushToFile(self) < 0) {
1031                 return -1;
1032             }
1033             if (_Pickler_ClearBuffer(self) < 0) {
1034                 return -1;
1035             }
1036         }
1037     }
1038     return 0;
1039 }
1040 
1041 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1042 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1043 {
1044     Py_ssize_t i, n, required;
1045     char *buffer;
1046     int need_new_frame;
1047 
1048     assert(s != NULL);
1049     need_new_frame = (self->framing && self->frame_start == -1);
1050 
1051     if (need_new_frame)
1052         n = data_len + FRAME_HEADER_SIZE;
1053     else
1054         n = data_len;
1055 
1056     required = self->output_len + n;
1057     if (required > self->max_output_len) {
1058         /* Make place in buffer for the pickle chunk */
1059         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1060             PyErr_NoMemory();
1061             return -1;
1062         }
1063         self->max_output_len = (self->output_len + n) / 2 * 3;
1064         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1065             return -1;
1066     }
1067     buffer = PyBytes_AS_STRING(self->output_buffer);
1068     if (need_new_frame) {
1069         /* Setup new frame */
1070         Py_ssize_t frame_start = self->output_len;
1071         self->frame_start = frame_start;
1072         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1073             /* Write an invalid value, for debugging */
1074             buffer[frame_start + i] = 0xFE;
1075         }
1076         self->output_len += FRAME_HEADER_SIZE;
1077     }
1078     if (data_len < 8) {
1079         /* This is faster than memcpy when the string is short. */
1080         for (i = 0; i < data_len; i++) {
1081             buffer[self->output_len + i] = s[i];
1082         }
1083     }
1084     else {
1085         memcpy(buffer + self->output_len, s, data_len);
1086     }
1087     self->output_len += data_len;
1088     return data_len;
1089 }
1090 
1091 static PicklerObject *
_Pickler_New(PickleState * st)1092 _Pickler_New(PickleState *st)
1093 {
1094     PyMemoTable *memo = PyMemoTable_New();
1095     if (memo == NULL) {
1096         return NULL;
1097     }
1098 
1099     const Py_ssize_t max_output_len = WRITE_BUF_SIZE;
1100     PyObject *output_buffer = PyBytes_FromStringAndSize(NULL, max_output_len);
1101     if (output_buffer == NULL) {
1102         goto error;
1103     }
1104 
1105     PicklerObject *self = PyObject_GC_New(PicklerObject, st->Pickler_Type);
1106     if (self == NULL) {
1107         goto error;
1108     }
1109 
1110     self->memo = memo;
1111     self->persistent_id = NULL;
1112     self->persistent_id_attr = NULL;
1113     self->dispatch_table = NULL;
1114     self->reducer_override = NULL;
1115     self->write = NULL;
1116     self->output_buffer = output_buffer;
1117     self->output_len = 0;
1118     self->max_output_len = max_output_len;
1119     self->proto = 0;
1120     self->bin = 0;
1121     self->framing = 0;
1122     self->frame_start = -1;
1123     self->buf_size = 0;
1124     self->fast = 0;
1125     self->fast_nesting = 0;
1126     self->fix_imports = 0;
1127     self->fast_memo = NULL;
1128     self->buffer_callback = NULL;
1129 
1130     PyObject_GC_Track(self);
1131     return self;
1132 
1133 error:
1134     PyMem_Free(memo);
1135     Py_XDECREF(output_buffer);
1136     return NULL;
1137 }
1138 
1139 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1140 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1141 {
1142     long proto;
1143 
1144     if (protocol == Py_None) {
1145         proto = DEFAULT_PROTOCOL;
1146     }
1147     else {
1148         proto = PyLong_AsLong(protocol);
1149         if (proto < 0) {
1150             if (proto == -1 && PyErr_Occurred())
1151                 return -1;
1152             proto = HIGHEST_PROTOCOL;
1153         }
1154         else if (proto > HIGHEST_PROTOCOL) {
1155             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1156                          HIGHEST_PROTOCOL);
1157             return -1;
1158         }
1159     }
1160     self->proto = (int)proto;
1161     self->bin = proto > 0;
1162     self->fix_imports = fix_imports && proto < 3;
1163     return 0;
1164 }
1165 
1166 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1167    be called once on a freshly created Pickler. */
1168 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1169 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1170 {
1171     assert(file != NULL);
1172     if (PyObject_GetOptionalAttr(file, &_Py_ID(write), &self->write) < 0) {
1173         return -1;
1174     }
1175     if (self->write == NULL) {
1176         PyErr_SetString(PyExc_TypeError,
1177                         "file must have a 'write' attribute");
1178         return -1;
1179     }
1180 
1181     return 0;
1182 }
1183 
1184 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1185 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1186 {
1187     if (buffer_callback == Py_None) {
1188         buffer_callback = NULL;
1189     }
1190     if (buffer_callback != NULL && self->proto < 5) {
1191         PyErr_SetString(PyExc_ValueError,
1192                         "buffer_callback needs protocol >= 5");
1193         return -1;
1194     }
1195 
1196     self->buffer_callback = Py_XNewRef(buffer_callback);
1197     return 0;
1198 }
1199 
1200 /* Returns the size of the input on success, -1 on failure. This takes its
1201    own reference to `input`. */
1202 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1203 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1204 {
1205     if (self->buffer.buf != NULL)
1206         PyBuffer_Release(&self->buffer);
1207     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1208         return -1;
1209     self->input_buffer = self->buffer.buf;
1210     self->input_len = self->buffer.len;
1211     self->next_read_idx = 0;
1212     self->prefetched_idx = self->input_len;
1213     return self->input_len;
1214 }
1215 
1216 static int
bad_readline(PickleState * st)1217 bad_readline(PickleState *st)
1218 {
1219     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1220     return -1;
1221 }
1222 
1223 /* Skip any consumed data that was only prefetched using peek() */
1224 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1225 _Unpickler_SkipConsumed(UnpicklerObject *self)
1226 {
1227     Py_ssize_t consumed;
1228     PyObject *r;
1229 
1230     consumed = self->next_read_idx - self->prefetched_idx;
1231     if (consumed <= 0)
1232         return 0;
1233 
1234     assert(self->peek);  /* otherwise we did something wrong */
1235     /* This makes a useless copy... */
1236     r = PyObject_CallFunction(self->read, "n", consumed);
1237     if (r == NULL)
1238         return -1;
1239     Py_DECREF(r);
1240 
1241     self->prefetched_idx = self->next_read_idx;
1242     return 0;
1243 }
1244 
1245 static const Py_ssize_t READ_WHOLE_LINE = -1;
1246 
1247 /* If reading from a file, we need to only pull the bytes we need, since there
1248    may be multiple pickle objects arranged contiguously in the same input
1249    buffer.
1250 
1251    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1252    bytes from the input stream/buffer.
1253 
1254    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1255    failure; on success, returns the number of bytes read from the file.
1256 
1257    On success, self->input_len will be 0; this is intentional so that when
1258    unpickling from a file, the "we've run out of data" code paths will trigger,
1259    causing the Unpickler to go back to the file for more data. Use the returned
1260    size to tell you how much data you can process. */
1261 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1262 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1263 {
1264     PyObject *data;
1265     Py_ssize_t read_size;
1266 
1267     assert(self->read != NULL);
1268 
1269     if (_Unpickler_SkipConsumed(self) < 0)
1270         return -1;
1271 
1272     if (n == READ_WHOLE_LINE) {
1273         data = PyObject_CallNoArgs(self->readline);
1274     }
1275     else {
1276         PyObject *len;
1277         /* Prefetch some data without advancing the file pointer, if possible */
1278         if (self->peek && n < PREFETCH) {
1279             len = PyLong_FromSsize_t(PREFETCH);
1280             if (len == NULL)
1281                 return -1;
1282             data = _Pickle_FastCall(self->peek, len);
1283             if (data == NULL) {
1284                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1285                     return -1;
1286                 /* peek() is probably not supported by the given file object */
1287                 PyErr_Clear();
1288                 Py_CLEAR(self->peek);
1289             }
1290             else {
1291                 read_size = _Unpickler_SetStringInput(self, data);
1292                 Py_DECREF(data);
1293                 if (read_size < 0) {
1294                     return -1;
1295                 }
1296 
1297                 self->prefetched_idx = 0;
1298                 if (n <= read_size)
1299                     return n;
1300             }
1301         }
1302         len = PyLong_FromSsize_t(n);
1303         if (len == NULL)
1304             return -1;
1305         data = _Pickle_FastCall(self->read, len);
1306     }
1307     if (data == NULL)
1308         return -1;
1309 
1310     read_size = _Unpickler_SetStringInput(self, data);
1311     Py_DECREF(data);
1312     return read_size;
1313 }
1314 
1315 /* Don't call it directly: use _Unpickler_Read() */
1316 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,PickleState * st,char ** s,Py_ssize_t n)1317 _Unpickler_ReadImpl(UnpicklerObject *self, PickleState *st, char **s, Py_ssize_t n)
1318 {
1319     Py_ssize_t num_read;
1320 
1321     *s = NULL;
1322     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1323         PyErr_SetString(st->UnpicklingError,
1324                         "read would overflow (invalid bytecode)");
1325         return -1;
1326     }
1327 
1328     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1329     assert(self->next_read_idx + n > self->input_len);
1330 
1331     if (!self->read)
1332         return bad_readline(st);
1333 
1334     /* Extend the buffer to satisfy desired size */
1335     num_read = _Unpickler_ReadFromFile(self, n);
1336     if (num_read < 0)
1337         return -1;
1338     if (num_read < n)
1339         return bad_readline(st);
1340     *s = self->input_buffer;
1341     self->next_read_idx = n;
1342     return n;
1343 }
1344 
1345 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1346  *
1347  * This should only be used for non-small data reads where potentially
1348  * avoiding a copy is beneficial.  This method does not try to prefetch
1349  * more data into the input buffer.
1350  *
1351  * _Unpickler_Read() is recommended in most cases.
1352  */
1353 static Py_ssize_t
_Unpickler_ReadInto(PickleState * state,UnpicklerObject * self,char * buf,Py_ssize_t n)1354 _Unpickler_ReadInto(PickleState *state, UnpicklerObject *self, char *buf,
1355                     Py_ssize_t n)
1356 {
1357     assert(n != READ_WHOLE_LINE);
1358 
1359     /* Read from available buffer data, if any */
1360     Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1361     if (in_buffer > 0) {
1362         Py_ssize_t to_read = Py_MIN(in_buffer, n);
1363         memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1364         self->next_read_idx += to_read;
1365         buf += to_read;
1366         n -= to_read;
1367         if (n == 0) {
1368             /* Entire read was satisfied from buffer */
1369             return n;
1370         }
1371     }
1372 
1373     /* Read from file */
1374     if (!self->read) {
1375         /* We're unpickling memory, this means the input is truncated */
1376         return bad_readline(state);
1377     }
1378     if (_Unpickler_SkipConsumed(self) < 0) {
1379         return -1;
1380     }
1381 
1382     if (!self->readinto) {
1383         /* readinto() not supported on file-like object, fall back to read()
1384          * and copy into destination buffer (bpo-39681) */
1385         PyObject* len = PyLong_FromSsize_t(n);
1386         if (len == NULL) {
1387             return -1;
1388         }
1389         PyObject* data = _Pickle_FastCall(self->read, len);
1390         if (data == NULL) {
1391             return -1;
1392         }
1393         if (!PyBytes_Check(data)) {
1394             PyErr_Format(PyExc_ValueError,
1395                          "read() returned non-bytes object (%R)",
1396                          Py_TYPE(data));
1397             Py_DECREF(data);
1398             return -1;
1399         }
1400         Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1401         if (read_size < n) {
1402             Py_DECREF(data);
1403             return bad_readline(state);
1404         }
1405         memcpy(buf, PyBytes_AS_STRING(data), n);
1406         Py_DECREF(data);
1407         return n;
1408     }
1409 
1410     /* Call readinto() into user buffer */
1411     PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1412     if (buf_obj == NULL) {
1413         return -1;
1414     }
1415     PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1416     if (read_size_obj == NULL) {
1417         return -1;
1418     }
1419     Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1420     Py_DECREF(read_size_obj);
1421 
1422     if (read_size < 0) {
1423         if (!PyErr_Occurred()) {
1424             PyErr_SetString(PyExc_ValueError,
1425                             "readinto() returned negative size");
1426         }
1427         return -1;
1428     }
1429     if (read_size < n) {
1430         return bad_readline(state);
1431     }
1432     return n;
1433 }
1434 
1435 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1436 
1437    This should be used for all data reads, rather than accessing the unpickler's
1438    input buffer directly. This method deals correctly with reading from input
1439    streams, which the input buffer doesn't deal with.
1440 
1441    Note that when reading from a file-like object, self->next_read_idx won't
1442    be updated (it should remain at 0 for the entire unpickling process). You
1443    should use this function's return value to know how many bytes you can
1444    consume.
1445 
1446    Returns -1 (with an exception set) on failure. On success, return the
1447    number of chars read. */
1448 #define _Unpickler_Read(self, state, s, n) \
1449     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1450      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1451         (self)->next_read_idx += (n),                        \
1452         (n))                                                 \
1453      : _Unpickler_ReadImpl(self, state, (s), (n)))
1454 
1455 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1456 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1457                     char **result)
1458 {
1459     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1460     if (input_line == NULL) {
1461         PyErr_NoMemory();
1462         return -1;
1463     }
1464 
1465     memcpy(input_line, line, len);
1466     input_line[len] = '\0';
1467     self->input_line = input_line;
1468     *result = self->input_line;
1469     return len;
1470 }
1471 
1472 /* Read a line from the input stream/buffer. If we run off the end of the input
1473    before hitting \n, raise an error.
1474 
1475    Returns the number of chars read, or -1 on failure. */
1476 static Py_ssize_t
_Unpickler_Readline(PickleState * state,UnpicklerObject * self,char ** result)1477 _Unpickler_Readline(PickleState *state, UnpicklerObject *self, char **result)
1478 {
1479     Py_ssize_t i, num_read;
1480 
1481     for (i = self->next_read_idx; i < self->input_len; i++) {
1482         if (self->input_buffer[i] == '\n') {
1483             char *line_start = self->input_buffer + self->next_read_idx;
1484             num_read = i - self->next_read_idx + 1;
1485             self->next_read_idx = i + 1;
1486             return _Unpickler_CopyLine(self, line_start, num_read, result);
1487         }
1488     }
1489     if (!self->read)
1490         return bad_readline(state);
1491 
1492     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1493     if (num_read < 0)
1494         return -1;
1495     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1496         return bad_readline(state);
1497     self->next_read_idx = num_read;
1498     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1499 }
1500 
1501 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1502    will be modified in place. */
1503 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1504 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1505 {
1506     size_t i;
1507 
1508     assert(new_size > self->memo_size);
1509 
1510     PyObject **memo_new = self->memo;
1511     PyMem_RESIZE(memo_new, PyObject *, new_size);
1512     if (memo_new == NULL) {
1513         PyErr_NoMemory();
1514         return -1;
1515     }
1516     self->memo = memo_new;
1517     for (i = self->memo_size; i < new_size; i++)
1518         self->memo[i] = NULL;
1519     self->memo_size = new_size;
1520     return 0;
1521 }
1522 
1523 /* Returns NULL if idx is out of bounds. */
1524 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1525 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1526 {
1527     if (idx >= self->memo_size)
1528         return NULL;
1529 
1530     return self->memo[idx];
1531 }
1532 
1533 /* Returns -1 (with an exception set) on failure, 0 on success.
1534    This takes its own reference to `value`. */
1535 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1536 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1537 {
1538     PyObject *old_item;
1539 
1540     if (idx >= self->memo_size) {
1541         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1542             return -1;
1543         assert(idx < self->memo_size);
1544     }
1545     old_item = self->memo[idx];
1546     self->memo[idx] = Py_NewRef(value);
1547     if (old_item != NULL) {
1548         Py_DECREF(old_item);
1549     }
1550     else {
1551         self->memo_len++;
1552     }
1553     return 0;
1554 }
1555 
1556 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1557 _Unpickler_NewMemo(Py_ssize_t new_size)
1558 {
1559     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1560     if (memo == NULL) {
1561         PyErr_NoMemory();
1562         return NULL;
1563     }
1564     memset(memo, 0, new_size * sizeof(PyObject *));
1565     return memo;
1566 }
1567 
1568 /* Free the unpickler's memo, taking care to decref any items left in it. */
1569 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1570 _Unpickler_MemoCleanup(UnpicklerObject *self)
1571 {
1572     Py_ssize_t i;
1573     PyObject **memo = self->memo;
1574 
1575     if (self->memo == NULL)
1576         return;
1577     self->memo = NULL;
1578     i = self->memo_size;
1579     while (--i >= 0) {
1580         Py_XDECREF(memo[i]);
1581     }
1582     PyMem_Free(memo);
1583 }
1584 
1585 static UnpicklerObject *
_Unpickler_New(PyObject * module)1586 _Unpickler_New(PyObject *module)
1587 {
1588     const int MEMO_SIZE = 32;
1589     PyObject **memo = _Unpickler_NewMemo(MEMO_SIZE);
1590     if (memo == NULL) {
1591         return NULL;
1592     }
1593 
1594     PickleState *st = _Pickle_GetState(module);
1595     PyObject *stack = Pdata_New(st);
1596     if (stack == NULL) {
1597         goto error;
1598     }
1599 
1600     UnpicklerObject *self = PyObject_GC_New(UnpicklerObject,
1601                                             st->Unpickler_Type);
1602     if (self == NULL) {
1603         goto error;
1604     }
1605 
1606     self->stack = (Pdata *)stack;
1607     self->memo = memo;
1608     self->memo_size = MEMO_SIZE;
1609     self->memo_len = 0;
1610     self->persistent_load = NULL;
1611     self->persistent_load_attr = NULL;
1612     memset(&self->buffer, 0, sizeof(Py_buffer));
1613     self->input_buffer = NULL;
1614     self->input_line = NULL;
1615     self->input_len = 0;
1616     self->next_read_idx = 0;
1617     self->prefetched_idx = 0;
1618     self->read = NULL;
1619     self->readinto = NULL;
1620     self->readline = NULL;
1621     self->peek = NULL;
1622     self->buffers = NULL;
1623     self->encoding = NULL;
1624     self->errors = NULL;
1625     self->marks = NULL;
1626     self->num_marks = 0;
1627     self->marks_size = 0;
1628     self->proto = 0;
1629     self->fix_imports = 0;
1630 
1631     PyObject_GC_Track(self);
1632     return self;
1633 
1634 error:
1635     PyMem_Free(memo);
1636     Py_XDECREF(stack);
1637     return NULL;
1638 }
1639 
1640 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1641    be called once on a freshly created Unpickler. */
1642 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1643 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1644 {
1645     /* Optional file methods */
1646     if (PyObject_GetOptionalAttr(file, &_Py_ID(peek), &self->peek) < 0) {
1647         goto error;
1648     }
1649     if (PyObject_GetOptionalAttr(file, &_Py_ID(readinto), &self->readinto) < 0) {
1650         goto error;
1651     }
1652     if (PyObject_GetOptionalAttr(file, &_Py_ID(read), &self->read) < 0) {
1653         goto error;
1654     }
1655     if (PyObject_GetOptionalAttr(file, &_Py_ID(readline), &self->readline) < 0) {
1656         goto error;
1657     }
1658     if (!self->readline || !self->read) {
1659         PyErr_SetString(PyExc_TypeError,
1660                         "file must have 'read' and 'readline' attributes");
1661         goto error;
1662     }
1663     return 0;
1664 
1665 error:
1666     Py_CLEAR(self->read);
1667     Py_CLEAR(self->readinto);
1668     Py_CLEAR(self->readline);
1669     Py_CLEAR(self->peek);
1670     return -1;
1671 }
1672 
1673 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1674    be called once on a freshly created Unpickler. */
1675 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1676 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1677                             const char *encoding,
1678                             const char *errors)
1679 {
1680     if (encoding == NULL)
1681         encoding = "ASCII";
1682     if (errors == NULL)
1683         errors = "strict";
1684 
1685     self->encoding = _PyMem_Strdup(encoding);
1686     self->errors = _PyMem_Strdup(errors);
1687     if (self->encoding == NULL || self->errors == NULL) {
1688         PyErr_NoMemory();
1689         return -1;
1690     }
1691     return 0;
1692 }
1693 
1694 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1695    be called once on a freshly created Unpickler. */
1696 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1697 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1698 {
1699     if (buffers == NULL || buffers == Py_None) {
1700         self->buffers = NULL;
1701     }
1702     else {
1703         self->buffers = PyObject_GetIter(buffers);
1704         if (self->buffers == NULL) {
1705             return -1;
1706         }
1707     }
1708     return 0;
1709 }
1710 
1711 /* Generate a GET opcode for an object stored in the memo. */
1712 static int
memo_get(PickleState * st,PicklerObject * self,PyObject * key)1713 memo_get(PickleState *st, PicklerObject *self, PyObject *key)
1714 {
1715     Py_ssize_t *value;
1716     char pdata[30];
1717     Py_ssize_t len;
1718 
1719     value = PyMemoTable_Get(self->memo, key);
1720     if (value == NULL)  {
1721         PyErr_SetObject(PyExc_KeyError, key);
1722         return -1;
1723     }
1724 
1725     if (!self->bin) {
1726         pdata[0] = GET;
1727         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1728                       "%zd\n", *value);
1729         len = strlen(pdata);
1730     }
1731     else {
1732         if (*value < 256) {
1733             pdata[0] = BINGET;
1734             pdata[1] = (unsigned char)(*value & 0xff);
1735             len = 2;
1736         }
1737         else if ((size_t)*value <= 0xffffffffUL) {
1738             pdata[0] = LONG_BINGET;
1739             pdata[1] = (unsigned char)(*value & 0xff);
1740             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1741             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1742             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1743             len = 5;
1744         }
1745         else { /* unlikely */
1746             PyErr_SetString(st->PicklingError,
1747                             "memo id too large for LONG_BINGET");
1748             return -1;
1749         }
1750     }
1751 
1752     if (_Pickler_Write(self, pdata, len) < 0)
1753         return -1;
1754 
1755     return 0;
1756 }
1757 
1758 /* Store an object in the memo, assign it a new unique ID based on the number
1759    of objects currently stored in the memo and generate a PUT opcode. */
1760 static int
memo_put(PickleState * st,PicklerObject * self,PyObject * obj)1761 memo_put(PickleState *st, PicklerObject *self, PyObject *obj)
1762 {
1763     char pdata[30];
1764     Py_ssize_t len;
1765     Py_ssize_t idx;
1766 
1767     const char memoize_op = MEMOIZE;
1768 
1769     if (self->fast)
1770         return 0;
1771 
1772     idx = PyMemoTable_Size(self->memo);
1773     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1774         return -1;
1775 
1776     if (self->proto >= 4) {
1777         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1778             return -1;
1779         return 0;
1780     }
1781     else if (!self->bin) {
1782         pdata[0] = PUT;
1783         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1784                       "%zd\n", idx);
1785         len = strlen(pdata);
1786     }
1787     else {
1788         if (idx < 256) {
1789             pdata[0] = BINPUT;
1790             pdata[1] = (unsigned char)idx;
1791             len = 2;
1792         }
1793         else if ((size_t)idx <= 0xffffffffUL) {
1794             pdata[0] = LONG_BINPUT;
1795             pdata[1] = (unsigned char)(idx & 0xff);
1796             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1797             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1798             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1799             len = 5;
1800         }
1801         else { /* unlikely */
1802             PyErr_SetString(st->PicklingError,
1803                             "memo id too large for LONG_BINPUT");
1804             return -1;
1805         }
1806     }
1807     if (_Pickler_Write(self, pdata, len) < 0)
1808         return -1;
1809 
1810     return 0;
1811 }
1812 
1813 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1814 get_dotted_path(PyObject *obj, PyObject *name)
1815 {
1816     PyObject *dotted_path;
1817     Py_ssize_t i, n;
1818     dotted_path = PyUnicode_Split(name, _Py_LATIN1_CHR('.'), -1);
1819     if (dotted_path == NULL)
1820         return NULL;
1821     n = PyList_GET_SIZE(dotted_path);
1822     assert(n >= 1);
1823     for (i = 0; i < n; i++) {
1824         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1825         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1826             if (obj == NULL)
1827                 PyErr_Format(PyExc_AttributeError,
1828                              "Can't get local object %R", name);
1829             else
1830                 PyErr_Format(PyExc_AttributeError,
1831                              "Can't get local attribute %R on %R", name, obj);
1832             Py_DECREF(dotted_path);
1833             return NULL;
1834         }
1835     }
1836     return dotted_path;
1837 }
1838 
1839 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1840 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1841 {
1842     Py_ssize_t i, n;
1843     PyObject *parent = NULL;
1844 
1845     assert(PyList_CheckExact(names));
1846     Py_INCREF(obj);
1847     n = PyList_GET_SIZE(names);
1848     for (i = 0; i < n; i++) {
1849         PyObject *name = PyList_GET_ITEM(names, i);
1850         Py_XSETREF(parent, obj);
1851         (void)PyObject_GetOptionalAttr(parent, name, &obj);
1852         if (obj == NULL) {
1853             Py_DECREF(parent);
1854             return NULL;
1855         }
1856     }
1857     if (pparent != NULL)
1858         *pparent = parent;
1859     else
1860         Py_XDECREF(parent);
1861     return obj;
1862 }
1863 
1864 
1865 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1866 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1867 {
1868     PyObject *dotted_path, *attr;
1869 
1870     if (allow_qualname) {
1871         dotted_path = get_dotted_path(obj, name);
1872         if (dotted_path == NULL)
1873             return NULL;
1874         attr = get_deep_attribute(obj, dotted_path, NULL);
1875         Py_DECREF(dotted_path);
1876     }
1877     else {
1878         (void)PyObject_GetOptionalAttr(obj, name, &attr);
1879     }
1880     if (attr == NULL && !PyErr_Occurred()) {
1881         PyErr_Format(PyExc_AttributeError,
1882                      "Can't get attribute %R on %R", name, obj);
1883     }
1884     return attr;
1885 }
1886 
1887 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1888 _checkmodule(PyObject *module_name, PyObject *module,
1889              PyObject *global, PyObject *dotted_path)
1890 {
1891     if (module == Py_None) {
1892         return -1;
1893     }
1894     if (PyUnicode_Check(module_name) &&
1895             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1896         return -1;
1897     }
1898 
1899     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1900     if (candidate == NULL) {
1901         return -1;
1902     }
1903     if (candidate != global) {
1904         Py_DECREF(candidate);
1905         return -1;
1906     }
1907     Py_DECREF(candidate);
1908     return 0;
1909 }
1910 
1911 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1912 whichmodule(PyObject *global, PyObject *dotted_path)
1913 {
1914     PyObject *module_name;
1915     PyObject *module = NULL;
1916     Py_ssize_t i;
1917     PyObject *modules;
1918 
1919     if (PyObject_GetOptionalAttr(global, &_Py_ID(__module__), &module_name) < 0) {
1920         return NULL;
1921     }
1922     if (module_name) {
1923         /* In some rare cases (e.g., bound methods of extension types),
1924            __module__ can be None. If it is so, then search sys.modules for
1925            the module of global. */
1926         if (module_name != Py_None)
1927             return module_name;
1928         Py_CLEAR(module_name);
1929     }
1930     assert(module_name == NULL);
1931 
1932     /* Fallback on walking sys.modules */
1933     PyThreadState *tstate = _PyThreadState_GET();
1934     modules = _PySys_GetAttr(tstate, &_Py_ID(modules));
1935     if (modules == NULL) {
1936         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1937         return NULL;
1938     }
1939     if (PyDict_CheckExact(modules)) {
1940         i = 0;
1941         while (PyDict_Next(modules, &i, &module_name, &module)) {
1942             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1943                 return Py_NewRef(module_name);
1944             }
1945             if (PyErr_Occurred()) {
1946                 return NULL;
1947             }
1948         }
1949     }
1950     else {
1951         PyObject *iterator = PyObject_GetIter(modules);
1952         if (iterator == NULL) {
1953             return NULL;
1954         }
1955         while ((module_name = PyIter_Next(iterator))) {
1956             module = PyObject_GetItem(modules, module_name);
1957             if (module == NULL) {
1958                 Py_DECREF(module_name);
1959                 Py_DECREF(iterator);
1960                 return NULL;
1961             }
1962             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1963                 Py_DECREF(module);
1964                 Py_DECREF(iterator);
1965                 return module_name;
1966             }
1967             Py_DECREF(module);
1968             Py_DECREF(module_name);
1969             if (PyErr_Occurred()) {
1970                 Py_DECREF(iterator);
1971                 return NULL;
1972             }
1973         }
1974         Py_DECREF(iterator);
1975     }
1976 
1977     /* If no module is found, use __main__. */
1978     return &_Py_ID(__main__);
1979 }
1980 
1981 /* fast_save_enter() and fast_save_leave() are guards against recursive
1982    objects when Pickler is used with the "fast mode" (i.e., with object
1983    memoization disabled). If the nesting of a list or dict object exceed
1984    FAST_NESTING_LIMIT, these guards will start keeping an internal
1985    reference to the seen list or dict objects and check whether these objects
1986    are recursive. These are not strictly necessary, since save() has a
1987    hard-coded recursion limit, but they give a nicer error message than the
1988    typical RuntimeError. */
1989 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1990 fast_save_enter(PicklerObject *self, PyObject *obj)
1991 {
1992     /* if fast_nesting < 0, we're doing an error exit. */
1993     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1994         PyObject *key = NULL;
1995         if (self->fast_memo == NULL) {
1996             self->fast_memo = PyDict_New();
1997             if (self->fast_memo == NULL) {
1998                 self->fast_nesting = -1;
1999                 return 0;
2000             }
2001         }
2002         key = PyLong_FromVoidPtr(obj);
2003         if (key == NULL) {
2004             self->fast_nesting = -1;
2005             return 0;
2006         }
2007         int r = PyDict_Contains(self->fast_memo, key);
2008         if (r > 0) {
2009             PyErr_Format(PyExc_ValueError,
2010                          "fast mode: can't pickle cyclic objects "
2011                          "including object type %.200s at %p",
2012                          Py_TYPE(obj)->tp_name, obj);
2013         }
2014         else if (r == 0) {
2015             r = PyDict_SetItem(self->fast_memo, key, Py_None);
2016         }
2017         Py_DECREF(key);
2018         if (r != 0) {
2019             self->fast_nesting = -1;
2020             return 0;
2021         }
2022     }
2023     return 1;
2024 }
2025 
2026 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2027 fast_save_leave(PicklerObject *self, PyObject *obj)
2028 {
2029     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2030         PyObject *key = PyLong_FromVoidPtr(obj);
2031         if (key == NULL)
2032             return 0;
2033         if (PyDict_DelItem(self->fast_memo, key) < 0) {
2034             Py_DECREF(key);
2035             return 0;
2036         }
2037         Py_DECREF(key);
2038     }
2039     return 1;
2040 }
2041 
2042 static int
save_none(PicklerObject * self,PyObject * obj)2043 save_none(PicklerObject *self, PyObject *obj)
2044 {
2045     const char none_op = NONE;
2046     if (_Pickler_Write(self, &none_op, 1) < 0)
2047         return -1;
2048 
2049     return 0;
2050 }
2051 
2052 static int
save_bool(PicklerObject * self,PyObject * obj)2053 save_bool(PicklerObject *self, PyObject *obj)
2054 {
2055     if (self->proto >= 2) {
2056         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2057         if (_Pickler_Write(self, &bool_op, 1) < 0)
2058             return -1;
2059     }
2060     else {
2061         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2062          * so that unpicklers written before bools were introduced unpickle them
2063          * as ints, but unpicklers after can recognize that bools were intended.
2064          * Note that protocol 2 added direct ways to pickle bools.
2065          */
2066         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2067         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2068             return -1;
2069     }
2070     return 0;
2071 }
2072 
2073 static int
save_long(PicklerObject * self,PyObject * obj)2074 save_long(PicklerObject *self, PyObject *obj)
2075 {
2076     PyObject *repr = NULL;
2077     Py_ssize_t size;
2078     long val;
2079     int overflow;
2080     int status = 0;
2081 
2082     val= PyLong_AsLongAndOverflow(obj, &overflow);
2083     if (!overflow && (sizeof(long) <= 4 ||
2084             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2085     {
2086         /* result fits in a signed 4-byte integer.
2087 
2088            Note: we can't use -0x80000000L in the above condition because some
2089            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2090            before applying the unary minus when sizeof(long) <= 4. The
2091            resulting value stays unsigned which is commonly not what we want,
2092            so MSVC happily warns us about it.  However, that result would have
2093            been fine because we guard for sizeof(long) <= 4 which turns the
2094            condition true in that particular case. */
2095         char pdata[32];
2096         Py_ssize_t len = 0;
2097 
2098         if (self->bin) {
2099             pdata[1] = (unsigned char)(val & 0xff);
2100             pdata[2] = (unsigned char)((val >> 8) & 0xff);
2101             pdata[3] = (unsigned char)((val >> 16) & 0xff);
2102             pdata[4] = (unsigned char)((val >> 24) & 0xff);
2103 
2104             if ((pdata[4] != 0) || (pdata[3] != 0)) {
2105                 pdata[0] = BININT;
2106                 len = 5;
2107             }
2108             else if (pdata[2] != 0) {
2109                 pdata[0] = BININT2;
2110                 len = 3;
2111             }
2112             else {
2113                 pdata[0] = BININT1;
2114                 len = 2;
2115             }
2116         }
2117         else {
2118             sprintf(pdata, "%c%ld\n", INT,  val);
2119             len = strlen(pdata);
2120         }
2121         if (_Pickler_Write(self, pdata, len) < 0)
2122             return -1;
2123 
2124         return 0;
2125     }
2126     assert(!PyErr_Occurred());
2127 
2128     if (self->proto >= 2) {
2129         /* Linear-time pickling. */
2130         size_t nbits;
2131         size_t nbytes;
2132         unsigned char *pdata;
2133         char header[5];
2134         int i;
2135         int sign = _PyLong_Sign(obj);
2136 
2137         if (sign == 0) {
2138             header[0] = LONG1;
2139             header[1] = 0;      /* It's 0 -- an empty bytestring. */
2140             if (_Pickler_Write(self, header, 2) < 0)
2141                 goto error;
2142             return 0;
2143         }
2144         nbits = _PyLong_NumBits(obj);
2145         if (nbits == (size_t)-1 && PyErr_Occurred())
2146             goto error;
2147         /* How many bytes do we need?  There are nbits >> 3 full
2148          * bytes of data, and nbits & 7 leftover bits.  If there
2149          * are any leftover bits, then we clearly need another
2150          * byte.  What's not so obvious is that we *probably*
2151          * need another byte even if there aren't any leftovers:
2152          * the most-significant bit of the most-significant byte
2153          * acts like a sign bit, and it's usually got a sense
2154          * opposite of the one we need.  The exception is ints
2155          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2156          * its own 256's-complement, so has the right sign bit
2157          * even without the extra byte.  That's a pain to check
2158          * for in advance, though, so we always grab an extra
2159          * byte at the start, and cut it back later if possible.
2160          */
2161         nbytes = (nbits >> 3) + 1;
2162         if (nbytes > 0x7fffffffL) {
2163             PyErr_SetString(PyExc_OverflowError,
2164                             "int too large to pickle");
2165             goto error;
2166         }
2167         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2168         if (repr == NULL)
2169             goto error;
2170         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2171         i = _PyLong_AsByteArray((PyLongObject *)obj,
2172                                 pdata, nbytes,
2173                                 1 /* little endian */ , 1 /* signed */ ,
2174                                 1 /* with exceptions */);
2175         if (i < 0)
2176             goto error;
2177         /* If the int is negative, this may be a byte more than
2178          * needed.  This is so iff the MSB is all redundant sign
2179          * bits.
2180          */
2181         if (sign < 0 &&
2182             nbytes > 1 &&
2183             pdata[nbytes - 1] == 0xff &&
2184             (pdata[nbytes - 2] & 0x80) != 0) {
2185             nbytes--;
2186         }
2187 
2188         if (nbytes < 256) {
2189             header[0] = LONG1;
2190             header[1] = (unsigned char)nbytes;
2191             size = 2;
2192         }
2193         else {
2194             header[0] = LONG4;
2195             size = (Py_ssize_t) nbytes;
2196             for (i = 1; i < 5; i++) {
2197                 header[i] = (unsigned char)(size & 0xff);
2198                 size >>= 8;
2199             }
2200             size = 5;
2201         }
2202         if (_Pickler_Write(self, header, size) < 0 ||
2203             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2204             goto error;
2205     }
2206     else {
2207         const char long_op = LONG;
2208         const char *string;
2209 
2210         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2211            the number of digits), in both directions.  We add a trailing 'L'
2212            to the repr, for compatibility with Python 2.x. */
2213 
2214         repr = PyObject_Repr(obj);
2215         if (repr == NULL)
2216             goto error;
2217 
2218         string = PyUnicode_AsUTF8AndSize(repr, &size);
2219         if (string == NULL)
2220             goto error;
2221 
2222         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2223             _Pickler_Write(self, string, size) < 0 ||
2224             _Pickler_Write(self, "L\n", 2) < 0)
2225             goto error;
2226     }
2227 
2228     if (0) {
2229   error:
2230       status = -1;
2231     }
2232     Py_XDECREF(repr);
2233 
2234     return status;
2235 }
2236 
2237 static int
save_float(PicklerObject * self,PyObject * obj)2238 save_float(PicklerObject *self, PyObject *obj)
2239 {
2240     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2241 
2242     if (self->bin) {
2243         char pdata[9];
2244         pdata[0] = BINFLOAT;
2245         if (PyFloat_Pack8(x, &pdata[1], 0) < 0)
2246             return -1;
2247         if (_Pickler_Write(self, pdata, 9) < 0)
2248             return -1;
2249    }
2250     else {
2251         int result = -1;
2252         char *buf = NULL;
2253         char op = FLOAT;
2254 
2255         if (_Pickler_Write(self, &op, 1) < 0)
2256             goto done;
2257 
2258         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2259         if (!buf) {
2260             PyErr_NoMemory();
2261             goto done;
2262         }
2263 
2264         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2265             goto done;
2266 
2267         if (_Pickler_Write(self, "\n", 1) < 0)
2268             goto done;
2269 
2270         result = 0;
2271 done:
2272         PyMem_Free(buf);
2273         return result;
2274     }
2275 
2276     return 0;
2277 }
2278 
2279 /* Perform direct write of the header and payload of the binary object.
2280 
2281    The large contiguous data is written directly into the underlying file
2282    object, bypassing the output_buffer of the Pickler.  We intentionally
2283    do not insert a protocol 4 frame opcode to make it possible to optimize
2284    file.read calls in the loader.
2285  */
2286 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2287 _Pickler_write_bytes(PicklerObject *self,
2288                      const char *header, Py_ssize_t header_size,
2289                      const char *data, Py_ssize_t data_size,
2290                      PyObject *payload)
2291 {
2292     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2293     int framing = self->framing;
2294 
2295     if (bypass_buffer) {
2296         assert(self->output_buffer != NULL);
2297         /* Commit the previous frame. */
2298         if (_Pickler_CommitFrame(self)) {
2299             return -1;
2300         }
2301         /* Disable framing temporarily */
2302         self->framing = 0;
2303     }
2304 
2305     if (_Pickler_Write(self, header, header_size) < 0) {
2306         return -1;
2307     }
2308 
2309     if (bypass_buffer && self->write != NULL) {
2310         /* Bypass the in-memory buffer to directly stream large data
2311            into the underlying file object. */
2312         PyObject *result, *mem = NULL;
2313         /* Dump the output buffer to the file. */
2314         if (_Pickler_FlushToFile(self) < 0) {
2315             return -1;
2316         }
2317 
2318         /* Stream write the payload into the file without going through the
2319            output buffer. */
2320         if (payload == NULL) {
2321             /* TODO: It would be better to use a memoryview with a linked
2322                original string if this is possible. */
2323             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2324             if (payload == NULL) {
2325                 return -1;
2326             }
2327         }
2328         result = PyObject_CallOneArg(self->write, payload);
2329         Py_XDECREF(mem);
2330         if (result == NULL) {
2331             return -1;
2332         }
2333         Py_DECREF(result);
2334 
2335         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2336         if (_Pickler_ClearBuffer(self) < 0) {
2337             return -1;
2338         }
2339     }
2340     else {
2341         if (_Pickler_Write(self, data, data_size) < 0) {
2342             return -1;
2343         }
2344     }
2345 
2346     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2347     self->framing = framing;
2348 
2349     return 0;
2350 }
2351 
2352 static int
_save_bytes_data(PickleState * st,PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2353 _save_bytes_data(PickleState *st, PicklerObject *self, PyObject *obj,
2354                  const char *data, Py_ssize_t size)
2355 {
2356     assert(self->proto >= 3);
2357 
2358     char header[9];
2359     Py_ssize_t len;
2360 
2361     if (size < 0)
2362         return -1;
2363 
2364     if (size <= 0xff) {
2365         header[0] = SHORT_BINBYTES;
2366         header[1] = (unsigned char)size;
2367         len = 2;
2368     }
2369     else if ((size_t)size <= 0xffffffffUL) {
2370         header[0] = BINBYTES;
2371         header[1] = (unsigned char)(size & 0xff);
2372         header[2] = (unsigned char)((size >> 8) & 0xff);
2373         header[3] = (unsigned char)((size >> 16) & 0xff);
2374         header[4] = (unsigned char)((size >> 24) & 0xff);
2375         len = 5;
2376     }
2377     else if (self->proto >= 4) {
2378         header[0] = BINBYTES8;
2379         _write_size64(header + 1, size);
2380         len = 9;
2381     }
2382     else {
2383         PyErr_SetString(PyExc_OverflowError,
2384                         "serializing a bytes object larger than 4 GiB "
2385                         "requires pickle protocol 4 or higher");
2386         return -1;
2387     }
2388 
2389     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2390         return -1;
2391     }
2392 
2393     if (memo_put(st, self, obj) < 0) {
2394         return -1;
2395     }
2396 
2397     return 0;
2398 }
2399 
2400 static int
save_bytes(PickleState * st,PicklerObject * self,PyObject * obj)2401 save_bytes(PickleState *st, PicklerObject *self, PyObject *obj)
2402 {
2403     if (self->proto < 3) {
2404         /* Older pickle protocols do not have an opcode for pickling bytes
2405            objects. Therefore, we need to fake the copy protocol (i.e.,
2406            the __reduce__ method) to permit bytes object unpickling.
2407 
2408            Here we use a hack to be compatible with Python 2. Since in Python
2409            2 'bytes' is just an alias for 'str' (which has different
2410            parameters than the actual bytes object), we use codecs.encode
2411            to create the appropriate 'str' object when unpickled using
2412            Python 2 *and* the appropriate 'bytes' object when unpickled
2413            using Python 3. Again this is a hack and we don't need to do this
2414            with newer protocols. */
2415         PyObject *reduce_value;
2416         int status;
2417 
2418         if (PyBytes_GET_SIZE(obj) == 0) {
2419             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2420         }
2421         else {
2422             PyObject *unicode_str =
2423                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2424                                        PyBytes_GET_SIZE(obj),
2425                                        "strict");
2426 
2427             if (unicode_str == NULL)
2428                 return -1;
2429             reduce_value = Py_BuildValue("(O(OO))",
2430                                          st->codecs_encode, unicode_str,
2431                                          &_Py_ID(latin1));
2432             Py_DECREF(unicode_str);
2433         }
2434 
2435         if (reduce_value == NULL)
2436             return -1;
2437 
2438         /* save_reduce() will memoize the object automatically. */
2439         status = save_reduce(st, self, reduce_value, obj);
2440         Py_DECREF(reduce_value);
2441         return status;
2442     }
2443     else {
2444         return _save_bytes_data(st, self, obj, PyBytes_AS_STRING(obj),
2445                                 PyBytes_GET_SIZE(obj));
2446     }
2447 }
2448 
2449 static int
_save_bytearray_data(PickleState * state,PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2450 _save_bytearray_data(PickleState *state, PicklerObject *self, PyObject *obj,
2451                      const char *data, Py_ssize_t size)
2452 {
2453     assert(self->proto >= 5);
2454 
2455     char header[9];
2456     Py_ssize_t len;
2457 
2458     if (size < 0)
2459         return -1;
2460 
2461     header[0] = BYTEARRAY8;
2462     _write_size64(header + 1, size);
2463     len = 9;
2464 
2465     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2466         return -1;
2467     }
2468 
2469     if (memo_put(state, self, obj) < 0) {
2470         return -1;
2471     }
2472 
2473     return 0;
2474 }
2475 
2476 static int
save_bytearray(PickleState * state,PicklerObject * self,PyObject * obj)2477 save_bytearray(PickleState *state, PicklerObject *self, PyObject *obj)
2478 {
2479     if (self->proto < 5) {
2480         /* Older pickle protocols do not have an opcode for pickling
2481          * bytearrays. */
2482         PyObject *reduce_value = NULL;
2483         int status;
2484 
2485         if (PyByteArray_GET_SIZE(obj) == 0) {
2486             reduce_value = Py_BuildValue("(O())",
2487                                          (PyObject *) &PyByteArray_Type);
2488         }
2489         else {
2490             PyObject *bytes_obj = PyBytes_FromObject(obj);
2491             if (bytes_obj != NULL) {
2492                 reduce_value = Py_BuildValue("(O(O))",
2493                                              (PyObject *) &PyByteArray_Type,
2494                                              bytes_obj);
2495                 Py_DECREF(bytes_obj);
2496             }
2497         }
2498         if (reduce_value == NULL)
2499             return -1;
2500 
2501         /* save_reduce() will memoize the object automatically. */
2502         status = save_reduce(state, self, reduce_value, obj);
2503         Py_DECREF(reduce_value);
2504         return status;
2505     }
2506     else {
2507         return _save_bytearray_data(state, self, obj,
2508                                     PyByteArray_AS_STRING(obj),
2509                                     PyByteArray_GET_SIZE(obj));
2510     }
2511 }
2512 
2513 static int
save_picklebuffer(PickleState * st,PicklerObject * self,PyObject * obj)2514 save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj)
2515 {
2516     if (self->proto < 5) {
2517         PyErr_SetString(st->PicklingError,
2518                         "PickleBuffer can only be pickled with protocol >= 5");
2519         return -1;
2520     }
2521     const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2522     if (view == NULL) {
2523         return -1;
2524     }
2525     if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2526         PyErr_SetString(st->PicklingError,
2527                         "PickleBuffer can not be pickled when "
2528                         "pointing to a non-contiguous buffer");
2529         return -1;
2530     }
2531     int in_band = 1;
2532     if (self->buffer_callback != NULL) {
2533         PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2534         if (ret == NULL) {
2535             return -1;
2536         }
2537         in_band = PyObject_IsTrue(ret);
2538         Py_DECREF(ret);
2539         if (in_band == -1) {
2540             return -1;
2541         }
2542     }
2543     if (in_band) {
2544         /* Write data in-band */
2545         if (view->readonly) {
2546             return _save_bytes_data(st, self, obj, (const char *)view->buf,
2547                                     view->len);
2548         }
2549         else {
2550             return _save_bytearray_data(st, self, obj, (const char *)view->buf,
2551                                         view->len);
2552         }
2553     }
2554     else {
2555         /* Write data out-of-band */
2556         const char next_buffer_op = NEXT_BUFFER;
2557         if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2558             return -1;
2559         }
2560         if (view->readonly) {
2561             const char readonly_buffer_op = READONLY_BUFFER;
2562             if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2563                 return -1;
2564             }
2565         }
2566     }
2567     return 0;
2568 }
2569 
2570 /* A copy of PyUnicode_AsRawUnicodeEscapeString() that also translates
2571    backslash and newline characters to \uXXXX escapes. */
2572 static PyObject *
raw_unicode_escape(PyObject * obj)2573 raw_unicode_escape(PyObject *obj)
2574 {
2575     char *p;
2576     Py_ssize_t i, size;
2577     const void *data;
2578     int kind;
2579     _PyBytesWriter writer;
2580 
2581     _PyBytesWriter_Init(&writer);
2582 
2583     size = PyUnicode_GET_LENGTH(obj);
2584     data = PyUnicode_DATA(obj);
2585     kind = PyUnicode_KIND(obj);
2586 
2587     p = _PyBytesWriter_Alloc(&writer, size);
2588     if (p == NULL)
2589         goto error;
2590     writer.overallocate = 1;
2591 
2592     for (i=0; i < size; i++) {
2593         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2594         /* Map 32-bit characters to '\Uxxxxxxxx' */
2595         if (ch >= 0x10000) {
2596             /* -1: subtract 1 preallocated byte */
2597             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2598             if (p == NULL)
2599                 goto error;
2600 
2601             *p++ = '\\';
2602             *p++ = 'U';
2603             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2604             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2605             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2606             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2607             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2608             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2609             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2610             *p++ = Py_hexdigits[ch & 15];
2611         }
2612         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2613         else if (ch >= 256 ||
2614                  ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2615                  ch == 0x1a)
2616         {
2617             /* -1: subtract 1 preallocated byte */
2618             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2619             if (p == NULL)
2620                 goto error;
2621 
2622             *p++ = '\\';
2623             *p++ = 'u';
2624             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2625             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2626             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2627             *p++ = Py_hexdigits[ch & 15];
2628         }
2629         /* Copy everything else as-is */
2630         else
2631             *p++ = (char) ch;
2632     }
2633 
2634     return _PyBytesWriter_Finish(&writer, p);
2635 
2636 error:
2637     _PyBytesWriter_Dealloc(&writer);
2638     return NULL;
2639 }
2640 
2641 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2642 write_unicode_binary(PicklerObject *self, PyObject *obj)
2643 {
2644     char header[9];
2645     Py_ssize_t len;
2646     PyObject *encoded = NULL;
2647     Py_ssize_t size;
2648     const char *data;
2649 
2650     data = PyUnicode_AsUTF8AndSize(obj, &size);
2651     if (data == NULL) {
2652         /* Issue #8383: for strings with lone surrogates, fallback on the
2653            "surrogatepass" error handler. */
2654         PyErr_Clear();
2655         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2656         if (encoded == NULL)
2657             return -1;
2658 
2659         data = PyBytes_AS_STRING(encoded);
2660         size = PyBytes_GET_SIZE(encoded);
2661     }
2662 
2663     assert(size >= 0);
2664     if (size <= 0xff && self->proto >= 4) {
2665         header[0] = SHORT_BINUNICODE;
2666         header[1] = (unsigned char)(size & 0xff);
2667         len = 2;
2668     }
2669     else if ((size_t)size <= 0xffffffffUL) {
2670         header[0] = BINUNICODE;
2671         header[1] = (unsigned char)(size & 0xff);
2672         header[2] = (unsigned char)((size >> 8) & 0xff);
2673         header[3] = (unsigned char)((size >> 16) & 0xff);
2674         header[4] = (unsigned char)((size >> 24) & 0xff);
2675         len = 5;
2676     }
2677     else if (self->proto >= 4) {
2678         header[0] = BINUNICODE8;
2679         _write_size64(header + 1, size);
2680         len = 9;
2681     }
2682     else {
2683         PyErr_SetString(PyExc_OverflowError,
2684                         "serializing a string larger than 4 GiB "
2685                         "requires pickle protocol 4 or higher");
2686         Py_XDECREF(encoded);
2687         return -1;
2688     }
2689 
2690     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2691         Py_XDECREF(encoded);
2692         return -1;
2693     }
2694     Py_XDECREF(encoded);
2695     return 0;
2696 }
2697 
2698 static int
save_unicode(PickleState * state,PicklerObject * self,PyObject * obj)2699 save_unicode(PickleState *state, PicklerObject *self, PyObject *obj)
2700 {
2701     if (self->bin) {
2702         if (write_unicode_binary(self, obj) < 0)
2703             return -1;
2704     }
2705     else {
2706         PyObject *encoded;
2707         Py_ssize_t size;
2708         const char unicode_op = UNICODE;
2709 
2710         encoded = raw_unicode_escape(obj);
2711         if (encoded == NULL)
2712             return -1;
2713 
2714         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2715             Py_DECREF(encoded);
2716             return -1;
2717         }
2718 
2719         size = PyBytes_GET_SIZE(encoded);
2720         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2721             Py_DECREF(encoded);
2722             return -1;
2723         }
2724         Py_DECREF(encoded);
2725 
2726         if (_Pickler_Write(self, "\n", 1) < 0)
2727             return -1;
2728     }
2729     if (memo_put(state, self, obj) < 0)
2730         return -1;
2731 
2732     return 0;
2733 }
2734 
2735 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2736 static int
store_tuple_elements(PickleState * state,PicklerObject * self,PyObject * t,Py_ssize_t len)2737 store_tuple_elements(PickleState *state, PicklerObject *self, PyObject *t,
2738                      Py_ssize_t len)
2739 {
2740     Py_ssize_t i;
2741 
2742     assert(PyTuple_Size(t) == len);
2743 
2744     for (i = 0; i < len; i++) {
2745         PyObject *element = PyTuple_GET_ITEM(t, i);
2746 
2747         if (element == NULL)
2748             return -1;
2749         if (save(state, self, element, 0) < 0)
2750             return -1;
2751     }
2752 
2753     return 0;
2754 }
2755 
2756 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2757  * used across protocols to minimize the space needed to pickle them.
2758  * Tuples are also the only builtin immutable type that can be recursive
2759  * (a tuple can be reached from itself), and that requires some subtle
2760  * magic so that it works in all cases.  IOW, this is a long routine.
2761  */
2762 static int
save_tuple(PickleState * state,PicklerObject * self,PyObject * obj)2763 save_tuple(PickleState *state, PicklerObject *self, PyObject *obj)
2764 {
2765     Py_ssize_t len, i;
2766 
2767     const char mark_op = MARK;
2768     const char tuple_op = TUPLE;
2769     const char pop_op = POP;
2770     const char pop_mark_op = POP_MARK;
2771     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2772 
2773     if ((len = PyTuple_Size(obj)) < 0)
2774         return -1;
2775 
2776     if (len == 0) {
2777         char pdata[2];
2778 
2779         if (self->proto) {
2780             pdata[0] = EMPTY_TUPLE;
2781             len = 1;
2782         }
2783         else {
2784             pdata[0] = MARK;
2785             pdata[1] = TUPLE;
2786             len = 2;
2787         }
2788         if (_Pickler_Write(self, pdata, len) < 0)
2789             return -1;
2790         return 0;
2791     }
2792 
2793     /* The tuple isn't in the memo now.  If it shows up there after
2794      * saving the tuple elements, the tuple must be recursive, in
2795      * which case we'll pop everything we put on the stack, and fetch
2796      * its value from the memo.
2797      */
2798     if (len <= 3 && self->proto >= 2) {
2799         /* Use TUPLE{1,2,3} opcodes. */
2800         if (store_tuple_elements(state, self, obj, len) < 0)
2801             return -1;
2802 
2803         if (PyMemoTable_Get(self->memo, obj)) {
2804             /* pop the len elements */
2805             for (i = 0; i < len; i++)
2806                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2807                     return -1;
2808             /* fetch from memo */
2809             if (memo_get(state, self, obj) < 0)
2810                 return -1;
2811 
2812             return 0;
2813         }
2814         else { /* Not recursive. */
2815             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2816                 return -1;
2817         }
2818         goto memoize;
2819     }
2820 
2821     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2822      * Generate MARK e1 e2 ... TUPLE
2823      */
2824     if (_Pickler_Write(self, &mark_op, 1) < 0)
2825         return -1;
2826 
2827     if (store_tuple_elements(state, self, obj, len) < 0)
2828         return -1;
2829 
2830     if (PyMemoTable_Get(self->memo, obj)) {
2831         /* pop the stack stuff we pushed */
2832         if (self->bin) {
2833             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2834                 return -1;
2835         }
2836         else {
2837             /* Note that we pop one more than len, to remove
2838              * the MARK too.
2839              */
2840             for (i = 0; i <= len; i++)
2841                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2842                     return -1;
2843         }
2844         /* fetch from memo */
2845         if (memo_get(state, self, obj) < 0)
2846             return -1;
2847 
2848         return 0;
2849     }
2850     else { /* Not recursive. */
2851         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2852             return -1;
2853     }
2854 
2855   memoize:
2856     if (memo_put(state, self, obj) < 0)
2857         return -1;
2858 
2859     return 0;
2860 }
2861 
2862 /* iter is an iterator giving items, and we batch up chunks of
2863  *     MARK item item ... item APPENDS
2864  * opcode sequences.  Calling code should have arranged to first create an
2865  * empty list, or list-like object, for the APPENDS to operate on.
2866  * Returns 0 on success, <0 on error.
2867  */
2868 static int
batch_list(PickleState * state,PicklerObject * self,PyObject * iter)2869 batch_list(PickleState *state, PicklerObject *self, PyObject *iter)
2870 {
2871     PyObject *obj = NULL;
2872     PyObject *firstitem = NULL;
2873     int i, n;
2874 
2875     const char mark_op = MARK;
2876     const char append_op = APPEND;
2877     const char appends_op = APPENDS;
2878 
2879     assert(iter != NULL);
2880 
2881     /* XXX: I think this function could be made faster by avoiding the
2882        iterator interface and fetching objects directly from list using
2883        PyList_GET_ITEM.
2884     */
2885 
2886     if (self->proto == 0) {
2887         /* APPENDS isn't available; do one at a time. */
2888         for (;;) {
2889             obj = PyIter_Next(iter);
2890             if (obj == NULL) {
2891                 if (PyErr_Occurred())
2892                     return -1;
2893                 break;
2894             }
2895             i = save(state, self, obj, 0);
2896             Py_DECREF(obj);
2897             if (i < 0)
2898                 return -1;
2899             if (_Pickler_Write(self, &append_op, 1) < 0)
2900                 return -1;
2901         }
2902         return 0;
2903     }
2904 
2905     /* proto > 0:  write in batches of BATCHSIZE. */
2906     do {
2907         /* Get first item */
2908         firstitem = PyIter_Next(iter);
2909         if (firstitem == NULL) {
2910             if (PyErr_Occurred())
2911                 goto error;
2912 
2913             /* nothing more to add */
2914             break;
2915         }
2916 
2917         /* Try to get a second item */
2918         obj = PyIter_Next(iter);
2919         if (obj == NULL) {
2920             if (PyErr_Occurred())
2921                 goto error;
2922 
2923             /* Only one item to write */
2924             if (save(state, self, firstitem, 0) < 0)
2925                 goto error;
2926             if (_Pickler_Write(self, &append_op, 1) < 0)
2927                 goto error;
2928             Py_CLEAR(firstitem);
2929             break;
2930         }
2931 
2932         /* More than one item to write */
2933 
2934         /* Pump out MARK, items, APPENDS. */
2935         if (_Pickler_Write(self, &mark_op, 1) < 0)
2936             goto error;
2937 
2938         if (save(state, self, firstitem, 0) < 0)
2939             goto error;
2940         Py_CLEAR(firstitem);
2941         n = 1;
2942 
2943         /* Fetch and save up to BATCHSIZE items */
2944         while (obj) {
2945             if (save(state, self, obj, 0) < 0)
2946                 goto error;
2947             Py_CLEAR(obj);
2948             n += 1;
2949 
2950             if (n == BATCHSIZE)
2951                 break;
2952 
2953             obj = PyIter_Next(iter);
2954             if (obj == NULL) {
2955                 if (PyErr_Occurred())
2956                     goto error;
2957                 break;
2958             }
2959         }
2960 
2961         if (_Pickler_Write(self, &appends_op, 1) < 0)
2962             goto error;
2963 
2964     } while (n == BATCHSIZE);
2965     return 0;
2966 
2967   error:
2968     Py_XDECREF(firstitem);
2969     Py_XDECREF(obj);
2970     return -1;
2971 }
2972 
2973 /* This is a variant of batch_list() above, specialized for lists (with no
2974  * support for list subclasses). Like batch_list(), we batch up chunks of
2975  *     MARK item item ... item APPENDS
2976  * opcode sequences.  Calling code should have arranged to first create an
2977  * empty list, or list-like object, for the APPENDS to operate on.
2978  * Returns 0 on success, -1 on error.
2979  *
2980  * This version is considerably faster than batch_list(), if less general.
2981  *
2982  * Note that this only works for protocols > 0.
2983  */
2984 static int
batch_list_exact(PickleState * state,PicklerObject * self,PyObject * obj)2985 batch_list_exact(PickleState *state, PicklerObject *self, PyObject *obj)
2986 {
2987     PyObject *item = NULL;
2988     Py_ssize_t this_batch, total;
2989 
2990     const char append_op = APPEND;
2991     const char appends_op = APPENDS;
2992     const char mark_op = MARK;
2993 
2994     assert(obj != NULL);
2995     assert(self->proto > 0);
2996     assert(PyList_CheckExact(obj));
2997 
2998     if (PyList_GET_SIZE(obj) == 1) {
2999         item = PyList_GET_ITEM(obj, 0);
3000         Py_INCREF(item);
3001         int err = save(state, self, item, 0);
3002         Py_DECREF(item);
3003         if (err < 0)
3004             return -1;
3005         if (_Pickler_Write(self, &append_op, 1) < 0)
3006             return -1;
3007         return 0;
3008     }
3009 
3010     /* Write in batches of BATCHSIZE. */
3011     total = 0;
3012     do {
3013         this_batch = 0;
3014         if (_Pickler_Write(self, &mark_op, 1) < 0)
3015             return -1;
3016         while (total < PyList_GET_SIZE(obj)) {
3017             item = PyList_GET_ITEM(obj, total);
3018             Py_INCREF(item);
3019             int err = save(state, self, item, 0);
3020             Py_DECREF(item);
3021             if (err < 0)
3022                 return -1;
3023             total++;
3024             if (++this_batch == BATCHSIZE)
3025                 break;
3026         }
3027         if (_Pickler_Write(self, &appends_op, 1) < 0)
3028             return -1;
3029 
3030     } while (total < PyList_GET_SIZE(obj));
3031 
3032     return 0;
3033 }
3034 
3035 static int
save_list(PickleState * state,PicklerObject * self,PyObject * obj)3036 save_list(PickleState *state, PicklerObject *self, PyObject *obj)
3037 {
3038     char header[3];
3039     Py_ssize_t len;
3040     int status = 0;
3041 
3042     if (self->fast && !fast_save_enter(self, obj))
3043         goto error;
3044 
3045     /* Create an empty list. */
3046     if (self->bin) {
3047         header[0] = EMPTY_LIST;
3048         len = 1;
3049     }
3050     else {
3051         header[0] = MARK;
3052         header[1] = LIST;
3053         len = 2;
3054     }
3055 
3056     if (_Pickler_Write(self, header, len) < 0)
3057         goto error;
3058 
3059     /* Get list length, and bow out early if empty. */
3060     if ((len = PyList_Size(obj)) < 0)
3061         goto error;
3062 
3063     if (memo_put(state, self, obj) < 0)
3064         goto error;
3065 
3066     if (len != 0) {
3067         /* Materialize the list elements. */
3068         if (PyList_CheckExact(obj) && self->proto > 0) {
3069             if (_Py_EnterRecursiveCall(" while pickling an object"))
3070                 goto error;
3071             status = batch_list_exact(state, self, obj);
3072             _Py_LeaveRecursiveCall();
3073         } else {
3074             PyObject *iter = PyObject_GetIter(obj);
3075             if (iter == NULL)
3076                 goto error;
3077 
3078             if (_Py_EnterRecursiveCall(" while pickling an object")) {
3079                 Py_DECREF(iter);
3080                 goto error;
3081             }
3082             status = batch_list(state, self, iter);
3083             _Py_LeaveRecursiveCall();
3084             Py_DECREF(iter);
3085         }
3086     }
3087     if (0) {
3088   error:
3089         status = -1;
3090     }
3091 
3092     if (self->fast && !fast_save_leave(self, obj))
3093         status = -1;
3094 
3095     return status;
3096 }
3097 
3098 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3099  *     MARK key value ... key value SETITEMS
3100  * opcode sequences.  Calling code should have arranged to first create an
3101  * empty dict, or dict-like object, for the SETITEMS to operate on.
3102  * Returns 0 on success, <0 on error.
3103  *
3104  * This is very much like batch_list().  The difference between saving
3105  * elements directly, and picking apart two-tuples, is so long-winded at
3106  * the C level, though, that attempts to combine these routines were too
3107  * ugly to bear.
3108  */
3109 static int
batch_dict(PickleState * state,PicklerObject * self,PyObject * iter)3110 batch_dict(PickleState *state, PicklerObject *self, PyObject *iter)
3111 {
3112     PyObject *obj = NULL;
3113     PyObject *firstitem = NULL;
3114     int i, n;
3115 
3116     const char mark_op = MARK;
3117     const char setitem_op = SETITEM;
3118     const char setitems_op = SETITEMS;
3119 
3120     assert(iter != NULL);
3121 
3122     if (self->proto == 0) {
3123         /* SETITEMS isn't available; do one at a time. */
3124         for (;;) {
3125             obj = PyIter_Next(iter);
3126             if (obj == NULL) {
3127                 if (PyErr_Occurred())
3128                     return -1;
3129                 break;
3130             }
3131             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3132                 PyErr_SetString(PyExc_TypeError, "dict items "
3133                                 "iterator must return 2-tuples");
3134                 Py_DECREF(obj);
3135                 return -1;
3136             }
3137             i = save(state, self, PyTuple_GET_ITEM(obj, 0), 0);
3138             if (i >= 0)
3139                 i = save(state, self, PyTuple_GET_ITEM(obj, 1), 0);
3140             Py_DECREF(obj);
3141             if (i < 0)
3142                 return -1;
3143             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3144                 return -1;
3145         }
3146         return 0;
3147     }
3148 
3149     /* proto > 0:  write in batches of BATCHSIZE. */
3150     do {
3151         /* Get first item */
3152         firstitem = PyIter_Next(iter);
3153         if (firstitem == NULL) {
3154             if (PyErr_Occurred())
3155                 goto error;
3156 
3157             /* nothing more to add */
3158             break;
3159         }
3160         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3161             PyErr_SetString(PyExc_TypeError, "dict items "
3162                                 "iterator must return 2-tuples");
3163             goto error;
3164         }
3165 
3166         /* Try to get a second item */
3167         obj = PyIter_Next(iter);
3168         if (obj == NULL) {
3169             if (PyErr_Occurred())
3170                 goto error;
3171 
3172             /* Only one item to write */
3173             if (save(state, self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3174                 goto error;
3175             if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3176                 goto error;
3177             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3178                 goto error;
3179             Py_CLEAR(firstitem);
3180             break;
3181         }
3182 
3183         /* More than one item to write */
3184 
3185         /* Pump out MARK, items, SETITEMS. */
3186         if (_Pickler_Write(self, &mark_op, 1) < 0)
3187             goto error;
3188 
3189         if (save(state, self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3190             goto error;
3191         if (save(state, self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3192             goto error;
3193         Py_CLEAR(firstitem);
3194         n = 1;
3195 
3196         /* Fetch and save up to BATCHSIZE items */
3197         while (obj) {
3198             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3199                 PyErr_SetString(PyExc_TypeError, "dict items "
3200                     "iterator must return 2-tuples");
3201                 goto error;
3202             }
3203             if (save(state, self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3204                 save(state, self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3205                 goto error;
3206             Py_CLEAR(obj);
3207             n += 1;
3208 
3209             if (n == BATCHSIZE)
3210                 break;
3211 
3212             obj = PyIter_Next(iter);
3213             if (obj == NULL) {
3214                 if (PyErr_Occurred())
3215                     goto error;
3216                 break;
3217             }
3218         }
3219 
3220         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3221             goto error;
3222 
3223     } while (n == BATCHSIZE);
3224     return 0;
3225 
3226   error:
3227     Py_XDECREF(firstitem);
3228     Py_XDECREF(obj);
3229     return -1;
3230 }
3231 
3232 /* This is a variant of batch_dict() above that specializes for dicts, with no
3233  * support for dict subclasses. Like batch_dict(), we batch up chunks of
3234  *     MARK key value ... key value SETITEMS
3235  * opcode sequences.  Calling code should have arranged to first create an
3236  * empty dict, or dict-like object, for the SETITEMS to operate on.
3237  * Returns 0 on success, -1 on error.
3238  *
3239  * Note that this currently doesn't work for protocol 0.
3240  */
3241 static int
batch_dict_exact(PickleState * state,PicklerObject * self,PyObject * obj)3242 batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
3243 {
3244     PyObject *key = NULL, *value = NULL;
3245     int i;
3246     Py_ssize_t dict_size, ppos = 0;
3247 
3248     const char mark_op = MARK;
3249     const char setitem_op = SETITEM;
3250     const char setitems_op = SETITEMS;
3251 
3252     assert(obj != NULL && PyDict_CheckExact(obj));
3253     assert(self->proto > 0);
3254 
3255     dict_size = PyDict_GET_SIZE(obj);
3256 
3257     /* Special-case len(d) == 1 to save space. */
3258     if (dict_size == 1) {
3259         PyDict_Next(obj, &ppos, &key, &value);
3260         Py_INCREF(key);
3261         Py_INCREF(value);
3262         if (save(state, self, key, 0) < 0) {
3263             goto error;
3264         }
3265         if (save(state, self, value, 0) < 0) {
3266             goto error;
3267         }
3268         Py_CLEAR(key);
3269         Py_CLEAR(value);
3270         if (_Pickler_Write(self, &setitem_op, 1) < 0)
3271             return -1;
3272         return 0;
3273     }
3274 
3275     /* Write in batches of BATCHSIZE. */
3276     do {
3277         i = 0;
3278         if (_Pickler_Write(self, &mark_op, 1) < 0)
3279             return -1;
3280         while (PyDict_Next(obj, &ppos, &key, &value)) {
3281             Py_INCREF(key);
3282             Py_INCREF(value);
3283             if (save(state, self, key, 0) < 0) {
3284                 goto error;
3285             }
3286             if (save(state, self, value, 0) < 0) {
3287                 goto error;
3288             }
3289             Py_CLEAR(key);
3290             Py_CLEAR(value);
3291             if (++i == BATCHSIZE)
3292                 break;
3293         }
3294         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3295             return -1;
3296         if (PyDict_GET_SIZE(obj) != dict_size) {
3297             PyErr_Format(
3298                 PyExc_RuntimeError,
3299                 "dictionary changed size during iteration");
3300             return -1;
3301         }
3302 
3303     } while (i == BATCHSIZE);
3304     return 0;
3305 error:
3306     Py_XDECREF(key);
3307     Py_XDECREF(value);
3308     return -1;
3309 }
3310 
3311 static int
save_dict(PickleState * state,PicklerObject * self,PyObject * obj)3312 save_dict(PickleState *state, PicklerObject *self, PyObject *obj)
3313 {
3314     PyObject *items, *iter;
3315     char header[3];
3316     Py_ssize_t len;
3317     int status = 0;
3318     assert(PyDict_Check(obj));
3319 
3320     if (self->fast && !fast_save_enter(self, obj))
3321         goto error;
3322 
3323     /* Create an empty dict. */
3324     if (self->bin) {
3325         header[0] = EMPTY_DICT;
3326         len = 1;
3327     }
3328     else {
3329         header[0] = MARK;
3330         header[1] = DICT;
3331         len = 2;
3332     }
3333 
3334     if (_Pickler_Write(self, header, len) < 0)
3335         goto error;
3336 
3337     if (memo_put(state, self, obj) < 0)
3338         goto error;
3339 
3340     if (PyDict_GET_SIZE(obj)) {
3341         /* Save the dict items. */
3342         if (PyDict_CheckExact(obj) && self->proto > 0) {
3343             /* We can take certain shortcuts if we know this is a dict and
3344                not a dict subclass. */
3345             if (_Py_EnterRecursiveCall(" while pickling an object"))
3346                 goto error;
3347             status = batch_dict_exact(state, self, obj);
3348             _Py_LeaveRecursiveCall();
3349         } else {
3350             items = PyObject_CallMethodNoArgs(obj, &_Py_ID(items));
3351             if (items == NULL)
3352                 goto error;
3353             iter = PyObject_GetIter(items);
3354             Py_DECREF(items);
3355             if (iter == NULL)
3356                 goto error;
3357             if (_Py_EnterRecursiveCall(" while pickling an object")) {
3358                 Py_DECREF(iter);
3359                 goto error;
3360             }
3361             status = batch_dict(state, self, iter);
3362             _Py_LeaveRecursiveCall();
3363             Py_DECREF(iter);
3364         }
3365     }
3366 
3367     if (0) {
3368   error:
3369         status = -1;
3370     }
3371 
3372     if (self->fast && !fast_save_leave(self, obj))
3373         status = -1;
3374 
3375     return status;
3376 }
3377 
3378 static int
save_set(PickleState * state,PicklerObject * self,PyObject * obj)3379 save_set(PickleState *state, PicklerObject *self, PyObject *obj)
3380 {
3381     PyObject *item;
3382     int i;
3383     Py_ssize_t set_size, ppos = 0;
3384     Py_hash_t hash;
3385 
3386     const char empty_set_op = EMPTY_SET;
3387     const char mark_op = MARK;
3388     const char additems_op = ADDITEMS;
3389 
3390     if (self->proto < 4) {
3391         PyObject *items;
3392         PyObject *reduce_value;
3393         int status;
3394 
3395         items = PySequence_List(obj);
3396         if (items == NULL) {
3397             return -1;
3398         }
3399         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3400         Py_DECREF(items);
3401         if (reduce_value == NULL) {
3402             return -1;
3403         }
3404         /* save_reduce() will memoize the object automatically. */
3405         status = save_reduce(state, self, reduce_value, obj);
3406         Py_DECREF(reduce_value);
3407         return status;
3408     }
3409 
3410     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3411         return -1;
3412 
3413     if (memo_put(state, self, obj) < 0)
3414         return -1;
3415 
3416     set_size = PySet_GET_SIZE(obj);
3417     if (set_size == 0)
3418         return 0;  /* nothing to do */
3419 
3420     /* Write in batches of BATCHSIZE. */
3421     do {
3422         i = 0;
3423         if (_Pickler_Write(self, &mark_op, 1) < 0)
3424             return -1;
3425 
3426         int err = 0;
3427         Py_BEGIN_CRITICAL_SECTION(obj);
3428         while (_PySet_NextEntryRef(obj, &ppos, &item, &hash)) {
3429             err = save(state, self, item, 0);
3430             Py_CLEAR(item);
3431             if (err < 0)
3432                 break;
3433             if (++i == BATCHSIZE)
3434                 break;
3435         }
3436         Py_END_CRITICAL_SECTION();
3437         if (err < 0) {
3438             return -1;
3439         }
3440         if (_Pickler_Write(self, &additems_op, 1) < 0)
3441             return -1;
3442         if (PySet_GET_SIZE(obj) != set_size) {
3443             PyErr_Format(
3444                 PyExc_RuntimeError,
3445                 "set changed size during iteration");
3446             return -1;
3447         }
3448     } while (i == BATCHSIZE);
3449 
3450     return 0;
3451 }
3452 
3453 static int
save_frozenset(PickleState * state,PicklerObject * self,PyObject * obj)3454 save_frozenset(PickleState *state, PicklerObject *self, PyObject *obj)
3455 {
3456     PyObject *iter;
3457 
3458     const char mark_op = MARK;
3459     const char frozenset_op = FROZENSET;
3460 
3461     if (self->fast && !fast_save_enter(self, obj))
3462         return -1;
3463 
3464     if (self->proto < 4) {
3465         PyObject *items;
3466         PyObject *reduce_value;
3467         int status;
3468 
3469         items = PySequence_List(obj);
3470         if (items == NULL) {
3471             return -1;
3472         }
3473         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3474                                      items);
3475         Py_DECREF(items);
3476         if (reduce_value == NULL) {
3477             return -1;
3478         }
3479         /* save_reduce() will memoize the object automatically. */
3480         status = save_reduce(state, self, reduce_value, obj);
3481         Py_DECREF(reduce_value);
3482         return status;
3483     }
3484 
3485     if (_Pickler_Write(self, &mark_op, 1) < 0)
3486         return -1;
3487 
3488     iter = PyObject_GetIter(obj);
3489     if (iter == NULL) {
3490         return -1;
3491     }
3492     for (;;) {
3493         PyObject *item;
3494 
3495         item = PyIter_Next(iter);
3496         if (item == NULL) {
3497             if (PyErr_Occurred()) {
3498                 Py_DECREF(iter);
3499                 return -1;
3500             }
3501             break;
3502         }
3503         if (save(state, self, item, 0) < 0) {
3504             Py_DECREF(item);
3505             Py_DECREF(iter);
3506             return -1;
3507         }
3508         Py_DECREF(item);
3509     }
3510     Py_DECREF(iter);
3511 
3512     /* If the object is already in the memo, this means it is
3513        recursive. In this case, throw away everything we put on the
3514        stack, and fetch the object back from the memo. */
3515     if (PyMemoTable_Get(self->memo, obj)) {
3516         const char pop_mark_op = POP_MARK;
3517 
3518         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3519             return -1;
3520         if (memo_get(state, self, obj) < 0)
3521             return -1;
3522         return 0;
3523     }
3524 
3525     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3526         return -1;
3527     if (memo_put(state, self, obj) < 0)
3528         return -1;
3529 
3530     return 0;
3531 }
3532 
3533 static int
fix_imports(PickleState * st,PyObject ** module_name,PyObject ** global_name)3534 fix_imports(PickleState *st, PyObject **module_name, PyObject **global_name)
3535 {
3536     PyObject *key;
3537     PyObject *item;
3538 
3539     key = PyTuple_Pack(2, *module_name, *global_name);
3540     if (key == NULL)
3541         return -1;
3542     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3543     Py_DECREF(key);
3544     if (item) {
3545         PyObject *fixed_module_name;
3546         PyObject *fixed_global_name;
3547 
3548         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3549             PyErr_Format(PyExc_RuntimeError,
3550                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3551                          "should be 2-tuples, not %.200s",
3552                          Py_TYPE(item)->tp_name);
3553             return -1;
3554         }
3555         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3556         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3557         if (!PyUnicode_Check(fixed_module_name) ||
3558             !PyUnicode_Check(fixed_global_name)) {
3559             PyErr_Format(PyExc_RuntimeError,
3560                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3561                          "should be pairs of str, not (%.200s, %.200s)",
3562                          Py_TYPE(fixed_module_name)->tp_name,
3563                          Py_TYPE(fixed_global_name)->tp_name);
3564             return -1;
3565         }
3566 
3567         Py_CLEAR(*module_name);
3568         Py_CLEAR(*global_name);
3569         *module_name = Py_NewRef(fixed_module_name);
3570         *global_name = Py_NewRef(fixed_global_name);
3571         return 0;
3572     }
3573     else if (PyErr_Occurred()) {
3574         return -1;
3575     }
3576 
3577     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3578     if (item) {
3579         if (!PyUnicode_Check(item)) {
3580             PyErr_Format(PyExc_RuntimeError,
3581                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3582                          "should be strings, not %.200s",
3583                          Py_TYPE(item)->tp_name);
3584             return -1;
3585         }
3586         Py_XSETREF(*module_name, Py_NewRef(item));
3587     }
3588     else if (PyErr_Occurred()) {
3589         return -1;
3590     }
3591 
3592     return 0;
3593 }
3594 
3595 static int
save_global(PickleState * st,PicklerObject * self,PyObject * obj,PyObject * name)3596 save_global(PickleState *st, PicklerObject *self, PyObject *obj,
3597             PyObject *name)
3598 {
3599     PyObject *global_name = NULL;
3600     PyObject *module_name = NULL;
3601     PyObject *module = NULL;
3602     PyObject *parent = NULL;
3603     PyObject *dotted_path = NULL;
3604     PyObject *cls;
3605     int status = 0;
3606 
3607     const char global_op = GLOBAL;
3608 
3609     if (name) {
3610         global_name = Py_NewRef(name);
3611     }
3612     else {
3613         if (PyObject_GetOptionalAttr(obj, &_Py_ID(__qualname__), &global_name) < 0)
3614             goto error;
3615         if (global_name == NULL) {
3616             global_name = PyObject_GetAttr(obj, &_Py_ID(__name__));
3617             if (global_name == NULL)
3618                 goto error;
3619         }
3620     }
3621 
3622     dotted_path = get_dotted_path(module, global_name);
3623     if (dotted_path == NULL)
3624         goto error;
3625     module_name = whichmodule(obj, dotted_path);
3626     if (module_name == NULL)
3627         goto error;
3628 
3629     /* XXX: Change to use the import C API directly with level=0 to disallow
3630        relative imports.
3631 
3632        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3633        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3634        custom import functions (IMHO, this would be a nice security
3635        feature). The import C API would need to be extended to support the
3636        extra parameters of __import__ to fix that. */
3637     module = PyImport_Import(module_name);
3638     if (module == NULL) {
3639         PyErr_Format(st->PicklingError,
3640                      "Can't pickle %R: import of module %R failed",
3641                      obj, module_name);
3642         goto error;
3643     }
3644     cls = get_deep_attribute(module, dotted_path, &parent);
3645     if (cls == NULL) {
3646         PyErr_Format(st->PicklingError,
3647                      "Can't pickle %R: attribute lookup %S on %S failed",
3648                      obj, global_name, module_name);
3649         goto error;
3650     }
3651     if (cls != obj) {
3652         Py_DECREF(cls);
3653         PyErr_Format(st->PicklingError,
3654                      "Can't pickle %R: it's not the same object as %S.%S",
3655                      obj, module_name, global_name);
3656         goto error;
3657     }
3658     Py_DECREF(cls);
3659 
3660     if (self->proto >= 2) {
3661         /* See whether this is in the extension registry, and if
3662          * so generate an EXT opcode.
3663          */
3664         PyObject *extension_key;
3665         PyObject *code_obj;      /* extension code as Python object */
3666         long code;               /* extension code as C value */
3667         char pdata[5];
3668         Py_ssize_t n;
3669 
3670         extension_key = PyTuple_Pack(2, module_name, global_name);
3671         if (extension_key == NULL) {
3672             goto error;
3673         }
3674         if (PyDict_GetItemRef(st->extension_registry, extension_key, &code_obj) < 0) {
3675             Py_DECREF(extension_key);
3676             goto error;
3677         }
3678         Py_DECREF(extension_key);
3679         if (code_obj == NULL) {
3680             /* The object is not registered in the extension registry.
3681                This is the most likely code path. */
3682             goto gen_global;
3683         }
3684 
3685         code = PyLong_AsLong(code_obj);
3686         Py_DECREF(code_obj);
3687         if (code <= 0 || code > 0x7fffffffL) {
3688             /* Should never happen in normal circumstances, since the type and
3689                the value of the code are checked in copyreg.add_extension(). */
3690             if (!PyErr_Occurred())
3691                 PyErr_Format(PyExc_RuntimeError, "extension code %ld is out of range", code);
3692             goto error;
3693         }
3694 
3695         /* Generate an EXT opcode. */
3696         if (code <= 0xff) {
3697             pdata[0] = EXT1;
3698             pdata[1] = (unsigned char)code;
3699             n = 2;
3700         }
3701         else if (code <= 0xffff) {
3702             pdata[0] = EXT2;
3703             pdata[1] = (unsigned char)(code & 0xff);
3704             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3705             n = 3;
3706         }
3707         else {
3708             pdata[0] = EXT4;
3709             pdata[1] = (unsigned char)(code & 0xff);
3710             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3711             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3712             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3713             n = 5;
3714         }
3715 
3716         if (_Pickler_Write(self, pdata, n) < 0)
3717             goto error;
3718     }
3719     else {
3720   gen_global:
3721         if (parent == module) {
3722             Py_SETREF(global_name,
3723                 Py_NewRef(PyList_GET_ITEM(dotted_path,
3724                                           PyList_GET_SIZE(dotted_path) - 1)));
3725             Py_CLEAR(dotted_path);
3726         }
3727         if (self->proto >= 4) {
3728             const char stack_global_op = STACK_GLOBAL;
3729 
3730             if (save(st, self, module_name, 0) < 0)
3731                 goto error;
3732             if (save(st, self, global_name, 0) < 0)
3733                 goto error;
3734 
3735             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3736                 goto error;
3737         }
3738         else {
3739             /* Generate a normal global opcode if we are using a pickle
3740                protocol < 4, or if the object is not registered in the
3741                extension registry.
3742 
3743                Objects with multi-part __qualname__ are represented as
3744                getattr(getattr(..., attrname1), attrname2). */
3745             const char mark_op = MARK;
3746             const char tupletwo_op = (self->proto < 2) ? TUPLE : TUPLE2;
3747             const char reduce_op = REDUCE;
3748             Py_ssize_t i;
3749             if (dotted_path) {
3750                 if (PyList_GET_SIZE(dotted_path) > 1) {
3751                     Py_SETREF(global_name, Py_NewRef(PyList_GET_ITEM(dotted_path, 0)));
3752                 }
3753                 for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) {
3754                     if (save(st, self, st->getattr, 0) < 0 ||
3755                         (self->proto < 2 && _Pickler_Write(self, &mark_op, 1) < 0))
3756                     {
3757                         goto error;
3758                     }
3759                 }
3760             }
3761 
3762             PyObject *encoded;
3763             PyObject *(*unicode_encoder)(PyObject *);
3764 
3765             if (_Pickler_Write(self, &global_op, 1) < 0)
3766                 goto error;
3767 
3768             /* For protocol < 3 and if the user didn't request against doing
3769                so, we convert module names to the old 2.x module names. */
3770             if (self->proto < 3 && self->fix_imports) {
3771                 if (fix_imports(st, &module_name, &global_name) < 0) {
3772                     goto error;
3773                 }
3774             }
3775 
3776             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3777                both the module name and the global name using UTF-8. We do so
3778                only when we are using the pickle protocol newer than version
3779                3. This is to ensure compatibility with older Unpickler running
3780                on Python 2.x. */
3781             if (self->proto == 3) {
3782                 unicode_encoder = PyUnicode_AsUTF8String;
3783             }
3784             else {
3785                 unicode_encoder = PyUnicode_AsASCIIString;
3786             }
3787             encoded = unicode_encoder(module_name);
3788             if (encoded == NULL) {
3789                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3790                     PyErr_Format(st->PicklingError,
3791                                  "can't pickle module identifier '%S' using "
3792                                  "pickle protocol %i",
3793                                  module_name, self->proto);
3794                 goto error;
3795             }
3796             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3797                                PyBytes_GET_SIZE(encoded)) < 0) {
3798                 Py_DECREF(encoded);
3799                 goto error;
3800             }
3801             Py_DECREF(encoded);
3802             if(_Pickler_Write(self, "\n", 1) < 0)
3803                 goto error;
3804 
3805             /* Save the name of the module. */
3806             encoded = unicode_encoder(global_name);
3807             if (encoded == NULL) {
3808                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3809                     PyErr_Format(st->PicklingError,
3810                                  "can't pickle global identifier '%S' using "
3811                                  "pickle protocol %i",
3812                                  global_name, self->proto);
3813                 goto error;
3814             }
3815             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3816                                PyBytes_GET_SIZE(encoded)) < 0) {
3817                 Py_DECREF(encoded);
3818                 goto error;
3819             }
3820             Py_DECREF(encoded);
3821             if (_Pickler_Write(self, "\n", 1) < 0)
3822                 goto error;
3823 
3824             if (dotted_path) {
3825                 for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) {
3826                     if (save(st, self, PyList_GET_ITEM(dotted_path, i), 0) < 0 ||
3827                         _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
3828                         _Pickler_Write(self, &reduce_op, 1) < 0)
3829                     {
3830                         goto error;
3831                     }
3832                 }
3833             }
3834         }
3835         /* Memoize the object. */
3836         if (memo_put(st, self, obj) < 0)
3837             goto error;
3838     }
3839 
3840     if (0) {
3841   error:
3842         status = -1;
3843     }
3844     Py_XDECREF(module_name);
3845     Py_XDECREF(global_name);
3846     Py_XDECREF(module);
3847     Py_XDECREF(parent);
3848     Py_XDECREF(dotted_path);
3849 
3850     return status;
3851 }
3852 
3853 static int
save_singleton_type(PickleState * state,PicklerObject * self,PyObject * obj,PyObject * singleton)3854 save_singleton_type(PickleState *state, PicklerObject *self, PyObject *obj,
3855                     PyObject *singleton)
3856 {
3857     PyObject *reduce_value;
3858     int status;
3859 
3860     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3861     if (reduce_value == NULL) {
3862         return -1;
3863     }
3864     status = save_reduce(state, self, reduce_value, obj);
3865     Py_DECREF(reduce_value);
3866     return status;
3867 }
3868 
3869 static int
save_type(PickleState * state,PicklerObject * self,PyObject * obj)3870 save_type(PickleState *state, PicklerObject *self, PyObject *obj)
3871 {
3872     if (obj == (PyObject *)&_PyNone_Type) {
3873         return save_singleton_type(state, self, obj, Py_None);
3874     }
3875     else if (obj == (PyObject *)&PyEllipsis_Type) {
3876         return save_singleton_type(state, self, obj, Py_Ellipsis);
3877     }
3878     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3879         return save_singleton_type(state, self, obj, Py_NotImplemented);
3880     }
3881     return save_global(state, self, obj, NULL);
3882 }
3883 
3884 static int
save_pers(PickleState * state,PicklerObject * self,PyObject * obj)3885 save_pers(PickleState *state, PicklerObject *self, PyObject *obj)
3886 {
3887     PyObject *pid = NULL;
3888     int status = 0;
3889 
3890     const char persid_op = PERSID;
3891     const char binpersid_op = BINPERSID;
3892 
3893     pid = PyObject_CallOneArg(self->persistent_id, obj);
3894     if (pid == NULL)
3895         return -1;
3896 
3897     if (pid != Py_None) {
3898         if (self->bin) {
3899             if (save(state, self, pid, 1) < 0 ||
3900                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3901                 goto error;
3902         }
3903         else {
3904             PyObject *pid_str;
3905 
3906             pid_str = PyObject_Str(pid);
3907             if (pid_str == NULL)
3908                 goto error;
3909 
3910             /* XXX: Should it check whether the pid contains embedded
3911                newlines? */
3912             if (!PyUnicode_IS_ASCII(pid_str)) {
3913                 PyErr_SetString(state->PicklingError,
3914                                 "persistent IDs in protocol 0 must be "
3915                                 "ASCII strings");
3916                 Py_DECREF(pid_str);
3917                 goto error;
3918             }
3919 
3920             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3921                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3922                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3923                 _Pickler_Write(self, "\n", 1) < 0) {
3924                 Py_DECREF(pid_str);
3925                 goto error;
3926             }
3927             Py_DECREF(pid_str);
3928         }
3929         status = 1;
3930     }
3931 
3932     if (0) {
3933   error:
3934         status = -1;
3935     }
3936     Py_XDECREF(pid);
3937 
3938     return status;
3939 }
3940 
3941 static PyObject *
get_class(PyObject * obj)3942 get_class(PyObject *obj)
3943 {
3944     PyObject *cls;
3945 
3946     if (PyObject_GetOptionalAttr(obj, &_Py_ID(__class__), &cls) == 0) {
3947         cls = Py_NewRef(Py_TYPE(obj));
3948     }
3949     return cls;
3950 }
3951 
3952 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3953  * appropriate __reduce__ method for obj.
3954  */
3955 static int
save_reduce(PickleState * st,PicklerObject * self,PyObject * args,PyObject * obj)3956 save_reduce(PickleState *st, PicklerObject *self, PyObject *args,
3957             PyObject *obj)
3958 {
3959     PyObject *callable;
3960     PyObject *argtup;
3961     PyObject *state = NULL;
3962     PyObject *listitems = Py_None;
3963     PyObject *dictitems = Py_None;
3964     PyObject *state_setter = Py_None;
3965     Py_ssize_t size;
3966     int use_newobj = 0, use_newobj_ex = 0;
3967 
3968     const char reduce_op = REDUCE;
3969     const char build_op = BUILD;
3970     const char newobj_op = NEWOBJ;
3971     const char newobj_ex_op = NEWOBJ_EX;
3972 
3973     size = PyTuple_Size(args);
3974     if (size < 2 || size > 6) {
3975         PyErr_SetString(st->PicklingError, "tuple returned by "
3976                         "__reduce__ must contain 2 through 6 elements");
3977         return -1;
3978     }
3979 
3980     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3981                            &callable, &argtup, &state, &listitems, &dictitems,
3982                            &state_setter))
3983         return -1;
3984 
3985     if (!PyCallable_Check(callable)) {
3986         PyErr_SetString(st->PicklingError, "first item of the tuple "
3987                         "returned by __reduce__ must be callable");
3988         return -1;
3989     }
3990     if (!PyTuple_Check(argtup)) {
3991         PyErr_SetString(st->PicklingError, "second item of the tuple "
3992                         "returned by __reduce__ must be a tuple");
3993         return -1;
3994     }
3995 
3996     if (state == Py_None)
3997         state = NULL;
3998 
3999     if (listitems == Py_None)
4000         listitems = NULL;
4001     else if (!PyIter_Check(listitems)) {
4002         PyErr_Format(st->PicklingError, "fourth element of the tuple "
4003                      "returned by __reduce__ must be an iterator, not %s",
4004                      Py_TYPE(listitems)->tp_name);
4005         return -1;
4006     }
4007 
4008     if (dictitems == Py_None)
4009         dictitems = NULL;
4010     else if (!PyIter_Check(dictitems)) {
4011         PyErr_Format(st->PicklingError, "fifth element of the tuple "
4012                      "returned by __reduce__ must be an iterator, not %s",
4013                      Py_TYPE(dictitems)->tp_name);
4014         return -1;
4015     }
4016 
4017     if (state_setter == Py_None)
4018         state_setter = NULL;
4019     else if (!PyCallable_Check(state_setter)) {
4020         PyErr_Format(st->PicklingError, "sixth element of the tuple "
4021                      "returned by __reduce__ must be a function, not %s",
4022                      Py_TYPE(state_setter)->tp_name);
4023         return -1;
4024     }
4025 
4026     if (self->proto >= 2) {
4027         PyObject *name;
4028 
4029         if (PyObject_GetOptionalAttr(callable, &_Py_ID(__name__), &name) < 0) {
4030             return -1;
4031         }
4032         if (name != NULL && PyUnicode_Check(name)) {
4033             use_newobj_ex = _PyUnicode_Equal(name, &_Py_ID(__newobj_ex__));
4034             if (!use_newobj_ex) {
4035                 use_newobj = _PyUnicode_Equal(name, &_Py_ID(__newobj__));
4036             }
4037         }
4038         Py_XDECREF(name);
4039     }
4040 
4041     if (use_newobj_ex) {
4042         PyObject *cls;
4043         PyObject *args;
4044         PyObject *kwargs;
4045 
4046         if (PyTuple_GET_SIZE(argtup) != 3) {
4047             PyErr_Format(st->PicklingError,
4048                          "length of the NEWOBJ_EX argument tuple must be "
4049                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4050             return -1;
4051         }
4052 
4053         cls = PyTuple_GET_ITEM(argtup, 0);
4054         if (!PyType_Check(cls)) {
4055             PyErr_Format(st->PicklingError,
4056                          "first item from NEWOBJ_EX argument tuple must "
4057                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4058             return -1;
4059         }
4060         args = PyTuple_GET_ITEM(argtup, 1);
4061         if (!PyTuple_Check(args)) {
4062             PyErr_Format(st->PicklingError,
4063                          "second item from NEWOBJ_EX argument tuple must "
4064                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4065             return -1;
4066         }
4067         kwargs = PyTuple_GET_ITEM(argtup, 2);
4068         if (!PyDict_Check(kwargs)) {
4069             PyErr_Format(st->PicklingError,
4070                          "third item from NEWOBJ_EX argument tuple must "
4071                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4072             return -1;
4073         }
4074 
4075         if (self->proto >= 4) {
4076             if (save(st, self, cls, 0) < 0 ||
4077                 save(st, self, args, 0) < 0 ||
4078                 save(st, self, kwargs, 0) < 0 ||
4079                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4080                 return -1;
4081             }
4082         }
4083         else {
4084             PyObject *newargs;
4085             PyObject *cls_new;
4086             Py_ssize_t i;
4087 
4088             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4089             if (newargs == NULL)
4090                 return -1;
4091 
4092             cls_new = PyObject_GetAttr(cls, &_Py_ID(__new__));
4093             if (cls_new == NULL) {
4094                 Py_DECREF(newargs);
4095                 return -1;
4096             }
4097             PyTuple_SET_ITEM(newargs, 0, cls_new);
4098             PyTuple_SET_ITEM(newargs, 1, Py_NewRef(cls));
4099             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4100                 PyObject *item = PyTuple_GET_ITEM(args, i);
4101                 PyTuple_SET_ITEM(newargs, i + 2, Py_NewRef(item));
4102             }
4103 
4104             callable = PyObject_Call(st->partial, newargs, kwargs);
4105             Py_DECREF(newargs);
4106             if (callable == NULL)
4107                 return -1;
4108 
4109             newargs = PyTuple_New(0);
4110             if (newargs == NULL) {
4111                 Py_DECREF(callable);
4112                 return -1;
4113             }
4114 
4115             if (save(st, self, callable, 0) < 0 ||
4116                 save(st, self, newargs, 0) < 0 ||
4117                 _Pickler_Write(self, &reduce_op, 1) < 0) {
4118                 Py_DECREF(newargs);
4119                 Py_DECREF(callable);
4120                 return -1;
4121             }
4122             Py_DECREF(newargs);
4123             Py_DECREF(callable);
4124         }
4125     }
4126     else if (use_newobj) {
4127         PyObject *cls;
4128         PyObject *newargtup;
4129         PyObject *obj_class;
4130         int p;
4131 
4132         /* Sanity checks. */
4133         if (PyTuple_GET_SIZE(argtup) < 1) {
4134             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4135             return -1;
4136         }
4137 
4138         cls = PyTuple_GET_ITEM(argtup, 0);
4139         if (!PyType_Check(cls)) {
4140             PyErr_SetString(st->PicklingError, "args[0] from "
4141                             "__newobj__ args is not a type");
4142             return -1;
4143         }
4144 
4145         if (obj != NULL) {
4146             obj_class = get_class(obj);
4147             if (obj_class == NULL) {
4148                 return -1;
4149             }
4150             p = obj_class != cls;
4151             Py_DECREF(obj_class);
4152             if (p) {
4153                 PyErr_SetString(st->PicklingError, "args[0] from "
4154                                 "__newobj__ args has the wrong class");
4155                 return -1;
4156             }
4157         }
4158         /* XXX: These calls save() are prone to infinite recursion. Imagine
4159            what happen if the value returned by the __reduce__() method of
4160            some extension type contains another object of the same type. Ouch!
4161 
4162            Here is a quick example, that I ran into, to illustrate what I
4163            mean:
4164 
4165              >>> import pickle, copyreg
4166              >>> copyreg.dispatch_table.pop(complex)
4167              >>> pickle.dumps(1+2j)
4168              Traceback (most recent call last):
4169                ...
4170              RecursionError: maximum recursion depth exceeded
4171 
4172            Removing the complex class from copyreg.dispatch_table made the
4173            __reduce_ex__() method emit another complex object:
4174 
4175              >>> (1+1j).__reduce_ex__(2)
4176              (<function __newobj__ at 0xb7b71c3c>,
4177                (<class 'complex'>, (1+1j)), None, None, None)
4178 
4179            Thus when save() was called on newargstup (the 2nd item) recursion
4180            ensued. Of course, the bug was in the complex class which had a
4181            broken __getnewargs__() that emitted another complex object. But,
4182            the point, here, is it is quite easy to end up with a broken reduce
4183            function. */
4184 
4185         /* Save the class and its __new__ arguments. */
4186         if (save(st, self, cls, 0) < 0) {
4187             return -1;
4188         }
4189 
4190         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4191         if (newargtup == NULL)
4192             return -1;
4193 
4194         p = save(st, self, newargtup, 0);
4195         Py_DECREF(newargtup);
4196         if (p < 0)
4197             return -1;
4198 
4199         /* Add NEWOBJ opcode. */
4200         if (_Pickler_Write(self, &newobj_op, 1) < 0)
4201             return -1;
4202     }
4203     else { /* Not using NEWOBJ. */
4204         if (save(st, self, callable, 0) < 0 ||
4205             save(st, self, argtup, 0) < 0 ||
4206             _Pickler_Write(self, &reduce_op, 1) < 0)
4207             return -1;
4208     }
4209 
4210     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4211        the caller do not want to memoize the object. Not particularly useful,
4212        but that is to mimic the behavior save_reduce() in pickle.py when
4213        obj is None. */
4214     if (obj != NULL) {
4215         /* If the object is already in the memo, this means it is
4216            recursive. In this case, throw away everything we put on the
4217            stack, and fetch the object back from the memo. */
4218         if (PyMemoTable_Get(self->memo, obj)) {
4219             const char pop_op = POP;
4220 
4221             if (_Pickler_Write(self, &pop_op, 1) < 0)
4222                 return -1;
4223             if (memo_get(st, self, obj) < 0)
4224                 return -1;
4225 
4226             return 0;
4227         }
4228         else if (memo_put(st, self, obj) < 0)
4229             return -1;
4230     }
4231 
4232     if (listitems && batch_list(st, self, listitems) < 0)
4233         return -1;
4234 
4235     if (dictitems && batch_dict(st, self, dictitems) < 0)
4236         return -1;
4237 
4238     if (state) {
4239         if (state_setter == NULL) {
4240             if (save(st, self, state, 0) < 0 ||
4241                 _Pickler_Write(self, &build_op, 1) < 0)
4242                 return -1;
4243         }
4244         else {
4245 
4246             /* If a state_setter is specified, call it instead of load_build to
4247              * update obj's with its previous state.
4248              * The first 4 save/write instructions push state_setter and its
4249              * tuple of expected arguments (obj, state) onto the stack. The
4250              * REDUCE opcode triggers the state_setter(obj, state) function
4251              * call. Finally, because state-updating routines only do in-place
4252              * modification, the whole operation has to be stack-transparent.
4253              * Thus, we finally pop the call's output from the stack.*/
4254 
4255             const char tupletwo_op = TUPLE2;
4256             const char pop_op = POP;
4257             if (save(st, self, state_setter, 0) < 0 ||
4258                 save(st, self, obj, 0) < 0 || save(st, self, state, 0) < 0 ||
4259                 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4260                 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4261                 _Pickler_Write(self, &pop_op, 1) < 0)
4262                 return -1;
4263         }
4264     }
4265     return 0;
4266 }
4267 
4268 static int
save(PickleState * st,PicklerObject * self,PyObject * obj,int pers_save)4269 save(PickleState *st, PicklerObject *self, PyObject *obj, int pers_save)
4270 {
4271     PyTypeObject *type;
4272     PyObject *reduce_func = NULL;
4273     PyObject *reduce_value = NULL;
4274     int status = 0;
4275 
4276     if (_Pickler_OpcodeBoundary(self) < 0)
4277         return -1;
4278 
4279     /* The extra pers_save argument is necessary to avoid calling save_pers()
4280        on its returned object. */
4281     if (!pers_save && self->persistent_id) {
4282         /* save_pers() returns:
4283             -1   to signal an error;
4284              0   if it did nothing successfully;
4285              1   if a persistent id was saved.
4286          */
4287         if ((status = save_pers(st, self, obj)) != 0)
4288             return status;
4289     }
4290 
4291     type = Py_TYPE(obj);
4292 
4293     /* The old cPickle had an optimization that used switch-case statement
4294        dispatching on the first letter of the type name.  This has was removed
4295        since benchmarks shown that this optimization was actually slowing
4296        things down. */
4297 
4298     /* Atom types; these aren't memoized, so don't check the memo. */
4299 
4300     if (obj == Py_None) {
4301         return save_none(self, obj);
4302     }
4303     else if (obj == Py_False || obj == Py_True) {
4304         return save_bool(self, obj);
4305     }
4306     else if (type == &PyLong_Type) {
4307         return save_long(self, obj);
4308     }
4309     else if (type == &PyFloat_Type) {
4310         return save_float(self, obj);
4311     }
4312 
4313     /* Check the memo to see if it has the object. If so, generate
4314        a GET (or BINGET) opcode, instead of pickling the object
4315        once again. */
4316     if (PyMemoTable_Get(self->memo, obj)) {
4317         return memo_get(st, self, obj);
4318     }
4319 
4320     if (type == &PyBytes_Type) {
4321         return save_bytes(st, self, obj);
4322     }
4323     else if (type == &PyUnicode_Type) {
4324         return save_unicode(st, self, obj);
4325     }
4326 
4327     /* We're only calling _Py_EnterRecursiveCall here so that atomic
4328        types above are pickled faster. */
4329     if (_Py_EnterRecursiveCall(" while pickling an object")) {
4330         return -1;
4331     }
4332 
4333     if (type == &PyDict_Type) {
4334         status = save_dict(st, self, obj);
4335         goto done;
4336     }
4337     else if (type == &PySet_Type) {
4338         status = save_set(st, self, obj);
4339         goto done;
4340     }
4341     else if (type == &PyFrozenSet_Type) {
4342         status = save_frozenset(st, self, obj);
4343         goto done;
4344     }
4345     else if (type == &PyList_Type) {
4346         status = save_list(st, self, obj);
4347         goto done;
4348     }
4349     else if (type == &PyTuple_Type) {
4350         status = save_tuple(st, self, obj);
4351         goto done;
4352     }
4353     else if (type == &PyByteArray_Type) {
4354         status = save_bytearray(st, self, obj);
4355         goto done;
4356     }
4357     else if (type == &PyPickleBuffer_Type) {
4358         status = save_picklebuffer(st, self, obj);
4359         goto done;
4360     }
4361 
4362     /* Now, check reducer_override.  If it returns NotImplemented,
4363      * fallback to save_type or save_global, and then perhaps to the
4364      * regular reduction mechanism.
4365      */
4366     if (self->reducer_override != NULL) {
4367         reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4368         if (reduce_value == NULL) {
4369             goto error;
4370         }
4371         if (reduce_value != Py_NotImplemented) {
4372             goto reduce;
4373         }
4374         Py_SETREF(reduce_value, NULL);
4375     }
4376 
4377     if (type == &PyType_Type) {
4378         status = save_type(st, self, obj);
4379         goto done;
4380     }
4381     else if (type == &PyFunction_Type) {
4382         status = save_global(st, self, obj, NULL);
4383         goto done;
4384     }
4385 
4386     /* XXX: This part needs some unit tests. */
4387 
4388     /* Get a reduction callable, and call it.  This may come from
4389      * self.dispatch_table, copyreg.dispatch_table, the object's
4390      * __reduce_ex__ method, or the object's __reduce__ method.
4391      */
4392     if (self->dispatch_table == NULL) {
4393         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4394                                               (PyObject *)type);
4395         if (reduce_func == NULL) {
4396             if (PyErr_Occurred()) {
4397                 goto error;
4398             }
4399         } else {
4400             /* PyDict_GetItemWithError() returns a borrowed reference.
4401                Increase the reference count to be consistent with
4402                PyObject_GetItem and _PyObject_GetAttrId used below. */
4403             Py_INCREF(reduce_func);
4404         }
4405     }
4406     else if (PyMapping_GetOptionalItem(self->dispatch_table, (PyObject *)type,
4407                                        &reduce_func) < 0)
4408     {
4409         goto error;
4410     }
4411 
4412     if (reduce_func != NULL) {
4413         reduce_value = _Pickle_FastCall(reduce_func, Py_NewRef(obj));
4414     }
4415     else if (PyType_IsSubtype(type, &PyType_Type)) {
4416         status = save_global(st, self, obj, NULL);
4417         goto done;
4418     }
4419     else {
4420         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4421            automatically defined as __reduce__. While this is convenient, this
4422            make it impossible to know which method was actually called. Of
4423            course, this is not a big deal. But still, it would be nice to let
4424            the user know which method was called when something go
4425            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4426            don't actually have to check for a __reduce__ method. */
4427 
4428         /* Check for a __reduce_ex__ method. */
4429         if (PyObject_GetOptionalAttr(obj, &_Py_ID(__reduce_ex__), &reduce_func) < 0) {
4430             goto error;
4431         }
4432         if (reduce_func != NULL) {
4433             PyObject *proto;
4434             proto = PyLong_FromLong(self->proto);
4435             if (proto != NULL) {
4436                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4437             }
4438         }
4439         else {
4440             /* Check for a __reduce__ method. */
4441             if (PyObject_GetOptionalAttr(obj, &_Py_ID(__reduce__), &reduce_func) < 0) {
4442                 goto error;
4443             }
4444             if (reduce_func != NULL) {
4445                 reduce_value = PyObject_CallNoArgs(reduce_func);
4446             }
4447             else {
4448                 PyErr_Format(st->PicklingError,
4449                              "can't pickle '%.200s' object: %R",
4450                              type->tp_name, obj);
4451                 goto error;
4452             }
4453         }
4454     }
4455 
4456     if (reduce_value == NULL)
4457         goto error;
4458 
4459   reduce:
4460     if (PyUnicode_Check(reduce_value)) {
4461         status = save_global(st, self, obj, reduce_value);
4462         goto done;
4463     }
4464 
4465     if (!PyTuple_Check(reduce_value)) {
4466         PyErr_SetString(st->PicklingError,
4467                         "__reduce__ must return a string or tuple");
4468         goto error;
4469     }
4470 
4471     status = save_reduce(st, self, reduce_value, obj);
4472 
4473     if (0) {
4474   error:
4475         status = -1;
4476     }
4477   done:
4478 
4479     _Py_LeaveRecursiveCall();
4480     Py_XDECREF(reduce_func);
4481     Py_XDECREF(reduce_value);
4482 
4483     return status;
4484 }
4485 
4486 static PyObject *
persistent_id(PyObject * self,PyObject * obj)4487 persistent_id(PyObject *self, PyObject *obj)
4488 {
4489     Py_RETURN_NONE;
4490 }
4491 
4492 static int
dump(PickleState * state,PicklerObject * self,PyObject * obj)4493 dump(PickleState *state, PicklerObject *self, PyObject *obj)
4494 {
4495     const char stop_op = STOP;
4496     int status = -1;
4497     PyObject *tmp;
4498 
4499     /* Cache the persistent_id method. */
4500     tmp = PyObject_GetAttr((PyObject *)self, &_Py_ID(persistent_id));
4501     if (tmp == NULL) {
4502         goto error;
4503     }
4504     if (PyCFunction_Check(tmp) &&
4505         PyCFunction_GET_SELF(tmp) == (PyObject *)self &&
4506         PyCFunction_GET_FUNCTION(tmp) == persistent_id)
4507     {
4508         Py_CLEAR(tmp);
4509     }
4510     Py_XSETREF(self->persistent_id, tmp);
4511 
4512     /* Cache the reducer_override method, if it exists. */
4513     if (PyObject_GetOptionalAttr((PyObject *)self, &_Py_ID(reducer_override),
4514                              &tmp) < 0) {
4515         goto error;
4516     }
4517     Py_XSETREF(self->reducer_override, tmp);
4518 
4519     if (self->proto >= 2) {
4520         char header[2];
4521 
4522         header[0] = PROTO;
4523         assert(self->proto >= 0 && self->proto < 256);
4524         header[1] = (unsigned char)self->proto;
4525         if (_Pickler_Write(self, header, 2) < 0)
4526             goto error;
4527         if (self->proto >= 4)
4528             self->framing = 1;
4529     }
4530 
4531     if (save(state, self, obj, 0) < 0 ||
4532         _Pickler_Write(self, &stop_op, 1) < 0 ||
4533         _Pickler_CommitFrame(self) < 0)
4534         goto error;
4535 
4536     // Success
4537     status = 0;
4538 
4539   error:
4540     self->framing = 0;
4541 
4542     /* Break the reference cycle we generated at the beginning this function
4543      * call when setting the persistent_id and the reducer_override attributes
4544      * of the Pickler instance to a bound method of the same instance.
4545      * This is important as the Pickler instance holds a reference to each
4546      * object it has pickled (through its memo): thus, these objects won't
4547      * be garbage-collected as long as the Pickler itself is not collected. */
4548     Py_CLEAR(self->persistent_id);
4549     Py_CLEAR(self->reducer_override);
4550     return status;
4551 }
4552 
4553 /*[clinic input]
4554 
4555 _pickle.Pickler.clear_memo
4556 
4557 Clears the pickler's "memo".
4558 
4559 The memo is the data structure that remembers which objects the
4560 pickler has already seen, so that shared or recursive objects are
4561 pickled by reference and not by value.  This method is useful when
4562 re-using picklers.
4563 [clinic start generated code]*/
4564 
4565 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4566 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4567 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4568 {
4569     if (self->memo)
4570         PyMemoTable_Clear(self->memo);
4571 
4572     Py_RETURN_NONE;
4573 }
4574 
4575 /*[clinic input]
4576 
4577 _pickle.Pickler.dump
4578 
4579   cls: defining_class
4580   obj: object
4581   /
4582 
4583 Write a pickled representation of the given object to the open file.
4584 [clinic start generated code]*/
4585 
4586 static PyObject *
_pickle_Pickler_dump_impl(PicklerObject * self,PyTypeObject * cls,PyObject * obj)4587 _pickle_Pickler_dump_impl(PicklerObject *self, PyTypeObject *cls,
4588                           PyObject *obj)
4589 /*[clinic end generated code: output=952cf7f68b1445bb input=f949d84151983594]*/
4590 {
4591     PickleState *st = _Pickle_GetStateByClass(cls);
4592     /* Check whether the Pickler was initialized correctly (issue3664).
4593        Developers often forget to call __init__() in their subclasses, which
4594        would trigger a segfault without this check. */
4595     if (self->write == NULL) {
4596         PyErr_Format(st->PicklingError,
4597                      "Pickler.__init__() was not called by %s.__init__()",
4598                      Py_TYPE(self)->tp_name);
4599         return NULL;
4600     }
4601 
4602     if (_Pickler_ClearBuffer(self) < 0)
4603         return NULL;
4604 
4605     if (dump(st, self, obj) < 0)
4606         return NULL;
4607 
4608     if (_Pickler_FlushToFile(self) < 0)
4609         return NULL;
4610 
4611     Py_RETURN_NONE;
4612 }
4613 
4614 /*[clinic input]
4615 
4616 _pickle.Pickler.__sizeof__ -> size_t
4617 
4618 Returns size in memory, in bytes.
4619 [clinic start generated code]*/
4620 
4621 static size_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4622 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4623 /*[clinic end generated code: output=23ad75658d3b59ff input=d8127c8e7012ebd7]*/
4624 {
4625     size_t res = _PyObject_SIZE(Py_TYPE(self));
4626     if (self->memo != NULL) {
4627         res += sizeof(PyMemoTable);
4628         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4629     }
4630     if (self->output_buffer != NULL) {
4631         size_t s = _PySys_GetSizeOf(self->output_buffer);
4632         if (s == (size_t)-1) {
4633             return -1;
4634         }
4635         res += s;
4636     }
4637     return res;
4638 }
4639 
4640 static struct PyMethodDef Pickler_methods[] = {
4641     {"persistent_id", persistent_id, METH_O,
4642         PyDoc_STR("persistent_id($self, obj, /)\n--\n\n")},
4643     _PICKLE_PICKLER_DUMP_METHODDEF
4644     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4645     _PICKLE_PICKLER___SIZEOF___METHODDEF
4646     {NULL, NULL}                /* sentinel */
4647 };
4648 
4649 static int
Pickler_clear(PicklerObject * self)4650 Pickler_clear(PicklerObject *self)
4651 {
4652     Py_CLEAR(self->output_buffer);
4653     Py_CLEAR(self->write);
4654     Py_CLEAR(self->persistent_id);
4655     Py_CLEAR(self->dispatch_table);
4656     Py_CLEAR(self->fast_memo);
4657     Py_CLEAR(self->reducer_override);
4658     Py_CLEAR(self->buffer_callback);
4659 
4660     if (self->memo != NULL) {
4661         PyMemoTable *memo = self->memo;
4662         self->memo = NULL;
4663         PyMemoTable_Del(memo);
4664     }
4665     return 0;
4666 }
4667 
4668 static void
Pickler_dealloc(PicklerObject * self)4669 Pickler_dealloc(PicklerObject *self)
4670 {
4671     PyTypeObject *tp = Py_TYPE(self);
4672     PyObject_GC_UnTrack(self);
4673     (void)Pickler_clear(self);
4674     tp->tp_free((PyObject *)self);
4675     Py_DECREF(tp);
4676 }
4677 
4678 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4679 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4680 {
4681     Py_VISIT(Py_TYPE(self));
4682     Py_VISIT(self->write);
4683     Py_VISIT(self->persistent_id);
4684     Py_VISIT(self->dispatch_table);
4685     Py_VISIT(self->fast_memo);
4686     Py_VISIT(self->reducer_override);
4687     Py_VISIT(self->buffer_callback);
4688     PyMemoTable *memo = self->memo;
4689     if (memo && memo->mt_table) {
4690         Py_ssize_t i = memo->mt_allocated;
4691         while (--i >= 0) {
4692             Py_VISIT(memo->mt_table[i].me_key);
4693         }
4694     }
4695 
4696     return 0;
4697 }
4698 
4699 
4700 /*[clinic input]
4701 
4702 _pickle.Pickler.__init__
4703 
4704   file: object
4705   protocol: object = None
4706   fix_imports: bool = True
4707   buffer_callback: object = None
4708 
4709 This takes a binary file for writing a pickle data stream.
4710 
4711 The optional *protocol* argument tells the pickler to use the given
4712 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
4713 protocol is 4. It was introduced in Python 3.4, and is incompatible
4714 with previous versions.
4715 
4716 Specifying a negative protocol version selects the highest protocol
4717 version supported.  The higher the protocol used, the more recent the
4718 version of Python needed to read the pickle produced.
4719 
4720 The *file* argument must have a write() method that accepts a single
4721 bytes argument. It can thus be a file object opened for binary
4722 writing, an io.BytesIO instance, or any other custom object that meets
4723 this interface.
4724 
4725 If *fix_imports* is True and protocol is less than 3, pickle will try
4726 to map the new Python 3 names to the old module names used in Python
4727 2, so that the pickle data stream is readable with Python 2.
4728 
4729 If *buffer_callback* is None (the default), buffer views are
4730 serialized into *file* as part of the pickle stream.
4731 
4732 If *buffer_callback* is not None, then it can be called any number
4733 of times with a buffer view.  If the callback returns a false value
4734 (such as None), the given buffer is out-of-band; otherwise the
4735 buffer is serialized in-band, i.e. inside the pickle stream.
4736 
4737 It is an error if *buffer_callback* is not None and *protocol*
4738 is None or smaller than 5.
4739 
4740 [clinic start generated code]*/
4741 
4742 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4743 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4744                               PyObject *protocol, int fix_imports,
4745                               PyObject *buffer_callback)
4746 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4747 {
4748     /* In case of multiple __init__() calls, clear previous content. */
4749     if (self->write != NULL)
4750         (void)Pickler_clear(self);
4751 
4752     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4753         return -1;
4754 
4755     if (_Pickler_SetOutputStream(self, file) < 0)
4756         return -1;
4757 
4758     if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4759         return -1;
4760 
4761     /* memo and output_buffer may have already been created in _Pickler_New */
4762     if (self->memo == NULL) {
4763         self->memo = PyMemoTable_New();
4764         if (self->memo == NULL)
4765             return -1;
4766     }
4767     self->output_len = 0;
4768     if (self->output_buffer == NULL) {
4769         self->max_output_len = WRITE_BUF_SIZE;
4770         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4771                                                         self->max_output_len);
4772         if (self->output_buffer == NULL)
4773             return -1;
4774     }
4775 
4776     self->fast = 0;
4777     self->fast_nesting = 0;
4778     self->fast_memo = NULL;
4779 
4780     if (self->dispatch_table != NULL) {
4781         return 0;
4782     }
4783     if (PyObject_GetOptionalAttr((PyObject *)self, &_Py_ID(dispatch_table),
4784                              &self->dispatch_table) < 0) {
4785         return -1;
4786     }
4787 
4788     return 0;
4789 }
4790 
4791 
4792 /* Define a proxy object for the Pickler's internal memo object. This is to
4793  * avoid breaking code like:
4794  *  pickler.memo.clear()
4795  * and
4796  *  pickler.memo = saved_memo
4797  * Is this a good idea? Not really, but we don't want to break code that uses
4798  * it. Note that we don't implement the entire mapping API here. This is
4799  * intentional, as these should be treated as black-box implementation details.
4800  */
4801 
4802 /*[clinic input]
4803 _pickle.PicklerMemoProxy.clear
4804 
4805 Remove all items from memo.
4806 [clinic start generated code]*/
4807 
4808 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4809 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4810 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4811 {
4812     if (self->pickler->memo)
4813         PyMemoTable_Clear(self->pickler->memo);
4814     Py_RETURN_NONE;
4815 }
4816 
4817 /*[clinic input]
4818 _pickle.PicklerMemoProxy.copy
4819 
4820 Copy the memo to a new object.
4821 [clinic start generated code]*/
4822 
4823 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4824 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4825 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4826 {
4827     PyMemoTable *memo;
4828     PyObject *new_memo = PyDict_New();
4829     if (new_memo == NULL)
4830         return NULL;
4831 
4832     memo = self->pickler->memo;
4833     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4834         PyMemoEntry entry = memo->mt_table[i];
4835         if (entry.me_key != NULL) {
4836             int status;
4837             PyObject *key, *value;
4838 
4839             key = PyLong_FromVoidPtr(entry.me_key);
4840             if (key == NULL) {
4841                 goto error;
4842             }
4843             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4844             if (value == NULL) {
4845                 Py_DECREF(key);
4846                 goto error;
4847             }
4848             status = PyDict_SetItem(new_memo, key, value);
4849             Py_DECREF(key);
4850             Py_DECREF(value);
4851             if (status < 0)
4852                 goto error;
4853         }
4854     }
4855     return new_memo;
4856 
4857   error:
4858     Py_XDECREF(new_memo);
4859     return NULL;
4860 }
4861 
4862 /*[clinic input]
4863 _pickle.PicklerMemoProxy.__reduce__
4864 
4865 Implement pickle support.
4866 [clinic start generated code]*/
4867 
4868 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4869 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4870 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4871 {
4872     PyObject *reduce_value, *dict_args;
4873     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4874     if (contents == NULL)
4875         return NULL;
4876 
4877     reduce_value = PyTuple_New(2);
4878     if (reduce_value == NULL) {
4879         Py_DECREF(contents);
4880         return NULL;
4881     }
4882     dict_args = PyTuple_New(1);
4883     if (dict_args == NULL) {
4884         Py_DECREF(contents);
4885         Py_DECREF(reduce_value);
4886         return NULL;
4887     }
4888     PyTuple_SET_ITEM(dict_args, 0, contents);
4889     PyTuple_SET_ITEM(reduce_value, 0, Py_NewRef(&PyDict_Type));
4890     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4891     return reduce_value;
4892 }
4893 
4894 static PyMethodDef picklerproxy_methods[] = {
4895     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4896     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4897     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4898     {NULL, NULL} /* sentinel */
4899 };
4900 
4901 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4902 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4903 {
4904     PyTypeObject *tp = Py_TYPE(self);
4905     PyObject_GC_UnTrack(self);
4906     Py_CLEAR(self->pickler);
4907     tp->tp_free((PyObject *)self);
4908     Py_DECREF(tp);
4909 }
4910 
4911 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4912 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4913                           visitproc visit, void *arg)
4914 {
4915     Py_VISIT(Py_TYPE(self));
4916     Py_VISIT(self->pickler);
4917     return 0;
4918 }
4919 
4920 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4921 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4922 {
4923     Py_CLEAR(self->pickler);
4924     return 0;
4925 }
4926 
4927 static PyType_Slot memoproxy_slots[] = {
4928     {Py_tp_dealloc, PicklerMemoProxy_dealloc},
4929     {Py_tp_traverse, PicklerMemoProxy_traverse},
4930     {Py_tp_clear, PicklerMemoProxy_clear},
4931     {Py_tp_methods, picklerproxy_methods},
4932     {Py_tp_hash, PyObject_HashNotImplemented},
4933     {0, NULL},
4934 };
4935 
4936 static PyType_Spec memoproxy_spec = {
4937     .name = "_pickle.PicklerMemoProxy",
4938     .basicsize = sizeof(PicklerMemoProxyObject),
4939     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
4940               Py_TPFLAGS_IMMUTABLETYPE),
4941     .slots = memoproxy_slots,
4942 };
4943 
4944 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4945 PicklerMemoProxy_New(PicklerObject *pickler)
4946 {
4947     PicklerMemoProxyObject *self;
4948     PickleState *st = _Pickle_FindStateByType(Py_TYPE(pickler));
4949     self = PyObject_GC_New(PicklerMemoProxyObject, st->PicklerMemoProxyType);
4950     if (self == NULL)
4951         return NULL;
4952     self->pickler = (PicklerObject*)Py_NewRef(pickler);
4953     PyObject_GC_Track(self);
4954     return (PyObject *)self;
4955 }
4956 
4957 /*****************************************************************************/
4958 
4959 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4960 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4961 {
4962     return PicklerMemoProxy_New(self);
4963 }
4964 
4965 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4966 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4967 {
4968     PyMemoTable *new_memo = NULL;
4969 
4970     if (obj == NULL) {
4971         PyErr_SetString(PyExc_TypeError,
4972                         "attribute deletion is not supported");
4973         return -1;
4974     }
4975 
4976     PickleState *st = _Pickle_FindStateByType(Py_TYPE(self));
4977     if (Py_IS_TYPE(obj, st->PicklerMemoProxyType)) {
4978         PicklerObject *pickler =
4979             ((PicklerMemoProxyObject *)obj)->pickler;
4980 
4981         new_memo = PyMemoTable_Copy(pickler->memo);
4982         if (new_memo == NULL)
4983             return -1;
4984     }
4985     else if (PyDict_Check(obj)) {
4986         Py_ssize_t i = 0;
4987         PyObject *key, *value;
4988 
4989         new_memo = PyMemoTable_New();
4990         if (new_memo == NULL)
4991             return -1;
4992 
4993         while (PyDict_Next(obj, &i, &key, &value)) {
4994             Py_ssize_t memo_id;
4995             PyObject *memo_obj;
4996 
4997             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4998                 PyErr_SetString(PyExc_TypeError,
4999                                 "'memo' values must be 2-item tuples");
5000                 goto error;
5001             }
5002             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5003             if (memo_id == -1 && PyErr_Occurred())
5004                 goto error;
5005             memo_obj = PyTuple_GET_ITEM(value, 1);
5006             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5007                 goto error;
5008         }
5009     }
5010     else {
5011         PyErr_Format(PyExc_TypeError,
5012                      "'memo' attribute must be a PicklerMemoProxy object "
5013                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5014         return -1;
5015     }
5016 
5017     PyMemoTable_Del(self->memo);
5018     self->memo = new_memo;
5019 
5020     return 0;
5021 
5022   error:
5023     if (new_memo)
5024         PyMemoTable_Del(new_memo);
5025     return -1;
5026 }
5027 
5028 static PyObject *
Pickler_getattr(PyObject * self,PyObject * name)5029 Pickler_getattr(PyObject *self, PyObject *name)
5030 {
5031     if (PyUnicode_Check(name)
5032         && PyUnicode_EqualToUTF8(name, "persistent_id")
5033         && ((PicklerObject *)self)->persistent_id_attr)
5034     {
5035         return Py_NewRef(((PicklerObject *)self)->persistent_id_attr);
5036     }
5037 
5038     return PyObject_GenericGetAttr(self, name);
5039 }
5040 
5041 static int
Pickler_setattr(PyObject * self,PyObject * name,PyObject * value)5042 Pickler_setattr(PyObject *self, PyObject *name, PyObject *value)
5043 {
5044     if (PyUnicode_Check(name)
5045         && PyUnicode_EqualToUTF8(name, "persistent_id"))
5046     {
5047         Py_XINCREF(value);
5048         Py_XSETREF(((PicklerObject *)self)->persistent_id_attr, value);
5049         return 0;
5050     }
5051 
5052     return PyObject_GenericSetAttr(self, name, value);
5053 }
5054 
5055 static PyMemberDef Pickler_members[] = {
5056     {"bin", Py_T_INT, offsetof(PicklerObject, bin)},
5057     {"fast", Py_T_INT, offsetof(PicklerObject, fast)},
5058     {"dispatch_table", Py_T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5059     {NULL}
5060 };
5061 
5062 static PyGetSetDef Pickler_getsets[] = {
5063     {"memo",          (getter)Pickler_get_memo,
5064                       (setter)Pickler_set_memo},
5065     {NULL}
5066 };
5067 
5068 static PyType_Slot pickler_type_slots[] = {
5069     {Py_tp_dealloc, Pickler_dealloc},
5070     {Py_tp_getattro, Pickler_getattr},
5071     {Py_tp_setattro, Pickler_setattr},
5072     {Py_tp_methods, Pickler_methods},
5073     {Py_tp_members, Pickler_members},
5074     {Py_tp_getset, Pickler_getsets},
5075     {Py_tp_clear, Pickler_clear},
5076     {Py_tp_doc, (char*)_pickle_Pickler___init____doc__},
5077     {Py_tp_traverse, Pickler_traverse},
5078     {Py_tp_init, _pickle_Pickler___init__},
5079     {Py_tp_new, PyType_GenericNew},
5080     {Py_tp_alloc, PyType_GenericAlloc},
5081     {Py_tp_free, PyObject_GC_Del},
5082     {0, NULL},
5083 };
5084 
5085 static PyType_Spec pickler_type_spec = {
5086     .name = "_pickle.Pickler",
5087     .basicsize = sizeof(PicklerObject),
5088     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
5089               Py_TPFLAGS_IMMUTABLETYPE),
5090     .slots = pickler_type_slots,
5091 };
5092 
5093 /* Temporary helper for calling self.find_class().
5094 
5095    XXX: It would be nice to able to avoid Python function call overhead, by
5096    using directly the C version of find_class(), when find_class() is not
5097    overridden by a subclass. Although, this could become rather hackish. A
5098    simpler optimization would be to call the C function when self is not a
5099    subclass instance. */
5100 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5101 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5102 {
5103     return PyObject_CallMethodObjArgs((PyObject *)self, &_Py_ID(find_class),
5104                                       module_name, global_name, NULL);
5105 }
5106 
5107 static Py_ssize_t
marker(PickleState * st,UnpicklerObject * self)5108 marker(PickleState *st, UnpicklerObject *self)
5109 {
5110     if (self->num_marks < 1) {
5111         PyErr_SetString(st->UnpicklingError, "could not find MARK");
5112         return -1;
5113     }
5114 
5115     Py_ssize_t mark = self->marks[--self->num_marks];
5116     self->stack->mark_set = self->num_marks != 0;
5117     self->stack->fence = self->num_marks ?
5118             self->marks[self->num_marks - 1] : 0;
5119     return mark;
5120 }
5121 
5122 static int
load_none(PickleState * state,UnpicklerObject * self)5123 load_none(PickleState *state, UnpicklerObject *self)
5124 {
5125     PDATA_APPEND(self->stack, Py_None, -1);
5126     return 0;
5127 }
5128 
5129 static int
load_int(PickleState * state,UnpicklerObject * self)5130 load_int(PickleState *state, UnpicklerObject *self)
5131 {
5132     PyObject *value;
5133     char *endptr, *s;
5134     Py_ssize_t len;
5135     long x;
5136 
5137     if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5138         return -1;
5139     if (len < 2)
5140         return bad_readline(state);
5141 
5142     errno = 0;
5143     /* XXX: Should the base argument of strtol() be explicitly set to 10?
5144        XXX(avassalotti): Should this uses PyOS_strtol()? */
5145     x = strtol(s, &endptr, 0);
5146 
5147     if (errno || (*endptr != '\n' && *endptr != '\0')) {
5148         /* Hm, maybe we've got something long.  Let's try reading
5149          * it as a Python int object. */
5150         errno = 0;
5151         /* XXX: Same thing about the base here. */
5152         value = PyLong_FromString(s, NULL, 0);
5153         if (value == NULL) {
5154             PyErr_SetString(PyExc_ValueError,
5155                             "could not convert string to int");
5156             return -1;
5157         }
5158     }
5159     else {
5160         if (len == 3 && (x == 0 || x == 1)) {
5161             if ((value = PyBool_FromLong(x)) == NULL)
5162                 return -1;
5163         }
5164         else {
5165             if ((value = PyLong_FromLong(x)) == NULL)
5166                 return -1;
5167         }
5168     }
5169 
5170     PDATA_PUSH(self->stack, value, -1);
5171     return 0;
5172 }
5173 
5174 static int
load_bool(PickleState * state,UnpicklerObject * self,PyObject * boolean)5175 load_bool(PickleState *state, UnpicklerObject *self, PyObject *boolean)
5176 {
5177     assert(boolean == Py_True || boolean == Py_False);
5178     PDATA_APPEND(self->stack, boolean, -1);
5179     return 0;
5180 }
5181 
5182 /* s contains x bytes of an unsigned little-endian integer.  Return its value
5183  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5184  */
5185 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5186 calc_binsize(char *bytes, int nbytes)
5187 {
5188     unsigned char *s = (unsigned char *)bytes;
5189     int i;
5190     size_t x = 0;
5191 
5192     if (nbytes > (int)sizeof(size_t)) {
5193         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
5194          * have 64-bit size that can't be represented on 32-bit platform.
5195          */
5196         for (i = (int)sizeof(size_t); i < nbytes; i++) {
5197             if (s[i])
5198                 return -1;
5199         }
5200         nbytes = (int)sizeof(size_t);
5201     }
5202     for (i = 0; i < nbytes; i++) {
5203         x |= (size_t) s[i] << (8 * i);
5204     }
5205 
5206     if (x > PY_SSIZE_T_MAX)
5207         return -1;
5208     else
5209         return (Py_ssize_t) x;
5210 }
5211 
5212 /* s contains x bytes of a little-endian integer.  Return its value as a
5213  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
5214  * int, but when x is 4 it's a signed one.  This is a historical source
5215  * of x-platform bugs.
5216  */
5217 static long
calc_binint(char * bytes,int nbytes)5218 calc_binint(char *bytes, int nbytes)
5219 {
5220     unsigned char *s = (unsigned char *)bytes;
5221     Py_ssize_t i;
5222     long x = 0;
5223 
5224     for (i = 0; i < nbytes; i++) {
5225         x |= (long)s[i] << (8 * i);
5226     }
5227 
5228     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5229      * is signed, so on a box with longs bigger than 4 bytes we need
5230      * to extend a BININT's sign bit to the full width.
5231      */
5232     if (SIZEOF_LONG > 4 && nbytes == 4) {
5233         x |= -(x & (1L << 31));
5234     }
5235 
5236     return x;
5237 }
5238 
5239 static int
load_binintx(UnpicklerObject * self,char * s,int size)5240 load_binintx(UnpicklerObject *self, char *s, int size)
5241 {
5242     PyObject *value;
5243     long x;
5244 
5245     x = calc_binint(s, size);
5246 
5247     if ((value = PyLong_FromLong(x)) == NULL)
5248         return -1;
5249 
5250     PDATA_PUSH(self->stack, value, -1);
5251     return 0;
5252 }
5253 
5254 static int
load_binint(PickleState * state,UnpicklerObject * self)5255 load_binint(PickleState *state, UnpicklerObject *self)
5256 {
5257     char *s;
5258     if (_Unpickler_Read(self, state, &s, 4) < 0)
5259         return -1;
5260 
5261     return load_binintx(self, s, 4);
5262 }
5263 
5264 static int
load_binint1(PickleState * state,UnpicklerObject * self)5265 load_binint1(PickleState *state, UnpicklerObject *self)
5266 {
5267     char *s;
5268     if (_Unpickler_Read(self, state, &s, 1) < 0)
5269         return -1;
5270 
5271     return load_binintx(self, s, 1);
5272 }
5273 
5274 static int
load_binint2(PickleState * state,UnpicklerObject * self)5275 load_binint2(PickleState *state, UnpicklerObject *self)
5276 {
5277     char *s;
5278     if (_Unpickler_Read(self, state, &s, 2) < 0)
5279         return -1;
5280 
5281     return load_binintx(self, s, 2);
5282 }
5283 
5284 static int
load_long(PickleState * state,UnpicklerObject * self)5285 load_long(PickleState *state, UnpicklerObject *self)
5286 {
5287     PyObject *value;
5288     char *s = NULL;
5289     Py_ssize_t len;
5290 
5291     if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5292         return -1;
5293     if (len < 2)
5294         return bad_readline(state);
5295 
5296     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5297        the 'L' before calling PyLong_FromString.  In order to maintain
5298        compatibility with Python 3.0.0, we don't actually *require*
5299        the 'L' to be present. */
5300     if (s[len-2] == 'L')
5301         s[len-2] = '\0';
5302     /* XXX: Should the base argument explicitly set to 10? */
5303     value = PyLong_FromString(s, NULL, 0);
5304     if (value == NULL)
5305         return -1;
5306 
5307     PDATA_PUSH(self->stack, value, -1);
5308     return 0;
5309 }
5310 
5311 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5312  * data following.
5313  */
5314 static int
load_counted_long(PickleState * st,UnpicklerObject * self,int size)5315 load_counted_long(PickleState *st, UnpicklerObject *self, int size)
5316 {
5317     PyObject *value;
5318     char *nbytes;
5319     char *pdata;
5320 
5321     assert(size == 1 || size == 4);
5322     if (_Unpickler_Read(self, st, &nbytes, size) < 0)
5323         return -1;
5324 
5325     size = calc_binint(nbytes, size);
5326     if (size < 0) {
5327         /* Corrupt or hostile pickle -- we never write one like this */
5328         PyErr_SetString(st->UnpicklingError,
5329                         "LONG pickle has negative byte count");
5330         return -1;
5331     }
5332 
5333     if (size == 0)
5334         value = PyLong_FromLong(0L);
5335     else {
5336         /* Read the raw little-endian bytes and convert. */
5337         if (_Unpickler_Read(self, st, &pdata, size) < 0)
5338             return -1;
5339         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5340                                       1 /* little endian */ , 1 /* signed */ );
5341     }
5342     if (value == NULL)
5343         return -1;
5344     PDATA_PUSH(self->stack, value, -1);
5345     return 0;
5346 }
5347 
5348 static int
load_float(PickleState * state,UnpicklerObject * self)5349 load_float(PickleState *state, UnpicklerObject *self)
5350 {
5351     PyObject *value;
5352     char *endptr, *s;
5353     Py_ssize_t len;
5354     double d;
5355 
5356     if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5357         return -1;
5358     if (len < 2)
5359         return bad_readline(state);
5360 
5361     errno = 0;
5362     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5363     if (d == -1.0 && PyErr_Occurred())
5364         return -1;
5365     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5366         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5367         return -1;
5368     }
5369     value = PyFloat_FromDouble(d);
5370     if (value == NULL)
5371         return -1;
5372 
5373     PDATA_PUSH(self->stack, value, -1);
5374     return 0;
5375 }
5376 
5377 static int
load_binfloat(PickleState * state,UnpicklerObject * self)5378 load_binfloat(PickleState *state, UnpicklerObject *self)
5379 {
5380     PyObject *value;
5381     double x;
5382     char *s;
5383 
5384     if (_Unpickler_Read(self, state, &s, 8) < 0)
5385         return -1;
5386 
5387     x = PyFloat_Unpack8(s, 0);
5388     if (x == -1.0 && PyErr_Occurred())
5389         return -1;
5390 
5391     if ((value = PyFloat_FromDouble(x)) == NULL)
5392         return -1;
5393 
5394     PDATA_PUSH(self->stack, value, -1);
5395     return 0;
5396 }
5397 
5398 static int
load_string(PickleState * st,UnpicklerObject * self)5399 load_string(PickleState *st, UnpicklerObject *self)
5400 {
5401     PyObject *bytes;
5402     PyObject *obj;
5403     Py_ssize_t len;
5404     char *s, *p;
5405 
5406     if ((len = _Unpickler_Readline(st, self, &s)) < 0)
5407         return -1;
5408     /* Strip the newline */
5409     len--;
5410     /* Strip outermost quotes */
5411     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5412         p = s + 1;
5413         len -= 2;
5414     }
5415     else {
5416         PyErr_SetString(st->UnpicklingError,
5417                         "the STRING opcode argument must be quoted");
5418         return -1;
5419     }
5420     assert(len >= 0);
5421 
5422     /* Use the PyBytes API to decode the string, since that is what is used
5423        to encode, and then coerce the result to Unicode. */
5424     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5425     if (bytes == NULL)
5426         return -1;
5427 
5428     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5429        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5430     if (strcmp(self->encoding, "bytes") == 0) {
5431         obj = bytes;
5432     }
5433     else {
5434         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5435         Py_DECREF(bytes);
5436         if (obj == NULL) {
5437             return -1;
5438         }
5439     }
5440 
5441     PDATA_PUSH(self->stack, obj, -1);
5442     return 0;
5443 }
5444 
5445 static int
load_counted_binstring(PickleState * st,UnpicklerObject * self,int nbytes)5446 load_counted_binstring(PickleState *st, UnpicklerObject *self, int nbytes)
5447 {
5448     PyObject *obj;
5449     Py_ssize_t size;
5450     char *s;
5451 
5452     if (_Unpickler_Read(self, st, &s, nbytes) < 0)
5453         return -1;
5454 
5455     size = calc_binsize(s, nbytes);
5456     if (size < 0) {
5457         PyErr_Format(st->UnpicklingError,
5458                      "BINSTRING exceeds system's maximum size of %zd bytes",
5459                      PY_SSIZE_T_MAX);
5460         return -1;
5461     }
5462 
5463     if (_Unpickler_Read(self, st, &s, size) < 0)
5464         return -1;
5465 
5466     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5467        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5468     if (strcmp(self->encoding, "bytes") == 0) {
5469         obj = PyBytes_FromStringAndSize(s, size);
5470     }
5471     else {
5472         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5473     }
5474     if (obj == NULL) {
5475         return -1;
5476     }
5477 
5478     PDATA_PUSH(self->stack, obj, -1);
5479     return 0;
5480 }
5481 
5482 static int
load_counted_binbytes(PickleState * state,UnpicklerObject * self,int nbytes)5483 load_counted_binbytes(PickleState *state, UnpicklerObject *self, int nbytes)
5484 {
5485     PyObject *bytes;
5486     Py_ssize_t size;
5487     char *s;
5488 
5489     if (_Unpickler_Read(self, state, &s, nbytes) < 0)
5490         return -1;
5491 
5492     size = calc_binsize(s, nbytes);
5493     if (size < 0) {
5494         PyErr_Format(PyExc_OverflowError,
5495                      "BINBYTES exceeds system's maximum size of %zd bytes",
5496                      PY_SSIZE_T_MAX);
5497         return -1;
5498     }
5499 
5500     bytes = PyBytes_FromStringAndSize(NULL, size);
5501     if (bytes == NULL)
5502         return -1;
5503     if (_Unpickler_ReadInto(state, self, PyBytes_AS_STRING(bytes), size) < 0) {
5504         Py_DECREF(bytes);
5505         return -1;
5506     }
5507 
5508     PDATA_PUSH(self->stack, bytes, -1);
5509     return 0;
5510 }
5511 
5512 static int
load_counted_bytearray(PickleState * state,UnpicklerObject * self)5513 load_counted_bytearray(PickleState *state, UnpicklerObject *self)
5514 {
5515     PyObject *bytearray;
5516     Py_ssize_t size;
5517     char *s;
5518 
5519     if (_Unpickler_Read(self, state, &s, 8) < 0) {
5520         return -1;
5521     }
5522 
5523     size = calc_binsize(s, 8);
5524     if (size < 0) {
5525         PyErr_Format(PyExc_OverflowError,
5526                      "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5527                      PY_SSIZE_T_MAX);
5528         return -1;
5529     }
5530 
5531     bytearray = PyByteArray_FromStringAndSize(NULL, size);
5532     if (bytearray == NULL) {
5533         return -1;
5534     }
5535     char *str = PyByteArray_AS_STRING(bytearray);
5536     if (_Unpickler_ReadInto(state, self, str, size) < 0) {
5537         Py_DECREF(bytearray);
5538         return -1;
5539     }
5540 
5541     PDATA_PUSH(self->stack, bytearray, -1);
5542     return 0;
5543 }
5544 
5545 static int
load_next_buffer(PickleState * st,UnpicklerObject * self)5546 load_next_buffer(PickleState *st, UnpicklerObject *self)
5547 {
5548     if (self->buffers == NULL) {
5549         PyErr_SetString(st->UnpicklingError,
5550                         "pickle stream refers to out-of-band data "
5551                         "but no *buffers* argument was given");
5552         return -1;
5553     }
5554     PyObject *buf = PyIter_Next(self->buffers);
5555     if (buf == NULL) {
5556         if (!PyErr_Occurred()) {
5557             PyErr_SetString(st->UnpicklingError,
5558                             "not enough out-of-band buffers");
5559         }
5560         return -1;
5561     }
5562 
5563     PDATA_PUSH(self->stack, buf, -1);
5564     return 0;
5565 }
5566 
5567 static int
load_readonly_buffer(PickleState * state,UnpicklerObject * self)5568 load_readonly_buffer(PickleState *state, UnpicklerObject *self)
5569 {
5570     Py_ssize_t len = Py_SIZE(self->stack);
5571     if (len <= self->stack->fence) {
5572         return Pdata_stack_underflow(state, self->stack);
5573     }
5574 
5575     PyObject *obj = self->stack->data[len - 1];
5576     PyObject *view = PyMemoryView_FromObject(obj);
5577     if (view == NULL) {
5578         return -1;
5579     }
5580     if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5581         /* Original object is writable */
5582         PyMemoryView_GET_BUFFER(view)->readonly = 1;
5583         self->stack->data[len - 1] = view;
5584         Py_DECREF(obj);
5585     }
5586     else {
5587         /* Original object is read-only, no need to replace it */
5588         Py_DECREF(view);
5589     }
5590     return 0;
5591 }
5592 
5593 static int
load_unicode(PickleState * state,UnpicklerObject * self)5594 load_unicode(PickleState *state, UnpicklerObject *self)
5595 {
5596     PyObject *str;
5597     Py_ssize_t len;
5598     char *s = NULL;
5599 
5600     if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5601         return -1;
5602     if (len < 1)
5603         return bad_readline(state);
5604 
5605     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5606     if (str == NULL)
5607         return -1;
5608 
5609     PDATA_PUSH(self->stack, str, -1);
5610     return 0;
5611 }
5612 
5613 static int
load_counted_binunicode(PickleState * state,UnpicklerObject * self,int nbytes)5614 load_counted_binunicode(PickleState *state, UnpicklerObject *self, int nbytes)
5615 {
5616     PyObject *str;
5617     Py_ssize_t size;
5618     char *s;
5619 
5620     if (_Unpickler_Read(self, state, &s, nbytes) < 0)
5621         return -1;
5622 
5623     size = calc_binsize(s, nbytes);
5624     if (size < 0) {
5625         PyErr_Format(PyExc_OverflowError,
5626                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5627                      PY_SSIZE_T_MAX);
5628         return -1;
5629     }
5630 
5631     if (_Unpickler_Read(self, state, &s, size) < 0)
5632         return -1;
5633 
5634     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5635     if (str == NULL)
5636         return -1;
5637 
5638     PDATA_PUSH(self->stack, str, -1);
5639     return 0;
5640 }
5641 
5642 static int
load_counted_tuple(PickleState * state,UnpicklerObject * self,Py_ssize_t len)5643 load_counted_tuple(PickleState *state, UnpicklerObject *self, Py_ssize_t len)
5644 {
5645     PyObject *tuple;
5646 
5647     if (Py_SIZE(self->stack) < len)
5648         return Pdata_stack_underflow(state, self->stack);
5649 
5650     tuple = Pdata_poptuple(state, self->stack, Py_SIZE(self->stack) - len);
5651     if (tuple == NULL)
5652         return -1;
5653     PDATA_PUSH(self->stack, tuple, -1);
5654     return 0;
5655 }
5656 
5657 static int
load_tuple(PickleState * state,UnpicklerObject * self)5658 load_tuple(PickleState *state, UnpicklerObject *self)
5659 {
5660     Py_ssize_t i;
5661 
5662     if ((i = marker(state, self)) < 0)
5663         return -1;
5664 
5665     return load_counted_tuple(state, self, Py_SIZE(self->stack) - i);
5666 }
5667 
5668 static int
load_empty_list(PickleState * state,UnpicklerObject * self)5669 load_empty_list(PickleState *state, UnpicklerObject *self)
5670 {
5671     PyObject *list;
5672 
5673     if ((list = PyList_New(0)) == NULL)
5674         return -1;
5675     PDATA_PUSH(self->stack, list, -1);
5676     return 0;
5677 }
5678 
5679 static int
load_empty_dict(PickleState * state,UnpicklerObject * self)5680 load_empty_dict(PickleState *state, UnpicklerObject *self)
5681 {
5682     PyObject *dict;
5683 
5684     if ((dict = PyDict_New()) == NULL)
5685         return -1;
5686     PDATA_PUSH(self->stack, dict, -1);
5687     return 0;
5688 }
5689 
5690 static int
load_empty_set(PickleState * state,UnpicklerObject * self)5691 load_empty_set(PickleState *state, UnpicklerObject *self)
5692 {
5693     PyObject *set;
5694 
5695     if ((set = PySet_New(NULL)) == NULL)
5696         return -1;
5697     PDATA_PUSH(self->stack, set, -1);
5698     return 0;
5699 }
5700 
5701 static int
load_list(PickleState * state,UnpicklerObject * self)5702 load_list(PickleState *state, UnpicklerObject *self)
5703 {
5704     PyObject *list;
5705     Py_ssize_t i;
5706 
5707     if ((i = marker(state, self)) < 0)
5708         return -1;
5709 
5710     list = Pdata_poplist(self->stack, i);
5711     if (list == NULL)
5712         return -1;
5713     PDATA_PUSH(self->stack, list, -1);
5714     return 0;
5715 }
5716 
5717 static int
load_dict(PickleState * st,UnpicklerObject * self)5718 load_dict(PickleState *st, UnpicklerObject *self)
5719 {
5720     PyObject *dict, *key, *value;
5721     Py_ssize_t i, j, k;
5722 
5723     if ((i = marker(st, self)) < 0)
5724         return -1;
5725     j = Py_SIZE(self->stack);
5726 
5727     if ((dict = PyDict_New()) == NULL)
5728         return -1;
5729 
5730     if ((j - i) % 2 != 0) {
5731         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5732         Py_DECREF(dict);
5733         return -1;
5734     }
5735 
5736     for (k = i + 1; k < j; k += 2) {
5737         key = self->stack->data[k - 1];
5738         value = self->stack->data[k];
5739         if (PyDict_SetItem(dict, key, value) < 0) {
5740             Py_DECREF(dict);
5741             return -1;
5742         }
5743     }
5744     Pdata_clear(self->stack, i);
5745     PDATA_PUSH(self->stack, dict, -1);
5746     return 0;
5747 }
5748 
5749 static int
load_frozenset(PickleState * state,UnpicklerObject * self)5750 load_frozenset(PickleState *state, UnpicklerObject *self)
5751 {
5752     PyObject *items;
5753     PyObject *frozenset;
5754     Py_ssize_t i;
5755 
5756     if ((i = marker(state, self)) < 0)
5757         return -1;
5758 
5759     items = Pdata_poptuple(state, self->stack, i);
5760     if (items == NULL)
5761         return -1;
5762 
5763     frozenset = PyFrozenSet_New(items);
5764     Py_DECREF(items);
5765     if (frozenset == NULL)
5766         return -1;
5767 
5768     PDATA_PUSH(self->stack, frozenset, -1);
5769     return 0;
5770 }
5771 
5772 static PyObject *
instantiate(PyObject * cls,PyObject * args)5773 instantiate(PyObject *cls, PyObject *args)
5774 {
5775     /* Caller must assure args are a tuple.  Normally, args come from
5776        Pdata_poptuple which packs objects from the top of the stack
5777        into a newly created tuple. */
5778     assert(PyTuple_Check(args));
5779     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5780         int rc = PyObject_HasAttrWithError(cls, &_Py_ID(__getinitargs__));
5781         if (rc < 0) {
5782             return NULL;
5783         }
5784         if (!rc) {
5785             return PyObject_CallMethodOneArg(cls, &_Py_ID(__new__), cls);
5786         }
5787     }
5788     return PyObject_CallObject(cls, args);
5789 }
5790 
5791 static int
load_obj(PickleState * state,UnpicklerObject * self)5792 load_obj(PickleState *state, UnpicklerObject *self)
5793 {
5794     PyObject *cls, *args, *obj = NULL;
5795     Py_ssize_t i;
5796 
5797     if ((i = marker(state, self)) < 0)
5798         return -1;
5799 
5800     if (Py_SIZE(self->stack) - i < 1)
5801         return Pdata_stack_underflow(state, self->stack);
5802 
5803     args = Pdata_poptuple(state, self->stack, i + 1);
5804     if (args == NULL)
5805         return -1;
5806 
5807     PDATA_POP(state, self->stack, cls);
5808     if (cls) {
5809         obj = instantiate(cls, args);
5810         Py_DECREF(cls);
5811     }
5812     Py_DECREF(args);
5813     if (obj == NULL)
5814         return -1;
5815 
5816     PDATA_PUSH(self->stack, obj, -1);
5817     return 0;
5818 }
5819 
5820 static int
load_inst(PickleState * state,UnpicklerObject * self)5821 load_inst(PickleState *state, UnpicklerObject *self)
5822 {
5823     PyObject *cls = NULL;
5824     PyObject *args = NULL;
5825     PyObject *obj = NULL;
5826     PyObject *module_name;
5827     PyObject *class_name;
5828     Py_ssize_t len;
5829     Py_ssize_t i;
5830     char *s;
5831 
5832     if ((i = marker(state, self)) < 0)
5833         return -1;
5834     if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5835         return -1;
5836     if (len < 2)
5837         return bad_readline(state);
5838 
5839     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5840        identifiers are permitted in Python 3.0, since the INST opcode is only
5841        supported by older protocols on Python 2.x. */
5842     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5843     if (module_name == NULL)
5844         return -1;
5845 
5846     if ((len = _Unpickler_Readline(state, self, &s)) >= 0) {
5847         if (len < 2) {
5848             Py_DECREF(module_name);
5849             return bad_readline(state);
5850         }
5851         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5852         if (class_name != NULL) {
5853             cls = find_class(self, module_name, class_name);
5854             Py_DECREF(class_name);
5855         }
5856     }
5857     Py_DECREF(module_name);
5858 
5859     if (cls == NULL)
5860         return -1;
5861 
5862     if ((args = Pdata_poptuple(state, self->stack, i)) != NULL) {
5863         obj = instantiate(cls, args);
5864         Py_DECREF(args);
5865     }
5866     Py_DECREF(cls);
5867 
5868     if (obj == NULL)
5869         return -1;
5870 
5871     PDATA_PUSH(self->stack, obj, -1);
5872     return 0;
5873 }
5874 
5875 static void
newobj_unpickling_error(PickleState * st,const char * msg,int use_kwargs,PyObject * arg)5876 newobj_unpickling_error(PickleState *st, const char *msg, int use_kwargs,
5877                         PyObject *arg)
5878 {
5879     PyErr_Format(st->UnpicklingError, msg,
5880                  use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5881                  Py_TYPE(arg)->tp_name);
5882 }
5883 
5884 static int
load_newobj(PickleState * state,UnpicklerObject * self,int use_kwargs)5885 load_newobj(PickleState *state, UnpicklerObject *self, int use_kwargs)
5886 {
5887     PyObject *cls, *args, *kwargs = NULL;
5888     PyObject *obj;
5889 
5890     /* Stack is ... cls args [kwargs], and we want to call
5891      * cls.__new__(cls, *args, **kwargs).
5892      */
5893     if (use_kwargs) {
5894         PDATA_POP(state, self->stack, kwargs);
5895         if (kwargs == NULL) {
5896             return -1;
5897         }
5898     }
5899     PDATA_POP(state, self->stack, args);
5900     if (args == NULL) {
5901         Py_XDECREF(kwargs);
5902         return -1;
5903     }
5904     PDATA_POP(state, self->stack, cls);
5905     if (cls == NULL) {
5906         Py_XDECREF(kwargs);
5907         Py_DECREF(args);
5908         return -1;
5909     }
5910 
5911     if (!PyType_Check(cls)) {
5912         newobj_unpickling_error(state,
5913                                 "%s class argument must be a type, not %.200s",
5914                                 use_kwargs, cls);
5915         goto error;
5916     }
5917     if (((PyTypeObject *)cls)->tp_new == NULL) {
5918         newobj_unpickling_error(state,
5919                                 "%s class argument '%.200s' doesn't have __new__",
5920                                 use_kwargs, cls);
5921         goto error;
5922     }
5923     if (!PyTuple_Check(args)) {
5924         newobj_unpickling_error(state,
5925                                 "%s args argument must be a tuple, not %.200s",
5926                                 use_kwargs, args);
5927         goto error;
5928     }
5929     if (use_kwargs && !PyDict_Check(kwargs)) {
5930         newobj_unpickling_error(state,
5931                                 "%s kwargs argument must be a dict, not %.200s",
5932                                 use_kwargs, kwargs);
5933         goto error;
5934     }
5935 
5936     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5937     if (obj == NULL) {
5938         goto error;
5939     }
5940     Py_XDECREF(kwargs);
5941     Py_DECREF(args);
5942     Py_DECREF(cls);
5943     PDATA_PUSH(self->stack, obj, -1);
5944     return 0;
5945 
5946 error:
5947     Py_XDECREF(kwargs);
5948     Py_DECREF(args);
5949     Py_DECREF(cls);
5950     return -1;
5951 }
5952 
5953 static int
load_global(PickleState * state,UnpicklerObject * self)5954 load_global(PickleState *state, UnpicklerObject *self)
5955 {
5956     PyObject *global = NULL;
5957     PyObject *module_name;
5958     PyObject *global_name;
5959     Py_ssize_t len;
5960     char *s;
5961 
5962     if ((len = _Unpickler_Readline(state, self, &s)) < 0)
5963         return -1;
5964     if (len < 2)
5965         return bad_readline(state);
5966     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5967     if (!module_name)
5968         return -1;
5969 
5970     if ((len = _Unpickler_Readline(state, self, &s)) >= 0) {
5971         if (len < 2) {
5972             Py_DECREF(module_name);
5973             return bad_readline(state);
5974         }
5975         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5976         if (global_name) {
5977             global = find_class(self, module_name, global_name);
5978             Py_DECREF(global_name);
5979         }
5980     }
5981     Py_DECREF(module_name);
5982 
5983     if (global == NULL)
5984         return -1;
5985     PDATA_PUSH(self->stack, global, -1);
5986     return 0;
5987 }
5988 
5989 static int
load_stack_global(PickleState * st,UnpicklerObject * self)5990 load_stack_global(PickleState *st, UnpicklerObject *self)
5991 {
5992     PyObject *global;
5993     PyObject *module_name;
5994     PyObject *global_name;
5995 
5996     PDATA_POP(st, self->stack, global_name);
5997     if (global_name == NULL) {
5998         return -1;
5999     }
6000     PDATA_POP(st, self->stack, module_name);
6001     if (module_name == NULL) {
6002         Py_DECREF(global_name);
6003         return -1;
6004     }
6005     if (!PyUnicode_CheckExact(module_name) ||
6006         !PyUnicode_CheckExact(global_name))
6007     {
6008         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6009         Py_DECREF(global_name);
6010         Py_DECREF(module_name);
6011         return -1;
6012     }
6013     global = find_class(self, module_name, global_name);
6014     Py_DECREF(global_name);
6015     Py_DECREF(module_name);
6016     if (global == NULL)
6017         return -1;
6018     PDATA_PUSH(self->stack, global, -1);
6019     return 0;
6020 }
6021 
6022 static int
load_persid(PickleState * st,UnpicklerObject * self)6023 load_persid(PickleState *st, UnpicklerObject *self)
6024 {
6025     PyObject *pid, *obj;
6026     Py_ssize_t len;
6027     char *s;
6028 
6029     if ((len = _Unpickler_Readline(st, self, &s)) < 0)
6030         return -1;
6031     if (len < 1)
6032         return bad_readline(st);
6033 
6034     pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6035     if (pid == NULL) {
6036         if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6037             PyErr_SetString(st->UnpicklingError,
6038                             "persistent IDs in protocol 0 must be "
6039                             "ASCII strings");
6040         }
6041         return -1;
6042     }
6043 
6044     obj = PyObject_CallOneArg(self->persistent_load, pid);
6045     Py_DECREF(pid);
6046     if (obj == NULL)
6047         return -1;
6048 
6049     PDATA_PUSH(self->stack, obj, -1);
6050     return 0;
6051 }
6052 
6053 static int
load_binpersid(PickleState * st,UnpicklerObject * self)6054 load_binpersid(PickleState *st, UnpicklerObject *self)
6055 {
6056     PyObject *pid, *obj;
6057 
6058     PDATA_POP(st, self->stack, pid);
6059     if (pid == NULL)
6060         return -1;
6061 
6062     obj = PyObject_CallOneArg(self->persistent_load, pid);
6063     Py_DECREF(pid);
6064     if (obj == NULL)
6065         return -1;
6066 
6067     PDATA_PUSH(self->stack, obj, -1);
6068     return 0;
6069 }
6070 
6071 static int
load_pop(PickleState * state,UnpicklerObject * self)6072 load_pop(PickleState *state, UnpicklerObject *self)
6073 {
6074     Py_ssize_t len = Py_SIZE(self->stack);
6075 
6076     /* Note that we split the (pickle.py) stack into two stacks,
6077      * an object stack and a mark stack. We have to be clever and
6078      * pop the right one. We do this by looking at the top of the
6079      * mark stack first, and only signalling a stack underflow if
6080      * the object stack is empty and the mark stack doesn't match
6081      * our expectations.
6082      */
6083     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6084         self->num_marks--;
6085         self->stack->mark_set = self->num_marks != 0;
6086         self->stack->fence = self->num_marks ?
6087                 self->marks[self->num_marks - 1] : 0;
6088     } else if (len <= self->stack->fence)
6089         return Pdata_stack_underflow(state, self->stack);
6090     else {
6091         len--;
6092         Py_DECREF(self->stack->data[len]);
6093         Py_SET_SIZE(self->stack, len);
6094     }
6095     return 0;
6096 }
6097 
6098 static int
load_pop_mark(PickleState * state,UnpicklerObject * self)6099 load_pop_mark(PickleState *state, UnpicklerObject *self)
6100 {
6101     Py_ssize_t i;
6102     if ((i = marker(state, self)) < 0)
6103         return -1;
6104 
6105     Pdata_clear(self->stack, i);
6106 
6107     return 0;
6108 }
6109 
6110 static int
load_dup(PickleState * state,UnpicklerObject * self)6111 load_dup(PickleState *state, UnpicklerObject *self)
6112 {
6113     PyObject *last;
6114     Py_ssize_t len = Py_SIZE(self->stack);
6115 
6116     if (len <= self->stack->fence)
6117         return Pdata_stack_underflow(state, self->stack);
6118     last = self->stack->data[len - 1];
6119     PDATA_APPEND(self->stack, last, -1);
6120     return 0;
6121 }
6122 
6123 static int
load_get(PickleState * st,UnpicklerObject * self)6124 load_get(PickleState *st, UnpicklerObject *self)
6125 {
6126     PyObject *key, *value;
6127     Py_ssize_t idx;
6128     Py_ssize_t len;
6129     char *s;
6130 
6131     if ((len = _Unpickler_Readline(st, self, &s)) < 0)
6132         return -1;
6133     if (len < 2)
6134         return bad_readline(st);
6135 
6136     key = PyLong_FromString(s, NULL, 10);
6137     if (key == NULL)
6138         return -1;
6139     idx = PyLong_AsSsize_t(key);
6140     if (idx == -1 && PyErr_Occurred()) {
6141         Py_DECREF(key);
6142         return -1;
6143     }
6144 
6145     value = _Unpickler_MemoGet(self, idx);
6146     if (value == NULL) {
6147         if (!PyErr_Occurred()) {
6148            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6149         }
6150         Py_DECREF(key);
6151         return -1;
6152     }
6153     Py_DECREF(key);
6154 
6155     PDATA_APPEND(self->stack, value, -1);
6156     return 0;
6157 }
6158 
6159 static int
load_binget(PickleState * st,UnpicklerObject * self)6160 load_binget(PickleState *st, UnpicklerObject *self)
6161 {
6162     PyObject *value;
6163     Py_ssize_t idx;
6164     char *s;
6165 
6166     if (_Unpickler_Read(self, st, &s, 1) < 0)
6167         return -1;
6168 
6169     idx = Py_CHARMASK(s[0]);
6170 
6171     value = _Unpickler_MemoGet(self, idx);
6172     if (value == NULL) {
6173         PyObject *key = PyLong_FromSsize_t(idx);
6174         if (key != NULL) {
6175             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6176             Py_DECREF(key);
6177         }
6178         return -1;
6179     }
6180 
6181     PDATA_APPEND(self->stack, value, -1);
6182     return 0;
6183 }
6184 
6185 static int
load_long_binget(PickleState * st,UnpicklerObject * self)6186 load_long_binget(PickleState *st, UnpicklerObject *self)
6187 {
6188     PyObject *value;
6189     Py_ssize_t idx;
6190     char *s;
6191 
6192     if (_Unpickler_Read(self, st, &s, 4) < 0)
6193         return -1;
6194 
6195     idx = calc_binsize(s, 4);
6196 
6197     value = _Unpickler_MemoGet(self, idx);
6198     if (value == NULL) {
6199         PyObject *key = PyLong_FromSsize_t(idx);
6200         if (key != NULL) {
6201             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6202             Py_DECREF(key);
6203         }
6204         return -1;
6205     }
6206 
6207     PDATA_APPEND(self->stack, value, -1);
6208     return 0;
6209 }
6210 
6211 /* Push an object from the extension registry (EXT[124]).  nbytes is
6212  * the number of bytes following the opcode, holding the index (code) value.
6213  */
6214 static int
load_extension(PickleState * st,UnpicklerObject * self,int nbytes)6215 load_extension(PickleState *st, UnpicklerObject *self, int nbytes)
6216 {
6217     char *codebytes;            /* the nbytes bytes after the opcode */
6218     long code;                  /* calc_binint returns long */
6219     PyObject *py_code;          /* code as a Python int */
6220     PyObject *obj;              /* the object to push */
6221     PyObject *pair;             /* (module_name, class_name) */
6222     PyObject *module_name, *class_name;
6223 
6224     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6225     if (_Unpickler_Read(self, st, &codebytes, nbytes) < 0)
6226         return -1;
6227     code = calc_binint(codebytes, nbytes);
6228     if (code <= 0) {            /* note that 0 is forbidden */
6229         /* Corrupt or hostile pickle. */
6230         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6231         return -1;
6232     }
6233 
6234     /* Look for the code in the cache. */
6235     py_code = PyLong_FromLong(code);
6236     if (py_code == NULL)
6237         return -1;
6238     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6239     if (obj != NULL) {
6240         /* Bingo. */
6241         Py_DECREF(py_code);
6242         PDATA_APPEND(self->stack, obj, -1);
6243         return 0;
6244     }
6245     if (PyErr_Occurred()) {
6246         Py_DECREF(py_code);
6247         return -1;
6248     }
6249 
6250     /* Look up the (module_name, class_name) pair. */
6251     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6252     if (pair == NULL) {
6253         Py_DECREF(py_code);
6254         if (!PyErr_Occurred()) {
6255             PyErr_Format(PyExc_ValueError, "unregistered extension "
6256                          "code %ld", code);
6257         }
6258         return -1;
6259     }
6260     /* Since the extension registry is manipulable via Python code,
6261      * confirm that pair is really a 2-tuple of strings.
6262      */
6263     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6264         goto error;
6265     }
6266 
6267     module_name = PyTuple_GET_ITEM(pair, 0);
6268     if (!PyUnicode_Check(module_name)) {
6269         goto error;
6270     }
6271 
6272     class_name = PyTuple_GET_ITEM(pair, 1);
6273     if (!PyUnicode_Check(class_name)) {
6274         goto error;
6275     }
6276 
6277     /* Load the object. */
6278     obj = find_class(self, module_name, class_name);
6279     if (obj == NULL) {
6280         Py_DECREF(py_code);
6281         return -1;
6282     }
6283     /* Cache code -> obj. */
6284     code = PyDict_SetItem(st->extension_cache, py_code, obj);
6285     Py_DECREF(py_code);
6286     if (code < 0) {
6287         Py_DECREF(obj);
6288         return -1;
6289     }
6290     PDATA_PUSH(self->stack, obj, -1);
6291     return 0;
6292 
6293 error:
6294     Py_DECREF(py_code);
6295     PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6296                  "isn't a 2-tuple of strings", code);
6297     return -1;
6298 }
6299 
6300 static int
load_put(PickleState * state,UnpicklerObject * self)6301 load_put(PickleState *state, UnpicklerObject *self)
6302 {
6303     PyObject *key, *value;
6304     Py_ssize_t idx;
6305     Py_ssize_t len;
6306     char *s = NULL;
6307 
6308     if ((len = _Unpickler_Readline(state, self, &s)) < 0)
6309         return -1;
6310     if (len < 2)
6311         return bad_readline(state);
6312     if (Py_SIZE(self->stack) <= self->stack->fence)
6313         return Pdata_stack_underflow(state, self->stack);
6314     value = self->stack->data[Py_SIZE(self->stack) - 1];
6315 
6316     key = PyLong_FromString(s, NULL, 10);
6317     if (key == NULL)
6318         return -1;
6319     idx = PyLong_AsSsize_t(key);
6320     Py_DECREF(key);
6321     if (idx < 0) {
6322         if (!PyErr_Occurred())
6323             PyErr_SetString(PyExc_ValueError,
6324                             "negative PUT argument");
6325         return -1;
6326     }
6327 
6328     return _Unpickler_MemoPut(self, idx, value);
6329 }
6330 
6331 static int
load_binput(PickleState * state,UnpicklerObject * self)6332 load_binput(PickleState *state, UnpicklerObject *self)
6333 {
6334     PyObject *value;
6335     Py_ssize_t idx;
6336     char *s;
6337 
6338     if (_Unpickler_Read(self, state, &s, 1) < 0)
6339         return -1;
6340 
6341     if (Py_SIZE(self->stack) <= self->stack->fence)
6342         return Pdata_stack_underflow(state, self->stack);
6343     value = self->stack->data[Py_SIZE(self->stack) - 1];
6344 
6345     idx = Py_CHARMASK(s[0]);
6346 
6347     return _Unpickler_MemoPut(self, idx, value);
6348 }
6349 
6350 static int
load_long_binput(PickleState * state,UnpicklerObject * self)6351 load_long_binput(PickleState *state, UnpicklerObject *self)
6352 {
6353     PyObject *value;
6354     Py_ssize_t idx;
6355     char *s;
6356 
6357     if (_Unpickler_Read(self, state, &s, 4) < 0)
6358         return -1;
6359 
6360     if (Py_SIZE(self->stack) <= self->stack->fence)
6361         return Pdata_stack_underflow(state, self->stack);
6362     value = self->stack->data[Py_SIZE(self->stack) - 1];
6363 
6364     idx = calc_binsize(s, 4);
6365     if (idx < 0) {
6366         PyErr_SetString(PyExc_ValueError,
6367                         "negative LONG_BINPUT argument");
6368         return -1;
6369     }
6370 
6371     return _Unpickler_MemoPut(self, idx, value);
6372 }
6373 
6374 static int
load_memoize(PickleState * state,UnpicklerObject * self)6375 load_memoize(PickleState *state, UnpicklerObject *self)
6376 {
6377     PyObject *value;
6378 
6379     if (Py_SIZE(self->stack) <= self->stack->fence)
6380         return Pdata_stack_underflow(state, self->stack);
6381     value = self->stack->data[Py_SIZE(self->stack) - 1];
6382 
6383     return _Unpickler_MemoPut(self, self->memo_len, value);
6384 }
6385 
6386 static int
do_append(PickleState * state,UnpicklerObject * self,Py_ssize_t x)6387 do_append(PickleState *state, UnpicklerObject *self, Py_ssize_t x)
6388 {
6389     PyObject *value;
6390     PyObject *slice;
6391     PyObject *list;
6392     PyObject *result;
6393     Py_ssize_t len, i;
6394 
6395     len = Py_SIZE(self->stack);
6396     if (x > len || x <= self->stack->fence)
6397         return Pdata_stack_underflow(state, self->stack);
6398     if (len == x)  /* nothing to do */
6399         return 0;
6400 
6401     list = self->stack->data[x - 1];
6402 
6403     if (PyList_CheckExact(list)) {
6404         Py_ssize_t list_len;
6405         int ret;
6406 
6407         slice = Pdata_poplist(self->stack, x);
6408         if (!slice)
6409             return -1;
6410         list_len = PyList_GET_SIZE(list);
6411         ret = PyList_SetSlice(list, list_len, list_len, slice);
6412         Py_DECREF(slice);
6413         return ret;
6414     }
6415     else {
6416         PyObject *extend_func;
6417 
6418         if (PyObject_GetOptionalAttr(list, &_Py_ID(extend), &extend_func) < 0) {
6419             return -1;
6420         }
6421         if (extend_func != NULL) {
6422             slice = Pdata_poplist(self->stack, x);
6423             if (!slice) {
6424                 Py_DECREF(extend_func);
6425                 return -1;
6426             }
6427             result = _Pickle_FastCall(extend_func, slice);
6428             Py_DECREF(extend_func);
6429             if (result == NULL)
6430                 return -1;
6431             Py_DECREF(result);
6432         }
6433         else {
6434             PyObject *append_func;
6435 
6436             /* Even if the PEP 307 requires extend() and append() methods,
6437                fall back on append() if the object has no extend() method
6438                for backward compatibility. */
6439             append_func = PyObject_GetAttr(list, &_Py_ID(append));
6440             if (append_func == NULL)
6441                 return -1;
6442             for (i = x; i < len; i++) {
6443                 value = self->stack->data[i];
6444                 result = _Pickle_FastCall(append_func, value);
6445                 if (result == NULL) {
6446                     Pdata_clear(self->stack, i + 1);
6447                     Py_SET_SIZE(self->stack, x);
6448                     Py_DECREF(append_func);
6449                     return -1;
6450                 }
6451                 Py_DECREF(result);
6452             }
6453             Py_SET_SIZE(self->stack, x);
6454             Py_DECREF(append_func);
6455         }
6456     }
6457 
6458     return 0;
6459 }
6460 
6461 static int
load_append(PickleState * state,UnpicklerObject * self)6462 load_append(PickleState *state, UnpicklerObject *self)
6463 {
6464     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6465         return Pdata_stack_underflow(state, self->stack);
6466     return do_append(state, self, Py_SIZE(self->stack) - 1);
6467 }
6468 
6469 static int
load_appends(PickleState * state,UnpicklerObject * self)6470 load_appends(PickleState *state, UnpicklerObject *self)
6471 {
6472     Py_ssize_t i = marker(state, self);
6473     if (i < 0)
6474         return -1;
6475     return do_append(state, self, i);
6476 }
6477 
6478 static int
do_setitems(PickleState * st,UnpicklerObject * self,Py_ssize_t x)6479 do_setitems(PickleState *st, UnpicklerObject *self, Py_ssize_t x)
6480 {
6481     PyObject *value, *key;
6482     PyObject *dict;
6483     Py_ssize_t len, i;
6484     int status = 0;
6485 
6486     len = Py_SIZE(self->stack);
6487     if (x > len || x <= self->stack->fence)
6488         return Pdata_stack_underflow(st, self->stack);
6489     if (len == x)  /* nothing to do */
6490         return 0;
6491     if ((len - x) % 2 != 0) {
6492         /* Corrupt or hostile pickle -- we never write one like this. */
6493         PyErr_SetString(st->UnpicklingError,
6494                         "odd number of items for SETITEMS");
6495         return -1;
6496     }
6497 
6498     /* Here, dict does not actually need to be a PyDict; it could be anything
6499        that supports the __setitem__ attribute. */
6500     dict = self->stack->data[x - 1];
6501 
6502     for (i = x + 1; i < len; i += 2) {
6503         key = self->stack->data[i - 1];
6504         value = self->stack->data[i];
6505         if (PyObject_SetItem(dict, key, value) < 0) {
6506             status = -1;
6507             break;
6508         }
6509     }
6510 
6511     Pdata_clear(self->stack, x);
6512     return status;
6513 }
6514 
6515 static int
load_setitem(PickleState * state,UnpicklerObject * self)6516 load_setitem(PickleState *state, UnpicklerObject *self)
6517 {
6518     return do_setitems(state, self, Py_SIZE(self->stack) - 2);
6519 }
6520 
6521 static int
load_setitems(PickleState * state,UnpicklerObject * self)6522 load_setitems(PickleState *state, UnpicklerObject *self)
6523 {
6524     Py_ssize_t i = marker(state, self);
6525     if (i < 0)
6526         return -1;
6527     return do_setitems(state, self, i);
6528 }
6529 
6530 static int
load_additems(PickleState * state,UnpicklerObject * self)6531 load_additems(PickleState *state, UnpicklerObject *self)
6532 {
6533     PyObject *set;
6534     Py_ssize_t mark, len, i;
6535 
6536     mark =  marker(state, self);
6537     if (mark < 0)
6538         return -1;
6539     len = Py_SIZE(self->stack);
6540     if (mark > len || mark <= self->stack->fence)
6541         return Pdata_stack_underflow(state, self->stack);
6542     if (len == mark)  /* nothing to do */
6543         return 0;
6544 
6545     set = self->stack->data[mark - 1];
6546 
6547     if (PySet_Check(set)) {
6548         PyObject *items;
6549         int status;
6550 
6551         items = Pdata_poptuple(state, self->stack, mark);
6552         if (items == NULL)
6553             return -1;
6554 
6555         status = _PySet_Update(set, items);
6556         Py_DECREF(items);
6557         return status;
6558     }
6559     else {
6560         PyObject *add_func;
6561 
6562         add_func = PyObject_GetAttr(set, &_Py_ID(add));
6563         if (add_func == NULL)
6564             return -1;
6565         for (i = mark; i < len; i++) {
6566             PyObject *result;
6567             PyObject *item;
6568 
6569             item = self->stack->data[i];
6570             result = _Pickle_FastCall(add_func, item);
6571             if (result == NULL) {
6572                 Pdata_clear(self->stack, i + 1);
6573                 Py_SET_SIZE(self->stack, mark);
6574                 Py_DECREF(add_func);
6575                 return -1;
6576             }
6577             Py_DECREF(result);
6578         }
6579         Py_SET_SIZE(self->stack, mark);
6580         Py_DECREF(add_func);
6581     }
6582 
6583     return 0;
6584 }
6585 
6586 static int
load_build(PickleState * st,UnpicklerObject * self)6587 load_build(PickleState *st, UnpicklerObject *self)
6588 {
6589     PyObject *inst, *slotstate;
6590     PyObject *setstate;
6591     int status = 0;
6592 
6593     /* Stack is ... instance, state.  We want to leave instance at
6594      * the stack top, possibly mutated via instance.__setstate__(state).
6595      */
6596     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6597         return Pdata_stack_underflow(st, self->stack);
6598 
6599     PyObject *state;
6600     PDATA_POP(st, self->stack, state);
6601     if (state == NULL)
6602         return -1;
6603 
6604     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6605 
6606     if (PyObject_GetOptionalAttr(inst, &_Py_ID(__setstate__), &setstate) < 0) {
6607         Py_DECREF(state);
6608         return -1;
6609     }
6610     if (setstate != NULL) {
6611         PyObject *result;
6612 
6613         /* The explicit __setstate__ is responsible for everything. */
6614         result = _Pickle_FastCall(setstate, state);
6615         Py_DECREF(setstate);
6616         if (result == NULL)
6617             return -1;
6618         Py_DECREF(result);
6619         return 0;
6620     }
6621 
6622     /* A default __setstate__.  First see whether state embeds a
6623      * slot state dict too (a proto 2 addition).
6624      */
6625     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6626         PyObject *tmp = state;
6627 
6628         state = PyTuple_GET_ITEM(tmp, 0);
6629         slotstate = PyTuple_GET_ITEM(tmp, 1);
6630         Py_INCREF(state);
6631         Py_INCREF(slotstate);
6632         Py_DECREF(tmp);
6633     }
6634     else
6635         slotstate = NULL;
6636 
6637     /* Set inst.__dict__ from the state dict (if any). */
6638     if (state != Py_None) {
6639         PyObject *dict;
6640         PyObject *d_key, *d_value;
6641         Py_ssize_t i;
6642 
6643         if (!PyDict_Check(state)) {
6644             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6645             goto error;
6646         }
6647         dict = PyObject_GetAttr(inst, &_Py_ID(__dict__));
6648         if (dict == NULL)
6649             goto error;
6650 
6651         i = 0;
6652         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6653             /* normally the keys for instance attributes are
6654                interned.  we should try to do that here. */
6655             Py_INCREF(d_key);
6656             if (PyUnicode_CheckExact(d_key)) {
6657                 PyInterpreterState *interp = _PyInterpreterState_GET();
6658                 _PyUnicode_InternMortal(interp, &d_key);
6659             }
6660             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6661                 Py_DECREF(d_key);
6662                 Py_DECREF(dict);
6663                 goto error;
6664             }
6665             Py_DECREF(d_key);
6666         }
6667         Py_DECREF(dict);
6668     }
6669 
6670     /* Also set instance attributes from the slotstate dict (if any). */
6671     if (slotstate != NULL) {
6672         PyObject *d_key, *d_value;
6673         Py_ssize_t i;
6674 
6675         if (!PyDict_Check(slotstate)) {
6676             PyErr_SetString(st->UnpicklingError,
6677                             "slot state is not a dictionary");
6678             goto error;
6679         }
6680         i = 0;
6681         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6682             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6683                 goto error;
6684         }
6685     }
6686 
6687     if (0) {
6688   error:
6689         status = -1;
6690     }
6691 
6692     Py_DECREF(state);
6693     Py_XDECREF(slotstate);
6694     return status;
6695 }
6696 
6697 static int
load_mark(PickleState * state,UnpicklerObject * self)6698 load_mark(PickleState *state, UnpicklerObject *self)
6699 {
6700 
6701     /* Note that we split the (pickle.py) stack into two stacks, an
6702      * object stack and a mark stack. Here we push a mark onto the
6703      * mark stack.
6704      */
6705 
6706     if (self->num_marks >= self->marks_size) {
6707         size_t alloc = ((size_t)self->num_marks << 1) + 20;
6708         Py_ssize_t *marks_new = self->marks;
6709         PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6710         if (marks_new == NULL) {
6711             PyErr_NoMemory();
6712             return -1;
6713         }
6714         self->marks = marks_new;
6715         self->marks_size = (Py_ssize_t)alloc;
6716     }
6717 
6718     self->stack->mark_set = 1;
6719     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6720 
6721     return 0;
6722 }
6723 
6724 static int
load_reduce(PickleState * state,UnpicklerObject * self)6725 load_reduce(PickleState *state, UnpicklerObject *self)
6726 {
6727     PyObject *callable = NULL;
6728     PyObject *argtup = NULL;
6729     PyObject *obj = NULL;
6730 
6731     PDATA_POP(state, self->stack, argtup);
6732     if (argtup == NULL)
6733         return -1;
6734     PDATA_POP(state, self->stack, callable);
6735     if (callable) {
6736         obj = PyObject_CallObject(callable, argtup);
6737         Py_DECREF(callable);
6738     }
6739     Py_DECREF(argtup);
6740 
6741     if (obj == NULL)
6742         return -1;
6743 
6744     PDATA_PUSH(self->stack, obj, -1);
6745     return 0;
6746 }
6747 
6748 /* Just raises an error if we don't know the protocol specified.  PROTO
6749  * is the first opcode for protocols >= 2.
6750  */
6751 static int
load_proto(PickleState * state,UnpicklerObject * self)6752 load_proto(PickleState *state, UnpicklerObject *self)
6753 {
6754     char *s;
6755     int i;
6756 
6757     if (_Unpickler_Read(self, state, &s, 1) < 0)
6758         return -1;
6759 
6760     i = (unsigned char)s[0];
6761     if (i <= HIGHEST_PROTOCOL) {
6762         self->proto = i;
6763         return 0;
6764     }
6765 
6766     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6767     return -1;
6768 }
6769 
6770 static int
load_frame(PickleState * state,UnpicklerObject * self)6771 load_frame(PickleState *state, UnpicklerObject *self)
6772 {
6773     char *s;
6774     Py_ssize_t frame_len;
6775 
6776     if (_Unpickler_Read(self, state, &s, 8) < 0)
6777         return -1;
6778 
6779     frame_len = calc_binsize(s, 8);
6780     if (frame_len < 0) {
6781         PyErr_Format(PyExc_OverflowError,
6782                      "FRAME length exceeds system's maximum of %zd bytes",
6783                      PY_SSIZE_T_MAX);
6784         return -1;
6785     }
6786 
6787     if (_Unpickler_Read(self, state, &s, frame_len) < 0)
6788         return -1;
6789 
6790     /* Rewind to start of frame */
6791     self->next_read_idx -= frame_len;
6792     return 0;
6793 }
6794 
6795 static PyObject *
load(PickleState * st,UnpicklerObject * self)6796 load(PickleState *st, UnpicklerObject *self)
6797 {
6798     PyObject *value = NULL;
6799     PyObject *tmp;
6800     char *s = NULL;
6801 
6802     self->num_marks = 0;
6803     self->stack->mark_set = 0;
6804     self->stack->fence = 0;
6805     self->proto = 0;
6806     if (Py_SIZE(self->stack))
6807         Pdata_clear(self->stack, 0);
6808 
6809     /* Cache the persistent_load method. */
6810     tmp = PyObject_GetAttr((PyObject *)self, &_Py_ID(persistent_load));
6811     if (tmp == NULL) {
6812         goto error;
6813     }
6814     Py_XSETREF(self->persistent_load, tmp);
6815 
6816     /* Convenient macros for the dispatch while-switch loop just below. */
6817 #define OP(opcode, load_func) \
6818     case opcode: if (load_func(st, self) < 0) break; continue;
6819 
6820 #define OP_ARG(opcode, load_func, arg) \
6821     case opcode: if (load_func(st, self, (arg)) < 0) break; continue;
6822 
6823     while (1) {
6824         if (_Unpickler_Read(self, st, &s, 1) < 0) {
6825             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6826                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6827             }
6828             goto error;
6829         }
6830 
6831         switch ((enum opcode)s[0]) {
6832         OP(NONE, load_none)
6833         OP(BININT, load_binint)
6834         OP(BININT1, load_binint1)
6835         OP(BININT2, load_binint2)
6836         OP(INT, load_int)
6837         OP(LONG, load_long)
6838         OP_ARG(LONG1, load_counted_long, 1)
6839         OP_ARG(LONG4, load_counted_long, 4)
6840         OP(FLOAT, load_float)
6841         OP(BINFLOAT, load_binfloat)
6842         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6843         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6844         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6845         OP(BYTEARRAY8, load_counted_bytearray)
6846         OP(NEXT_BUFFER, load_next_buffer)
6847         OP(READONLY_BUFFER, load_readonly_buffer)
6848         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6849         OP_ARG(BINSTRING, load_counted_binstring, 4)
6850         OP(STRING, load_string)
6851         OP(UNICODE, load_unicode)
6852         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6853         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6854         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6855         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6856         OP_ARG(TUPLE1, load_counted_tuple, 1)
6857         OP_ARG(TUPLE2, load_counted_tuple, 2)
6858         OP_ARG(TUPLE3, load_counted_tuple, 3)
6859         OP(TUPLE, load_tuple)
6860         OP(EMPTY_LIST, load_empty_list)
6861         OP(LIST, load_list)
6862         OP(EMPTY_DICT, load_empty_dict)
6863         OP(DICT, load_dict)
6864         OP(EMPTY_SET, load_empty_set)
6865         OP(ADDITEMS, load_additems)
6866         OP(FROZENSET, load_frozenset)
6867         OP(OBJ, load_obj)
6868         OP(INST, load_inst)
6869         OP_ARG(NEWOBJ, load_newobj, 0)
6870         OP_ARG(NEWOBJ_EX, load_newobj, 1)
6871         OP(GLOBAL, load_global)
6872         OP(STACK_GLOBAL, load_stack_global)
6873         OP(APPEND, load_append)
6874         OP(APPENDS, load_appends)
6875         OP(BUILD, load_build)
6876         OP(DUP, load_dup)
6877         OP(BINGET, load_binget)
6878         OP(LONG_BINGET, load_long_binget)
6879         OP(GET, load_get)
6880         OP(MARK, load_mark)
6881         OP(BINPUT, load_binput)
6882         OP(LONG_BINPUT, load_long_binput)
6883         OP(PUT, load_put)
6884         OP(MEMOIZE, load_memoize)
6885         OP(POP, load_pop)
6886         OP(POP_MARK, load_pop_mark)
6887         OP(SETITEM, load_setitem)
6888         OP(SETITEMS, load_setitems)
6889         OP(PERSID, load_persid)
6890         OP(BINPERSID, load_binpersid)
6891         OP(REDUCE, load_reduce)
6892         OP(PROTO, load_proto)
6893         OP(FRAME, load_frame)
6894         OP_ARG(EXT1, load_extension, 1)
6895         OP_ARG(EXT2, load_extension, 2)
6896         OP_ARG(EXT4, load_extension, 4)
6897         OP_ARG(NEWTRUE, load_bool, Py_True)
6898         OP_ARG(NEWFALSE, load_bool, Py_False)
6899 
6900         case STOP:
6901             break;
6902 
6903         default:
6904             {
6905                 unsigned char c = (unsigned char) *s;
6906                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6907                     PyErr_Format(st->UnpicklingError,
6908                                  "invalid load key, '%c'.", c);
6909                 }
6910                 else {
6911                     PyErr_Format(st->UnpicklingError,
6912                                  "invalid load key, '\\x%02x'.", c);
6913                 }
6914                 goto error;
6915             }
6916         }
6917 
6918         break;                  /* and we are done! */
6919     }
6920 
6921     if (PyErr_Occurred()) {
6922         goto error;
6923     }
6924 
6925     if (_Unpickler_SkipConsumed(self) < 0)
6926         goto error;
6927 
6928     Py_CLEAR(self->persistent_load);
6929     PDATA_POP(st, self->stack, value);
6930     return value;
6931 
6932 error:
6933     Py_CLEAR(self->persistent_load);
6934     return NULL;
6935 }
6936 
6937 /*[clinic input]
6938 
6939 _pickle.Unpickler.persistent_load
6940 
6941     cls: defining_class
6942     pid: object
6943     /
6944 
6945 [clinic start generated code]*/
6946 
6947 static PyObject *
_pickle_Unpickler_persistent_load_impl(UnpicklerObject * self,PyTypeObject * cls,PyObject * pid)6948 _pickle_Unpickler_persistent_load_impl(UnpicklerObject *self,
6949                                        PyTypeObject *cls, PyObject *pid)
6950 /*[clinic end generated code: output=9f4706f1330cb14d input=2f9554fae051276e]*/
6951 {
6952     PickleState *st = _Pickle_GetStateByClass(cls);
6953     PyErr_SetString(st->UnpicklingError,
6954                     "A load persistent id instruction was encountered, "
6955                     "but no persistent_load function was specified.");
6956     return NULL;
6957 }
6958 
6959 /*[clinic input]
6960 
6961 _pickle.Unpickler.load
6962 
6963     cls: defining_class
6964 
6965 Load a pickle.
6966 
6967 Read a pickled object representation from the open file object given
6968 in the constructor, and return the reconstituted object hierarchy
6969 specified therein.
6970 [clinic start generated code]*/
6971 
6972 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self,PyTypeObject * cls)6973 _pickle_Unpickler_load_impl(UnpicklerObject *self, PyTypeObject *cls)
6974 /*[clinic end generated code: output=cc88168f608e3007 input=f5d2f87e61d5f07f]*/
6975 {
6976     UnpicklerObject *unpickler = (UnpicklerObject*)self;
6977 
6978     PickleState *st = _Pickle_GetStateByClass(cls);
6979 
6980     /* Check whether the Unpickler was initialized correctly. This prevents
6981        segfaulting if a subclass overridden __init__ with a function that does
6982        not call Unpickler.__init__(). Here, we simply ensure that self->read
6983        is not NULL. */
6984     if (unpickler->read == NULL) {
6985         PyErr_Format(st->UnpicklingError,
6986                      "Unpickler.__init__() was not called by %s.__init__()",
6987                      Py_TYPE(unpickler)->tp_name);
6988         return NULL;
6989     }
6990 
6991     return load(st, unpickler);
6992 }
6993 
6994 /* The name of find_class() is misleading. In newer pickle protocols, this
6995    function is used for loading any global (i.e., functions), not just
6996    classes. The name is kept only for backward compatibility. */
6997 
6998 /*[clinic input]
6999 
7000 _pickle.Unpickler.find_class
7001 
7002   cls: defining_class
7003   module_name: object
7004   global_name: object
7005   /
7006 
7007 Return an object from a specified module.
7008 
7009 If necessary, the module will be imported. Subclasses may override
7010 this method (e.g. to restrict unpickling of arbitrary classes and
7011 functions).
7012 
7013 This method is called whenever a class or a function object is
7014 needed.  Both arguments passed are str objects.
7015 [clinic start generated code]*/
7016 
7017 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyTypeObject * cls,PyObject * module_name,PyObject * global_name)7018 _pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyTypeObject *cls,
7019                                   PyObject *module_name,
7020                                   PyObject *global_name)
7021 /*[clinic end generated code: output=99577948abb0be81 input=9577745719219fc7]*/
7022 {
7023     PyObject *global;
7024     PyObject *module;
7025 
7026     if (PySys_Audit("pickle.find_class", "OO",
7027                     module_name, global_name) < 0) {
7028         return NULL;
7029     }
7030 
7031     /* Try to map the old names used in Python 2.x to the new ones used in
7032        Python 3.x.  We do this only with old pickle protocols and when the
7033        user has not disabled the feature. */
7034     if (self->proto < 3 && self->fix_imports) {
7035         PyObject *key;
7036         PyObject *item;
7037         PickleState *st = _Pickle_GetStateByClass(cls);
7038 
7039         /* Check if the global (i.e., a function or a class) was renamed
7040            or moved to another module. */
7041         key = PyTuple_Pack(2, module_name, global_name);
7042         if (key == NULL)
7043             return NULL;
7044         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7045         Py_DECREF(key);
7046         if (item) {
7047             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7048                 PyErr_Format(PyExc_RuntimeError,
7049                              "_compat_pickle.NAME_MAPPING values should be "
7050                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7051                 return NULL;
7052             }
7053             module_name = PyTuple_GET_ITEM(item, 0);
7054             global_name = PyTuple_GET_ITEM(item, 1);
7055             if (!PyUnicode_Check(module_name) ||
7056                 !PyUnicode_Check(global_name)) {
7057                 PyErr_Format(PyExc_RuntimeError,
7058                              "_compat_pickle.NAME_MAPPING values should be "
7059                              "pairs of str, not (%.200s, %.200s)",
7060                              Py_TYPE(module_name)->tp_name,
7061                              Py_TYPE(global_name)->tp_name);
7062                 return NULL;
7063             }
7064         }
7065         else if (PyErr_Occurred()) {
7066             return NULL;
7067         }
7068         else {
7069             /* Check if the module was renamed. */
7070             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7071             if (item) {
7072                 if (!PyUnicode_Check(item)) {
7073                     PyErr_Format(PyExc_RuntimeError,
7074                                 "_compat_pickle.IMPORT_MAPPING values should be "
7075                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
7076                     return NULL;
7077                 }
7078                 module_name = item;
7079             }
7080             else if (PyErr_Occurred()) {
7081                 return NULL;
7082             }
7083         }
7084     }
7085 
7086     /*
7087      * we don't use PyImport_GetModule here, because it can return partially-
7088      * initialised modules, which then cause the getattribute to fail.
7089      */
7090     module = PyImport_Import(module_name);
7091     if (module == NULL) {
7092         return NULL;
7093     }
7094     global = getattribute(module, global_name, self->proto >= 4);
7095     Py_DECREF(module);
7096     return global;
7097 }
7098 
7099 /*[clinic input]
7100 
7101 _pickle.Unpickler.__sizeof__ -> size_t
7102 
7103 Returns size in memory, in bytes.
7104 [clinic start generated code]*/
7105 
7106 static size_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7107 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7108 /*[clinic end generated code: output=4648d84c228196df input=27180b2b6b524012]*/
7109 {
7110     size_t res = _PyObject_SIZE(Py_TYPE(self));
7111     if (self->memo != NULL)
7112         res += self->memo_size * sizeof(PyObject *);
7113     if (self->marks != NULL)
7114         res += (size_t)self->marks_size * sizeof(Py_ssize_t);
7115     if (self->input_line != NULL)
7116         res += strlen(self->input_line) + 1;
7117     if (self->encoding != NULL)
7118         res += strlen(self->encoding) + 1;
7119     if (self->errors != NULL)
7120         res += strlen(self->errors) + 1;
7121     return res;
7122 }
7123 
7124 static struct PyMethodDef Unpickler_methods[] = {
7125     _PICKLE_UNPICKLER_PERSISTENT_LOAD_METHODDEF
7126     _PICKLE_UNPICKLER_LOAD_METHODDEF
7127     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7128     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7129     {NULL, NULL}                /* sentinel */
7130 };
7131 
7132 static int
Unpickler_clear(UnpicklerObject * self)7133 Unpickler_clear(UnpicklerObject *self)
7134 {
7135     Py_CLEAR(self->readline);
7136     Py_CLEAR(self->readinto);
7137     Py_CLEAR(self->read);
7138     Py_CLEAR(self->peek);
7139     Py_CLEAR(self->stack);
7140     Py_CLEAR(self->persistent_load);
7141     Py_CLEAR(self->buffers);
7142     if (self->buffer.buf != NULL) {
7143         PyBuffer_Release(&self->buffer);
7144         self->buffer.buf = NULL;
7145     }
7146 
7147     _Unpickler_MemoCleanup(self);
7148     PyMem_Free(self->marks);
7149     self->marks = NULL;
7150     PyMem_Free(self->input_line);
7151     self->input_line = NULL;
7152     PyMem_Free(self->encoding);
7153     self->encoding = NULL;
7154     PyMem_Free(self->errors);
7155     self->errors = NULL;
7156 
7157     return 0;
7158 }
7159 
7160 static void
Unpickler_dealloc(UnpicklerObject * self)7161 Unpickler_dealloc(UnpicklerObject *self)
7162 {
7163     PyTypeObject *tp = Py_TYPE(self);
7164     PyObject_GC_UnTrack((PyObject *)self);
7165     (void)Unpickler_clear(self);
7166     tp->tp_free((PyObject *)self);
7167     Py_DECREF(tp);
7168 }
7169 
7170 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7171 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7172 {
7173     Py_VISIT(Py_TYPE(self));
7174     Py_VISIT(self->readline);
7175     Py_VISIT(self->readinto);
7176     Py_VISIT(self->read);
7177     Py_VISIT(self->peek);
7178     Py_VISIT(self->stack);
7179     Py_VISIT(self->persistent_load);
7180     Py_VISIT(self->buffers);
7181     PyObject **memo = self->memo;
7182     if (memo) {
7183         Py_ssize_t i = self->memo_size;
7184         while (--i >= 0) {
7185             Py_VISIT(memo[i]);
7186         }
7187     }
7188     return 0;
7189 }
7190 
7191 /*[clinic input]
7192 
7193 _pickle.Unpickler.__init__
7194 
7195   file: object
7196   *
7197   fix_imports: bool = True
7198   encoding: str = 'ASCII'
7199   errors: str = 'strict'
7200   buffers: object(c_default="NULL") = ()
7201 
7202 This takes a binary file for reading a pickle data stream.
7203 
7204 The protocol version of the pickle is detected automatically, so no
7205 protocol argument is needed.  Bytes past the pickled object's
7206 representation are ignored.
7207 
7208 The argument *file* must have two methods, a read() method that takes
7209 an integer argument, and a readline() method that requires no
7210 arguments.  Both methods should return bytes.  Thus *file* can be a
7211 binary file object opened for reading, an io.BytesIO object, or any
7212 other custom object that meets this interface.
7213 
7214 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7215 which are used to control compatibility support for pickle stream
7216 generated by Python 2.  If *fix_imports* is True, pickle will try to
7217 map the old Python 2 names to the new names used in Python 3.  The
7218 *encoding* and *errors* tell pickle how to decode 8-bit string
7219 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7220 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7221 string instances as bytes objects.
7222 [clinic start generated code]*/
7223 
7224 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7225 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7226                                 int fix_imports, const char *encoding,
7227                                 const char *errors, PyObject *buffers)
7228 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7229 {
7230     /* In case of multiple __init__() calls, clear previous content. */
7231     if (self->read != NULL)
7232         (void)Unpickler_clear(self);
7233 
7234     if (_Unpickler_SetInputStream(self, file) < 0)
7235         return -1;
7236 
7237     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7238         return -1;
7239 
7240     if (_Unpickler_SetBuffers(self, buffers) < 0)
7241         return -1;
7242 
7243     self->fix_imports = fix_imports;
7244 
7245     PyTypeObject *tp = Py_TYPE(self);
7246     PickleState *state = _Pickle_FindStateByType(tp);
7247     self->stack = (Pdata *)Pdata_New(state);
7248     if (self->stack == NULL)
7249         return -1;
7250 
7251     self->memo_size = 32;
7252     self->memo = _Unpickler_NewMemo(self->memo_size);
7253     if (self->memo == NULL)
7254         return -1;
7255 
7256     self->proto = 0;
7257 
7258     return 0;
7259 }
7260 
7261 
7262 /* Define a proxy object for the Unpickler's internal memo object. This is to
7263  * avoid breaking code like:
7264  *  unpickler.memo.clear()
7265  * and
7266  *  unpickler.memo = saved_memo
7267  * Is this a good idea? Not really, but we don't want to break code that uses
7268  * it. Note that we don't implement the entire mapping API here. This is
7269  * intentional, as these should be treated as black-box implementation details.
7270  *
7271  * We do, however, have to implement pickling/unpickling support because of
7272  * real-world code like cvs2svn.
7273  */
7274 
7275 /*[clinic input]
7276 _pickle.UnpicklerMemoProxy.clear
7277 
7278 Remove all items from memo.
7279 [clinic start generated code]*/
7280 
7281 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7282 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7283 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7284 {
7285     _Unpickler_MemoCleanup(self->unpickler);
7286     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7287     if (self->unpickler->memo == NULL)
7288         return NULL;
7289     Py_RETURN_NONE;
7290 }
7291 
7292 /*[clinic input]
7293 _pickle.UnpicklerMemoProxy.copy
7294 
7295 Copy the memo to a new object.
7296 [clinic start generated code]*/
7297 
7298 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7299 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7300 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7301 {
7302     size_t i;
7303     PyObject *new_memo = PyDict_New();
7304     if (new_memo == NULL)
7305         return NULL;
7306 
7307     for (i = 0; i < self->unpickler->memo_size; i++) {
7308         int status;
7309         PyObject *key, *value;
7310 
7311         value = self->unpickler->memo[i];
7312         if (value == NULL)
7313             continue;
7314 
7315         key = PyLong_FromSsize_t(i);
7316         if (key == NULL)
7317             goto error;
7318         status = PyDict_SetItem(new_memo, key, value);
7319         Py_DECREF(key);
7320         if (status < 0)
7321             goto error;
7322     }
7323     return new_memo;
7324 
7325 error:
7326     Py_DECREF(new_memo);
7327     return NULL;
7328 }
7329 
7330 /*[clinic input]
7331 _pickle.UnpicklerMemoProxy.__reduce__
7332 
7333 Implement pickling support.
7334 [clinic start generated code]*/
7335 
7336 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7337 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7338 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7339 {
7340     PyObject *reduce_value;
7341     PyObject *constructor_args;
7342     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7343     if (contents == NULL)
7344         return NULL;
7345 
7346     reduce_value = PyTuple_New(2);
7347     if (reduce_value == NULL) {
7348         Py_DECREF(contents);
7349         return NULL;
7350     }
7351     constructor_args = PyTuple_New(1);
7352     if (constructor_args == NULL) {
7353         Py_DECREF(contents);
7354         Py_DECREF(reduce_value);
7355         return NULL;
7356     }
7357     PyTuple_SET_ITEM(constructor_args, 0, contents);
7358     PyTuple_SET_ITEM(reduce_value, 0, Py_NewRef(&PyDict_Type));
7359     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7360     return reduce_value;
7361 }
7362 
7363 static PyMethodDef unpicklerproxy_methods[] = {
7364     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7365     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7366     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7367     {NULL, NULL}    /* sentinel */
7368 };
7369 
7370 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7371 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7372 {
7373     PyTypeObject *tp = Py_TYPE(self);
7374     PyObject_GC_UnTrack(self);
7375     Py_CLEAR(self->unpickler);
7376     tp->tp_free((PyObject *)self);
7377     Py_DECREF(tp);
7378 }
7379 
7380 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7381 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7382                             visitproc visit, void *arg)
7383 {
7384     Py_VISIT(Py_TYPE(self));
7385     Py_VISIT(self->unpickler);
7386     return 0;
7387 }
7388 
7389 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7390 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7391 {
7392     Py_CLEAR(self->unpickler);
7393     return 0;
7394 }
7395 
7396 static PyType_Slot unpickler_memoproxy_slots[] = {
7397     {Py_tp_dealloc, UnpicklerMemoProxy_dealloc},
7398     {Py_tp_traverse, UnpicklerMemoProxy_traverse},
7399     {Py_tp_clear, UnpicklerMemoProxy_clear},
7400     {Py_tp_methods, unpicklerproxy_methods},
7401     {Py_tp_hash, PyObject_HashNotImplemented},
7402     {0, NULL},
7403 };
7404 
7405 static PyType_Spec unpickler_memoproxy_spec = {
7406     .name = "_pickle.UnpicklerMemoProxy",
7407     .basicsize = sizeof(UnpicklerMemoProxyObject),
7408     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
7409               Py_TPFLAGS_IMMUTABLETYPE),
7410     .slots = unpickler_memoproxy_slots,
7411 };
7412 
7413 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7414 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7415 {
7416     PickleState *state = _Pickle_FindStateByType(Py_TYPE(unpickler));
7417     UnpicklerMemoProxyObject *self;
7418     self = PyObject_GC_New(UnpicklerMemoProxyObject,
7419                            state->UnpicklerMemoProxyType);
7420     if (self == NULL)
7421         return NULL;
7422     self->unpickler = (UnpicklerObject*)Py_NewRef(unpickler);
7423     PyObject_GC_Track(self);
7424     return (PyObject *)self;
7425 }
7426 
7427 /*****************************************************************************/
7428 
7429 
7430 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7431 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7432 {
7433     return UnpicklerMemoProxy_New(self);
7434 }
7435 
7436 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7437 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7438 {
7439     PyObject **new_memo;
7440     size_t new_memo_size = 0;
7441 
7442     if (obj == NULL) {
7443         PyErr_SetString(PyExc_TypeError,
7444                         "attribute deletion is not supported");
7445         return -1;
7446     }
7447 
7448     PickleState *state = _Pickle_FindStateByType(Py_TYPE(self));
7449     if (Py_IS_TYPE(obj, state->UnpicklerMemoProxyType)) {
7450         UnpicklerObject *unpickler =
7451             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7452 
7453         new_memo_size = unpickler->memo_size;
7454         new_memo = _Unpickler_NewMemo(new_memo_size);
7455         if (new_memo == NULL)
7456             return -1;
7457 
7458         for (size_t i = 0; i < new_memo_size; i++) {
7459             new_memo[i] = Py_XNewRef(unpickler->memo[i]);
7460         }
7461     }
7462     else if (PyDict_Check(obj)) {
7463         Py_ssize_t i = 0;
7464         PyObject *key, *value;
7465 
7466         new_memo_size = PyDict_GET_SIZE(obj);
7467         new_memo = _Unpickler_NewMemo(new_memo_size);
7468         if (new_memo == NULL)
7469             return -1;
7470 
7471         while (PyDict_Next(obj, &i, &key, &value)) {
7472             Py_ssize_t idx;
7473             if (!PyLong_Check(key)) {
7474                 PyErr_SetString(PyExc_TypeError,
7475                                 "memo key must be integers");
7476                 goto error;
7477             }
7478             idx = PyLong_AsSsize_t(key);
7479             if (idx == -1 && PyErr_Occurred())
7480                 goto error;
7481             if (idx < 0) {
7482                 PyErr_SetString(PyExc_ValueError,
7483                                 "memo key must be positive integers.");
7484                 goto error;
7485             }
7486             if (_Unpickler_MemoPut(self, idx, value) < 0)
7487                 goto error;
7488         }
7489     }
7490     else {
7491         PyErr_Format(PyExc_TypeError,
7492                      "'memo' attribute must be an UnpicklerMemoProxy object "
7493                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7494         return -1;
7495     }
7496 
7497     _Unpickler_MemoCleanup(self);
7498     self->memo_size = new_memo_size;
7499     self->memo = new_memo;
7500 
7501     return 0;
7502 
7503   error:
7504     if (new_memo_size) {
7505         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7506             Py_XDECREF(new_memo[i]);
7507         }
7508         PyMem_Free(new_memo);
7509     }
7510     return -1;
7511 }
7512 
7513 static PyObject *
Unpickler_getattr(PyObject * self,PyObject * name)7514 Unpickler_getattr(PyObject *self, PyObject *name)
7515 {
7516     if (PyUnicode_Check(name)
7517         && PyUnicode_EqualToUTF8(name, "persistent_load")
7518         && ((UnpicklerObject *)self)->persistent_load_attr)
7519     {
7520         return Py_NewRef(((UnpicklerObject *)self)->persistent_load_attr);
7521     }
7522 
7523     return PyObject_GenericGetAttr(self, name);
7524 }
7525 
7526 static int
Unpickler_setattr(PyObject * self,PyObject * name,PyObject * value)7527 Unpickler_setattr(PyObject *self, PyObject *name, PyObject *value)
7528 {
7529     if (PyUnicode_Check(name)
7530         && PyUnicode_EqualToUTF8(name, "persistent_load"))
7531     {
7532         Py_XINCREF(value);
7533         Py_XSETREF(((UnpicklerObject *)self)->persistent_load_attr, value);
7534         return 0;
7535     }
7536 
7537     return PyObject_GenericSetAttr(self, name, value);
7538 }
7539 
7540 static PyGetSetDef Unpickler_getsets[] = {
7541     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7542     {NULL}
7543 };
7544 
7545 static PyType_Slot unpickler_type_slots[] = {
7546     {Py_tp_dealloc, Unpickler_dealloc},
7547     {Py_tp_doc, (char *)_pickle_Unpickler___init____doc__},
7548     {Py_tp_getattro, Unpickler_getattr},
7549     {Py_tp_setattro, Unpickler_setattr},
7550     {Py_tp_traverse, Unpickler_traverse},
7551     {Py_tp_clear, Unpickler_clear},
7552     {Py_tp_methods, Unpickler_methods},
7553     {Py_tp_getset, Unpickler_getsets},
7554     {Py_tp_init, _pickle_Unpickler___init__},
7555     {Py_tp_alloc, PyType_GenericAlloc},
7556     {Py_tp_new, PyType_GenericNew},
7557     {Py_tp_free, PyObject_GC_Del},
7558     {0, NULL},
7559 };
7560 
7561 static PyType_Spec unpickler_type_spec = {
7562     .name = "_pickle.Unpickler",
7563     .basicsize = sizeof(UnpicklerObject),
7564     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
7565               Py_TPFLAGS_IMMUTABLETYPE),
7566     .slots = unpickler_type_slots,
7567 };
7568 
7569 /*[clinic input]
7570 
7571 _pickle.dump
7572 
7573   obj: object
7574   file: object
7575   protocol: object = None
7576   *
7577   fix_imports: bool = True
7578   buffer_callback: object = None
7579 
7580 Write a pickled representation of obj to the open file object file.
7581 
7582 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7583 be more efficient.
7584 
7585 The optional *protocol* argument tells the pickler to use the given
7586 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7587 protocol is 4. It was introduced in Python 3.4, and is incompatible
7588 with previous versions.
7589 
7590 Specifying a negative protocol version selects the highest protocol
7591 version supported.  The higher the protocol used, the more recent the
7592 version of Python needed to read the pickle produced.
7593 
7594 The *file* argument must have a write() method that accepts a single
7595 bytes argument.  It can thus be a file object opened for binary
7596 writing, an io.BytesIO instance, or any other custom object that meets
7597 this interface.
7598 
7599 If *fix_imports* is True and protocol is less than 3, pickle will try
7600 to map the new Python 3 names to the old module names used in Python
7601 2, so that the pickle data stream is readable with Python 2.
7602 
7603 If *buffer_callback* is None (the default), buffer views are serialized
7604 into *file* as part of the pickle stream.  It is an error if
7605 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7606 
7607 [clinic start generated code]*/
7608 
7609 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7610 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7611                   PyObject *protocol, int fix_imports,
7612                   PyObject *buffer_callback)
7613 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7614 {
7615     PickleState *state = _Pickle_GetState(module);
7616     PicklerObject *pickler = _Pickler_New(state);
7617 
7618     if (pickler == NULL)
7619         return NULL;
7620 
7621     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7622         goto error;
7623 
7624     if (_Pickler_SetOutputStream(pickler, file) < 0)
7625         goto error;
7626 
7627     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7628         goto error;
7629 
7630     if (dump(state, pickler, obj) < 0)
7631         goto error;
7632 
7633     if (_Pickler_FlushToFile(pickler) < 0)
7634         goto error;
7635 
7636     Py_DECREF(pickler);
7637     Py_RETURN_NONE;
7638 
7639   error:
7640     Py_XDECREF(pickler);
7641     return NULL;
7642 }
7643 
7644 /*[clinic input]
7645 
7646 _pickle.dumps
7647 
7648   obj: object
7649   protocol: object = None
7650   *
7651   fix_imports: bool = True
7652   buffer_callback: object = None
7653 
7654 Return the pickled representation of the object as a bytes object.
7655 
7656 The optional *protocol* argument tells the pickler to use the given
7657 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7658 protocol is 4. It was introduced in Python 3.4, and is incompatible
7659 with previous versions.
7660 
7661 Specifying a negative protocol version selects the highest protocol
7662 version supported.  The higher the protocol used, the more recent the
7663 version of Python needed to read the pickle produced.
7664 
7665 If *fix_imports* is True and *protocol* is less than 3, pickle will
7666 try to map the new Python 3 names to the old module names used in
7667 Python 2, so that the pickle data stream is readable with Python 2.
7668 
7669 If *buffer_callback* is None (the default), buffer views are serialized
7670 into *file* as part of the pickle stream.  It is an error if
7671 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7672 
7673 [clinic start generated code]*/
7674 
7675 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7676 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7677                    int fix_imports, PyObject *buffer_callback)
7678 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7679 {
7680     PyObject *result;
7681     PickleState *state = _Pickle_GetState(module);
7682     PicklerObject *pickler = _Pickler_New(state);
7683 
7684     if (pickler == NULL)
7685         return NULL;
7686 
7687     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7688         goto error;
7689 
7690     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7691         goto error;
7692 
7693     if (dump(state, pickler, obj) < 0)
7694         goto error;
7695 
7696     result = _Pickler_GetString(pickler);
7697     Py_DECREF(pickler);
7698     return result;
7699 
7700   error:
7701     Py_XDECREF(pickler);
7702     return NULL;
7703 }
7704 
7705 /*[clinic input]
7706 
7707 _pickle.load
7708 
7709   file: object
7710   *
7711   fix_imports: bool = True
7712   encoding: str = 'ASCII'
7713   errors: str = 'strict'
7714   buffers: object(c_default="NULL") = ()
7715 
7716 Read and return an object from the pickle data stored in a file.
7717 
7718 This is equivalent to ``Unpickler(file).load()``, but may be more
7719 efficient.
7720 
7721 The protocol version of the pickle is detected automatically, so no
7722 protocol argument is needed.  Bytes past the pickled object's
7723 representation are ignored.
7724 
7725 The argument *file* must have two methods, a read() method that takes
7726 an integer argument, and a readline() method that requires no
7727 arguments.  Both methods should return bytes.  Thus *file* can be a
7728 binary file object opened for reading, an io.BytesIO object, or any
7729 other custom object that meets this interface.
7730 
7731 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7732 which are used to control compatibility support for pickle stream
7733 generated by Python 2.  If *fix_imports* is True, pickle will try to
7734 map the old Python 2 names to the new names used in Python 3.  The
7735 *encoding* and *errors* tell pickle how to decode 8-bit string
7736 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7737 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7738 string instances as bytes objects.
7739 [clinic start generated code]*/
7740 
7741 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7742 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7743                   const char *encoding, const char *errors,
7744                   PyObject *buffers)
7745 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7746 {
7747     PyObject *result;
7748     UnpicklerObject *unpickler = _Unpickler_New(module);
7749 
7750     if (unpickler == NULL)
7751         return NULL;
7752 
7753     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7754         goto error;
7755 
7756     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7757         goto error;
7758 
7759     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7760         goto error;
7761 
7762     unpickler->fix_imports = fix_imports;
7763 
7764     PickleState *state = _Pickle_GetState(module);
7765     result = load(state, unpickler);
7766     Py_DECREF(unpickler);
7767     return result;
7768 
7769   error:
7770     Py_XDECREF(unpickler);
7771     return NULL;
7772 }
7773 
7774 /*[clinic input]
7775 
7776 _pickle.loads
7777 
7778   data: object
7779   /
7780   *
7781   fix_imports: bool = True
7782   encoding: str = 'ASCII'
7783   errors: str = 'strict'
7784   buffers: object(c_default="NULL") = ()
7785 
7786 Read and return an object from the given pickle data.
7787 
7788 The protocol version of the pickle is detected automatically, so no
7789 protocol argument is needed.  Bytes past the pickled object's
7790 representation are ignored.
7791 
7792 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7793 which are used to control compatibility support for pickle stream
7794 generated by Python 2.  If *fix_imports* is True, pickle will try to
7795 map the old Python 2 names to the new names used in Python 3.  The
7796 *encoding* and *errors* tell pickle how to decode 8-bit string
7797 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7798 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7799 string instances as bytes objects.
7800 [clinic start generated code]*/
7801 
7802 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7803 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7804                    const char *encoding, const char *errors,
7805                    PyObject *buffers)
7806 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7807 {
7808     PyObject *result;
7809     UnpicklerObject *unpickler = _Unpickler_New(module);
7810 
7811     if (unpickler == NULL)
7812         return NULL;
7813 
7814     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7815         goto error;
7816 
7817     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7818         goto error;
7819 
7820     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7821         goto error;
7822 
7823     unpickler->fix_imports = fix_imports;
7824 
7825     PickleState *state = _Pickle_GetState(module);
7826     result = load(state, unpickler);
7827     Py_DECREF(unpickler);
7828     return result;
7829 
7830   error:
7831     Py_XDECREF(unpickler);
7832     return NULL;
7833 }
7834 
7835 static struct PyMethodDef pickle_methods[] = {
7836     _PICKLE_DUMP_METHODDEF
7837     _PICKLE_DUMPS_METHODDEF
7838     _PICKLE_LOAD_METHODDEF
7839     _PICKLE_LOADS_METHODDEF
7840     {NULL, NULL} /* sentinel */
7841 };
7842 
7843 static int
pickle_clear(PyObject * m)7844 pickle_clear(PyObject *m)
7845 {
7846     _Pickle_ClearState(_Pickle_GetState(m));
7847     return 0;
7848 }
7849 
7850 static void
pickle_free(PyObject * m)7851 pickle_free(PyObject *m)
7852 {
7853     _Pickle_ClearState(_Pickle_GetState(m));
7854 }
7855 
7856 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7857 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7858 {
7859     PickleState *st = _Pickle_GetState(m);
7860     Py_VISIT(st->PickleError);
7861     Py_VISIT(st->PicklingError);
7862     Py_VISIT(st->UnpicklingError);
7863     Py_VISIT(st->dispatch_table);
7864     Py_VISIT(st->extension_registry);
7865     Py_VISIT(st->extension_cache);
7866     Py_VISIT(st->inverted_registry);
7867     Py_VISIT(st->name_mapping_2to3);
7868     Py_VISIT(st->import_mapping_2to3);
7869     Py_VISIT(st->name_mapping_3to2);
7870     Py_VISIT(st->import_mapping_3to2);
7871     Py_VISIT(st->codecs_encode);
7872     Py_VISIT(st->getattr);
7873     Py_VISIT(st->partial);
7874     Py_VISIT(st->Pickler_Type);
7875     Py_VISIT(st->Unpickler_Type);
7876     Py_VISIT(st->Pdata_Type);
7877     Py_VISIT(st->PicklerMemoProxyType);
7878     Py_VISIT(st->UnpicklerMemoProxyType);
7879     return 0;
7880 }
7881 
7882 static int
_pickle_exec(PyObject * m)7883 _pickle_exec(PyObject *m)
7884 {
7885     PickleState *st = _Pickle_GetState(m);
7886 
7887 #define CREATE_TYPE(mod, type, spec)                                        \
7888     do {                                                                    \
7889         type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \
7890         if (type == NULL) {                                                 \
7891             return -1;                                                      \
7892         }                                                                   \
7893     } while (0)
7894 
7895     CREATE_TYPE(m, st->Pdata_Type, &pdata_spec);
7896     CREATE_TYPE(m, st->PicklerMemoProxyType, &memoproxy_spec);
7897     CREATE_TYPE(m, st->UnpicklerMemoProxyType, &unpickler_memoproxy_spec);
7898     CREATE_TYPE(m, st->Pickler_Type, &pickler_type_spec);
7899     CREATE_TYPE(m, st->Unpickler_Type, &unpickler_type_spec);
7900 
7901 #undef CREATE_TYPE
7902 
7903     /* Add types */
7904     if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
7905         return -1;
7906     }
7907     if (PyModule_AddType(m, st->Pickler_Type) < 0) {
7908         return -1;
7909     }
7910     if (PyModule_AddType(m, st->Unpickler_Type) < 0) {
7911         return -1;
7912     }
7913 
7914     /* Initialize the exceptions. */
7915     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7916     if (st->PickleError == NULL)
7917         return -1;
7918     st->PicklingError = \
7919         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7920     if (st->PicklingError == NULL)
7921         return -1;
7922     st->UnpicklingError = \
7923         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7924     if (st->UnpicklingError == NULL)
7925         return -1;
7926 
7927     if (PyModule_AddObjectRef(m, "PickleError", st->PickleError) < 0) {
7928         return -1;
7929     }
7930     if (PyModule_AddObjectRef(m, "PicklingError", st->PicklingError) < 0) {
7931         return -1;
7932     }
7933     if (PyModule_AddObjectRef(m, "UnpicklingError", st->UnpicklingError) < 0) {
7934         return -1;
7935     }
7936 
7937     if (_Pickle_InitState(st) < 0)
7938         return -1;
7939 
7940     return 0;
7941 }
7942 
7943 static PyModuleDef_Slot pickle_slots[] = {
7944     {Py_mod_exec, _pickle_exec},
7945     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
7946     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
7947     {0, NULL},
7948 };
7949 
7950 static struct PyModuleDef _picklemodule = {
7951     PyModuleDef_HEAD_INIT,
7952     .m_name = "_pickle",
7953     .m_doc = pickle_module_doc,
7954     .m_size = sizeof(PickleState),
7955     .m_methods = pickle_methods,
7956     .m_slots = pickle_slots,
7957     .m_traverse = pickle_traverse,
7958     .m_clear = pickle_clear,
7959     .m_free = (freefunc)pickle_free,
7960 };
7961 
7962 PyMODINIT_FUNC
PyInit__pickle(void)7963 PyInit__pickle(void)
7964 {
7965     return PyModuleDef_Init(&_picklemodule);
7966 }
7967