• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* pickle accelerator C extensor: _pickle module.
2  *
3  * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4  * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5  * platforms. */
6 
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 #  error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10 
11 #include "Python.h"
12 #include "pycore_moduleobject.h"  // _PyModule_GetState()
13 #include "structmember.h"         // PyMemberDef
14 
15 PyDoc_STRVAR(pickle_module_doc,
16 "Optimized C implementation for the Python pickle module.");
17 
18 /*[clinic input]
19 module _pickle
20 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
21 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
22 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
23 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
24 [clinic start generated code]*/
25 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
26 
27 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
28    Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
29    already includes it. */
30 enum {
31     HIGHEST_PROTOCOL = 5,
32     DEFAULT_PROTOCOL = 4
33 };
34 
35 /* Pickle opcodes. These must be kept updated with pickle.py.
36    Extensive docs are in pickletools.py. */
37 enum opcode {
38     MARK            = '(',
39     STOP            = '.',
40     POP             = '0',
41     POP_MARK        = '1',
42     DUP             = '2',
43     FLOAT           = 'F',
44     INT             = 'I',
45     BININT          = 'J',
46     BININT1         = 'K',
47     LONG            = 'L',
48     BININT2         = 'M',
49     NONE            = 'N',
50     PERSID          = 'P',
51     BINPERSID       = 'Q',
52     REDUCE          = 'R',
53     STRING          = 'S',
54     BINSTRING       = 'T',
55     SHORT_BINSTRING = 'U',
56     UNICODE         = 'V',
57     BINUNICODE      = 'X',
58     APPEND          = 'a',
59     BUILD           = 'b',
60     GLOBAL          = 'c',
61     DICT            = 'd',
62     EMPTY_DICT      = '}',
63     APPENDS         = 'e',
64     GET             = 'g',
65     BINGET          = 'h',
66     INST            = 'i',
67     LONG_BINGET     = 'j',
68     LIST            = 'l',
69     EMPTY_LIST      = ']',
70     OBJ             = 'o',
71     PUT             = 'p',
72     BINPUT          = 'q',
73     LONG_BINPUT     = 'r',
74     SETITEM         = 's',
75     TUPLE           = 't',
76     EMPTY_TUPLE     = ')',
77     SETITEMS        = 'u',
78     BINFLOAT        = 'G',
79 
80     /* Protocol 2. */
81     PROTO       = '\x80',
82     NEWOBJ      = '\x81',
83     EXT1        = '\x82',
84     EXT2        = '\x83',
85     EXT4        = '\x84',
86     TUPLE1      = '\x85',
87     TUPLE2      = '\x86',
88     TUPLE3      = '\x87',
89     NEWTRUE     = '\x88',
90     NEWFALSE    = '\x89',
91     LONG1       = '\x8a',
92     LONG4       = '\x8b',
93 
94     /* Protocol 3 (Python 3.x) */
95     BINBYTES       = 'B',
96     SHORT_BINBYTES = 'C',
97 
98     /* Protocol 4 */
99     SHORT_BINUNICODE = '\x8c',
100     BINUNICODE8      = '\x8d',
101     BINBYTES8        = '\x8e',
102     EMPTY_SET        = '\x8f',
103     ADDITEMS         = '\x90',
104     FROZENSET        = '\x91',
105     NEWOBJ_EX        = '\x92',
106     STACK_GLOBAL     = '\x93',
107     MEMOIZE          = '\x94',
108     FRAME            = '\x95',
109 
110     /* Protocol 5 */
111     BYTEARRAY8       = '\x96',
112     NEXT_BUFFER      = '\x97',
113     READONLY_BUFFER  = '\x98'
114 };
115 
116 enum {
117    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
118       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
119       break if this gets out of synch with pickle.py, but it's unclear that would
120       help anything either. */
121     BATCHSIZE = 1000,
122 
123     /* Nesting limit until Pickler, when running in "fast mode", starts
124        checking for self-referential data-structures. */
125     FAST_NESTING_LIMIT = 50,
126 
127     /* Initial size of the write buffer of Pickler. */
128     WRITE_BUF_SIZE = 4096,
129 
130     /* Prefetch size when unpickling (disabled on unpeekable streams) */
131     PREFETCH = 8192 * 16,
132 
133     FRAME_SIZE_MIN = 4,
134     FRAME_SIZE_TARGET = 64 * 1024,
135     FRAME_HEADER_SIZE = 9
136 };
137 
138 /*************************************************************************/
139 
140 /* State of the pickle module, per PEP 3121. */
141 typedef struct {
142     /* Exception classes for pickle. */
143     PyObject *PickleError;
144     PyObject *PicklingError;
145     PyObject *UnpicklingError;
146 
147     /* copyreg.dispatch_table, {type_object: pickling_function} */
148     PyObject *dispatch_table;
149 
150     /* For the extension opcodes EXT1, EXT2 and EXT4. */
151 
152     /* copyreg._extension_registry, {(module_name, function_name): code} */
153     PyObject *extension_registry;
154     /* copyreg._extension_cache, {code: object} */
155     PyObject *extension_cache;
156     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
157     PyObject *inverted_registry;
158 
159     /* Import mappings for compatibility with Python 2.x */
160 
161     /* _compat_pickle.NAME_MAPPING,
162        {(oldmodule, oldname): (newmodule, newname)} */
163     PyObject *name_mapping_2to3;
164     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
165     PyObject *import_mapping_2to3;
166     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
167     PyObject *name_mapping_3to2;
168     PyObject *import_mapping_3to2;
169 
170     /* codecs.encode, used for saving bytes in older protocols */
171     PyObject *codecs_encode;
172     /* builtins.getattr, used for saving nested names with protocol < 4 */
173     PyObject *getattr;
174     /* functools.partial, used for implementing __newobj_ex__ with protocols
175        2 and 3 */
176     PyObject *partial;
177 } PickleState;
178 
179 /* Forward declaration of the _pickle module definition. */
180 static struct PyModuleDef _picklemodule;
181 
182 /* Given a module object, get its per-module state. */
183 static PickleState *
_Pickle_GetState(PyObject * module)184 _Pickle_GetState(PyObject *module)
185 {
186     return (PickleState *)_PyModule_GetState(module);
187 }
188 
189 /* Find the module instance imported in the currently running sub-interpreter
190    and get its state. */
191 static PickleState *
_Pickle_GetGlobalState(void)192 _Pickle_GetGlobalState(void)
193 {
194     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
195 }
196 
197 /* Clear the given pickle module state. */
198 static void
_Pickle_ClearState(PickleState * st)199 _Pickle_ClearState(PickleState *st)
200 {
201     Py_CLEAR(st->PickleError);
202     Py_CLEAR(st->PicklingError);
203     Py_CLEAR(st->UnpicklingError);
204     Py_CLEAR(st->dispatch_table);
205     Py_CLEAR(st->extension_registry);
206     Py_CLEAR(st->extension_cache);
207     Py_CLEAR(st->inverted_registry);
208     Py_CLEAR(st->name_mapping_2to3);
209     Py_CLEAR(st->import_mapping_2to3);
210     Py_CLEAR(st->name_mapping_3to2);
211     Py_CLEAR(st->import_mapping_3to2);
212     Py_CLEAR(st->codecs_encode);
213     Py_CLEAR(st->getattr);
214     Py_CLEAR(st->partial);
215 }
216 
217 /* Initialize the given pickle module state. */
218 static int
_Pickle_InitState(PickleState * st)219 _Pickle_InitState(PickleState *st)
220 {
221     PyObject *copyreg = NULL;
222     PyObject *compat_pickle = NULL;
223     PyObject *codecs = NULL;
224     PyObject *functools = NULL;
225     _Py_IDENTIFIER(getattr);
226 
227     st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
228     if (st->getattr == NULL)
229         goto error;
230 
231     copyreg = PyImport_ImportModule("copyreg");
232     if (!copyreg)
233         goto error;
234     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
235     if (!st->dispatch_table)
236         goto error;
237     if (!PyDict_CheckExact(st->dispatch_table)) {
238         PyErr_Format(PyExc_RuntimeError,
239                      "copyreg.dispatch_table should be a dict, not %.200s",
240                      Py_TYPE(st->dispatch_table)->tp_name);
241         goto error;
242     }
243     st->extension_registry = \
244         PyObject_GetAttrString(copyreg, "_extension_registry");
245     if (!st->extension_registry)
246         goto error;
247     if (!PyDict_CheckExact(st->extension_registry)) {
248         PyErr_Format(PyExc_RuntimeError,
249                      "copyreg._extension_registry should be a dict, "
250                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
251         goto error;
252     }
253     st->inverted_registry = \
254         PyObject_GetAttrString(copyreg, "_inverted_registry");
255     if (!st->inverted_registry)
256         goto error;
257     if (!PyDict_CheckExact(st->inverted_registry)) {
258         PyErr_Format(PyExc_RuntimeError,
259                      "copyreg._inverted_registry should be a dict, "
260                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
261         goto error;
262     }
263     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
264     if (!st->extension_cache)
265         goto error;
266     if (!PyDict_CheckExact(st->extension_cache)) {
267         PyErr_Format(PyExc_RuntimeError,
268                      "copyreg._extension_cache should be a dict, "
269                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
270         goto error;
271     }
272     Py_CLEAR(copyreg);
273 
274     /* Load the 2.x -> 3.x stdlib module mapping tables */
275     compat_pickle = PyImport_ImportModule("_compat_pickle");
276     if (!compat_pickle)
277         goto error;
278     st->name_mapping_2to3 = \
279         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
280     if (!st->name_mapping_2to3)
281         goto error;
282     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
283         PyErr_Format(PyExc_RuntimeError,
284                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
285                      Py_TYPE(st->name_mapping_2to3)->tp_name);
286         goto error;
287     }
288     st->import_mapping_2to3 = \
289         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
290     if (!st->import_mapping_2to3)
291         goto error;
292     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
293         PyErr_Format(PyExc_RuntimeError,
294                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
295                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
296         goto error;
297     }
298     /* ... and the 3.x -> 2.x mapping tables */
299     st->name_mapping_3to2 = \
300         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
301     if (!st->name_mapping_3to2)
302         goto error;
303     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
304         PyErr_Format(PyExc_RuntimeError,
305                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
306                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
307         goto error;
308     }
309     st->import_mapping_3to2 = \
310         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
311     if (!st->import_mapping_3to2)
312         goto error;
313     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
314         PyErr_Format(PyExc_RuntimeError,
315                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
316                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
317         goto error;
318     }
319     Py_CLEAR(compat_pickle);
320 
321     codecs = PyImport_ImportModule("codecs");
322     if (codecs == NULL)
323         goto error;
324     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
325     if (st->codecs_encode == NULL) {
326         goto error;
327     }
328     if (!PyCallable_Check(st->codecs_encode)) {
329         PyErr_Format(PyExc_RuntimeError,
330                      "codecs.encode should be a callable, not %.200s",
331                      Py_TYPE(st->codecs_encode)->tp_name);
332         goto error;
333     }
334     Py_CLEAR(codecs);
335 
336     functools = PyImport_ImportModule("functools");
337     if (!functools)
338         goto error;
339     st->partial = PyObject_GetAttrString(functools, "partial");
340     if (!st->partial)
341         goto error;
342     Py_CLEAR(functools);
343 
344     return 0;
345 
346   error:
347     Py_CLEAR(copyreg);
348     Py_CLEAR(compat_pickle);
349     Py_CLEAR(codecs);
350     Py_CLEAR(functools);
351     _Pickle_ClearState(st);
352     return -1;
353 }
354 
355 /* Helper for calling a function with a single argument quickly.
356 
357    This function steals the reference of the given argument. */
358 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)359 _Pickle_FastCall(PyObject *func, PyObject *obj)
360 {
361     PyObject *result;
362 
363     result = PyObject_CallOneArg(func, obj);
364     Py_DECREF(obj);
365     return result;
366 }
367 
368 /*************************************************************************/
369 
370 /* Retrieve and deconstruct a method for avoiding a reference cycle
371    (pickler -> bound method of pickler -> pickler) */
372 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)373 init_method_ref(PyObject *self, _Py_Identifier *name,
374                 PyObject **method_func, PyObject **method_self)
375 {
376     PyObject *func, *func2;
377     int ret;
378 
379     /* *method_func and *method_self should be consistent.  All refcount decrements
380        should be occurred after setting *method_self and *method_func. */
381     ret = _PyObject_LookupAttrId(self, name, &func);
382     if (func == NULL) {
383         *method_self = NULL;
384         Py_CLEAR(*method_func);
385         return ret;
386     }
387 
388     if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
389         /* Deconstruct a bound Python method */
390         func2 = PyMethod_GET_FUNCTION(func);
391         Py_INCREF(func2);
392         *method_self = self; /* borrowed */
393         Py_XSETREF(*method_func, func2);
394         Py_DECREF(func);
395         return 0;
396     }
397     else {
398         *method_self = NULL;
399         Py_XSETREF(*method_func, func);
400         return 0;
401     }
402 }
403 
404 /* Bind a method if it was deconstructed */
405 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)406 reconstruct_method(PyObject *func, PyObject *self)
407 {
408     if (self) {
409         return PyMethod_New(func, self);
410     }
411     else {
412         Py_INCREF(func);
413         return func;
414     }
415 }
416 
417 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)418 call_method(PyObject *func, PyObject *self, PyObject *obj)
419 {
420     if (self) {
421         return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
422     }
423     else {
424         return PyObject_CallOneArg(func, obj);
425     }
426 }
427 
428 /*************************************************************************/
429 
430 /* Internal data type used as the unpickling stack. */
431 typedef struct {
432     PyObject_VAR_HEAD
433     PyObject **data;
434     int mark_set;          /* is MARK set? */
435     Py_ssize_t fence;      /* position of top MARK or 0 */
436     Py_ssize_t allocated;  /* number of slots in data allocated */
437 } Pdata;
438 
439 static void
Pdata_dealloc(Pdata * self)440 Pdata_dealloc(Pdata *self)
441 {
442     Py_ssize_t i = Py_SIZE(self);
443     while (--i >= 0) {
444         Py_DECREF(self->data[i]);
445     }
446     PyMem_Free(self->data);
447     PyObject_Free(self);
448 }
449 
450 static PyTypeObject Pdata_Type = {
451     PyVarObject_HEAD_INIT(NULL, 0)
452     "_pickle.Pdata",              /*tp_name*/
453     sizeof(Pdata),                /*tp_basicsize*/
454     sizeof(PyObject *),           /*tp_itemsize*/
455     (destructor)Pdata_dealloc,    /*tp_dealloc*/
456 };
457 
458 static PyObject *
Pdata_New(void)459 Pdata_New(void)
460 {
461     Pdata *self;
462 
463     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
464         return NULL;
465     Py_SET_SIZE(self, 0);
466     self->mark_set = 0;
467     self->fence = 0;
468     self->allocated = 8;
469     self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
470     if (self->data)
471         return (PyObject *)self;
472     Py_DECREF(self);
473     return PyErr_NoMemory();
474 }
475 
476 
477 /* Retain only the initial clearto items.  If clearto >= the current
478  * number of items, this is a (non-erroneous) NOP.
479  */
480 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)481 Pdata_clear(Pdata *self, Py_ssize_t clearto)
482 {
483     Py_ssize_t i = Py_SIZE(self);
484 
485     assert(clearto >= self->fence);
486     if (clearto >= i)
487         return 0;
488 
489     while (--i >= clearto) {
490         Py_CLEAR(self->data[i]);
491     }
492     Py_SET_SIZE(self, clearto);
493     return 0;
494 }
495 
496 static int
Pdata_grow(Pdata * self)497 Pdata_grow(Pdata *self)
498 {
499     PyObject **data = self->data;
500     size_t allocated = (size_t)self->allocated;
501     size_t new_allocated;
502 
503     new_allocated = (allocated >> 3) + 6;
504     /* check for integer overflow */
505     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
506         goto nomemory;
507     new_allocated += allocated;
508     PyMem_RESIZE(data, PyObject *, new_allocated);
509     if (data == NULL)
510         goto nomemory;
511 
512     self->data = data;
513     self->allocated = (Py_ssize_t)new_allocated;
514     return 0;
515 
516   nomemory:
517     PyErr_NoMemory();
518     return -1;
519 }
520 
521 static int
Pdata_stack_underflow(Pdata * self)522 Pdata_stack_underflow(Pdata *self)
523 {
524     PickleState *st = _Pickle_GetGlobalState();
525     PyErr_SetString(st->UnpicklingError,
526                     self->mark_set ?
527                     "unexpected MARK found" :
528                     "unpickling stack underflow");
529     return -1;
530 }
531 
532 /* D is a Pdata*.  Pop the topmost element and store it into V, which
533  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
534  * is raised and V is set to NULL.
535  */
536 static PyObject *
Pdata_pop(Pdata * self)537 Pdata_pop(Pdata *self)
538 {
539     if (Py_SIZE(self) <= self->fence) {
540         Pdata_stack_underflow(self);
541         return NULL;
542     }
543     Py_SET_SIZE(self, Py_SIZE(self) - 1);
544     return self->data[Py_SIZE(self)];
545 }
546 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
547 
548 static int
Pdata_push(Pdata * self,PyObject * obj)549 Pdata_push(Pdata *self, PyObject *obj)
550 {
551     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
552         return -1;
553     }
554     self->data[Py_SIZE(self)] = obj;
555     Py_SET_SIZE(self, Py_SIZE(self) + 1);
556     return 0;
557 }
558 
559 /* Push an object on stack, transferring its ownership to the stack. */
560 #define PDATA_PUSH(D, O, ER) do {                               \
561         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
562 
563 /* Push an object on stack, adding a new reference to the object. */
564 #define PDATA_APPEND(D, O, ER) do {                             \
565         Py_INCREF((O));                                         \
566         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
567 
568 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)569 Pdata_poptuple(Pdata *self, Py_ssize_t start)
570 {
571     PyObject *tuple;
572     Py_ssize_t len, i, j;
573 
574     if (start < self->fence) {
575         Pdata_stack_underflow(self);
576         return NULL;
577     }
578     len = Py_SIZE(self) - start;
579     tuple = PyTuple_New(len);
580     if (tuple == NULL)
581         return NULL;
582     for (i = start, j = 0; j < len; i++, j++)
583         PyTuple_SET_ITEM(tuple, j, self->data[i]);
584 
585     Py_SET_SIZE(self, start);
586     return tuple;
587 }
588 
589 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)590 Pdata_poplist(Pdata *self, Py_ssize_t start)
591 {
592     PyObject *list;
593     Py_ssize_t len, i, j;
594 
595     len = Py_SIZE(self) - start;
596     list = PyList_New(len);
597     if (list == NULL)
598         return NULL;
599     for (i = start, j = 0; j < len; i++, j++)
600         PyList_SET_ITEM(list, j, self->data[i]);
601 
602     Py_SET_SIZE(self, start);
603     return list;
604 }
605 
606 typedef struct {
607     PyObject *me_key;
608     Py_ssize_t me_value;
609 } PyMemoEntry;
610 
611 typedef struct {
612     size_t mt_mask;
613     size_t mt_used;
614     size_t mt_allocated;
615     PyMemoEntry *mt_table;
616 } PyMemoTable;
617 
618 typedef struct PicklerObject {
619     PyObject_HEAD
620     PyMemoTable *memo;          /* Memo table, keep track of the seen
621                                    objects to support self-referential objects
622                                    pickling. */
623     PyObject *pers_func;        /* persistent_id() method, can be NULL */
624     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
625                                    is an unbound method, NULL otherwise */
626     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
627     PyObject *reducer_override; /* hook for invoking user-defined callbacks
628                                    instead of save_global when pickling
629                                    functions and classes*/
630 
631     PyObject *write;            /* write() method of the output stream. */
632     PyObject *output_buffer;    /* Write into a local bytearray buffer before
633                                    flushing to the stream. */
634     Py_ssize_t output_len;      /* Length of output_buffer. */
635     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
636     int proto;                  /* Pickle protocol number, >= 0 */
637     int bin;                    /* Boolean, true if proto > 0 */
638     int framing;                /* True when framing is enabled, proto >= 4 */
639     Py_ssize_t frame_start;     /* Position in output_buffer where the
640                                    current frame begins. -1 if there
641                                    is no frame currently open. */
642 
643     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
644     int fast;                   /* Enable fast mode if set to a true value.
645                                    The fast mode disable the usage of memo,
646                                    therefore speeding the pickling process by
647                                    not generating superfluous PUT opcodes. It
648                                    should not be used if with self-referential
649                                    objects. */
650     int fast_nesting;
651     int fix_imports;            /* Indicate whether Pickler should fix
652                                    the name of globals for Python 2.x. */
653     PyObject *fast_memo;
654     PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
655 } PicklerObject;
656 
657 typedef struct UnpicklerObject {
658     PyObject_HEAD
659     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
660 
661     /* The unpickler memo is just an array of PyObject *s. Using a dict
662        is unnecessary, since the keys are contiguous ints. */
663     PyObject **memo;
664     size_t memo_size;       /* Capacity of the memo array */
665     size_t memo_len;        /* Number of objects in the memo */
666 
667     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
668     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
669                                    is an unbound method, NULL otherwise */
670 
671     Py_buffer buffer;
672     char *input_buffer;
673     char *input_line;
674     Py_ssize_t input_len;
675     Py_ssize_t next_read_idx;
676     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
677 
678     PyObject *read;             /* read() method of the input stream. */
679     PyObject *readinto;         /* readinto() method of the input stream. */
680     PyObject *readline;         /* readline() method of the input stream. */
681     PyObject *peek;             /* peek() method of the input stream, or NULL */
682     PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
683 
684     char *encoding;             /* Name of the encoding to be used for
685                                    decoding strings pickled using Python
686                                    2.x. The default value is "ASCII" */
687     char *errors;               /* Name of errors handling scheme to used when
688                                    decoding strings. The default value is
689                                    "strict". */
690     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
691                                    objects. */
692     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
693     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
694     int proto;                  /* Protocol of the pickle loaded. */
695     int fix_imports;            /* Indicate whether Unpickler should fix
696                                    the name of globals pickled by Python 2.x. */
697 } UnpicklerObject;
698 
699 typedef struct {
700     PyObject_HEAD
701     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
702 }  PicklerMemoProxyObject;
703 
704 typedef struct {
705     PyObject_HEAD
706     UnpicklerObject *unpickler;
707 } UnpicklerMemoProxyObject;
708 
709 /* Forward declarations */
710 static int save(PicklerObject *, PyObject *, int);
711 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
712 static PyTypeObject Pickler_Type;
713 static PyTypeObject Unpickler_Type;
714 
715 #include "clinic/_pickle.c.h"
716 
717 /*************************************************************************
718  A custom hashtable mapping void* to Python ints. This is used by the pickler
719  for memoization. Using a custom hashtable rather than PyDict allows us to skip
720  a bunch of unnecessary object creation. This makes a huge performance
721  difference. */
722 
723 #define MT_MINSIZE 8
724 #define PERTURB_SHIFT 5
725 
726 
727 static PyMemoTable *
PyMemoTable_New(void)728 PyMemoTable_New(void)
729 {
730     PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
731     if (memo == NULL) {
732         PyErr_NoMemory();
733         return NULL;
734     }
735 
736     memo->mt_used = 0;
737     memo->mt_allocated = MT_MINSIZE;
738     memo->mt_mask = MT_MINSIZE - 1;
739     memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
740     if (memo->mt_table == NULL) {
741         PyMem_Free(memo);
742         PyErr_NoMemory();
743         return NULL;
744     }
745     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
746 
747     return memo;
748 }
749 
750 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)751 PyMemoTable_Copy(PyMemoTable *self)
752 {
753     PyMemoTable *new = PyMemoTable_New();
754     if (new == NULL)
755         return NULL;
756 
757     new->mt_used = self->mt_used;
758     new->mt_allocated = self->mt_allocated;
759     new->mt_mask = self->mt_mask;
760     /* The table we get from _New() is probably smaller than we wanted.
761        Free it and allocate one that's the right size. */
762     PyMem_Free(new->mt_table);
763     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
764     if (new->mt_table == NULL) {
765         PyMem_Free(new);
766         PyErr_NoMemory();
767         return NULL;
768     }
769     for (size_t i = 0; i < self->mt_allocated; i++) {
770         Py_XINCREF(self->mt_table[i].me_key);
771     }
772     memcpy(new->mt_table, self->mt_table,
773            sizeof(PyMemoEntry) * self->mt_allocated);
774 
775     return new;
776 }
777 
778 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)779 PyMemoTable_Size(PyMemoTable *self)
780 {
781     return self->mt_used;
782 }
783 
784 static int
PyMemoTable_Clear(PyMemoTable * self)785 PyMemoTable_Clear(PyMemoTable *self)
786 {
787     Py_ssize_t i = self->mt_allocated;
788 
789     while (--i >= 0) {
790         Py_XDECREF(self->mt_table[i].me_key);
791     }
792     self->mt_used = 0;
793     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
794     return 0;
795 }
796 
797 static void
PyMemoTable_Del(PyMemoTable * self)798 PyMemoTable_Del(PyMemoTable *self)
799 {
800     if (self == NULL)
801         return;
802     PyMemoTable_Clear(self);
803 
804     PyMem_Free(self->mt_table);
805     PyMem_Free(self);
806 }
807 
808 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
809    can be considerably simpler than dictobject.c's lookdict(). */
810 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)811 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
812 {
813     size_t i;
814     size_t perturb;
815     size_t mask = self->mt_mask;
816     PyMemoEntry *table = self->mt_table;
817     PyMemoEntry *entry;
818     Py_hash_t hash = (Py_hash_t)key >> 3;
819 
820     i = hash & mask;
821     entry = &table[i];
822     if (entry->me_key == NULL || entry->me_key == key)
823         return entry;
824 
825     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
826         i = (i << 2) + i + perturb + 1;
827         entry = &table[i & mask];
828         if (entry->me_key == NULL || entry->me_key == key)
829             return entry;
830     }
831     Py_UNREACHABLE();
832 }
833 
834 /* Returns -1 on failure, 0 on success. */
835 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)836 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
837 {
838     PyMemoEntry *oldtable = NULL;
839     PyMemoEntry *oldentry, *newentry;
840     size_t new_size = MT_MINSIZE;
841     size_t to_process;
842 
843     assert(min_size > 0);
844 
845     if (min_size > PY_SSIZE_T_MAX) {
846         PyErr_NoMemory();
847         return -1;
848     }
849 
850     /* Find the smallest valid table size >= min_size. */
851     while (new_size < min_size) {
852         new_size <<= 1;
853     }
854     /* new_size needs to be a power of two. */
855     assert((new_size & (new_size - 1)) == 0);
856 
857     /* Allocate new table. */
858     oldtable = self->mt_table;
859     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
860     if (self->mt_table == NULL) {
861         self->mt_table = oldtable;
862         PyErr_NoMemory();
863         return -1;
864     }
865     self->mt_allocated = new_size;
866     self->mt_mask = new_size - 1;
867     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
868 
869     /* Copy entries from the old table. */
870     to_process = self->mt_used;
871     for (oldentry = oldtable; to_process > 0; oldentry++) {
872         if (oldentry->me_key != NULL) {
873             to_process--;
874             /* newentry is a pointer to a chunk of the new
875                mt_table, so we're setting the key:value pair
876                in-place. */
877             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
878             newentry->me_key = oldentry->me_key;
879             newentry->me_value = oldentry->me_value;
880         }
881     }
882 
883     /* Deallocate the old table. */
884     PyMem_Free(oldtable);
885     return 0;
886 }
887 
888 /* Returns NULL on failure, a pointer to the value otherwise. */
889 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)890 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
891 {
892     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
893     if (entry->me_key == NULL)
894         return NULL;
895     return &entry->me_value;
896 }
897 
898 /* Returns -1 on failure, 0 on success. */
899 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)900 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
901 {
902     PyMemoEntry *entry;
903 
904     assert(key != NULL);
905 
906     entry = _PyMemoTable_Lookup(self, key);
907     if (entry->me_key != NULL) {
908         entry->me_value = value;
909         return 0;
910     }
911     Py_INCREF(key);
912     entry->me_key = key;
913     entry->me_value = value;
914     self->mt_used++;
915 
916     /* If we added a key, we can safely resize. Otherwise just return!
917      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
918      *
919      * Quadrupling the size improves average table sparseness
920      * (reducing collisions) at the cost of some memory. It also halves
921      * the number of expensive resize operations in a growing memo table.
922      *
923      * Very large memo tables (over 50K items) use doubling instead.
924      * This may help applications with severe memory constraints.
925      */
926     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
927         return 0;
928     }
929     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
930     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
931     return _PyMemoTable_ResizeTable(self, desired_size);
932 }
933 
934 #undef MT_MINSIZE
935 #undef PERTURB_SHIFT
936 
937 /*************************************************************************/
938 
939 
940 static int
_Pickler_ClearBuffer(PicklerObject * self)941 _Pickler_ClearBuffer(PicklerObject *self)
942 {
943     Py_XSETREF(self->output_buffer,
944               PyBytes_FromStringAndSize(NULL, self->max_output_len));
945     if (self->output_buffer == NULL)
946         return -1;
947     self->output_len = 0;
948     self->frame_start = -1;
949     return 0;
950 }
951 
952 static void
_write_size64(char * out,size_t value)953 _write_size64(char *out, size_t value)
954 {
955     size_t i;
956 
957     Py_BUILD_ASSERT(sizeof(size_t) <= 8);
958 
959     for (i = 0; i < sizeof(size_t); i++) {
960         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
961     }
962     for (i = sizeof(size_t); i < 8; i++) {
963         out[i] = 0;
964     }
965 }
966 
967 static int
_Pickler_CommitFrame(PicklerObject * self)968 _Pickler_CommitFrame(PicklerObject *self)
969 {
970     size_t frame_len;
971     char *qdata;
972 
973     if (!self->framing || self->frame_start == -1)
974         return 0;
975     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
976     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
977     if (frame_len >= FRAME_SIZE_MIN) {
978         qdata[0] = FRAME;
979         _write_size64(qdata + 1, frame_len);
980     }
981     else {
982         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
983         self->output_len -= FRAME_HEADER_SIZE;
984     }
985     self->frame_start = -1;
986     return 0;
987 }
988 
989 static PyObject *
_Pickler_GetString(PicklerObject * self)990 _Pickler_GetString(PicklerObject *self)
991 {
992     PyObject *output_buffer = self->output_buffer;
993 
994     assert(self->output_buffer != NULL);
995 
996     if (_Pickler_CommitFrame(self))
997         return NULL;
998 
999     self->output_buffer = NULL;
1000     /* Resize down to exact size */
1001     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1002         return NULL;
1003     return output_buffer;
1004 }
1005 
1006 static int
_Pickler_FlushToFile(PicklerObject * self)1007 _Pickler_FlushToFile(PicklerObject *self)
1008 {
1009     PyObject *output, *result;
1010 
1011     assert(self->write != NULL);
1012 
1013     /* This will commit the frame first */
1014     output = _Pickler_GetString(self);
1015     if (output == NULL)
1016         return -1;
1017 
1018     result = _Pickle_FastCall(self->write, output);
1019     Py_XDECREF(result);
1020     return (result == NULL) ? -1 : 0;
1021 }
1022 
1023 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1024 _Pickler_OpcodeBoundary(PicklerObject *self)
1025 {
1026     Py_ssize_t frame_len;
1027 
1028     if (!self->framing || self->frame_start == -1) {
1029         return 0;
1030     }
1031     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1032     if (frame_len >= FRAME_SIZE_TARGET) {
1033         if(_Pickler_CommitFrame(self)) {
1034             return -1;
1035         }
1036         /* Flush the content of the committed frame to the underlying
1037          * file and reuse the pickler buffer for the next frame so as
1038          * to limit memory usage when dumping large complex objects to
1039          * a file.
1040          *
1041          * self->write is NULL when called via dumps.
1042          */
1043         if (self->write != NULL) {
1044             if (_Pickler_FlushToFile(self) < 0) {
1045                 return -1;
1046             }
1047             if (_Pickler_ClearBuffer(self) < 0) {
1048                 return -1;
1049             }
1050         }
1051     }
1052     return 0;
1053 }
1054 
1055 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1056 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1057 {
1058     Py_ssize_t i, n, required;
1059     char *buffer;
1060     int need_new_frame;
1061 
1062     assert(s != NULL);
1063     need_new_frame = (self->framing && self->frame_start == -1);
1064 
1065     if (need_new_frame)
1066         n = data_len + FRAME_HEADER_SIZE;
1067     else
1068         n = data_len;
1069 
1070     required = self->output_len + n;
1071     if (required > self->max_output_len) {
1072         /* Make place in buffer for the pickle chunk */
1073         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1074             PyErr_NoMemory();
1075             return -1;
1076         }
1077         self->max_output_len = (self->output_len + n) / 2 * 3;
1078         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1079             return -1;
1080     }
1081     buffer = PyBytes_AS_STRING(self->output_buffer);
1082     if (need_new_frame) {
1083         /* Setup new frame */
1084         Py_ssize_t frame_start = self->output_len;
1085         self->frame_start = frame_start;
1086         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1087             /* Write an invalid value, for debugging */
1088             buffer[frame_start + i] = 0xFE;
1089         }
1090         self->output_len += FRAME_HEADER_SIZE;
1091     }
1092     if (data_len < 8) {
1093         /* This is faster than memcpy when the string is short. */
1094         for (i = 0; i < data_len; i++) {
1095             buffer[self->output_len + i] = s[i];
1096         }
1097     }
1098     else {
1099         memcpy(buffer + self->output_len, s, data_len);
1100     }
1101     self->output_len += data_len;
1102     return data_len;
1103 }
1104 
1105 static PicklerObject *
_Pickler_New(void)1106 _Pickler_New(void)
1107 {
1108     PicklerObject *self;
1109 
1110     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1111     if (self == NULL)
1112         return NULL;
1113 
1114     self->pers_func = NULL;
1115     self->dispatch_table = NULL;
1116     self->buffer_callback = NULL;
1117     self->write = NULL;
1118     self->proto = 0;
1119     self->bin = 0;
1120     self->framing = 0;
1121     self->frame_start = -1;
1122     self->fast = 0;
1123     self->fast_nesting = 0;
1124     self->fix_imports = 0;
1125     self->fast_memo = NULL;
1126     self->max_output_len = WRITE_BUF_SIZE;
1127     self->output_len = 0;
1128     self->reducer_override = NULL;
1129 
1130     self->memo = PyMemoTable_New();
1131     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1132                                                     self->max_output_len);
1133 
1134     if (self->memo == NULL || self->output_buffer == NULL) {
1135         Py_DECREF(self);
1136         return NULL;
1137     }
1138 
1139     PyObject_GC_Track(self);
1140     return self;
1141 }
1142 
1143 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1144 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1145 {
1146     long proto;
1147 
1148     if (protocol == Py_None) {
1149         proto = DEFAULT_PROTOCOL;
1150     }
1151     else {
1152         proto = PyLong_AsLong(protocol);
1153         if (proto < 0) {
1154             if (proto == -1 && PyErr_Occurred())
1155                 return -1;
1156             proto = HIGHEST_PROTOCOL;
1157         }
1158         else if (proto > HIGHEST_PROTOCOL) {
1159             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1160                          HIGHEST_PROTOCOL);
1161             return -1;
1162         }
1163     }
1164     self->proto = (int)proto;
1165     self->bin = proto > 0;
1166     self->fix_imports = fix_imports && proto < 3;
1167     return 0;
1168 }
1169 
1170 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1171    be called once on a freshly created Pickler. */
1172 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1173 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1174 {
1175     _Py_IDENTIFIER(write);
1176     assert(file != NULL);
1177     if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1178         return -1;
1179     }
1180     if (self->write == NULL) {
1181         PyErr_SetString(PyExc_TypeError,
1182                         "file must have a 'write' attribute");
1183         return -1;
1184     }
1185 
1186     return 0;
1187 }
1188 
1189 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1190 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1191 {
1192     if (buffer_callback == Py_None) {
1193         buffer_callback = NULL;
1194     }
1195     if (buffer_callback != NULL && self->proto < 5) {
1196         PyErr_SetString(PyExc_ValueError,
1197                         "buffer_callback needs protocol >= 5");
1198         return -1;
1199     }
1200 
1201     Py_XINCREF(buffer_callback);
1202     self->buffer_callback = buffer_callback;
1203     return 0;
1204 }
1205 
1206 /* Returns the size of the input on success, -1 on failure. This takes its
1207    own reference to `input`. */
1208 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1209 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1210 {
1211     if (self->buffer.buf != NULL)
1212         PyBuffer_Release(&self->buffer);
1213     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1214         return -1;
1215     self->input_buffer = self->buffer.buf;
1216     self->input_len = self->buffer.len;
1217     self->next_read_idx = 0;
1218     self->prefetched_idx = self->input_len;
1219     return self->input_len;
1220 }
1221 
1222 static int
bad_readline(void)1223 bad_readline(void)
1224 {
1225     PickleState *st = _Pickle_GetGlobalState();
1226     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1227     return -1;
1228 }
1229 
1230 /* Skip any consumed data that was only prefetched using peek() */
1231 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1232 _Unpickler_SkipConsumed(UnpicklerObject *self)
1233 {
1234     Py_ssize_t consumed;
1235     PyObject *r;
1236 
1237     consumed = self->next_read_idx - self->prefetched_idx;
1238     if (consumed <= 0)
1239         return 0;
1240 
1241     assert(self->peek);  /* otherwise we did something wrong */
1242     /* This makes a useless copy... */
1243     r = PyObject_CallFunction(self->read, "n", consumed);
1244     if (r == NULL)
1245         return -1;
1246     Py_DECREF(r);
1247 
1248     self->prefetched_idx = self->next_read_idx;
1249     return 0;
1250 }
1251 
1252 static const Py_ssize_t READ_WHOLE_LINE = -1;
1253 
1254 /* If reading from a file, we need to only pull the bytes we need, since there
1255    may be multiple pickle objects arranged contiguously in the same input
1256    buffer.
1257 
1258    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1259    bytes from the input stream/buffer.
1260 
1261    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1262    failure; on success, returns the number of bytes read from the file.
1263 
1264    On success, self->input_len will be 0; this is intentional so that when
1265    unpickling from a file, the "we've run out of data" code paths will trigger,
1266    causing the Unpickler to go back to the file for more data. Use the returned
1267    size to tell you how much data you can process. */
1268 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1269 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1270 {
1271     PyObject *data;
1272     Py_ssize_t read_size;
1273 
1274     assert(self->read != NULL);
1275 
1276     if (_Unpickler_SkipConsumed(self) < 0)
1277         return -1;
1278 
1279     if (n == READ_WHOLE_LINE) {
1280         data = PyObject_CallNoArgs(self->readline);
1281     }
1282     else {
1283         PyObject *len;
1284         /* Prefetch some data without advancing the file pointer, if possible */
1285         if (self->peek && n < PREFETCH) {
1286             len = PyLong_FromSsize_t(PREFETCH);
1287             if (len == NULL)
1288                 return -1;
1289             data = _Pickle_FastCall(self->peek, len);
1290             if (data == NULL) {
1291                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1292                     return -1;
1293                 /* peek() is probably not supported by the given file object */
1294                 PyErr_Clear();
1295                 Py_CLEAR(self->peek);
1296             }
1297             else {
1298                 read_size = _Unpickler_SetStringInput(self, data);
1299                 Py_DECREF(data);
1300                 self->prefetched_idx = 0;
1301                 if (n <= read_size)
1302                     return n;
1303             }
1304         }
1305         len = PyLong_FromSsize_t(n);
1306         if (len == NULL)
1307             return -1;
1308         data = _Pickle_FastCall(self->read, len);
1309     }
1310     if (data == NULL)
1311         return -1;
1312 
1313     read_size = _Unpickler_SetStringInput(self, data);
1314     Py_DECREF(data);
1315     return read_size;
1316 }
1317 
1318 /* Don't call it directly: use _Unpickler_Read() */
1319 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1320 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1321 {
1322     Py_ssize_t num_read;
1323 
1324     *s = NULL;
1325     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1326         PickleState *st = _Pickle_GetGlobalState();
1327         PyErr_SetString(st->UnpicklingError,
1328                         "read would overflow (invalid bytecode)");
1329         return -1;
1330     }
1331 
1332     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1333     assert(self->next_read_idx + n > self->input_len);
1334 
1335     if (!self->read)
1336         return bad_readline();
1337 
1338     /* Extend the buffer to satisfy desired size */
1339     num_read = _Unpickler_ReadFromFile(self, n);
1340     if (num_read < 0)
1341         return -1;
1342     if (num_read < n)
1343         return bad_readline();
1344     *s = self->input_buffer;
1345     self->next_read_idx = n;
1346     return n;
1347 }
1348 
1349 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1350  *
1351  * This should only be used for non-small data reads where potentially
1352  * avoiding a copy is beneficial.  This method does not try to prefetch
1353  * more data into the input buffer.
1354  *
1355  * _Unpickler_Read() is recommended in most cases.
1356  */
1357 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1358 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1359 {
1360     assert(n != READ_WHOLE_LINE);
1361 
1362     /* Read from available buffer data, if any */
1363     Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1364     if (in_buffer > 0) {
1365         Py_ssize_t to_read = Py_MIN(in_buffer, n);
1366         memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1367         self->next_read_idx += to_read;
1368         buf += to_read;
1369         n -= to_read;
1370         if (n == 0) {
1371             /* Entire read was satisfied from buffer */
1372             return n;
1373         }
1374     }
1375 
1376     /* Read from file */
1377     if (!self->read) {
1378         /* We're unpickling memory, this means the input is truncated */
1379         return bad_readline();
1380     }
1381     if (_Unpickler_SkipConsumed(self) < 0) {
1382         return -1;
1383     }
1384 
1385     if (!self->readinto) {
1386         /* readinto() not supported on file-like object, fall back to read()
1387          * and copy into destination buffer (bpo-39681) */
1388         PyObject* len = PyLong_FromSsize_t(n);
1389         if (len == NULL) {
1390             return -1;
1391         }
1392         PyObject* data = _Pickle_FastCall(self->read, len);
1393         if (data == NULL) {
1394             return -1;
1395         }
1396         if (!PyBytes_Check(data)) {
1397             PyErr_Format(PyExc_ValueError,
1398                          "read() returned non-bytes object (%R)",
1399                          Py_TYPE(data));
1400             Py_DECREF(data);
1401             return -1;
1402         }
1403         Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1404         if (read_size < n) {
1405             Py_DECREF(data);
1406             return bad_readline();
1407         }
1408         memcpy(buf, PyBytes_AS_STRING(data), n);
1409         Py_DECREF(data);
1410         return n;
1411     }
1412 
1413     /* Call readinto() into user buffer */
1414     PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1415     if (buf_obj == NULL) {
1416         return -1;
1417     }
1418     PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1419     if (read_size_obj == NULL) {
1420         return -1;
1421     }
1422     Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1423     Py_DECREF(read_size_obj);
1424 
1425     if (read_size < 0) {
1426         if (!PyErr_Occurred()) {
1427             PyErr_SetString(PyExc_ValueError,
1428                             "readinto() returned negative size");
1429         }
1430         return -1;
1431     }
1432     if (read_size < n) {
1433         return bad_readline();
1434     }
1435     return n;
1436 }
1437 
1438 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1439 
1440    This should be used for all data reads, rather than accessing the unpickler's
1441    input buffer directly. This method deals correctly with reading from input
1442    streams, which the input buffer doesn't deal with.
1443 
1444    Note that when reading from a file-like object, self->next_read_idx won't
1445    be updated (it should remain at 0 for the entire unpickling process). You
1446    should use this function's return value to know how many bytes you can
1447    consume.
1448 
1449    Returns -1 (with an exception set) on failure. On success, return the
1450    number of chars read. */
1451 #define _Unpickler_Read(self, s, n) \
1452     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1453      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1454         (self)->next_read_idx += (n),                        \
1455         (n))                                                 \
1456      : _Unpickler_ReadImpl(self, (s), (n)))
1457 
1458 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1459 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1460                     char **result)
1461 {
1462     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1463     if (input_line == NULL) {
1464         PyErr_NoMemory();
1465         return -1;
1466     }
1467 
1468     memcpy(input_line, line, len);
1469     input_line[len] = '\0';
1470     self->input_line = input_line;
1471     *result = self->input_line;
1472     return len;
1473 }
1474 
1475 /* Read a line from the input stream/buffer. If we run off the end of the input
1476    before hitting \n, raise an error.
1477 
1478    Returns the number of chars read, or -1 on failure. */
1479 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1480 _Unpickler_Readline(UnpicklerObject *self, char **result)
1481 {
1482     Py_ssize_t i, num_read;
1483 
1484     for (i = self->next_read_idx; i < self->input_len; i++) {
1485         if (self->input_buffer[i] == '\n') {
1486             char *line_start = self->input_buffer + self->next_read_idx;
1487             num_read = i - self->next_read_idx + 1;
1488             self->next_read_idx = i + 1;
1489             return _Unpickler_CopyLine(self, line_start, num_read, result);
1490         }
1491     }
1492     if (!self->read)
1493         return bad_readline();
1494 
1495     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1496     if (num_read < 0)
1497         return -1;
1498     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1499         return bad_readline();
1500     self->next_read_idx = num_read;
1501     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1502 }
1503 
1504 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1505    will be modified in place. */
1506 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1507 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1508 {
1509     size_t i;
1510 
1511     assert(new_size > self->memo_size);
1512 
1513     PyObject **memo_new = self->memo;
1514     PyMem_RESIZE(memo_new, PyObject *, new_size);
1515     if (memo_new == NULL) {
1516         PyErr_NoMemory();
1517         return -1;
1518     }
1519     self->memo = memo_new;
1520     for (i = self->memo_size; i < new_size; i++)
1521         self->memo[i] = NULL;
1522     self->memo_size = new_size;
1523     return 0;
1524 }
1525 
1526 /* Returns NULL if idx is out of bounds. */
1527 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1528 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1529 {
1530     if (idx >= self->memo_size)
1531         return NULL;
1532 
1533     return self->memo[idx];
1534 }
1535 
1536 /* Returns -1 (with an exception set) on failure, 0 on success.
1537    This takes its own reference to `value`. */
1538 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1539 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1540 {
1541     PyObject *old_item;
1542 
1543     if (idx >= self->memo_size) {
1544         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1545             return -1;
1546         assert(idx < self->memo_size);
1547     }
1548     Py_INCREF(value);
1549     old_item = self->memo[idx];
1550     self->memo[idx] = value;
1551     if (old_item != NULL) {
1552         Py_DECREF(old_item);
1553     }
1554     else {
1555         self->memo_len++;
1556     }
1557     return 0;
1558 }
1559 
1560 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1561 _Unpickler_NewMemo(Py_ssize_t new_size)
1562 {
1563     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1564     if (memo == NULL) {
1565         PyErr_NoMemory();
1566         return NULL;
1567     }
1568     memset(memo, 0, new_size * sizeof(PyObject *));
1569     return memo;
1570 }
1571 
1572 /* Free the unpickler's memo, taking care to decref any items left in it. */
1573 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1574 _Unpickler_MemoCleanup(UnpicklerObject *self)
1575 {
1576     Py_ssize_t i;
1577     PyObject **memo = self->memo;
1578 
1579     if (self->memo == NULL)
1580         return;
1581     self->memo = NULL;
1582     i = self->memo_size;
1583     while (--i >= 0) {
1584         Py_XDECREF(memo[i]);
1585     }
1586     PyMem_Free(memo);
1587 }
1588 
1589 static UnpicklerObject *
_Unpickler_New(void)1590 _Unpickler_New(void)
1591 {
1592     UnpicklerObject *self;
1593 
1594     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1595     if (self == NULL)
1596         return NULL;
1597 
1598     self->pers_func = NULL;
1599     self->input_buffer = NULL;
1600     self->input_line = NULL;
1601     self->input_len = 0;
1602     self->next_read_idx = 0;
1603     self->prefetched_idx = 0;
1604     self->read = NULL;
1605     self->readinto = NULL;
1606     self->readline = NULL;
1607     self->peek = NULL;
1608     self->buffers = NULL;
1609     self->encoding = NULL;
1610     self->errors = NULL;
1611     self->marks = NULL;
1612     self->num_marks = 0;
1613     self->marks_size = 0;
1614     self->proto = 0;
1615     self->fix_imports = 0;
1616     memset(&self->buffer, 0, sizeof(Py_buffer));
1617     self->memo_size = 32;
1618     self->memo_len = 0;
1619     self->memo = _Unpickler_NewMemo(self->memo_size);
1620     self->stack = (Pdata *)Pdata_New();
1621 
1622     if (self->memo == NULL || self->stack == NULL) {
1623         Py_DECREF(self);
1624         return NULL;
1625     }
1626 
1627     PyObject_GC_Track(self);
1628     return self;
1629 }
1630 
1631 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1632    be called once on a freshly created Unpickler. */
1633 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1634 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1635 {
1636     _Py_IDENTIFIER(peek);
1637     _Py_IDENTIFIER(read);
1638     _Py_IDENTIFIER(readinto);
1639     _Py_IDENTIFIER(readline);
1640 
1641     /* Optional file methods */
1642     if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1643         return -1;
1644     }
1645     if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1646         return -1;
1647     }
1648     (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1649     (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1650     if (!self->readline || !self->read) {
1651         if (!PyErr_Occurred()) {
1652             PyErr_SetString(PyExc_TypeError,
1653                             "file must have 'read' and 'readline' attributes");
1654         }
1655         Py_CLEAR(self->read);
1656         Py_CLEAR(self->readinto);
1657         Py_CLEAR(self->readline);
1658         Py_CLEAR(self->peek);
1659         return -1;
1660     }
1661     return 0;
1662 }
1663 
1664 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1665    be called once on a freshly created Unpickler. */
1666 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1667 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1668                             const char *encoding,
1669                             const char *errors)
1670 {
1671     if (encoding == NULL)
1672         encoding = "ASCII";
1673     if (errors == NULL)
1674         errors = "strict";
1675 
1676     self->encoding = _PyMem_Strdup(encoding);
1677     self->errors = _PyMem_Strdup(errors);
1678     if (self->encoding == NULL || self->errors == NULL) {
1679         PyErr_NoMemory();
1680         return -1;
1681     }
1682     return 0;
1683 }
1684 
1685 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1686    be called once on a freshly created Unpickler. */
1687 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1688 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1689 {
1690     if (buffers == NULL || buffers == Py_None) {
1691         self->buffers = NULL;
1692     }
1693     else {
1694         self->buffers = PyObject_GetIter(buffers);
1695         if (self->buffers == NULL) {
1696             return -1;
1697         }
1698     }
1699     return 0;
1700 }
1701 
1702 /* Generate a GET opcode for an object stored in the memo. */
1703 static int
memo_get(PicklerObject * self,PyObject * key)1704 memo_get(PicklerObject *self, PyObject *key)
1705 {
1706     Py_ssize_t *value;
1707     char pdata[30];
1708     Py_ssize_t len;
1709 
1710     value = PyMemoTable_Get(self->memo, key);
1711     if (value == NULL)  {
1712         PyErr_SetObject(PyExc_KeyError, key);
1713         return -1;
1714     }
1715 
1716     if (!self->bin) {
1717         pdata[0] = GET;
1718         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1719                       "%zd\n", *value);
1720         len = strlen(pdata);
1721     }
1722     else {
1723         if (*value < 256) {
1724             pdata[0] = BINGET;
1725             pdata[1] = (unsigned char)(*value & 0xff);
1726             len = 2;
1727         }
1728         else if ((size_t)*value <= 0xffffffffUL) {
1729             pdata[0] = LONG_BINGET;
1730             pdata[1] = (unsigned char)(*value & 0xff);
1731             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1732             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1733             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1734             len = 5;
1735         }
1736         else { /* unlikely */
1737             PickleState *st = _Pickle_GetGlobalState();
1738             PyErr_SetString(st->PicklingError,
1739                             "memo id too large for LONG_BINGET");
1740             return -1;
1741         }
1742     }
1743 
1744     if (_Pickler_Write(self, pdata, len) < 0)
1745         return -1;
1746 
1747     return 0;
1748 }
1749 
1750 /* Store an object in the memo, assign it a new unique ID based on the number
1751    of objects currently stored in the memo and generate a PUT opcode. */
1752 static int
memo_put(PicklerObject * self,PyObject * obj)1753 memo_put(PicklerObject *self, PyObject *obj)
1754 {
1755     char pdata[30];
1756     Py_ssize_t len;
1757     Py_ssize_t idx;
1758 
1759     const char memoize_op = MEMOIZE;
1760 
1761     if (self->fast)
1762         return 0;
1763 
1764     idx = PyMemoTable_Size(self->memo);
1765     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1766         return -1;
1767 
1768     if (self->proto >= 4) {
1769         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1770             return -1;
1771         return 0;
1772     }
1773     else if (!self->bin) {
1774         pdata[0] = PUT;
1775         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1776                       "%zd\n", idx);
1777         len = strlen(pdata);
1778     }
1779     else {
1780         if (idx < 256) {
1781             pdata[0] = BINPUT;
1782             pdata[1] = (unsigned char)idx;
1783             len = 2;
1784         }
1785         else if ((size_t)idx <= 0xffffffffUL) {
1786             pdata[0] = LONG_BINPUT;
1787             pdata[1] = (unsigned char)(idx & 0xff);
1788             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1789             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1790             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1791             len = 5;
1792         }
1793         else { /* unlikely */
1794             PickleState *st = _Pickle_GetGlobalState();
1795             PyErr_SetString(st->PicklingError,
1796                             "memo id too large for LONG_BINPUT");
1797             return -1;
1798         }
1799     }
1800     if (_Pickler_Write(self, pdata, len) < 0)
1801         return -1;
1802 
1803     return 0;
1804 }
1805 
1806 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1807 get_dotted_path(PyObject *obj, PyObject *name)
1808 {
1809     _Py_static_string(PyId_dot, ".");
1810     PyObject *dotted_path;
1811     Py_ssize_t i, n;
1812 
1813     dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1814     if (dotted_path == NULL)
1815         return NULL;
1816     n = PyList_GET_SIZE(dotted_path);
1817     assert(n >= 1);
1818     for (i = 0; i < n; i++) {
1819         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1820         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1821             if (obj == NULL)
1822                 PyErr_Format(PyExc_AttributeError,
1823                              "Can't pickle local object %R", name);
1824             else
1825                 PyErr_Format(PyExc_AttributeError,
1826                              "Can't pickle local attribute %R on %R", name, obj);
1827             Py_DECREF(dotted_path);
1828             return NULL;
1829         }
1830     }
1831     return dotted_path;
1832 }
1833 
1834 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1835 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1836 {
1837     Py_ssize_t i, n;
1838     PyObject *parent = NULL;
1839 
1840     assert(PyList_CheckExact(names));
1841     Py_INCREF(obj);
1842     n = PyList_GET_SIZE(names);
1843     for (i = 0; i < n; i++) {
1844         PyObject *name = PyList_GET_ITEM(names, i);
1845         Py_XDECREF(parent);
1846         parent = obj;
1847         (void)_PyObject_LookupAttr(parent, name, &obj);
1848         if (obj == NULL) {
1849             Py_DECREF(parent);
1850             return NULL;
1851         }
1852     }
1853     if (pparent != NULL)
1854         *pparent = parent;
1855     else
1856         Py_XDECREF(parent);
1857     return obj;
1858 }
1859 
1860 
1861 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1862 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1863 {
1864     PyObject *dotted_path, *attr;
1865 
1866     if (allow_qualname) {
1867         dotted_path = get_dotted_path(obj, name);
1868         if (dotted_path == NULL)
1869             return NULL;
1870         attr = get_deep_attribute(obj, dotted_path, NULL);
1871         Py_DECREF(dotted_path);
1872     }
1873     else {
1874         (void)_PyObject_LookupAttr(obj, name, &attr);
1875     }
1876     if (attr == NULL && !PyErr_Occurred()) {
1877         PyErr_Format(PyExc_AttributeError,
1878                      "Can't get attribute %R on %R", name, obj);
1879     }
1880     return attr;
1881 }
1882 
1883 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1884 _checkmodule(PyObject *module_name, PyObject *module,
1885              PyObject *global, PyObject *dotted_path)
1886 {
1887     if (module == Py_None) {
1888         return -1;
1889     }
1890     if (PyUnicode_Check(module_name) &&
1891             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1892         return -1;
1893     }
1894 
1895     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1896     if (candidate == NULL) {
1897         return -1;
1898     }
1899     if (candidate != global) {
1900         Py_DECREF(candidate);
1901         return -1;
1902     }
1903     Py_DECREF(candidate);
1904     return 0;
1905 }
1906 
1907 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1908 whichmodule(PyObject *global, PyObject *dotted_path)
1909 {
1910     PyObject *module_name;
1911     PyObject *module = NULL;
1912     Py_ssize_t i;
1913     PyObject *modules;
1914     _Py_IDENTIFIER(__module__);
1915     _Py_IDENTIFIER(modules);
1916     _Py_IDENTIFIER(__main__);
1917 
1918     if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1919         return NULL;
1920     }
1921     if (module_name) {
1922         /* In some rare cases (e.g., bound methods of extension types),
1923            __module__ can be None. If it is so, then search sys.modules for
1924            the module of global. */
1925         if (module_name != Py_None)
1926             return module_name;
1927         Py_CLEAR(module_name);
1928     }
1929     assert(module_name == NULL);
1930 
1931     /* Fallback on walking sys.modules */
1932     modules = _PySys_GetObjectId(&PyId_modules);
1933     if (modules == NULL) {
1934         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1935         return NULL;
1936     }
1937     if (PyDict_CheckExact(modules)) {
1938         i = 0;
1939         while (PyDict_Next(modules, &i, &module_name, &module)) {
1940             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1941                 Py_INCREF(module_name);
1942                 return module_name;
1943             }
1944             if (PyErr_Occurred()) {
1945                 return NULL;
1946             }
1947         }
1948     }
1949     else {
1950         PyObject *iterator = PyObject_GetIter(modules);
1951         if (iterator == NULL) {
1952             return NULL;
1953         }
1954         while ((module_name = PyIter_Next(iterator))) {
1955             module = PyObject_GetItem(modules, module_name);
1956             if (module == NULL) {
1957                 Py_DECREF(module_name);
1958                 Py_DECREF(iterator);
1959                 return NULL;
1960             }
1961             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1962                 Py_DECREF(module);
1963                 Py_DECREF(iterator);
1964                 return module_name;
1965             }
1966             Py_DECREF(module);
1967             Py_DECREF(module_name);
1968             if (PyErr_Occurred()) {
1969                 Py_DECREF(iterator);
1970                 return NULL;
1971             }
1972         }
1973         Py_DECREF(iterator);
1974     }
1975 
1976     /* If no module is found, use __main__. */
1977     module_name = _PyUnicode_FromId(&PyId___main__);
1978     Py_XINCREF(module_name);
1979     return module_name;
1980 }
1981 
1982 /* fast_save_enter() and fast_save_leave() are guards against recursive
1983    objects when Pickler is used with the "fast mode" (i.e., with object
1984    memoization disabled). If the nesting of a list or dict object exceed
1985    FAST_NESTING_LIMIT, these guards will start keeping an internal
1986    reference to the seen list or dict objects and check whether these objects
1987    are recursive. These are not strictly necessary, since save() has a
1988    hard-coded recursion limit, but they give a nicer error message than the
1989    typical RuntimeError. */
1990 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1991 fast_save_enter(PicklerObject *self, PyObject *obj)
1992 {
1993     /* if fast_nesting < 0, we're doing an error exit. */
1994     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1995         PyObject *key = NULL;
1996         if (self->fast_memo == NULL) {
1997             self->fast_memo = PyDict_New();
1998             if (self->fast_memo == NULL) {
1999                 self->fast_nesting = -1;
2000                 return 0;
2001             }
2002         }
2003         key = PyLong_FromVoidPtr(obj);
2004         if (key == NULL) {
2005             self->fast_nesting = -1;
2006             return 0;
2007         }
2008         int r = PyDict_Contains(self->fast_memo, key);
2009         if (r > 0) {
2010             PyErr_Format(PyExc_ValueError,
2011                          "fast mode: can't pickle cyclic objects "
2012                          "including object type %.200s at %p",
2013                          Py_TYPE(obj)->tp_name, obj);
2014         }
2015         else if (r == 0) {
2016             r = PyDict_SetItem(self->fast_memo, key, Py_None);
2017         }
2018         Py_DECREF(key);
2019         if (r != 0) {
2020             self->fast_nesting = -1;
2021             return 0;
2022         }
2023     }
2024     return 1;
2025 }
2026 
2027 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2028 fast_save_leave(PicklerObject *self, PyObject *obj)
2029 {
2030     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2031         PyObject *key = PyLong_FromVoidPtr(obj);
2032         if (key == NULL)
2033             return 0;
2034         if (PyDict_DelItem(self->fast_memo, key) < 0) {
2035             Py_DECREF(key);
2036             return 0;
2037         }
2038         Py_DECREF(key);
2039     }
2040     return 1;
2041 }
2042 
2043 static int
save_none(PicklerObject * self,PyObject * obj)2044 save_none(PicklerObject *self, PyObject *obj)
2045 {
2046     const char none_op = NONE;
2047     if (_Pickler_Write(self, &none_op, 1) < 0)
2048         return -1;
2049 
2050     return 0;
2051 }
2052 
2053 static int
save_bool(PicklerObject * self,PyObject * obj)2054 save_bool(PicklerObject *self, PyObject *obj)
2055 {
2056     if (self->proto >= 2) {
2057         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2058         if (_Pickler_Write(self, &bool_op, 1) < 0)
2059             return -1;
2060     }
2061     else {
2062         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2063          * so that unpicklers written before bools were introduced unpickle them
2064          * as ints, but unpicklers after can recognize that bools were intended.
2065          * Note that protocol 2 added direct ways to pickle bools.
2066          */
2067         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2068         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2069             return -1;
2070     }
2071     return 0;
2072 }
2073 
2074 static int
save_long(PicklerObject * self,PyObject * obj)2075 save_long(PicklerObject *self, PyObject *obj)
2076 {
2077     PyObject *repr = NULL;
2078     Py_ssize_t size;
2079     long val;
2080     int overflow;
2081     int status = 0;
2082 
2083     val= PyLong_AsLongAndOverflow(obj, &overflow);
2084     if (!overflow && (sizeof(long) <= 4 ||
2085             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2086     {
2087         /* result fits in a signed 4-byte integer.
2088 
2089            Note: we can't use -0x80000000L in the above condition because some
2090            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2091            before applying the unary minus when sizeof(long) <= 4. The
2092            resulting value stays unsigned which is commonly not what we want,
2093            so MSVC happily warns us about it.  However, that result would have
2094            been fine because we guard for sizeof(long) <= 4 which turns the
2095            condition true in that particular case. */
2096         char pdata[32];
2097         Py_ssize_t len = 0;
2098 
2099         if (self->bin) {
2100             pdata[1] = (unsigned char)(val & 0xff);
2101             pdata[2] = (unsigned char)((val >> 8) & 0xff);
2102             pdata[3] = (unsigned char)((val >> 16) & 0xff);
2103             pdata[4] = (unsigned char)((val >> 24) & 0xff);
2104 
2105             if ((pdata[4] != 0) || (pdata[3] != 0)) {
2106                 pdata[0] = BININT;
2107                 len = 5;
2108             }
2109             else if (pdata[2] != 0) {
2110                 pdata[0] = BININT2;
2111                 len = 3;
2112             }
2113             else {
2114                 pdata[0] = BININT1;
2115                 len = 2;
2116             }
2117         }
2118         else {
2119             sprintf(pdata, "%c%ld\n", INT,  val);
2120             len = strlen(pdata);
2121         }
2122         if (_Pickler_Write(self, pdata, len) < 0)
2123             return -1;
2124 
2125         return 0;
2126     }
2127     assert(!PyErr_Occurred());
2128 
2129     if (self->proto >= 2) {
2130         /* Linear-time pickling. */
2131         size_t nbits;
2132         size_t nbytes;
2133         unsigned char *pdata;
2134         char header[5];
2135         int i;
2136         int sign = _PyLong_Sign(obj);
2137 
2138         if (sign == 0) {
2139             header[0] = LONG1;
2140             header[1] = 0;      /* It's 0 -- an empty bytestring. */
2141             if (_Pickler_Write(self, header, 2) < 0)
2142                 goto error;
2143             return 0;
2144         }
2145         nbits = _PyLong_NumBits(obj);
2146         if (nbits == (size_t)-1 && PyErr_Occurred())
2147             goto error;
2148         /* How many bytes do we need?  There are nbits >> 3 full
2149          * bytes of data, and nbits & 7 leftover bits.  If there
2150          * are any leftover bits, then we clearly need another
2151          * byte.  What's not so obvious is that we *probably*
2152          * need another byte even if there aren't any leftovers:
2153          * the most-significant bit of the most-significant byte
2154          * acts like a sign bit, and it's usually got a sense
2155          * opposite of the one we need.  The exception is ints
2156          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2157          * its own 256's-complement, so has the right sign bit
2158          * even without the extra byte.  That's a pain to check
2159          * for in advance, though, so we always grab an extra
2160          * byte at the start, and cut it back later if possible.
2161          */
2162         nbytes = (nbits >> 3) + 1;
2163         if (nbytes > 0x7fffffffL) {
2164             PyErr_SetString(PyExc_OverflowError,
2165                             "int too large to pickle");
2166             goto error;
2167         }
2168         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2169         if (repr == NULL)
2170             goto error;
2171         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2172         i = _PyLong_AsByteArray((PyLongObject *)obj,
2173                                 pdata, nbytes,
2174                                 1 /* little endian */ , 1 /* signed */ );
2175         if (i < 0)
2176             goto error;
2177         /* If the int is negative, this may be a byte more than
2178          * needed.  This is so iff the MSB is all redundant sign
2179          * bits.
2180          */
2181         if (sign < 0 &&
2182             nbytes > 1 &&
2183             pdata[nbytes - 1] == 0xff &&
2184             (pdata[nbytes - 2] & 0x80) != 0) {
2185             nbytes--;
2186         }
2187 
2188         if (nbytes < 256) {
2189             header[0] = LONG1;
2190             header[1] = (unsigned char)nbytes;
2191             size = 2;
2192         }
2193         else {
2194             header[0] = LONG4;
2195             size = (Py_ssize_t) nbytes;
2196             for (i = 1; i < 5; i++) {
2197                 header[i] = (unsigned char)(size & 0xff);
2198                 size >>= 8;
2199             }
2200             size = 5;
2201         }
2202         if (_Pickler_Write(self, header, size) < 0 ||
2203             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2204             goto error;
2205     }
2206     else {
2207         const char long_op = LONG;
2208         const char *string;
2209 
2210         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2211            the number of digits), in both directions.  We add a trailing 'L'
2212            to the repr, for compatibility with Python 2.x. */
2213 
2214         repr = PyObject_Repr(obj);
2215         if (repr == NULL)
2216             goto error;
2217 
2218         string = PyUnicode_AsUTF8AndSize(repr, &size);
2219         if (string == NULL)
2220             goto error;
2221 
2222         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2223             _Pickler_Write(self, string, size) < 0 ||
2224             _Pickler_Write(self, "L\n", 2) < 0)
2225             goto error;
2226     }
2227 
2228     if (0) {
2229   error:
2230       status = -1;
2231     }
2232     Py_XDECREF(repr);
2233 
2234     return status;
2235 }
2236 
2237 static int
save_float(PicklerObject * self,PyObject * obj)2238 save_float(PicklerObject *self, PyObject *obj)
2239 {
2240     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2241 
2242     if (self->bin) {
2243         char pdata[9];
2244         pdata[0] = BINFLOAT;
2245         if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2246             return -1;
2247         if (_Pickler_Write(self, pdata, 9) < 0)
2248             return -1;
2249    }
2250     else {
2251         int result = -1;
2252         char *buf = NULL;
2253         char op = FLOAT;
2254 
2255         if (_Pickler_Write(self, &op, 1) < 0)
2256             goto done;
2257 
2258         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2259         if (!buf) {
2260             PyErr_NoMemory();
2261             goto done;
2262         }
2263 
2264         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2265             goto done;
2266 
2267         if (_Pickler_Write(self, "\n", 1) < 0)
2268             goto done;
2269 
2270         result = 0;
2271 done:
2272         PyMem_Free(buf);
2273         return result;
2274     }
2275 
2276     return 0;
2277 }
2278 
2279 /* Perform direct write of the header and payload of the binary object.
2280 
2281    The large contiguous data is written directly into the underlying file
2282    object, bypassing the output_buffer of the Pickler.  We intentionally
2283    do not insert a protocol 4 frame opcode to make it possible to optimize
2284    file.read calls in the loader.
2285  */
2286 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2287 _Pickler_write_bytes(PicklerObject *self,
2288                      const char *header, Py_ssize_t header_size,
2289                      const char *data, Py_ssize_t data_size,
2290                      PyObject *payload)
2291 {
2292     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2293     int framing = self->framing;
2294 
2295     if (bypass_buffer) {
2296         assert(self->output_buffer != NULL);
2297         /* Commit the previous frame. */
2298         if (_Pickler_CommitFrame(self)) {
2299             return -1;
2300         }
2301         /* Disable framing temporarily */
2302         self->framing = 0;
2303     }
2304 
2305     if (_Pickler_Write(self, header, header_size) < 0) {
2306         return -1;
2307     }
2308 
2309     if (bypass_buffer && self->write != NULL) {
2310         /* Bypass the in-memory buffer to directly stream large data
2311            into the underlying file object. */
2312         PyObject *result, *mem = NULL;
2313         /* Dump the output buffer to the file. */
2314         if (_Pickler_FlushToFile(self) < 0) {
2315             return -1;
2316         }
2317 
2318         /* Stream write the payload into the file without going through the
2319            output buffer. */
2320         if (payload == NULL) {
2321             /* TODO: It would be better to use a memoryview with a linked
2322                original string if this is possible. */
2323             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2324             if (payload == NULL) {
2325                 return -1;
2326             }
2327         }
2328         result = PyObject_CallOneArg(self->write, payload);
2329         Py_XDECREF(mem);
2330         if (result == NULL) {
2331             return -1;
2332         }
2333         Py_DECREF(result);
2334 
2335         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2336         if (_Pickler_ClearBuffer(self) < 0) {
2337             return -1;
2338         }
2339     }
2340     else {
2341         if (_Pickler_Write(self, data, data_size) < 0) {
2342             return -1;
2343         }
2344     }
2345 
2346     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2347     self->framing = framing;
2348 
2349     return 0;
2350 }
2351 
2352 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2353 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2354                  Py_ssize_t size)
2355 {
2356     assert(self->proto >= 3);
2357 
2358     char header[9];
2359     Py_ssize_t len;
2360 
2361     if (size < 0)
2362         return -1;
2363 
2364     if (size <= 0xff) {
2365         header[0] = SHORT_BINBYTES;
2366         header[1] = (unsigned char)size;
2367         len = 2;
2368     }
2369     else if ((size_t)size <= 0xffffffffUL) {
2370         header[0] = BINBYTES;
2371         header[1] = (unsigned char)(size & 0xff);
2372         header[2] = (unsigned char)((size >> 8) & 0xff);
2373         header[3] = (unsigned char)((size >> 16) & 0xff);
2374         header[4] = (unsigned char)((size >> 24) & 0xff);
2375         len = 5;
2376     }
2377     else if (self->proto >= 4) {
2378         header[0] = BINBYTES8;
2379         _write_size64(header + 1, size);
2380         len = 9;
2381     }
2382     else {
2383         PyErr_SetString(PyExc_OverflowError,
2384                         "serializing a bytes object larger than 4 GiB "
2385                         "requires pickle protocol 4 or higher");
2386         return -1;
2387     }
2388 
2389     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2390         return -1;
2391     }
2392 
2393     if (memo_put(self, obj) < 0) {
2394         return -1;
2395     }
2396 
2397     return 0;
2398 }
2399 
2400 static int
save_bytes(PicklerObject * self,PyObject * obj)2401 save_bytes(PicklerObject *self, PyObject *obj)
2402 {
2403     if (self->proto < 3) {
2404         /* Older pickle protocols do not have an opcode for pickling bytes
2405            objects. Therefore, we need to fake the copy protocol (i.e.,
2406            the __reduce__ method) to permit bytes object unpickling.
2407 
2408            Here we use a hack to be compatible with Python 2. Since in Python
2409            2 'bytes' is just an alias for 'str' (which has different
2410            parameters than the actual bytes object), we use codecs.encode
2411            to create the appropriate 'str' object when unpickled using
2412            Python 2 *and* the appropriate 'bytes' object when unpickled
2413            using Python 3. Again this is a hack and we don't need to do this
2414            with newer protocols. */
2415         PyObject *reduce_value;
2416         int status;
2417 
2418         if (PyBytes_GET_SIZE(obj) == 0) {
2419             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2420         }
2421         else {
2422             PickleState *st = _Pickle_GetGlobalState();
2423             PyObject *unicode_str =
2424                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2425                                        PyBytes_GET_SIZE(obj),
2426                                        "strict");
2427             _Py_IDENTIFIER(latin1);
2428 
2429             if (unicode_str == NULL)
2430                 return -1;
2431             reduce_value = Py_BuildValue("(O(OO))",
2432                                          st->codecs_encode, unicode_str,
2433                                          _PyUnicode_FromId(&PyId_latin1));
2434             Py_DECREF(unicode_str);
2435         }
2436 
2437         if (reduce_value == NULL)
2438             return -1;
2439 
2440         /* save_reduce() will memoize the object automatically. */
2441         status = save_reduce(self, reduce_value, obj);
2442         Py_DECREF(reduce_value);
2443         return status;
2444     }
2445     else {
2446         return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2447                                 PyBytes_GET_SIZE(obj));
2448     }
2449 }
2450 
2451 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2452 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2453                      Py_ssize_t size)
2454 {
2455     assert(self->proto >= 5);
2456 
2457     char header[9];
2458     Py_ssize_t len;
2459 
2460     if (size < 0)
2461         return -1;
2462 
2463     header[0] = BYTEARRAY8;
2464     _write_size64(header + 1, size);
2465     len = 9;
2466 
2467     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2468         return -1;
2469     }
2470 
2471     if (memo_put(self, obj) < 0) {
2472         return -1;
2473     }
2474 
2475     return 0;
2476 }
2477 
2478 static int
save_bytearray(PicklerObject * self,PyObject * obj)2479 save_bytearray(PicklerObject *self, PyObject *obj)
2480 {
2481     if (self->proto < 5) {
2482         /* Older pickle protocols do not have an opcode for pickling
2483          * bytearrays. */
2484         PyObject *reduce_value = NULL;
2485         int status;
2486 
2487         if (PyByteArray_GET_SIZE(obj) == 0) {
2488             reduce_value = Py_BuildValue("(O())",
2489                                          (PyObject *) &PyByteArray_Type);
2490         }
2491         else {
2492             PyObject *bytes_obj = PyBytes_FromObject(obj);
2493             if (bytes_obj != NULL) {
2494                 reduce_value = Py_BuildValue("(O(O))",
2495                                              (PyObject *) &PyByteArray_Type,
2496                                              bytes_obj);
2497                 Py_DECREF(bytes_obj);
2498             }
2499         }
2500         if (reduce_value == NULL)
2501             return -1;
2502 
2503         /* save_reduce() will memoize the object automatically. */
2504         status = save_reduce(self, reduce_value, obj);
2505         Py_DECREF(reduce_value);
2506         return status;
2507     }
2508     else {
2509         return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2510                                     PyByteArray_GET_SIZE(obj));
2511     }
2512 }
2513 
2514 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2515 save_picklebuffer(PicklerObject *self, PyObject *obj)
2516 {
2517     if (self->proto < 5) {
2518         PickleState *st = _Pickle_GetGlobalState();
2519         PyErr_SetString(st->PicklingError,
2520                         "PickleBuffer can only pickled with protocol >= 5");
2521         return -1;
2522     }
2523     const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2524     if (view == NULL) {
2525         return -1;
2526     }
2527     if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2528         PickleState *st = _Pickle_GetGlobalState();
2529         PyErr_SetString(st->PicklingError,
2530                         "PickleBuffer can not be pickled when "
2531                         "pointing to a non-contiguous buffer");
2532         return -1;
2533     }
2534     int in_band = 1;
2535     if (self->buffer_callback != NULL) {
2536         PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2537         if (ret == NULL) {
2538             return -1;
2539         }
2540         in_band = PyObject_IsTrue(ret);
2541         Py_DECREF(ret);
2542         if (in_band == -1) {
2543             return -1;
2544         }
2545     }
2546     if (in_band) {
2547         /* Write data in-band */
2548         if (view->readonly) {
2549             return _save_bytes_data(self, obj, (const char*) view->buf,
2550                                     view->len);
2551         }
2552         else {
2553             return _save_bytearray_data(self, obj, (const char*) view->buf,
2554                                         view->len);
2555         }
2556     }
2557     else {
2558         /* Write data out-of-band */
2559         const char next_buffer_op = NEXT_BUFFER;
2560         if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2561             return -1;
2562         }
2563         if (view->readonly) {
2564             const char readonly_buffer_op = READONLY_BUFFER;
2565             if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2566                 return -1;
2567             }
2568         }
2569     }
2570     return 0;
2571 }
2572 
2573 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2574    backslash and newline characters to \uXXXX escapes. */
2575 static PyObject *
raw_unicode_escape(PyObject * obj)2576 raw_unicode_escape(PyObject *obj)
2577 {
2578     char *p;
2579     Py_ssize_t i, size;
2580     const void *data;
2581     unsigned int kind;
2582     _PyBytesWriter writer;
2583 
2584     if (PyUnicode_READY(obj))
2585         return NULL;
2586 
2587     _PyBytesWriter_Init(&writer);
2588 
2589     size = PyUnicode_GET_LENGTH(obj);
2590     data = PyUnicode_DATA(obj);
2591     kind = PyUnicode_KIND(obj);
2592 
2593     p = _PyBytesWriter_Alloc(&writer, size);
2594     if (p == NULL)
2595         goto error;
2596     writer.overallocate = 1;
2597 
2598     for (i=0; i < size; i++) {
2599         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2600         /* Map 32-bit characters to '\Uxxxxxxxx' */
2601         if (ch >= 0x10000) {
2602             /* -1: subtract 1 preallocated byte */
2603             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2604             if (p == NULL)
2605                 goto error;
2606 
2607             *p++ = '\\';
2608             *p++ = 'U';
2609             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2610             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2611             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2612             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2613             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2614             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2615             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2616             *p++ = Py_hexdigits[ch & 15];
2617         }
2618         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2619         else if (ch >= 256 ||
2620                  ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2621                  ch == 0x1a)
2622         {
2623             /* -1: subtract 1 preallocated byte */
2624             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2625             if (p == NULL)
2626                 goto error;
2627 
2628             *p++ = '\\';
2629             *p++ = 'u';
2630             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2631             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2632             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2633             *p++ = Py_hexdigits[ch & 15];
2634         }
2635         /* Copy everything else as-is */
2636         else
2637             *p++ = (char) ch;
2638     }
2639 
2640     return _PyBytesWriter_Finish(&writer, p);
2641 
2642 error:
2643     _PyBytesWriter_Dealloc(&writer);
2644     return NULL;
2645 }
2646 
2647 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2648 write_unicode_binary(PicklerObject *self, PyObject *obj)
2649 {
2650     char header[9];
2651     Py_ssize_t len;
2652     PyObject *encoded = NULL;
2653     Py_ssize_t size;
2654     const char *data;
2655 
2656     if (PyUnicode_READY(obj))
2657         return -1;
2658 
2659     data = PyUnicode_AsUTF8AndSize(obj, &size);
2660     if (data == NULL) {
2661         /* Issue #8383: for strings with lone surrogates, fallback on the
2662            "surrogatepass" error handler. */
2663         PyErr_Clear();
2664         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2665         if (encoded == NULL)
2666             return -1;
2667 
2668         data = PyBytes_AS_STRING(encoded);
2669         size = PyBytes_GET_SIZE(encoded);
2670     }
2671 
2672     assert(size >= 0);
2673     if (size <= 0xff && self->proto >= 4) {
2674         header[0] = SHORT_BINUNICODE;
2675         header[1] = (unsigned char)(size & 0xff);
2676         len = 2;
2677     }
2678     else if ((size_t)size <= 0xffffffffUL) {
2679         header[0] = BINUNICODE;
2680         header[1] = (unsigned char)(size & 0xff);
2681         header[2] = (unsigned char)((size >> 8) & 0xff);
2682         header[3] = (unsigned char)((size >> 16) & 0xff);
2683         header[4] = (unsigned char)((size >> 24) & 0xff);
2684         len = 5;
2685     }
2686     else if (self->proto >= 4) {
2687         header[0] = BINUNICODE8;
2688         _write_size64(header + 1, size);
2689         len = 9;
2690     }
2691     else {
2692         PyErr_SetString(PyExc_OverflowError,
2693                         "serializing a string larger than 4 GiB "
2694                         "requires pickle protocol 4 or higher");
2695         Py_XDECREF(encoded);
2696         return -1;
2697     }
2698 
2699     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2700         Py_XDECREF(encoded);
2701         return -1;
2702     }
2703     Py_XDECREF(encoded);
2704     return 0;
2705 }
2706 
2707 static int
save_unicode(PicklerObject * self,PyObject * obj)2708 save_unicode(PicklerObject *self, PyObject *obj)
2709 {
2710     if (self->bin) {
2711         if (write_unicode_binary(self, obj) < 0)
2712             return -1;
2713     }
2714     else {
2715         PyObject *encoded;
2716         Py_ssize_t size;
2717         const char unicode_op = UNICODE;
2718 
2719         encoded = raw_unicode_escape(obj);
2720         if (encoded == NULL)
2721             return -1;
2722 
2723         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2724             Py_DECREF(encoded);
2725             return -1;
2726         }
2727 
2728         size = PyBytes_GET_SIZE(encoded);
2729         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2730             Py_DECREF(encoded);
2731             return -1;
2732         }
2733         Py_DECREF(encoded);
2734 
2735         if (_Pickler_Write(self, "\n", 1) < 0)
2736             return -1;
2737     }
2738     if (memo_put(self, obj) < 0)
2739         return -1;
2740 
2741     return 0;
2742 }
2743 
2744 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2745 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2746 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2747 {
2748     Py_ssize_t i;
2749 
2750     assert(PyTuple_Size(t) == len);
2751 
2752     for (i = 0; i < len; i++) {
2753         PyObject *element = PyTuple_GET_ITEM(t, i);
2754 
2755         if (element == NULL)
2756             return -1;
2757         if (save(self, element, 0) < 0)
2758             return -1;
2759     }
2760 
2761     return 0;
2762 }
2763 
2764 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2765  * used across protocols to minimize the space needed to pickle them.
2766  * Tuples are also the only builtin immutable type that can be recursive
2767  * (a tuple can be reached from itself), and that requires some subtle
2768  * magic so that it works in all cases.  IOW, this is a long routine.
2769  */
2770 static int
save_tuple(PicklerObject * self,PyObject * obj)2771 save_tuple(PicklerObject *self, PyObject *obj)
2772 {
2773     Py_ssize_t len, i;
2774 
2775     const char mark_op = MARK;
2776     const char tuple_op = TUPLE;
2777     const char pop_op = POP;
2778     const char pop_mark_op = POP_MARK;
2779     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2780 
2781     if ((len = PyTuple_Size(obj)) < 0)
2782         return -1;
2783 
2784     if (len == 0) {
2785         char pdata[2];
2786 
2787         if (self->proto) {
2788             pdata[0] = EMPTY_TUPLE;
2789             len = 1;
2790         }
2791         else {
2792             pdata[0] = MARK;
2793             pdata[1] = TUPLE;
2794             len = 2;
2795         }
2796         if (_Pickler_Write(self, pdata, len) < 0)
2797             return -1;
2798         return 0;
2799     }
2800 
2801     /* The tuple isn't in the memo now.  If it shows up there after
2802      * saving the tuple elements, the tuple must be recursive, in
2803      * which case we'll pop everything we put on the stack, and fetch
2804      * its value from the memo.
2805      */
2806     if (len <= 3 && self->proto >= 2) {
2807         /* Use TUPLE{1,2,3} opcodes. */
2808         if (store_tuple_elements(self, obj, len) < 0)
2809             return -1;
2810 
2811         if (PyMemoTable_Get(self->memo, obj)) {
2812             /* pop the len elements */
2813             for (i = 0; i < len; i++)
2814                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2815                     return -1;
2816             /* fetch from memo */
2817             if (memo_get(self, obj) < 0)
2818                 return -1;
2819 
2820             return 0;
2821         }
2822         else { /* Not recursive. */
2823             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2824                 return -1;
2825         }
2826         goto memoize;
2827     }
2828 
2829     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2830      * Generate MARK e1 e2 ... TUPLE
2831      */
2832     if (_Pickler_Write(self, &mark_op, 1) < 0)
2833         return -1;
2834 
2835     if (store_tuple_elements(self, obj, len) < 0)
2836         return -1;
2837 
2838     if (PyMemoTable_Get(self->memo, obj)) {
2839         /* pop the stack stuff we pushed */
2840         if (self->bin) {
2841             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2842                 return -1;
2843         }
2844         else {
2845             /* Note that we pop one more than len, to remove
2846              * the MARK too.
2847              */
2848             for (i = 0; i <= len; i++)
2849                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2850                     return -1;
2851         }
2852         /* fetch from memo */
2853         if (memo_get(self, obj) < 0)
2854             return -1;
2855 
2856         return 0;
2857     }
2858     else { /* Not recursive. */
2859         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2860             return -1;
2861     }
2862 
2863   memoize:
2864     if (memo_put(self, obj) < 0)
2865         return -1;
2866 
2867     return 0;
2868 }
2869 
2870 /* iter is an iterator giving items, and we batch up chunks of
2871  *     MARK item item ... item APPENDS
2872  * opcode sequences.  Calling code should have arranged to first create an
2873  * empty list, or list-like object, for the APPENDS to operate on.
2874  * Returns 0 on success, <0 on error.
2875  */
2876 static int
batch_list(PicklerObject * self,PyObject * iter)2877 batch_list(PicklerObject *self, PyObject *iter)
2878 {
2879     PyObject *obj = NULL;
2880     PyObject *firstitem = NULL;
2881     int i, n;
2882 
2883     const char mark_op = MARK;
2884     const char append_op = APPEND;
2885     const char appends_op = APPENDS;
2886 
2887     assert(iter != NULL);
2888 
2889     /* XXX: I think this function could be made faster by avoiding the
2890        iterator interface and fetching objects directly from list using
2891        PyList_GET_ITEM.
2892     */
2893 
2894     if (self->proto == 0) {
2895         /* APPENDS isn't available; do one at a time. */
2896         for (;;) {
2897             obj = PyIter_Next(iter);
2898             if (obj == NULL) {
2899                 if (PyErr_Occurred())
2900                     return -1;
2901                 break;
2902             }
2903             i = save(self, obj, 0);
2904             Py_DECREF(obj);
2905             if (i < 0)
2906                 return -1;
2907             if (_Pickler_Write(self, &append_op, 1) < 0)
2908                 return -1;
2909         }
2910         return 0;
2911     }
2912 
2913     /* proto > 0:  write in batches of BATCHSIZE. */
2914     do {
2915         /* Get first item */
2916         firstitem = PyIter_Next(iter);
2917         if (firstitem == NULL) {
2918             if (PyErr_Occurred())
2919                 goto error;
2920 
2921             /* nothing more to add */
2922             break;
2923         }
2924 
2925         /* Try to get a second item */
2926         obj = PyIter_Next(iter);
2927         if (obj == NULL) {
2928             if (PyErr_Occurred())
2929                 goto error;
2930 
2931             /* Only one item to write */
2932             if (save(self, firstitem, 0) < 0)
2933                 goto error;
2934             if (_Pickler_Write(self, &append_op, 1) < 0)
2935                 goto error;
2936             Py_CLEAR(firstitem);
2937             break;
2938         }
2939 
2940         /* More than one item to write */
2941 
2942         /* Pump out MARK, items, APPENDS. */
2943         if (_Pickler_Write(self, &mark_op, 1) < 0)
2944             goto error;
2945 
2946         if (save(self, firstitem, 0) < 0)
2947             goto error;
2948         Py_CLEAR(firstitem);
2949         n = 1;
2950 
2951         /* Fetch and save up to BATCHSIZE items */
2952         while (obj) {
2953             if (save(self, obj, 0) < 0)
2954                 goto error;
2955             Py_CLEAR(obj);
2956             n += 1;
2957 
2958             if (n == BATCHSIZE)
2959                 break;
2960 
2961             obj = PyIter_Next(iter);
2962             if (obj == NULL) {
2963                 if (PyErr_Occurred())
2964                     goto error;
2965                 break;
2966             }
2967         }
2968 
2969         if (_Pickler_Write(self, &appends_op, 1) < 0)
2970             goto error;
2971 
2972     } while (n == BATCHSIZE);
2973     return 0;
2974 
2975   error:
2976     Py_XDECREF(firstitem);
2977     Py_XDECREF(obj);
2978     return -1;
2979 }
2980 
2981 /* This is a variant of batch_list() above, specialized for lists (with no
2982  * support for list subclasses). Like batch_list(), we batch up chunks of
2983  *     MARK item item ... item APPENDS
2984  * opcode sequences.  Calling code should have arranged to first create an
2985  * empty list, or list-like object, for the APPENDS to operate on.
2986  * Returns 0 on success, -1 on error.
2987  *
2988  * This version is considerably faster than batch_list(), if less general.
2989  *
2990  * Note that this only works for protocols > 0.
2991  */
2992 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2993 batch_list_exact(PicklerObject *self, PyObject *obj)
2994 {
2995     PyObject *item = NULL;
2996     Py_ssize_t this_batch, total;
2997 
2998     const char append_op = APPEND;
2999     const char appends_op = APPENDS;
3000     const char mark_op = MARK;
3001 
3002     assert(obj != NULL);
3003     assert(self->proto > 0);
3004     assert(PyList_CheckExact(obj));
3005 
3006     if (PyList_GET_SIZE(obj) == 1) {
3007         item = PyList_GET_ITEM(obj, 0);
3008         if (save(self, item, 0) < 0)
3009             return -1;
3010         if (_Pickler_Write(self, &append_op, 1) < 0)
3011             return -1;
3012         return 0;
3013     }
3014 
3015     /* Write in batches of BATCHSIZE. */
3016     total = 0;
3017     do {
3018         this_batch = 0;
3019         if (_Pickler_Write(self, &mark_op, 1) < 0)
3020             return -1;
3021         while (total < PyList_GET_SIZE(obj)) {
3022             item = PyList_GET_ITEM(obj, total);
3023             if (save(self, item, 0) < 0)
3024                 return -1;
3025             total++;
3026             if (++this_batch == BATCHSIZE)
3027                 break;
3028         }
3029         if (_Pickler_Write(self, &appends_op, 1) < 0)
3030             return -1;
3031 
3032     } while (total < PyList_GET_SIZE(obj));
3033 
3034     return 0;
3035 }
3036 
3037 static int
save_list(PicklerObject * self,PyObject * obj)3038 save_list(PicklerObject *self, PyObject *obj)
3039 {
3040     char header[3];
3041     Py_ssize_t len;
3042     int status = 0;
3043 
3044     if (self->fast && !fast_save_enter(self, obj))
3045         goto error;
3046 
3047     /* Create an empty list. */
3048     if (self->bin) {
3049         header[0] = EMPTY_LIST;
3050         len = 1;
3051     }
3052     else {
3053         header[0] = MARK;
3054         header[1] = LIST;
3055         len = 2;
3056     }
3057 
3058     if (_Pickler_Write(self, header, len) < 0)
3059         goto error;
3060 
3061     /* Get list length, and bow out early if empty. */
3062     if ((len = PyList_Size(obj)) < 0)
3063         goto error;
3064 
3065     if (memo_put(self, obj) < 0)
3066         goto error;
3067 
3068     if (len != 0) {
3069         /* Materialize the list elements. */
3070         if (PyList_CheckExact(obj) && self->proto > 0) {
3071             if (Py_EnterRecursiveCall(" while pickling an object"))
3072                 goto error;
3073             status = batch_list_exact(self, obj);
3074             Py_LeaveRecursiveCall();
3075         } else {
3076             PyObject *iter = PyObject_GetIter(obj);
3077             if (iter == NULL)
3078                 goto error;
3079 
3080             if (Py_EnterRecursiveCall(" while pickling an object")) {
3081                 Py_DECREF(iter);
3082                 goto error;
3083             }
3084             status = batch_list(self, iter);
3085             Py_LeaveRecursiveCall();
3086             Py_DECREF(iter);
3087         }
3088     }
3089     if (0) {
3090   error:
3091         status = -1;
3092     }
3093 
3094     if (self->fast && !fast_save_leave(self, obj))
3095         status = -1;
3096 
3097     return status;
3098 }
3099 
3100 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3101  *     MARK key value ... key value SETITEMS
3102  * opcode sequences.  Calling code should have arranged to first create an
3103  * empty dict, or dict-like object, for the SETITEMS to operate on.
3104  * Returns 0 on success, <0 on error.
3105  *
3106  * This is very much like batch_list().  The difference between saving
3107  * elements directly, and picking apart two-tuples, is so long-winded at
3108  * the C level, though, that attempts to combine these routines were too
3109  * ugly to bear.
3110  */
3111 static int
batch_dict(PicklerObject * self,PyObject * iter)3112 batch_dict(PicklerObject *self, PyObject *iter)
3113 {
3114     PyObject *obj = NULL;
3115     PyObject *firstitem = NULL;
3116     int i, n;
3117 
3118     const char mark_op = MARK;
3119     const char setitem_op = SETITEM;
3120     const char setitems_op = SETITEMS;
3121 
3122     assert(iter != NULL);
3123 
3124     if (self->proto == 0) {
3125         /* SETITEMS isn't available; do one at a time. */
3126         for (;;) {
3127             obj = PyIter_Next(iter);
3128             if (obj == NULL) {
3129                 if (PyErr_Occurred())
3130                     return -1;
3131                 break;
3132             }
3133             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3134                 PyErr_SetString(PyExc_TypeError, "dict items "
3135                                 "iterator must return 2-tuples");
3136                 return -1;
3137             }
3138             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3139             if (i >= 0)
3140                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3141             Py_DECREF(obj);
3142             if (i < 0)
3143                 return -1;
3144             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3145                 return -1;
3146         }
3147         return 0;
3148     }
3149 
3150     /* proto > 0:  write in batches of BATCHSIZE. */
3151     do {
3152         /* Get first item */
3153         firstitem = PyIter_Next(iter);
3154         if (firstitem == NULL) {
3155             if (PyErr_Occurred())
3156                 goto error;
3157 
3158             /* nothing more to add */
3159             break;
3160         }
3161         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3162             PyErr_SetString(PyExc_TypeError, "dict items "
3163                                 "iterator must return 2-tuples");
3164             goto error;
3165         }
3166 
3167         /* Try to get a second item */
3168         obj = PyIter_Next(iter);
3169         if (obj == NULL) {
3170             if (PyErr_Occurred())
3171                 goto error;
3172 
3173             /* Only one item to write */
3174             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3175                 goto error;
3176             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3177                 goto error;
3178             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3179                 goto error;
3180             Py_CLEAR(firstitem);
3181             break;
3182         }
3183 
3184         /* More than one item to write */
3185 
3186         /* Pump out MARK, items, SETITEMS. */
3187         if (_Pickler_Write(self, &mark_op, 1) < 0)
3188             goto error;
3189 
3190         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3191             goto error;
3192         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3193             goto error;
3194         Py_CLEAR(firstitem);
3195         n = 1;
3196 
3197         /* Fetch and save up to BATCHSIZE items */
3198         while (obj) {
3199             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3200                 PyErr_SetString(PyExc_TypeError, "dict items "
3201                     "iterator must return 2-tuples");
3202                 goto error;
3203             }
3204             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3205                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3206                 goto error;
3207             Py_CLEAR(obj);
3208             n += 1;
3209 
3210             if (n == BATCHSIZE)
3211                 break;
3212 
3213             obj = PyIter_Next(iter);
3214             if (obj == NULL) {
3215                 if (PyErr_Occurred())
3216                     goto error;
3217                 break;
3218             }
3219         }
3220 
3221         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3222             goto error;
3223 
3224     } while (n == BATCHSIZE);
3225     return 0;
3226 
3227   error:
3228     Py_XDECREF(firstitem);
3229     Py_XDECREF(obj);
3230     return -1;
3231 }
3232 
3233 /* This is a variant of batch_dict() above that specializes for dicts, with no
3234  * support for dict subclasses. Like batch_dict(), we batch up chunks of
3235  *     MARK key value ... key value SETITEMS
3236  * opcode sequences.  Calling code should have arranged to first create an
3237  * empty dict, or dict-like object, for the SETITEMS to operate on.
3238  * Returns 0 on success, -1 on error.
3239  *
3240  * Note that this currently doesn't work for protocol 0.
3241  */
3242 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3243 batch_dict_exact(PicklerObject *self, PyObject *obj)
3244 {
3245     PyObject *key = NULL, *value = NULL;
3246     int i;
3247     Py_ssize_t dict_size, ppos = 0;
3248 
3249     const char mark_op = MARK;
3250     const char setitem_op = SETITEM;
3251     const char setitems_op = SETITEMS;
3252 
3253     assert(obj != NULL && PyDict_CheckExact(obj));
3254     assert(self->proto > 0);
3255 
3256     dict_size = PyDict_GET_SIZE(obj);
3257 
3258     /* Special-case len(d) == 1 to save space. */
3259     if (dict_size == 1) {
3260         PyDict_Next(obj, &ppos, &key, &value);
3261         if (save(self, key, 0) < 0)
3262             return -1;
3263         if (save(self, value, 0) < 0)
3264             return -1;
3265         if (_Pickler_Write(self, &setitem_op, 1) < 0)
3266             return -1;
3267         return 0;
3268     }
3269 
3270     /* Write in batches of BATCHSIZE. */
3271     do {
3272         i = 0;
3273         if (_Pickler_Write(self, &mark_op, 1) < 0)
3274             return -1;
3275         while (PyDict_Next(obj, &ppos, &key, &value)) {
3276             if (save(self, key, 0) < 0)
3277                 return -1;
3278             if (save(self, value, 0) < 0)
3279                 return -1;
3280             if (++i == BATCHSIZE)
3281                 break;
3282         }
3283         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3284             return -1;
3285         if (PyDict_GET_SIZE(obj) != dict_size) {
3286             PyErr_Format(
3287                 PyExc_RuntimeError,
3288                 "dictionary changed size during iteration");
3289             return -1;
3290         }
3291 
3292     } while (i == BATCHSIZE);
3293     return 0;
3294 }
3295 
3296 static int
save_dict(PicklerObject * self,PyObject * obj)3297 save_dict(PicklerObject *self, PyObject *obj)
3298 {
3299     PyObject *items, *iter;
3300     char header[3];
3301     Py_ssize_t len;
3302     int status = 0;
3303     assert(PyDict_Check(obj));
3304 
3305     if (self->fast && !fast_save_enter(self, obj))
3306         goto error;
3307 
3308     /* Create an empty dict. */
3309     if (self->bin) {
3310         header[0] = EMPTY_DICT;
3311         len = 1;
3312     }
3313     else {
3314         header[0] = MARK;
3315         header[1] = DICT;
3316         len = 2;
3317     }
3318 
3319     if (_Pickler_Write(self, header, len) < 0)
3320         goto error;
3321 
3322     if (memo_put(self, obj) < 0)
3323         goto error;
3324 
3325     if (PyDict_GET_SIZE(obj)) {
3326         /* Save the dict items. */
3327         if (PyDict_CheckExact(obj) && self->proto > 0) {
3328             /* We can take certain shortcuts if we know this is a dict and
3329                not a dict subclass. */
3330             if (Py_EnterRecursiveCall(" while pickling an object"))
3331                 goto error;
3332             status = batch_dict_exact(self, obj);
3333             Py_LeaveRecursiveCall();
3334         } else {
3335             _Py_IDENTIFIER(items);
3336 
3337             items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
3338             if (items == NULL)
3339                 goto error;
3340             iter = PyObject_GetIter(items);
3341             Py_DECREF(items);
3342             if (iter == NULL)
3343                 goto error;
3344             if (Py_EnterRecursiveCall(" while pickling an object")) {
3345                 Py_DECREF(iter);
3346                 goto error;
3347             }
3348             status = batch_dict(self, iter);
3349             Py_LeaveRecursiveCall();
3350             Py_DECREF(iter);
3351         }
3352     }
3353 
3354     if (0) {
3355   error:
3356         status = -1;
3357     }
3358 
3359     if (self->fast && !fast_save_leave(self, obj))
3360         status = -1;
3361 
3362     return status;
3363 }
3364 
3365 static int
save_set(PicklerObject * self,PyObject * obj)3366 save_set(PicklerObject *self, PyObject *obj)
3367 {
3368     PyObject *item;
3369     int i;
3370     Py_ssize_t set_size, ppos = 0;
3371     Py_hash_t hash;
3372 
3373     const char empty_set_op = EMPTY_SET;
3374     const char mark_op = MARK;
3375     const char additems_op = ADDITEMS;
3376 
3377     if (self->proto < 4) {
3378         PyObject *items;
3379         PyObject *reduce_value;
3380         int status;
3381 
3382         items = PySequence_List(obj);
3383         if (items == NULL) {
3384             return -1;
3385         }
3386         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3387         Py_DECREF(items);
3388         if (reduce_value == NULL) {
3389             return -1;
3390         }
3391         /* save_reduce() will memoize the object automatically. */
3392         status = save_reduce(self, reduce_value, obj);
3393         Py_DECREF(reduce_value);
3394         return status;
3395     }
3396 
3397     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3398         return -1;
3399 
3400     if (memo_put(self, obj) < 0)
3401         return -1;
3402 
3403     set_size = PySet_GET_SIZE(obj);
3404     if (set_size == 0)
3405         return 0;  /* nothing to do */
3406 
3407     /* Write in batches of BATCHSIZE. */
3408     do {
3409         i = 0;
3410         if (_Pickler_Write(self, &mark_op, 1) < 0)
3411             return -1;
3412         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3413             if (save(self, item, 0) < 0)
3414                 return -1;
3415             if (++i == BATCHSIZE)
3416                 break;
3417         }
3418         if (_Pickler_Write(self, &additems_op, 1) < 0)
3419             return -1;
3420         if (PySet_GET_SIZE(obj) != set_size) {
3421             PyErr_Format(
3422                 PyExc_RuntimeError,
3423                 "set changed size during iteration");
3424             return -1;
3425         }
3426     } while (i == BATCHSIZE);
3427 
3428     return 0;
3429 }
3430 
3431 static int
save_frozenset(PicklerObject * self,PyObject * obj)3432 save_frozenset(PicklerObject *self, PyObject *obj)
3433 {
3434     PyObject *iter;
3435 
3436     const char mark_op = MARK;
3437     const char frozenset_op = FROZENSET;
3438 
3439     if (self->fast && !fast_save_enter(self, obj))
3440         return -1;
3441 
3442     if (self->proto < 4) {
3443         PyObject *items;
3444         PyObject *reduce_value;
3445         int status;
3446 
3447         items = PySequence_List(obj);
3448         if (items == NULL) {
3449             return -1;
3450         }
3451         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3452                                      items);
3453         Py_DECREF(items);
3454         if (reduce_value == NULL) {
3455             return -1;
3456         }
3457         /* save_reduce() will memoize the object automatically. */
3458         status = save_reduce(self, reduce_value, obj);
3459         Py_DECREF(reduce_value);
3460         return status;
3461     }
3462 
3463     if (_Pickler_Write(self, &mark_op, 1) < 0)
3464         return -1;
3465 
3466     iter = PyObject_GetIter(obj);
3467     if (iter == NULL) {
3468         return -1;
3469     }
3470     for (;;) {
3471         PyObject *item;
3472 
3473         item = PyIter_Next(iter);
3474         if (item == NULL) {
3475             if (PyErr_Occurred()) {
3476                 Py_DECREF(iter);
3477                 return -1;
3478             }
3479             break;
3480         }
3481         if (save(self, item, 0) < 0) {
3482             Py_DECREF(item);
3483             Py_DECREF(iter);
3484             return -1;
3485         }
3486         Py_DECREF(item);
3487     }
3488     Py_DECREF(iter);
3489 
3490     /* If the object is already in the memo, this means it is
3491        recursive. In this case, throw away everything we put on the
3492        stack, and fetch the object back from the memo. */
3493     if (PyMemoTable_Get(self->memo, obj)) {
3494         const char pop_mark_op = POP_MARK;
3495 
3496         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3497             return -1;
3498         if (memo_get(self, obj) < 0)
3499             return -1;
3500         return 0;
3501     }
3502 
3503     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3504         return -1;
3505     if (memo_put(self, obj) < 0)
3506         return -1;
3507 
3508     return 0;
3509 }
3510 
3511 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3512 fix_imports(PyObject **module_name, PyObject **global_name)
3513 {
3514     PyObject *key;
3515     PyObject *item;
3516     PickleState *st = _Pickle_GetGlobalState();
3517 
3518     key = PyTuple_Pack(2, *module_name, *global_name);
3519     if (key == NULL)
3520         return -1;
3521     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3522     Py_DECREF(key);
3523     if (item) {
3524         PyObject *fixed_module_name;
3525         PyObject *fixed_global_name;
3526 
3527         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3528             PyErr_Format(PyExc_RuntimeError,
3529                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3530                          "should be 2-tuples, not %.200s",
3531                          Py_TYPE(item)->tp_name);
3532             return -1;
3533         }
3534         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3535         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3536         if (!PyUnicode_Check(fixed_module_name) ||
3537             !PyUnicode_Check(fixed_global_name)) {
3538             PyErr_Format(PyExc_RuntimeError,
3539                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3540                          "should be pairs of str, not (%.200s, %.200s)",
3541                          Py_TYPE(fixed_module_name)->tp_name,
3542                          Py_TYPE(fixed_global_name)->tp_name);
3543             return -1;
3544         }
3545 
3546         Py_CLEAR(*module_name);
3547         Py_CLEAR(*global_name);
3548         Py_INCREF(fixed_module_name);
3549         Py_INCREF(fixed_global_name);
3550         *module_name = fixed_module_name;
3551         *global_name = fixed_global_name;
3552         return 0;
3553     }
3554     else if (PyErr_Occurred()) {
3555         return -1;
3556     }
3557 
3558     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3559     if (item) {
3560         if (!PyUnicode_Check(item)) {
3561             PyErr_Format(PyExc_RuntimeError,
3562                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3563                          "should be strings, not %.200s",
3564                          Py_TYPE(item)->tp_name);
3565             return -1;
3566         }
3567         Py_INCREF(item);
3568         Py_XSETREF(*module_name, item);
3569     }
3570     else if (PyErr_Occurred()) {
3571         return -1;
3572     }
3573 
3574     return 0;
3575 }
3576 
3577 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3578 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3579 {
3580     PyObject *global_name = NULL;
3581     PyObject *module_name = NULL;
3582     PyObject *module = NULL;
3583     PyObject *parent = NULL;
3584     PyObject *dotted_path = NULL;
3585     PyObject *lastname = NULL;
3586     PyObject *cls;
3587     PickleState *st = _Pickle_GetGlobalState();
3588     int status = 0;
3589     _Py_IDENTIFIER(__name__);
3590     _Py_IDENTIFIER(__qualname__);
3591 
3592     const char global_op = GLOBAL;
3593 
3594     if (name) {
3595         Py_INCREF(name);
3596         global_name = name;
3597     }
3598     else {
3599         if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3600             goto error;
3601         if (global_name == NULL) {
3602             global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3603             if (global_name == NULL)
3604                 goto error;
3605         }
3606     }
3607 
3608     dotted_path = get_dotted_path(module, global_name);
3609     if (dotted_path == NULL)
3610         goto error;
3611     module_name = whichmodule(obj, dotted_path);
3612     if (module_name == NULL)
3613         goto error;
3614 
3615     /* XXX: Change to use the import C API directly with level=0 to disallow
3616        relative imports.
3617 
3618        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3619        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3620        custom import functions (IMHO, this would be a nice security
3621        feature). The import C API would need to be extended to support the
3622        extra parameters of __import__ to fix that. */
3623     module = PyImport_Import(module_name);
3624     if (module == NULL) {
3625         PyErr_Format(st->PicklingError,
3626                      "Can't pickle %R: import of module %R failed",
3627                      obj, module_name);
3628         goto error;
3629     }
3630     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3631     Py_INCREF(lastname);
3632     cls = get_deep_attribute(module, dotted_path, &parent);
3633     Py_CLEAR(dotted_path);
3634     if (cls == NULL) {
3635         PyErr_Format(st->PicklingError,
3636                      "Can't pickle %R: attribute lookup %S on %S failed",
3637                      obj, global_name, module_name);
3638         goto error;
3639     }
3640     if (cls != obj) {
3641         Py_DECREF(cls);
3642         PyErr_Format(st->PicklingError,
3643                      "Can't pickle %R: it's not the same object as %S.%S",
3644                      obj, module_name, global_name);
3645         goto error;
3646     }
3647     Py_DECREF(cls);
3648 
3649     if (self->proto >= 2) {
3650         /* See whether this is in the extension registry, and if
3651          * so generate an EXT opcode.
3652          */
3653         PyObject *extension_key;
3654         PyObject *code_obj;      /* extension code as Python object */
3655         long code;               /* extension code as C value */
3656         char pdata[5];
3657         Py_ssize_t n;
3658 
3659         extension_key = PyTuple_Pack(2, module_name, global_name);
3660         if (extension_key == NULL) {
3661             goto error;
3662         }
3663         code_obj = PyDict_GetItemWithError(st->extension_registry,
3664                                            extension_key);
3665         Py_DECREF(extension_key);
3666         /* The object is not registered in the extension registry.
3667            This is the most likely code path. */
3668         if (code_obj == NULL) {
3669             if (PyErr_Occurred()) {
3670                 goto error;
3671             }
3672             goto gen_global;
3673         }
3674 
3675         /* XXX: pickle.py doesn't check neither the type, nor the range
3676            of the value returned by the extension_registry. It should for
3677            consistency. */
3678 
3679         /* Verify code_obj has the right type and value. */
3680         if (!PyLong_Check(code_obj)) {
3681             PyErr_Format(st->PicklingError,
3682                          "Can't pickle %R: extension code %R isn't an integer",
3683                          obj, code_obj);
3684             goto error;
3685         }
3686         code = PyLong_AS_LONG(code_obj);
3687         if (code <= 0 || code > 0x7fffffffL) {
3688             if (!PyErr_Occurred())
3689                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3690                              "code %ld is out of range", obj, code);
3691             goto error;
3692         }
3693 
3694         /* Generate an EXT opcode. */
3695         if (code <= 0xff) {
3696             pdata[0] = EXT1;
3697             pdata[1] = (unsigned char)code;
3698             n = 2;
3699         }
3700         else if (code <= 0xffff) {
3701             pdata[0] = EXT2;
3702             pdata[1] = (unsigned char)(code & 0xff);
3703             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3704             n = 3;
3705         }
3706         else {
3707             pdata[0] = EXT4;
3708             pdata[1] = (unsigned char)(code & 0xff);
3709             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3710             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3711             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3712             n = 5;
3713         }
3714 
3715         if (_Pickler_Write(self, pdata, n) < 0)
3716             goto error;
3717     }
3718     else {
3719   gen_global:
3720         if (parent == module) {
3721             Py_INCREF(lastname);
3722             Py_DECREF(global_name);
3723             global_name = lastname;
3724         }
3725         if (self->proto >= 4) {
3726             const char stack_global_op = STACK_GLOBAL;
3727 
3728             if (save(self, module_name, 0) < 0)
3729                 goto error;
3730             if (save(self, global_name, 0) < 0)
3731                 goto error;
3732 
3733             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3734                 goto error;
3735         }
3736         else if (parent != module) {
3737             PickleState *st = _Pickle_GetGlobalState();
3738             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3739                                         st->getattr, parent, lastname);
3740             if (reduce_value == NULL)
3741                 goto error;
3742             status = save_reduce(self, reduce_value, NULL);
3743             Py_DECREF(reduce_value);
3744             if (status < 0)
3745                 goto error;
3746         }
3747         else {
3748             /* Generate a normal global opcode if we are using a pickle
3749                protocol < 4, or if the object is not registered in the
3750                extension registry. */
3751             PyObject *encoded;
3752             PyObject *(*unicode_encoder)(PyObject *);
3753 
3754             if (_Pickler_Write(self, &global_op, 1) < 0)
3755                 goto error;
3756 
3757             /* For protocol < 3 and if the user didn't request against doing
3758                so, we convert module names to the old 2.x module names. */
3759             if (self->proto < 3 && self->fix_imports) {
3760                 if (fix_imports(&module_name, &global_name) < 0) {
3761                     goto error;
3762                 }
3763             }
3764 
3765             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3766                both the module name and the global name using UTF-8. We do so
3767                only when we are using the pickle protocol newer than version
3768                3. This is to ensure compatibility with older Unpickler running
3769                on Python 2.x. */
3770             if (self->proto == 3) {
3771                 unicode_encoder = PyUnicode_AsUTF8String;
3772             }
3773             else {
3774                 unicode_encoder = PyUnicode_AsASCIIString;
3775             }
3776             encoded = unicode_encoder(module_name);
3777             if (encoded == NULL) {
3778                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3779                     PyErr_Format(st->PicklingError,
3780                                  "can't pickle module identifier '%S' using "
3781                                  "pickle protocol %i",
3782                                  module_name, self->proto);
3783                 goto error;
3784             }
3785             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3786                                PyBytes_GET_SIZE(encoded)) < 0) {
3787                 Py_DECREF(encoded);
3788                 goto error;
3789             }
3790             Py_DECREF(encoded);
3791             if(_Pickler_Write(self, "\n", 1) < 0)
3792                 goto error;
3793 
3794             /* Save the name of the module. */
3795             encoded = unicode_encoder(global_name);
3796             if (encoded == NULL) {
3797                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3798                     PyErr_Format(st->PicklingError,
3799                                  "can't pickle global identifier '%S' using "
3800                                  "pickle protocol %i",
3801                                  global_name, self->proto);
3802                 goto error;
3803             }
3804             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3805                                PyBytes_GET_SIZE(encoded)) < 0) {
3806                 Py_DECREF(encoded);
3807                 goto error;
3808             }
3809             Py_DECREF(encoded);
3810             if (_Pickler_Write(self, "\n", 1) < 0)
3811                 goto error;
3812         }
3813         /* Memoize the object. */
3814         if (memo_put(self, obj) < 0)
3815             goto error;
3816     }
3817 
3818     if (0) {
3819   error:
3820         status = -1;
3821     }
3822     Py_XDECREF(module_name);
3823     Py_XDECREF(global_name);
3824     Py_XDECREF(module);
3825     Py_XDECREF(parent);
3826     Py_XDECREF(dotted_path);
3827     Py_XDECREF(lastname);
3828 
3829     return status;
3830 }
3831 
3832 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3833 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3834 {
3835     PyObject *reduce_value;
3836     int status;
3837 
3838     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3839     if (reduce_value == NULL) {
3840         return -1;
3841     }
3842     status = save_reduce(self, reduce_value, obj);
3843     Py_DECREF(reduce_value);
3844     return status;
3845 }
3846 
3847 static int
save_type(PicklerObject * self,PyObject * obj)3848 save_type(PicklerObject *self, PyObject *obj)
3849 {
3850     if (obj == (PyObject *)&_PyNone_Type) {
3851         return save_singleton_type(self, obj, Py_None);
3852     }
3853     else if (obj == (PyObject *)&PyEllipsis_Type) {
3854         return save_singleton_type(self, obj, Py_Ellipsis);
3855     }
3856     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3857         return save_singleton_type(self, obj, Py_NotImplemented);
3858     }
3859     return save_global(self, obj, NULL);
3860 }
3861 
3862 static int
save_pers(PicklerObject * self,PyObject * obj)3863 save_pers(PicklerObject *self, PyObject *obj)
3864 {
3865     PyObject *pid = NULL;
3866     int status = 0;
3867 
3868     const char persid_op = PERSID;
3869     const char binpersid_op = BINPERSID;
3870 
3871     pid = call_method(self->pers_func, self->pers_func_self, obj);
3872     if (pid == NULL)
3873         return -1;
3874 
3875     if (pid != Py_None) {
3876         if (self->bin) {
3877             if (save(self, pid, 1) < 0 ||
3878                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3879                 goto error;
3880         }
3881         else {
3882             PyObject *pid_str;
3883 
3884             pid_str = PyObject_Str(pid);
3885             if (pid_str == NULL)
3886                 goto error;
3887 
3888             /* XXX: Should it check whether the pid contains embedded
3889                newlines? */
3890             if (!PyUnicode_IS_ASCII(pid_str)) {
3891                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3892                                 "persistent IDs in protocol 0 must be "
3893                                 "ASCII strings");
3894                 Py_DECREF(pid_str);
3895                 goto error;
3896             }
3897 
3898             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3899                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3900                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3901                 _Pickler_Write(self, "\n", 1) < 0) {
3902                 Py_DECREF(pid_str);
3903                 goto error;
3904             }
3905             Py_DECREF(pid_str);
3906         }
3907         status = 1;
3908     }
3909 
3910     if (0) {
3911   error:
3912         status = -1;
3913     }
3914     Py_XDECREF(pid);
3915 
3916     return status;
3917 }
3918 
3919 static PyObject *
get_class(PyObject * obj)3920 get_class(PyObject *obj)
3921 {
3922     PyObject *cls;
3923     _Py_IDENTIFIER(__class__);
3924 
3925     if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3926         cls = (PyObject *) Py_TYPE(obj);
3927         Py_INCREF(cls);
3928     }
3929     return cls;
3930 }
3931 
3932 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3933  * appropriate __reduce__ method for obj.
3934  */
3935 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3936 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3937 {
3938     PyObject *callable;
3939     PyObject *argtup;
3940     PyObject *state = NULL;
3941     PyObject *listitems = Py_None;
3942     PyObject *dictitems = Py_None;
3943     PyObject *state_setter = Py_None;
3944     PickleState *st = _Pickle_GetGlobalState();
3945     Py_ssize_t size;
3946     int use_newobj = 0, use_newobj_ex = 0;
3947 
3948     const char reduce_op = REDUCE;
3949     const char build_op = BUILD;
3950     const char newobj_op = NEWOBJ;
3951     const char newobj_ex_op = NEWOBJ_EX;
3952 
3953     size = PyTuple_Size(args);
3954     if (size < 2 || size > 6) {
3955         PyErr_SetString(st->PicklingError, "tuple returned by "
3956                         "__reduce__ must contain 2 through 6 elements");
3957         return -1;
3958     }
3959 
3960     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3961                            &callable, &argtup, &state, &listitems, &dictitems,
3962                            &state_setter))
3963         return -1;
3964 
3965     if (!PyCallable_Check(callable)) {
3966         PyErr_SetString(st->PicklingError, "first item of the tuple "
3967                         "returned by __reduce__ must be callable");
3968         return -1;
3969     }
3970     if (!PyTuple_Check(argtup)) {
3971         PyErr_SetString(st->PicklingError, "second item of the tuple "
3972                         "returned by __reduce__ must be a tuple");
3973         return -1;
3974     }
3975 
3976     if (state == Py_None)
3977         state = NULL;
3978 
3979     if (listitems == Py_None)
3980         listitems = NULL;
3981     else if (!PyIter_Check(listitems)) {
3982         PyErr_Format(st->PicklingError, "fourth element of the tuple "
3983                      "returned by __reduce__ must be an iterator, not %s",
3984                      Py_TYPE(listitems)->tp_name);
3985         return -1;
3986     }
3987 
3988     if (dictitems == Py_None)
3989         dictitems = NULL;
3990     else if (!PyIter_Check(dictitems)) {
3991         PyErr_Format(st->PicklingError, "fifth element of the tuple "
3992                      "returned by __reduce__ must be an iterator, not %s",
3993                      Py_TYPE(dictitems)->tp_name);
3994         return -1;
3995     }
3996 
3997     if (state_setter == Py_None)
3998         state_setter = NULL;
3999     else if (!PyCallable_Check(state_setter)) {
4000         PyErr_Format(st->PicklingError, "sixth element of the tuple "
4001                      "returned by __reduce__ must be a function, not %s",
4002                      Py_TYPE(state_setter)->tp_name);
4003         return -1;
4004     }
4005 
4006     if (self->proto >= 2) {
4007         PyObject *name;
4008         _Py_IDENTIFIER(__name__);
4009 
4010         if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4011             return -1;
4012         }
4013         if (name != NULL && PyUnicode_Check(name)) {
4014             _Py_IDENTIFIER(__newobj_ex__);
4015             use_newobj_ex = _PyUnicode_EqualToASCIIId(
4016                     name, &PyId___newobj_ex__);
4017             if (!use_newobj_ex) {
4018                 _Py_IDENTIFIER(__newobj__);
4019                 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
4020             }
4021         }
4022         Py_XDECREF(name);
4023     }
4024 
4025     if (use_newobj_ex) {
4026         PyObject *cls;
4027         PyObject *args;
4028         PyObject *kwargs;
4029 
4030         if (PyTuple_GET_SIZE(argtup) != 3) {
4031             PyErr_Format(st->PicklingError,
4032                          "length of the NEWOBJ_EX argument tuple must be "
4033                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4034             return -1;
4035         }
4036 
4037         cls = PyTuple_GET_ITEM(argtup, 0);
4038         if (!PyType_Check(cls)) {
4039             PyErr_Format(st->PicklingError,
4040                          "first item from NEWOBJ_EX argument tuple must "
4041                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4042             return -1;
4043         }
4044         args = PyTuple_GET_ITEM(argtup, 1);
4045         if (!PyTuple_Check(args)) {
4046             PyErr_Format(st->PicklingError,
4047                          "second item from NEWOBJ_EX argument tuple must "
4048                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4049             return -1;
4050         }
4051         kwargs = PyTuple_GET_ITEM(argtup, 2);
4052         if (!PyDict_Check(kwargs)) {
4053             PyErr_Format(st->PicklingError,
4054                          "third item from NEWOBJ_EX argument tuple must "
4055                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4056             return -1;
4057         }
4058 
4059         if (self->proto >= 4) {
4060             if (save(self, cls, 0) < 0 ||
4061                 save(self, args, 0) < 0 ||
4062                 save(self, kwargs, 0) < 0 ||
4063                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4064                 return -1;
4065             }
4066         }
4067         else {
4068             PyObject *newargs;
4069             PyObject *cls_new;
4070             Py_ssize_t i;
4071             _Py_IDENTIFIER(__new__);
4072 
4073             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4074             if (newargs == NULL)
4075                 return -1;
4076 
4077             cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4078             if (cls_new == NULL) {
4079                 Py_DECREF(newargs);
4080                 return -1;
4081             }
4082             PyTuple_SET_ITEM(newargs, 0, cls_new);
4083             Py_INCREF(cls);
4084             PyTuple_SET_ITEM(newargs, 1, cls);
4085             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4086                 PyObject *item = PyTuple_GET_ITEM(args, i);
4087                 Py_INCREF(item);
4088                 PyTuple_SET_ITEM(newargs, i + 2, item);
4089             }
4090 
4091             callable = PyObject_Call(st->partial, newargs, kwargs);
4092             Py_DECREF(newargs);
4093             if (callable == NULL)
4094                 return -1;
4095 
4096             newargs = PyTuple_New(0);
4097             if (newargs == NULL) {
4098                 Py_DECREF(callable);
4099                 return -1;
4100             }
4101 
4102             if (save(self, callable, 0) < 0 ||
4103                 save(self, newargs, 0) < 0 ||
4104                 _Pickler_Write(self, &reduce_op, 1) < 0) {
4105                 Py_DECREF(newargs);
4106                 Py_DECREF(callable);
4107                 return -1;
4108             }
4109             Py_DECREF(newargs);
4110             Py_DECREF(callable);
4111         }
4112     }
4113     else if (use_newobj) {
4114         PyObject *cls;
4115         PyObject *newargtup;
4116         PyObject *obj_class;
4117         int p;
4118 
4119         /* Sanity checks. */
4120         if (PyTuple_GET_SIZE(argtup) < 1) {
4121             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4122             return -1;
4123         }
4124 
4125         cls = PyTuple_GET_ITEM(argtup, 0);
4126         if (!PyType_Check(cls)) {
4127             PyErr_SetString(st->PicklingError, "args[0] from "
4128                             "__newobj__ args is not a type");
4129             return -1;
4130         }
4131 
4132         if (obj != NULL) {
4133             obj_class = get_class(obj);
4134             if (obj_class == NULL) {
4135                 return -1;
4136             }
4137             p = obj_class != cls;
4138             Py_DECREF(obj_class);
4139             if (p) {
4140                 PyErr_SetString(st->PicklingError, "args[0] from "
4141                                 "__newobj__ args has the wrong class");
4142                 return -1;
4143             }
4144         }
4145         /* XXX: These calls save() are prone to infinite recursion. Imagine
4146            what happen if the value returned by the __reduce__() method of
4147            some extension type contains another object of the same type. Ouch!
4148 
4149            Here is a quick example, that I ran into, to illustrate what I
4150            mean:
4151 
4152              >>> import pickle, copyreg
4153              >>> copyreg.dispatch_table.pop(complex)
4154              >>> pickle.dumps(1+2j)
4155              Traceback (most recent call last):
4156                ...
4157              RecursionError: maximum recursion depth exceeded
4158 
4159            Removing the complex class from copyreg.dispatch_table made the
4160            __reduce_ex__() method emit another complex object:
4161 
4162              >>> (1+1j).__reduce_ex__(2)
4163              (<function __newobj__ at 0xb7b71c3c>,
4164                (<class 'complex'>, (1+1j)), None, None, None)
4165 
4166            Thus when save() was called on newargstup (the 2nd item) recursion
4167            ensued. Of course, the bug was in the complex class which had a
4168            broken __getnewargs__() that emitted another complex object. But,
4169            the point, here, is it is quite easy to end up with a broken reduce
4170            function. */
4171 
4172         /* Save the class and its __new__ arguments. */
4173         if (save(self, cls, 0) < 0)
4174             return -1;
4175 
4176         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4177         if (newargtup == NULL)
4178             return -1;
4179 
4180         p = save(self, newargtup, 0);
4181         Py_DECREF(newargtup);
4182         if (p < 0)
4183             return -1;
4184 
4185         /* Add NEWOBJ opcode. */
4186         if (_Pickler_Write(self, &newobj_op, 1) < 0)
4187             return -1;
4188     }
4189     else { /* Not using NEWOBJ. */
4190         if (save(self, callable, 0) < 0 ||
4191             save(self, argtup, 0) < 0 ||
4192             _Pickler_Write(self, &reduce_op, 1) < 0)
4193             return -1;
4194     }
4195 
4196     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4197        the caller do not want to memoize the object. Not particularly useful,
4198        but that is to mimic the behavior save_reduce() in pickle.py when
4199        obj is None. */
4200     if (obj != NULL) {
4201         /* If the object is already in the memo, this means it is
4202            recursive. In this case, throw away everything we put on the
4203            stack, and fetch the object back from the memo. */
4204         if (PyMemoTable_Get(self->memo, obj)) {
4205             const char pop_op = POP;
4206 
4207             if (_Pickler_Write(self, &pop_op, 1) < 0)
4208                 return -1;
4209             if (memo_get(self, obj) < 0)
4210                 return -1;
4211 
4212             return 0;
4213         }
4214         else if (memo_put(self, obj) < 0)
4215             return -1;
4216     }
4217 
4218     if (listitems && batch_list(self, listitems) < 0)
4219         return -1;
4220 
4221     if (dictitems && batch_dict(self, dictitems) < 0)
4222         return -1;
4223 
4224     if (state) {
4225         if (state_setter == NULL) {
4226             if (save(self, state, 0) < 0 ||
4227                 _Pickler_Write(self, &build_op, 1) < 0)
4228                 return -1;
4229         }
4230         else {
4231 
4232             /* If a state_setter is specified, call it instead of load_build to
4233              * update obj's with its previous state.
4234              * The first 4 save/write instructions push state_setter and its
4235              * tuple of expected arguments (obj, state) onto the stack. The
4236              * REDUCE opcode triggers the state_setter(obj, state) function
4237              * call. Finally, because state-updating routines only do in-place
4238              * modification, the whole operation has to be stack-transparent.
4239              * Thus, we finally pop the call's output from the stack.*/
4240 
4241             const char tupletwo_op = TUPLE2;
4242             const char pop_op = POP;
4243             if (save(self, state_setter, 0) < 0 ||
4244                 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4245                 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4246                 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4247                 _Pickler_Write(self, &pop_op, 1) < 0)
4248                 return -1;
4249         }
4250     }
4251     return 0;
4252 }
4253 
4254 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4255 save(PicklerObject *self, PyObject *obj, int pers_save)
4256 {
4257     PyTypeObject *type;
4258     PyObject *reduce_func = NULL;
4259     PyObject *reduce_value = NULL;
4260     int status = 0;
4261 
4262     if (_Pickler_OpcodeBoundary(self) < 0)
4263         return -1;
4264 
4265     /* The extra pers_save argument is necessary to avoid calling save_pers()
4266        on its returned object. */
4267     if (!pers_save && self->pers_func) {
4268         /* save_pers() returns:
4269             -1   to signal an error;
4270              0   if it did nothing successfully;
4271              1   if a persistent id was saved.
4272          */
4273         if ((status = save_pers(self, obj)) != 0)
4274             return status;
4275     }
4276 
4277     type = Py_TYPE(obj);
4278 
4279     /* The old cPickle had an optimization that used switch-case statement
4280        dispatching on the first letter of the type name.  This has was removed
4281        since benchmarks shown that this optimization was actually slowing
4282        things down. */
4283 
4284     /* Atom types; these aren't memoized, so don't check the memo. */
4285 
4286     if (obj == Py_None) {
4287         return save_none(self, obj);
4288     }
4289     else if (obj == Py_False || obj == Py_True) {
4290         return save_bool(self, obj);
4291     }
4292     else if (type == &PyLong_Type) {
4293         return save_long(self, obj);
4294     }
4295     else if (type == &PyFloat_Type) {
4296         return save_float(self, obj);
4297     }
4298 
4299     /* Check the memo to see if it has the object. If so, generate
4300        a GET (or BINGET) opcode, instead of pickling the object
4301        once again. */
4302     if (PyMemoTable_Get(self->memo, obj)) {
4303         return memo_get(self, obj);
4304     }
4305 
4306     if (type == &PyBytes_Type) {
4307         return save_bytes(self, obj);
4308     }
4309     else if (type == &PyUnicode_Type) {
4310         return save_unicode(self, obj);
4311     }
4312 
4313     /* We're only calling Py_EnterRecursiveCall here so that atomic
4314        types above are pickled faster. */
4315     if (Py_EnterRecursiveCall(" while pickling an object")) {
4316         return -1;
4317     }
4318 
4319     if (type == &PyDict_Type) {
4320         status = save_dict(self, obj);
4321         goto done;
4322     }
4323     else if (type == &PySet_Type) {
4324         status = save_set(self, obj);
4325         goto done;
4326     }
4327     else if (type == &PyFrozenSet_Type) {
4328         status = save_frozenset(self, obj);
4329         goto done;
4330     }
4331     else if (type == &PyList_Type) {
4332         status = save_list(self, obj);
4333         goto done;
4334     }
4335     else if (type == &PyTuple_Type) {
4336         status = save_tuple(self, obj);
4337         goto done;
4338     }
4339     else if (type == &PyByteArray_Type) {
4340         status = save_bytearray(self, obj);
4341         goto done;
4342     }
4343     else if (type == &PyPickleBuffer_Type) {
4344         status = save_picklebuffer(self, obj);
4345         goto done;
4346     }
4347 
4348     /* Now, check reducer_override.  If it returns NotImplemented,
4349      * fallback to save_type or save_global, and then perhaps to the
4350      * regular reduction mechanism.
4351      */
4352     if (self->reducer_override != NULL) {
4353         reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4354         if (reduce_value == NULL) {
4355             goto error;
4356         }
4357         if (reduce_value != Py_NotImplemented) {
4358             goto reduce;
4359         }
4360         Py_DECREF(reduce_value);
4361         reduce_value = NULL;
4362     }
4363 
4364     if (type == &PyType_Type) {
4365         status = save_type(self, obj);
4366         goto done;
4367     }
4368     else if (type == &PyFunction_Type) {
4369         status = save_global(self, obj, NULL);
4370         goto done;
4371     }
4372 
4373     /* XXX: This part needs some unit tests. */
4374 
4375     /* Get a reduction callable, and call it.  This may come from
4376      * self.dispatch_table, copyreg.dispatch_table, the object's
4377      * __reduce_ex__ method, or the object's __reduce__ method.
4378      */
4379     if (self->dispatch_table == NULL) {
4380         PickleState *st = _Pickle_GetGlobalState();
4381         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4382                                               (PyObject *)type);
4383         if (reduce_func == NULL) {
4384             if (PyErr_Occurred()) {
4385                 goto error;
4386             }
4387         } else {
4388             /* PyDict_GetItemWithError() returns a borrowed reference.
4389                Increase the reference count to be consistent with
4390                PyObject_GetItem and _PyObject_GetAttrId used below. */
4391             Py_INCREF(reduce_func);
4392         }
4393     } else {
4394         reduce_func = PyObject_GetItem(self->dispatch_table,
4395                                        (PyObject *)type);
4396         if (reduce_func == NULL) {
4397             if (PyErr_ExceptionMatches(PyExc_KeyError))
4398                 PyErr_Clear();
4399             else
4400                 goto error;
4401         }
4402     }
4403     if (reduce_func != NULL) {
4404         Py_INCREF(obj);
4405         reduce_value = _Pickle_FastCall(reduce_func, obj);
4406     }
4407     else if (PyType_IsSubtype(type, &PyType_Type)) {
4408         status = save_global(self, obj, NULL);
4409         goto done;
4410     }
4411     else {
4412         _Py_IDENTIFIER(__reduce__);
4413         _Py_IDENTIFIER(__reduce_ex__);
4414 
4415         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4416            automatically defined as __reduce__. While this is convenient, this
4417            make it impossible to know which method was actually called. Of
4418            course, this is not a big deal. But still, it would be nice to let
4419            the user know which method was called when something go
4420            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4421            don't actually have to check for a __reduce__ method. */
4422 
4423         /* Check for a __reduce_ex__ method. */
4424         if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4425             goto error;
4426         }
4427         if (reduce_func != NULL) {
4428             PyObject *proto;
4429             proto = PyLong_FromLong(self->proto);
4430             if (proto != NULL) {
4431                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4432             }
4433         }
4434         else {
4435             /* Check for a __reduce__ method. */
4436             if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4437                 goto error;
4438             }
4439             if (reduce_func != NULL) {
4440                 reduce_value = PyObject_CallNoArgs(reduce_func);
4441             }
4442             else {
4443                 PickleState *st = _Pickle_GetGlobalState();
4444                 PyErr_Format(st->PicklingError,
4445                              "can't pickle '%.200s' object: %R",
4446                              type->tp_name, obj);
4447                 goto error;
4448             }
4449         }
4450     }
4451 
4452     if (reduce_value == NULL)
4453         goto error;
4454 
4455   reduce:
4456     if (PyUnicode_Check(reduce_value)) {
4457         status = save_global(self, obj, reduce_value);
4458         goto done;
4459     }
4460 
4461     if (!PyTuple_Check(reduce_value)) {
4462         PickleState *st = _Pickle_GetGlobalState();
4463         PyErr_SetString(st->PicklingError,
4464                         "__reduce__ must return a string or tuple");
4465         goto error;
4466     }
4467 
4468     status = save_reduce(self, reduce_value, obj);
4469 
4470     if (0) {
4471   error:
4472         status = -1;
4473     }
4474   done:
4475 
4476     Py_LeaveRecursiveCall();
4477     Py_XDECREF(reduce_func);
4478     Py_XDECREF(reduce_value);
4479 
4480     return status;
4481 }
4482 
4483 static int
dump(PicklerObject * self,PyObject * obj)4484 dump(PicklerObject *self, PyObject *obj)
4485 {
4486     const char stop_op = STOP;
4487     int status = -1;
4488     PyObject *tmp;
4489     _Py_IDENTIFIER(reducer_override);
4490 
4491     if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4492                                &tmp) < 0) {
4493       goto error;
4494     }
4495     /* Cache the reducer_override method, if it exists. */
4496     if (tmp != NULL) {
4497         Py_XSETREF(self->reducer_override, tmp);
4498     }
4499     else {
4500         Py_CLEAR(self->reducer_override);
4501     }
4502 
4503     if (self->proto >= 2) {
4504         char header[2];
4505 
4506         header[0] = PROTO;
4507         assert(self->proto >= 0 && self->proto < 256);
4508         header[1] = (unsigned char)self->proto;
4509         if (_Pickler_Write(self, header, 2) < 0)
4510             goto error;
4511         if (self->proto >= 4)
4512             self->framing = 1;
4513     }
4514 
4515     if (save(self, obj, 0) < 0 ||
4516         _Pickler_Write(self, &stop_op, 1) < 0 ||
4517         _Pickler_CommitFrame(self) < 0)
4518         goto error;
4519 
4520     // Success
4521     status = 0;
4522 
4523   error:
4524     self->framing = 0;
4525 
4526     /* Break the reference cycle we generated at the beginning this function
4527      * call when setting the reducer_override attribute of the Pickler instance
4528      * to a bound method of the same instance. This is important as the Pickler
4529      * instance holds a reference to each object it has pickled (through its
4530      * memo): thus, these objects won't be garbage-collected as long as the
4531      * Pickler itself is not collected. */
4532     Py_CLEAR(self->reducer_override);
4533     return status;
4534 }
4535 
4536 /*[clinic input]
4537 
4538 _pickle.Pickler.clear_memo
4539 
4540 Clears the pickler's "memo".
4541 
4542 The memo is the data structure that remembers which objects the
4543 pickler has already seen, so that shared or recursive objects are
4544 pickled by reference and not by value.  This method is useful when
4545 re-using picklers.
4546 [clinic start generated code]*/
4547 
4548 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4549 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4550 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4551 {
4552     if (self->memo)
4553         PyMemoTable_Clear(self->memo);
4554 
4555     Py_RETURN_NONE;
4556 }
4557 
4558 /*[clinic input]
4559 
4560 _pickle.Pickler.dump
4561 
4562   obj: object
4563   /
4564 
4565 Write a pickled representation of the given object to the open file.
4566 [clinic start generated code]*/
4567 
4568 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4569 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4570 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4571 {
4572     /* Check whether the Pickler was initialized correctly (issue3664).
4573        Developers often forget to call __init__() in their subclasses, which
4574        would trigger a segfault without this check. */
4575     if (self->write == NULL) {
4576         PickleState *st = _Pickle_GetGlobalState();
4577         PyErr_Format(st->PicklingError,
4578                      "Pickler.__init__() was not called by %s.__init__()",
4579                      Py_TYPE(self)->tp_name);
4580         return NULL;
4581     }
4582 
4583     if (_Pickler_ClearBuffer(self) < 0)
4584         return NULL;
4585 
4586     if (dump(self, obj) < 0)
4587         return NULL;
4588 
4589     if (_Pickler_FlushToFile(self) < 0)
4590         return NULL;
4591 
4592     Py_RETURN_NONE;
4593 }
4594 
4595 /*[clinic input]
4596 
4597 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4598 
4599 Returns size in memory, in bytes.
4600 [clinic start generated code]*/
4601 
4602 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4603 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4604 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4605 {
4606     Py_ssize_t res, s;
4607 
4608     res = _PyObject_SIZE(Py_TYPE(self));
4609     if (self->memo != NULL) {
4610         res += sizeof(PyMemoTable);
4611         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4612     }
4613     if (self->output_buffer != NULL) {
4614         s = _PySys_GetSizeOf(self->output_buffer);
4615         if (s == -1)
4616             return -1;
4617         res += s;
4618     }
4619     return res;
4620 }
4621 
4622 static struct PyMethodDef Pickler_methods[] = {
4623     _PICKLE_PICKLER_DUMP_METHODDEF
4624     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4625     _PICKLE_PICKLER___SIZEOF___METHODDEF
4626     {NULL, NULL}                /* sentinel */
4627 };
4628 
4629 static void
Pickler_dealloc(PicklerObject * self)4630 Pickler_dealloc(PicklerObject *self)
4631 {
4632     PyObject_GC_UnTrack(self);
4633 
4634     Py_XDECREF(self->output_buffer);
4635     Py_XDECREF(self->write);
4636     Py_XDECREF(self->pers_func);
4637     Py_XDECREF(self->dispatch_table);
4638     Py_XDECREF(self->fast_memo);
4639     Py_XDECREF(self->reducer_override);
4640     Py_XDECREF(self->buffer_callback);
4641 
4642     PyMemoTable_Del(self->memo);
4643 
4644     Py_TYPE(self)->tp_free((PyObject *)self);
4645 }
4646 
4647 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4648 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4649 {
4650     Py_VISIT(self->write);
4651     Py_VISIT(self->pers_func);
4652     Py_VISIT(self->dispatch_table);
4653     Py_VISIT(self->fast_memo);
4654     Py_VISIT(self->reducer_override);
4655     Py_VISIT(self->buffer_callback);
4656     return 0;
4657 }
4658 
4659 static int
Pickler_clear(PicklerObject * self)4660 Pickler_clear(PicklerObject *self)
4661 {
4662     Py_CLEAR(self->output_buffer);
4663     Py_CLEAR(self->write);
4664     Py_CLEAR(self->pers_func);
4665     Py_CLEAR(self->dispatch_table);
4666     Py_CLEAR(self->fast_memo);
4667     Py_CLEAR(self->reducer_override);
4668     Py_CLEAR(self->buffer_callback);
4669 
4670     if (self->memo != NULL) {
4671         PyMemoTable *memo = self->memo;
4672         self->memo = NULL;
4673         PyMemoTable_Del(memo);
4674     }
4675     return 0;
4676 }
4677 
4678 
4679 /*[clinic input]
4680 
4681 _pickle.Pickler.__init__
4682 
4683   file: object
4684   protocol: object = None
4685   fix_imports: bool = True
4686   buffer_callback: object = None
4687 
4688 This takes a binary file for writing a pickle data stream.
4689 
4690 The optional *protocol* argument tells the pickler to use the given
4691 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
4692 protocol is 4. It was introduced in Python 3.4, and is incompatible
4693 with previous versions.
4694 
4695 Specifying a negative protocol version selects the highest protocol
4696 version supported.  The higher the protocol used, the more recent the
4697 version of Python needed to read the pickle produced.
4698 
4699 The *file* argument must have a write() method that accepts a single
4700 bytes argument. It can thus be a file object opened for binary
4701 writing, an io.BytesIO instance, or any other custom object that meets
4702 this interface.
4703 
4704 If *fix_imports* is True and protocol is less than 3, pickle will try
4705 to map the new Python 3 names to the old module names used in Python
4706 2, so that the pickle data stream is readable with Python 2.
4707 
4708 If *buffer_callback* is None (the default), buffer views are
4709 serialized into *file* as part of the pickle stream.
4710 
4711 If *buffer_callback* is not None, then it can be called any number
4712 of times with a buffer view.  If the callback returns a false value
4713 (such as None), the given buffer is out-of-band; otherwise the
4714 buffer is serialized in-band, i.e. inside the pickle stream.
4715 
4716 It is an error if *buffer_callback* is not None and *protocol*
4717 is None or smaller than 5.
4718 
4719 [clinic start generated code]*/
4720 
4721 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4722 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4723                               PyObject *protocol, int fix_imports,
4724                               PyObject *buffer_callback)
4725 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4726 {
4727     _Py_IDENTIFIER(persistent_id);
4728     _Py_IDENTIFIER(dispatch_table);
4729 
4730     /* In case of multiple __init__() calls, clear previous content. */
4731     if (self->write != NULL)
4732         (void)Pickler_clear(self);
4733 
4734     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4735         return -1;
4736 
4737     if (_Pickler_SetOutputStream(self, file) < 0)
4738         return -1;
4739 
4740     if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4741         return -1;
4742 
4743     /* memo and output_buffer may have already been created in _Pickler_New */
4744     if (self->memo == NULL) {
4745         self->memo = PyMemoTable_New();
4746         if (self->memo == NULL)
4747             return -1;
4748     }
4749     self->output_len = 0;
4750     if (self->output_buffer == NULL) {
4751         self->max_output_len = WRITE_BUF_SIZE;
4752         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4753                                                         self->max_output_len);
4754         if (self->output_buffer == NULL)
4755             return -1;
4756     }
4757 
4758     self->fast = 0;
4759     self->fast_nesting = 0;
4760     self->fast_memo = NULL;
4761 
4762     if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4763                         &self->pers_func, &self->pers_func_self) < 0)
4764     {
4765         return -1;
4766     }
4767 
4768     if (_PyObject_LookupAttrId((PyObject *)self,
4769                                     &PyId_dispatch_table, &self->dispatch_table) < 0) {
4770         return -1;
4771     }
4772 
4773     return 0;
4774 }
4775 
4776 
4777 /* Define a proxy object for the Pickler's internal memo object. This is to
4778  * avoid breaking code like:
4779  *  pickler.memo.clear()
4780  * and
4781  *  pickler.memo = saved_memo
4782  * Is this a good idea? Not really, but we don't want to break code that uses
4783  * it. Note that we don't implement the entire mapping API here. This is
4784  * intentional, as these should be treated as black-box implementation details.
4785  */
4786 
4787 /*[clinic input]
4788 _pickle.PicklerMemoProxy.clear
4789 
4790 Remove all items from memo.
4791 [clinic start generated code]*/
4792 
4793 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4794 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4795 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4796 {
4797     if (self->pickler->memo)
4798         PyMemoTable_Clear(self->pickler->memo);
4799     Py_RETURN_NONE;
4800 }
4801 
4802 /*[clinic input]
4803 _pickle.PicklerMemoProxy.copy
4804 
4805 Copy the memo to a new object.
4806 [clinic start generated code]*/
4807 
4808 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4809 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4810 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4811 {
4812     PyMemoTable *memo;
4813     PyObject *new_memo = PyDict_New();
4814     if (new_memo == NULL)
4815         return NULL;
4816 
4817     memo = self->pickler->memo;
4818     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4819         PyMemoEntry entry = memo->mt_table[i];
4820         if (entry.me_key != NULL) {
4821             int status;
4822             PyObject *key, *value;
4823 
4824             key = PyLong_FromVoidPtr(entry.me_key);
4825             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4826 
4827             if (key == NULL || value == NULL) {
4828                 Py_XDECREF(key);
4829                 Py_XDECREF(value);
4830                 goto error;
4831             }
4832             status = PyDict_SetItem(new_memo, key, value);
4833             Py_DECREF(key);
4834             Py_DECREF(value);
4835             if (status < 0)
4836                 goto error;
4837         }
4838     }
4839     return new_memo;
4840 
4841   error:
4842     Py_XDECREF(new_memo);
4843     return NULL;
4844 }
4845 
4846 /*[clinic input]
4847 _pickle.PicklerMemoProxy.__reduce__
4848 
4849 Implement pickle support.
4850 [clinic start generated code]*/
4851 
4852 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4853 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4854 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4855 {
4856     PyObject *reduce_value, *dict_args;
4857     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4858     if (contents == NULL)
4859         return NULL;
4860 
4861     reduce_value = PyTuple_New(2);
4862     if (reduce_value == NULL) {
4863         Py_DECREF(contents);
4864         return NULL;
4865     }
4866     dict_args = PyTuple_New(1);
4867     if (dict_args == NULL) {
4868         Py_DECREF(contents);
4869         Py_DECREF(reduce_value);
4870         return NULL;
4871     }
4872     PyTuple_SET_ITEM(dict_args, 0, contents);
4873     Py_INCREF((PyObject *)&PyDict_Type);
4874     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4875     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4876     return reduce_value;
4877 }
4878 
4879 static PyMethodDef picklerproxy_methods[] = {
4880     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4881     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4882     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4883     {NULL, NULL} /* sentinel */
4884 };
4885 
4886 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4887 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4888 {
4889     PyObject_GC_UnTrack(self);
4890     Py_XDECREF(self->pickler);
4891     PyObject_GC_Del((PyObject *)self);
4892 }
4893 
4894 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4895 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4896                           visitproc visit, void *arg)
4897 {
4898     Py_VISIT(self->pickler);
4899     return 0;
4900 }
4901 
4902 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4903 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4904 {
4905     Py_CLEAR(self->pickler);
4906     return 0;
4907 }
4908 
4909 static PyTypeObject PicklerMemoProxyType = {
4910     PyVarObject_HEAD_INIT(NULL, 0)
4911     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4912     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4913     0,
4914     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4915     0,                                          /* tp_vectorcall_offset */
4916     0,                                          /* tp_getattr */
4917     0,                                          /* tp_setattr */
4918     0,                                          /* tp_as_async */
4919     0,                                          /* tp_repr */
4920     0,                                          /* tp_as_number */
4921     0,                                          /* tp_as_sequence */
4922     0,                                          /* tp_as_mapping */
4923     PyObject_HashNotImplemented,                /* tp_hash */
4924     0,                                          /* tp_call */
4925     0,                                          /* tp_str */
4926     PyObject_GenericGetAttr,                    /* tp_getattro */
4927     PyObject_GenericSetAttr,                    /* tp_setattro */
4928     0,                                          /* tp_as_buffer */
4929     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4930     0,                                          /* tp_doc */
4931     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4932     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4933     0,                                          /* tp_richcompare */
4934     0,                                          /* tp_weaklistoffset */
4935     0,                                          /* tp_iter */
4936     0,                                          /* tp_iternext */
4937     picklerproxy_methods,                       /* tp_methods */
4938 };
4939 
4940 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4941 PicklerMemoProxy_New(PicklerObject *pickler)
4942 {
4943     PicklerMemoProxyObject *self;
4944 
4945     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4946     if (self == NULL)
4947         return NULL;
4948     Py_INCREF(pickler);
4949     self->pickler = pickler;
4950     PyObject_GC_Track(self);
4951     return (PyObject *)self;
4952 }
4953 
4954 /*****************************************************************************/
4955 
4956 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4957 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4958 {
4959     return PicklerMemoProxy_New(self);
4960 }
4961 
4962 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4963 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4964 {
4965     PyMemoTable *new_memo = NULL;
4966 
4967     if (obj == NULL) {
4968         PyErr_SetString(PyExc_TypeError,
4969                         "attribute deletion is not supported");
4970         return -1;
4971     }
4972 
4973     if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
4974         PicklerObject *pickler =
4975             ((PicklerMemoProxyObject *)obj)->pickler;
4976 
4977         new_memo = PyMemoTable_Copy(pickler->memo);
4978         if (new_memo == NULL)
4979             return -1;
4980     }
4981     else if (PyDict_Check(obj)) {
4982         Py_ssize_t i = 0;
4983         PyObject *key, *value;
4984 
4985         new_memo = PyMemoTable_New();
4986         if (new_memo == NULL)
4987             return -1;
4988 
4989         while (PyDict_Next(obj, &i, &key, &value)) {
4990             Py_ssize_t memo_id;
4991             PyObject *memo_obj;
4992 
4993             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4994                 PyErr_SetString(PyExc_TypeError,
4995                                 "'memo' values must be 2-item tuples");
4996                 goto error;
4997             }
4998             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4999             if (memo_id == -1 && PyErr_Occurred())
5000                 goto error;
5001             memo_obj = PyTuple_GET_ITEM(value, 1);
5002             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5003                 goto error;
5004         }
5005     }
5006     else {
5007         PyErr_Format(PyExc_TypeError,
5008                      "'memo' attribute must be a PicklerMemoProxy object "
5009                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5010         return -1;
5011     }
5012 
5013     PyMemoTable_Del(self->memo);
5014     self->memo = new_memo;
5015 
5016     return 0;
5017 
5018   error:
5019     if (new_memo)
5020         PyMemoTable_Del(new_memo);
5021     return -1;
5022 }
5023 
5024 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5025 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5026 {
5027     if (self->pers_func == NULL) {
5028         PyErr_SetString(PyExc_AttributeError, "persistent_id");
5029         return NULL;
5030     }
5031     return reconstruct_method(self->pers_func, self->pers_func_self);
5032 }
5033 
5034 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5035 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5036 {
5037     if (value == NULL) {
5038         PyErr_SetString(PyExc_TypeError,
5039                         "attribute deletion is not supported");
5040         return -1;
5041     }
5042     if (!PyCallable_Check(value)) {
5043         PyErr_SetString(PyExc_TypeError,
5044                         "persistent_id must be a callable taking one argument");
5045         return -1;
5046     }
5047 
5048     self->pers_func_self = NULL;
5049     Py_INCREF(value);
5050     Py_XSETREF(self->pers_func, value);
5051 
5052     return 0;
5053 }
5054 
5055 static PyMemberDef Pickler_members[] = {
5056     {"bin", T_INT, offsetof(PicklerObject, bin)},
5057     {"fast", T_INT, offsetof(PicklerObject, fast)},
5058     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5059     {NULL}
5060 };
5061 
5062 static PyGetSetDef Pickler_getsets[] = {
5063     {"memo",          (getter)Pickler_get_memo,
5064                       (setter)Pickler_set_memo},
5065     {"persistent_id", (getter)Pickler_get_persid,
5066                       (setter)Pickler_set_persid},
5067     {NULL}
5068 };
5069 
5070 static PyTypeObject Pickler_Type = {
5071     PyVarObject_HEAD_INIT(NULL, 0)
5072     "_pickle.Pickler"  ,                /*tp_name*/
5073     sizeof(PicklerObject),              /*tp_basicsize*/
5074     0,                                  /*tp_itemsize*/
5075     (destructor)Pickler_dealloc,        /*tp_dealloc*/
5076     0,                                  /*tp_vectorcall_offset*/
5077     0,                                  /*tp_getattr*/
5078     0,                                  /*tp_setattr*/
5079     0,                                  /*tp_as_async*/
5080     0,                                  /*tp_repr*/
5081     0,                                  /*tp_as_number*/
5082     0,                                  /*tp_as_sequence*/
5083     0,                                  /*tp_as_mapping*/
5084     0,                                  /*tp_hash*/
5085     0,                                  /*tp_call*/
5086     0,                                  /*tp_str*/
5087     0,                                  /*tp_getattro*/
5088     0,                                  /*tp_setattro*/
5089     0,                                  /*tp_as_buffer*/
5090     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5091     _pickle_Pickler___init____doc__,    /*tp_doc*/
5092     (traverseproc)Pickler_traverse,     /*tp_traverse*/
5093     (inquiry)Pickler_clear,             /*tp_clear*/
5094     0,                                  /*tp_richcompare*/
5095     0,                                  /*tp_weaklistoffset*/
5096     0,                                  /*tp_iter*/
5097     0,                                  /*tp_iternext*/
5098     Pickler_methods,                    /*tp_methods*/
5099     Pickler_members,                    /*tp_members*/
5100     Pickler_getsets,                    /*tp_getset*/
5101     0,                                  /*tp_base*/
5102     0,                                  /*tp_dict*/
5103     0,                                  /*tp_descr_get*/
5104     0,                                  /*tp_descr_set*/
5105     0,                                  /*tp_dictoffset*/
5106     _pickle_Pickler___init__,           /*tp_init*/
5107     PyType_GenericAlloc,                /*tp_alloc*/
5108     PyType_GenericNew,                  /*tp_new*/
5109     PyObject_GC_Del,                    /*tp_free*/
5110     0,                                  /*tp_is_gc*/
5111 };
5112 
5113 /* Temporary helper for calling self.find_class().
5114 
5115    XXX: It would be nice to able to avoid Python function call overhead, by
5116    using directly the C version of find_class(), when find_class() is not
5117    overridden by a subclass. Although, this could become rather hackish. A
5118    simpler optimization would be to call the C function when self is not a
5119    subclass instance. */
5120 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5121 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5122 {
5123     _Py_IDENTIFIER(find_class);
5124 
5125     return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5126                                          module_name, global_name, NULL);
5127 }
5128 
5129 static Py_ssize_t
marker(UnpicklerObject * self)5130 marker(UnpicklerObject *self)
5131 {
5132     Py_ssize_t mark;
5133 
5134     if (self->num_marks < 1) {
5135         PickleState *st = _Pickle_GetGlobalState();
5136         PyErr_SetString(st->UnpicklingError, "could not find MARK");
5137         return -1;
5138     }
5139 
5140     mark = self->marks[--self->num_marks];
5141     self->stack->mark_set = self->num_marks != 0;
5142     self->stack->fence = self->num_marks ?
5143             self->marks[self->num_marks - 1] : 0;
5144     return mark;
5145 }
5146 
5147 static int
load_none(UnpicklerObject * self)5148 load_none(UnpicklerObject *self)
5149 {
5150     PDATA_APPEND(self->stack, Py_None, -1);
5151     return 0;
5152 }
5153 
5154 static int
load_int(UnpicklerObject * self)5155 load_int(UnpicklerObject *self)
5156 {
5157     PyObject *value;
5158     char *endptr, *s;
5159     Py_ssize_t len;
5160     long x;
5161 
5162     if ((len = _Unpickler_Readline(self, &s)) < 0)
5163         return -1;
5164     if (len < 2)
5165         return bad_readline();
5166 
5167     errno = 0;
5168     /* XXX: Should the base argument of strtol() be explicitly set to 10?
5169        XXX(avassalotti): Should this uses PyOS_strtol()? */
5170     x = strtol(s, &endptr, 0);
5171 
5172     if (errno || (*endptr != '\n' && *endptr != '\0')) {
5173         /* Hm, maybe we've got something long.  Let's try reading
5174          * it as a Python int object. */
5175         errno = 0;
5176         /* XXX: Same thing about the base here. */
5177         value = PyLong_FromString(s, NULL, 0);
5178         if (value == NULL) {
5179             PyErr_SetString(PyExc_ValueError,
5180                             "could not convert string to int");
5181             return -1;
5182         }
5183     }
5184     else {
5185         if (len == 3 && (x == 0 || x == 1)) {
5186             if ((value = PyBool_FromLong(x)) == NULL)
5187                 return -1;
5188         }
5189         else {
5190             if ((value = PyLong_FromLong(x)) == NULL)
5191                 return -1;
5192         }
5193     }
5194 
5195     PDATA_PUSH(self->stack, value, -1);
5196     return 0;
5197 }
5198 
5199 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5200 load_bool(UnpicklerObject *self, PyObject *boolean)
5201 {
5202     assert(boolean == Py_True || boolean == Py_False);
5203     PDATA_APPEND(self->stack, boolean, -1);
5204     return 0;
5205 }
5206 
5207 /* s contains x bytes of an unsigned little-endian integer.  Return its value
5208  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5209  */
5210 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5211 calc_binsize(char *bytes, int nbytes)
5212 {
5213     unsigned char *s = (unsigned char *)bytes;
5214     int i;
5215     size_t x = 0;
5216 
5217     if (nbytes > (int)sizeof(size_t)) {
5218         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
5219          * have 64-bit size that can't be represented on 32-bit platform.
5220          */
5221         for (i = (int)sizeof(size_t); i < nbytes; i++) {
5222             if (s[i])
5223                 return -1;
5224         }
5225         nbytes = (int)sizeof(size_t);
5226     }
5227     for (i = 0; i < nbytes; i++) {
5228         x |= (size_t) s[i] << (8 * i);
5229     }
5230 
5231     if (x > PY_SSIZE_T_MAX)
5232         return -1;
5233     else
5234         return (Py_ssize_t) x;
5235 }
5236 
5237 /* s contains x bytes of a little-endian integer.  Return its value as a
5238  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
5239  * int, but when x is 4 it's a signed one.  This is a historical source
5240  * of x-platform bugs.
5241  */
5242 static long
calc_binint(char * bytes,int nbytes)5243 calc_binint(char *bytes, int nbytes)
5244 {
5245     unsigned char *s = (unsigned char *)bytes;
5246     Py_ssize_t i;
5247     long x = 0;
5248 
5249     for (i = 0; i < nbytes; i++) {
5250         x |= (long)s[i] << (8 * i);
5251     }
5252 
5253     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5254      * is signed, so on a box with longs bigger than 4 bytes we need
5255      * to extend a BININT's sign bit to the full width.
5256      */
5257     if (SIZEOF_LONG > 4 && nbytes == 4) {
5258         x |= -(x & (1L << 31));
5259     }
5260 
5261     return x;
5262 }
5263 
5264 static int
load_binintx(UnpicklerObject * self,char * s,int size)5265 load_binintx(UnpicklerObject *self, char *s, int size)
5266 {
5267     PyObject *value;
5268     long x;
5269 
5270     x = calc_binint(s, size);
5271 
5272     if ((value = PyLong_FromLong(x)) == NULL)
5273         return -1;
5274 
5275     PDATA_PUSH(self->stack, value, -1);
5276     return 0;
5277 }
5278 
5279 static int
load_binint(UnpicklerObject * self)5280 load_binint(UnpicklerObject *self)
5281 {
5282     char *s;
5283 
5284     if (_Unpickler_Read(self, &s, 4) < 0)
5285         return -1;
5286 
5287     return load_binintx(self, s, 4);
5288 }
5289 
5290 static int
load_binint1(UnpicklerObject * self)5291 load_binint1(UnpicklerObject *self)
5292 {
5293     char *s;
5294 
5295     if (_Unpickler_Read(self, &s, 1) < 0)
5296         return -1;
5297 
5298     return load_binintx(self, s, 1);
5299 }
5300 
5301 static int
load_binint2(UnpicklerObject * self)5302 load_binint2(UnpicklerObject *self)
5303 {
5304     char *s;
5305 
5306     if (_Unpickler_Read(self, &s, 2) < 0)
5307         return -1;
5308 
5309     return load_binintx(self, s, 2);
5310 }
5311 
5312 static int
load_long(UnpicklerObject * self)5313 load_long(UnpicklerObject *self)
5314 {
5315     PyObject *value;
5316     char *s = NULL;
5317     Py_ssize_t len;
5318 
5319     if ((len = _Unpickler_Readline(self, &s)) < 0)
5320         return -1;
5321     if (len < 2)
5322         return bad_readline();
5323 
5324     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5325        the 'L' before calling PyLong_FromString.  In order to maintain
5326        compatibility with Python 3.0.0, we don't actually *require*
5327        the 'L' to be present. */
5328     if (s[len-2] == 'L')
5329         s[len-2] = '\0';
5330     /* XXX: Should the base argument explicitly set to 10? */
5331     value = PyLong_FromString(s, NULL, 0);
5332     if (value == NULL)
5333         return -1;
5334 
5335     PDATA_PUSH(self->stack, value, -1);
5336     return 0;
5337 }
5338 
5339 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5340  * data following.
5341  */
5342 static int
load_counted_long(UnpicklerObject * self,int size)5343 load_counted_long(UnpicklerObject *self, int size)
5344 {
5345     PyObject *value;
5346     char *nbytes;
5347     char *pdata;
5348 
5349     assert(size == 1 || size == 4);
5350     if (_Unpickler_Read(self, &nbytes, size) < 0)
5351         return -1;
5352 
5353     size = calc_binint(nbytes, size);
5354     if (size < 0) {
5355         PickleState *st = _Pickle_GetGlobalState();
5356         /* Corrupt or hostile pickle -- we never write one like this */
5357         PyErr_SetString(st->UnpicklingError,
5358                         "LONG pickle has negative byte count");
5359         return -1;
5360     }
5361 
5362     if (size == 0)
5363         value = PyLong_FromLong(0L);
5364     else {
5365         /* Read the raw little-endian bytes and convert. */
5366         if (_Unpickler_Read(self, &pdata, size) < 0)
5367             return -1;
5368         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5369                                       1 /* little endian */ , 1 /* signed */ );
5370     }
5371     if (value == NULL)
5372         return -1;
5373     PDATA_PUSH(self->stack, value, -1);
5374     return 0;
5375 }
5376 
5377 static int
load_float(UnpicklerObject * self)5378 load_float(UnpicklerObject *self)
5379 {
5380     PyObject *value;
5381     char *endptr, *s;
5382     Py_ssize_t len;
5383     double d;
5384 
5385     if ((len = _Unpickler_Readline(self, &s)) < 0)
5386         return -1;
5387     if (len < 2)
5388         return bad_readline();
5389 
5390     errno = 0;
5391     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5392     if (d == -1.0 && PyErr_Occurred())
5393         return -1;
5394     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5395         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5396         return -1;
5397     }
5398     value = PyFloat_FromDouble(d);
5399     if (value == NULL)
5400         return -1;
5401 
5402     PDATA_PUSH(self->stack, value, -1);
5403     return 0;
5404 }
5405 
5406 static int
load_binfloat(UnpicklerObject * self)5407 load_binfloat(UnpicklerObject *self)
5408 {
5409     PyObject *value;
5410     double x;
5411     char *s;
5412 
5413     if (_Unpickler_Read(self, &s, 8) < 0)
5414         return -1;
5415 
5416     x = _PyFloat_Unpack8((unsigned char *)s, 0);
5417     if (x == -1.0 && PyErr_Occurred())
5418         return -1;
5419 
5420     if ((value = PyFloat_FromDouble(x)) == NULL)
5421         return -1;
5422 
5423     PDATA_PUSH(self->stack, value, -1);
5424     return 0;
5425 }
5426 
5427 static int
load_string(UnpicklerObject * self)5428 load_string(UnpicklerObject *self)
5429 {
5430     PyObject *bytes;
5431     PyObject *obj;
5432     Py_ssize_t len;
5433     char *s, *p;
5434 
5435     if ((len = _Unpickler_Readline(self, &s)) < 0)
5436         return -1;
5437     /* Strip the newline */
5438     len--;
5439     /* Strip outermost quotes */
5440     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5441         p = s + 1;
5442         len -= 2;
5443     }
5444     else {
5445         PickleState *st = _Pickle_GetGlobalState();
5446         PyErr_SetString(st->UnpicklingError,
5447                         "the STRING opcode argument must be quoted");
5448         return -1;
5449     }
5450     assert(len >= 0);
5451 
5452     /* Use the PyBytes API to decode the string, since that is what is used
5453        to encode, and then coerce the result to Unicode. */
5454     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5455     if (bytes == NULL)
5456         return -1;
5457 
5458     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5459        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5460     if (strcmp(self->encoding, "bytes") == 0) {
5461         obj = bytes;
5462     }
5463     else {
5464         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5465         Py_DECREF(bytes);
5466         if (obj == NULL) {
5467             return -1;
5468         }
5469     }
5470 
5471     PDATA_PUSH(self->stack, obj, -1);
5472     return 0;
5473 }
5474 
5475 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5476 load_counted_binstring(UnpicklerObject *self, int nbytes)
5477 {
5478     PyObject *obj;
5479     Py_ssize_t size;
5480     char *s;
5481 
5482     if (_Unpickler_Read(self, &s, nbytes) < 0)
5483         return -1;
5484 
5485     size = calc_binsize(s, nbytes);
5486     if (size < 0) {
5487         PickleState *st = _Pickle_GetGlobalState();
5488         PyErr_Format(st->UnpicklingError,
5489                      "BINSTRING exceeds system's maximum size of %zd bytes",
5490                      PY_SSIZE_T_MAX);
5491         return -1;
5492     }
5493 
5494     if (_Unpickler_Read(self, &s, size) < 0)
5495         return -1;
5496 
5497     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5498        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5499     if (strcmp(self->encoding, "bytes") == 0) {
5500         obj = PyBytes_FromStringAndSize(s, size);
5501     }
5502     else {
5503         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5504     }
5505     if (obj == NULL) {
5506         return -1;
5507     }
5508 
5509     PDATA_PUSH(self->stack, obj, -1);
5510     return 0;
5511 }
5512 
5513 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5514 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5515 {
5516     PyObject *bytes;
5517     Py_ssize_t size;
5518     char *s;
5519 
5520     if (_Unpickler_Read(self, &s, nbytes) < 0)
5521         return -1;
5522 
5523     size = calc_binsize(s, nbytes);
5524     if (size < 0) {
5525         PyErr_Format(PyExc_OverflowError,
5526                      "BINBYTES exceeds system's maximum size of %zd bytes",
5527                      PY_SSIZE_T_MAX);
5528         return -1;
5529     }
5530 
5531     bytes = PyBytes_FromStringAndSize(NULL, size);
5532     if (bytes == NULL)
5533         return -1;
5534     if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5535         Py_DECREF(bytes);
5536         return -1;
5537     }
5538 
5539     PDATA_PUSH(self->stack, bytes, -1);
5540     return 0;
5541 }
5542 
5543 static int
load_counted_bytearray(UnpicklerObject * self)5544 load_counted_bytearray(UnpicklerObject *self)
5545 {
5546     PyObject *bytearray;
5547     Py_ssize_t size;
5548     char *s;
5549 
5550     if (_Unpickler_Read(self, &s, 8) < 0) {
5551         return -1;
5552     }
5553 
5554     size = calc_binsize(s, 8);
5555     if (size < 0) {
5556         PyErr_Format(PyExc_OverflowError,
5557                      "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5558                      PY_SSIZE_T_MAX);
5559         return -1;
5560     }
5561 
5562     bytearray = PyByteArray_FromStringAndSize(NULL, size);
5563     if (bytearray == NULL) {
5564         return -1;
5565     }
5566     if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5567         Py_DECREF(bytearray);
5568         return -1;
5569     }
5570 
5571     PDATA_PUSH(self->stack, bytearray, -1);
5572     return 0;
5573 }
5574 
5575 static int
load_next_buffer(UnpicklerObject * self)5576 load_next_buffer(UnpicklerObject *self)
5577 {
5578     if (self->buffers == NULL) {
5579         PickleState *st = _Pickle_GetGlobalState();
5580         PyErr_SetString(st->UnpicklingError,
5581                         "pickle stream refers to out-of-band data "
5582                         "but no *buffers* argument was given");
5583         return -1;
5584     }
5585     PyObject *buf = PyIter_Next(self->buffers);
5586     if (buf == NULL) {
5587         if (!PyErr_Occurred()) {
5588             PickleState *st = _Pickle_GetGlobalState();
5589             PyErr_SetString(st->UnpicklingError,
5590                             "not enough out-of-band buffers");
5591         }
5592         return -1;
5593     }
5594 
5595     PDATA_PUSH(self->stack, buf, -1);
5596     return 0;
5597 }
5598 
5599 static int
load_readonly_buffer(UnpicklerObject * self)5600 load_readonly_buffer(UnpicklerObject *self)
5601 {
5602     Py_ssize_t len = Py_SIZE(self->stack);
5603     if (len <= self->stack->fence) {
5604         return Pdata_stack_underflow(self->stack);
5605     }
5606 
5607     PyObject *obj = self->stack->data[len - 1];
5608     PyObject *view = PyMemoryView_FromObject(obj);
5609     if (view == NULL) {
5610         return -1;
5611     }
5612     if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5613         /* Original object is writable */
5614         PyMemoryView_GET_BUFFER(view)->readonly = 1;
5615         self->stack->data[len - 1] = view;
5616         Py_DECREF(obj);
5617     }
5618     else {
5619         /* Original object is read-only, no need to replace it */
5620         Py_DECREF(view);
5621     }
5622     return 0;
5623 }
5624 
5625 static int
load_unicode(UnpicklerObject * self)5626 load_unicode(UnpicklerObject *self)
5627 {
5628     PyObject *str;
5629     Py_ssize_t len;
5630     char *s = NULL;
5631 
5632     if ((len = _Unpickler_Readline(self, &s)) < 0)
5633         return -1;
5634     if (len < 1)
5635         return bad_readline();
5636 
5637     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5638     if (str == NULL)
5639         return -1;
5640 
5641     PDATA_PUSH(self->stack, str, -1);
5642     return 0;
5643 }
5644 
5645 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5646 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5647 {
5648     PyObject *str;
5649     Py_ssize_t size;
5650     char *s;
5651 
5652     if (_Unpickler_Read(self, &s, nbytes) < 0)
5653         return -1;
5654 
5655     size = calc_binsize(s, nbytes);
5656     if (size < 0) {
5657         PyErr_Format(PyExc_OverflowError,
5658                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5659                      PY_SSIZE_T_MAX);
5660         return -1;
5661     }
5662 
5663     if (_Unpickler_Read(self, &s, size) < 0)
5664         return -1;
5665 
5666     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5667     if (str == NULL)
5668         return -1;
5669 
5670     PDATA_PUSH(self->stack, str, -1);
5671     return 0;
5672 }
5673 
5674 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5675 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5676 {
5677     PyObject *tuple;
5678 
5679     if (Py_SIZE(self->stack) < len)
5680         return Pdata_stack_underflow(self->stack);
5681 
5682     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5683     if (tuple == NULL)
5684         return -1;
5685     PDATA_PUSH(self->stack, tuple, -1);
5686     return 0;
5687 }
5688 
5689 static int
load_tuple(UnpicklerObject * self)5690 load_tuple(UnpicklerObject *self)
5691 {
5692     Py_ssize_t i;
5693 
5694     if ((i = marker(self)) < 0)
5695         return -1;
5696 
5697     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5698 }
5699 
5700 static int
load_empty_list(UnpicklerObject * self)5701 load_empty_list(UnpicklerObject *self)
5702 {
5703     PyObject *list;
5704 
5705     if ((list = PyList_New(0)) == NULL)
5706         return -1;
5707     PDATA_PUSH(self->stack, list, -1);
5708     return 0;
5709 }
5710 
5711 static int
load_empty_dict(UnpicklerObject * self)5712 load_empty_dict(UnpicklerObject *self)
5713 {
5714     PyObject *dict;
5715 
5716     if ((dict = PyDict_New()) == NULL)
5717         return -1;
5718     PDATA_PUSH(self->stack, dict, -1);
5719     return 0;
5720 }
5721 
5722 static int
load_empty_set(UnpicklerObject * self)5723 load_empty_set(UnpicklerObject *self)
5724 {
5725     PyObject *set;
5726 
5727     if ((set = PySet_New(NULL)) == NULL)
5728         return -1;
5729     PDATA_PUSH(self->stack, set, -1);
5730     return 0;
5731 }
5732 
5733 static int
load_list(UnpicklerObject * self)5734 load_list(UnpicklerObject *self)
5735 {
5736     PyObject *list;
5737     Py_ssize_t i;
5738 
5739     if ((i = marker(self)) < 0)
5740         return -1;
5741 
5742     list = Pdata_poplist(self->stack, i);
5743     if (list == NULL)
5744         return -1;
5745     PDATA_PUSH(self->stack, list, -1);
5746     return 0;
5747 }
5748 
5749 static int
load_dict(UnpicklerObject * self)5750 load_dict(UnpicklerObject *self)
5751 {
5752     PyObject *dict, *key, *value;
5753     Py_ssize_t i, j, k;
5754 
5755     if ((i = marker(self)) < 0)
5756         return -1;
5757     j = Py_SIZE(self->stack);
5758 
5759     if ((dict = PyDict_New()) == NULL)
5760         return -1;
5761 
5762     if ((j - i) % 2 != 0) {
5763         PickleState *st = _Pickle_GetGlobalState();
5764         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5765         Py_DECREF(dict);
5766         return -1;
5767     }
5768 
5769     for (k = i + 1; k < j; k += 2) {
5770         key = self->stack->data[k - 1];
5771         value = self->stack->data[k];
5772         if (PyDict_SetItem(dict, key, value) < 0) {
5773             Py_DECREF(dict);
5774             return -1;
5775         }
5776     }
5777     Pdata_clear(self->stack, i);
5778     PDATA_PUSH(self->stack, dict, -1);
5779     return 0;
5780 }
5781 
5782 static int
load_frozenset(UnpicklerObject * self)5783 load_frozenset(UnpicklerObject *self)
5784 {
5785     PyObject *items;
5786     PyObject *frozenset;
5787     Py_ssize_t i;
5788 
5789     if ((i = marker(self)) < 0)
5790         return -1;
5791 
5792     items = Pdata_poptuple(self->stack, i);
5793     if (items == NULL)
5794         return -1;
5795 
5796     frozenset = PyFrozenSet_New(items);
5797     Py_DECREF(items);
5798     if (frozenset == NULL)
5799         return -1;
5800 
5801     PDATA_PUSH(self->stack, frozenset, -1);
5802     return 0;
5803 }
5804 
5805 static PyObject *
instantiate(PyObject * cls,PyObject * args)5806 instantiate(PyObject *cls, PyObject *args)
5807 {
5808     /* Caller must assure args are a tuple.  Normally, args come from
5809        Pdata_poptuple which packs objects from the top of the stack
5810        into a newly created tuple. */
5811     assert(PyTuple_Check(args));
5812     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5813         _Py_IDENTIFIER(__getinitargs__);
5814         _Py_IDENTIFIER(__new__);
5815         PyObject *func;
5816         if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5817             return NULL;
5818         }
5819         if (func == NULL) {
5820             return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
5821         }
5822         Py_DECREF(func);
5823     }
5824     return PyObject_CallObject(cls, args);
5825 }
5826 
5827 static int
load_obj(UnpicklerObject * self)5828 load_obj(UnpicklerObject *self)
5829 {
5830     PyObject *cls, *args, *obj = NULL;
5831     Py_ssize_t i;
5832 
5833     if ((i = marker(self)) < 0)
5834         return -1;
5835 
5836     if (Py_SIZE(self->stack) - i < 1)
5837         return Pdata_stack_underflow(self->stack);
5838 
5839     args = Pdata_poptuple(self->stack, i + 1);
5840     if (args == NULL)
5841         return -1;
5842 
5843     PDATA_POP(self->stack, cls);
5844     if (cls) {
5845         obj = instantiate(cls, args);
5846         Py_DECREF(cls);
5847     }
5848     Py_DECREF(args);
5849     if (obj == NULL)
5850         return -1;
5851 
5852     PDATA_PUSH(self->stack, obj, -1);
5853     return 0;
5854 }
5855 
5856 static int
load_inst(UnpicklerObject * self)5857 load_inst(UnpicklerObject *self)
5858 {
5859     PyObject *cls = NULL;
5860     PyObject *args = NULL;
5861     PyObject *obj = NULL;
5862     PyObject *module_name;
5863     PyObject *class_name;
5864     Py_ssize_t len;
5865     Py_ssize_t i;
5866     char *s;
5867 
5868     if ((i = marker(self)) < 0)
5869         return -1;
5870     if ((len = _Unpickler_Readline(self, &s)) < 0)
5871         return -1;
5872     if (len < 2)
5873         return bad_readline();
5874 
5875     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5876        identifiers are permitted in Python 3.0, since the INST opcode is only
5877        supported by older protocols on Python 2.x. */
5878     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5879     if (module_name == NULL)
5880         return -1;
5881 
5882     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5883         if (len < 2) {
5884             Py_DECREF(module_name);
5885             return bad_readline();
5886         }
5887         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5888         if (class_name != NULL) {
5889             cls = find_class(self, module_name, class_name);
5890             Py_DECREF(class_name);
5891         }
5892     }
5893     Py_DECREF(module_name);
5894 
5895     if (cls == NULL)
5896         return -1;
5897 
5898     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5899         obj = instantiate(cls, args);
5900         Py_DECREF(args);
5901     }
5902     Py_DECREF(cls);
5903 
5904     if (obj == NULL)
5905         return -1;
5906 
5907     PDATA_PUSH(self->stack, obj, -1);
5908     return 0;
5909 }
5910 
5911 static void
newobj_unpickling_error(const char * msg,int use_kwargs,PyObject * arg)5912 newobj_unpickling_error(const char * msg, int use_kwargs, PyObject *arg)
5913 {
5914     PickleState *st = _Pickle_GetGlobalState();
5915     PyErr_Format(st->UnpicklingError, msg,
5916                  use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5917                  Py_TYPE(arg)->tp_name);
5918 }
5919 
5920 static int
load_newobj(UnpicklerObject * self,int use_kwargs)5921 load_newobj(UnpicklerObject *self, int use_kwargs)
5922 {
5923     PyObject *cls, *args, *kwargs = NULL;
5924     PyObject *obj;
5925 
5926     /* Stack is ... cls args [kwargs], and we want to call
5927      * cls.__new__(cls, *args, **kwargs).
5928      */
5929     if (use_kwargs) {
5930         PDATA_POP(self->stack, kwargs);
5931         if (kwargs == NULL) {
5932             return -1;
5933         }
5934     }
5935     PDATA_POP(self->stack, args);
5936     if (args == NULL) {
5937         Py_XDECREF(kwargs);
5938         return -1;
5939     }
5940     PDATA_POP(self->stack, cls);
5941     if (cls == NULL) {
5942         Py_XDECREF(kwargs);
5943         Py_DECREF(args);
5944         return -1;
5945     }
5946 
5947     if (!PyType_Check(cls)) {
5948         newobj_unpickling_error("%s class argument must be a type, not %.200s",
5949                                 use_kwargs, cls);
5950         goto error;
5951     }
5952     if (((PyTypeObject *)cls)->tp_new == NULL) {
5953         newobj_unpickling_error("%s class argument '%.200s' doesn't have __new__",
5954                                 use_kwargs, cls);
5955         goto error;
5956     }
5957     if (!PyTuple_Check(args)) {
5958         newobj_unpickling_error("%s args argument must be a tuple, not %.200s",
5959                                 use_kwargs, args);
5960         goto error;
5961     }
5962     if (use_kwargs && !PyDict_Check(kwargs)) {
5963         newobj_unpickling_error("%s kwargs argument must be a dict, not %.200s",
5964                                 use_kwargs, kwargs);
5965         goto error;
5966     }
5967 
5968     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5969     if (obj == NULL) {
5970         goto error;
5971     }
5972     Py_XDECREF(kwargs);
5973     Py_DECREF(args);
5974     Py_DECREF(cls);
5975     PDATA_PUSH(self->stack, obj, -1);
5976     return 0;
5977 
5978 error:
5979     Py_XDECREF(kwargs);
5980     Py_DECREF(args);
5981     Py_DECREF(cls);
5982     return -1;
5983 }
5984 
5985 static int
load_global(UnpicklerObject * self)5986 load_global(UnpicklerObject *self)
5987 {
5988     PyObject *global = NULL;
5989     PyObject *module_name;
5990     PyObject *global_name;
5991     Py_ssize_t len;
5992     char *s;
5993 
5994     if ((len = _Unpickler_Readline(self, &s)) < 0)
5995         return -1;
5996     if (len < 2)
5997         return bad_readline();
5998     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5999     if (!module_name)
6000         return -1;
6001 
6002     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6003         if (len < 2) {
6004             Py_DECREF(module_name);
6005             return bad_readline();
6006         }
6007         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6008         if (global_name) {
6009             global = find_class(self, module_name, global_name);
6010             Py_DECREF(global_name);
6011         }
6012     }
6013     Py_DECREF(module_name);
6014 
6015     if (global == NULL)
6016         return -1;
6017     PDATA_PUSH(self->stack, global, -1);
6018     return 0;
6019 }
6020 
6021 static int
load_stack_global(UnpicklerObject * self)6022 load_stack_global(UnpicklerObject *self)
6023 {
6024     PyObject *global;
6025     PyObject *module_name;
6026     PyObject *global_name;
6027 
6028     PDATA_POP(self->stack, global_name);
6029     PDATA_POP(self->stack, module_name);
6030     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6031         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6032         PickleState *st = _Pickle_GetGlobalState();
6033         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6034         Py_XDECREF(global_name);
6035         Py_XDECREF(module_name);
6036         return -1;
6037     }
6038     global = find_class(self, module_name, global_name);
6039     Py_DECREF(global_name);
6040     Py_DECREF(module_name);
6041     if (global == NULL)
6042         return -1;
6043     PDATA_PUSH(self->stack, global, -1);
6044     return 0;
6045 }
6046 
6047 static int
load_persid(UnpicklerObject * self)6048 load_persid(UnpicklerObject *self)
6049 {
6050     PyObject *pid, *obj;
6051     Py_ssize_t len;
6052     char *s;
6053 
6054     if (self->pers_func) {
6055         if ((len = _Unpickler_Readline(self, &s)) < 0)
6056             return -1;
6057         if (len < 1)
6058             return bad_readline();
6059 
6060         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6061         if (pid == NULL) {
6062             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6063                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6064                                 "persistent IDs in protocol 0 must be "
6065                                 "ASCII strings");
6066             }
6067             return -1;
6068         }
6069 
6070         obj = call_method(self->pers_func, self->pers_func_self, pid);
6071         Py_DECREF(pid);
6072         if (obj == NULL)
6073             return -1;
6074 
6075         PDATA_PUSH(self->stack, obj, -1);
6076         return 0;
6077     }
6078     else {
6079         PickleState *st = _Pickle_GetGlobalState();
6080         PyErr_SetString(st->UnpicklingError,
6081                         "A load persistent id instruction was encountered,\n"
6082                         "but no persistent_load function was specified.");
6083         return -1;
6084     }
6085 }
6086 
6087 static int
load_binpersid(UnpicklerObject * self)6088 load_binpersid(UnpicklerObject *self)
6089 {
6090     PyObject *pid, *obj;
6091 
6092     if (self->pers_func) {
6093         PDATA_POP(self->stack, pid);
6094         if (pid == NULL)
6095             return -1;
6096 
6097         obj = call_method(self->pers_func, self->pers_func_self, pid);
6098         Py_DECREF(pid);
6099         if (obj == NULL)
6100             return -1;
6101 
6102         PDATA_PUSH(self->stack, obj, -1);
6103         return 0;
6104     }
6105     else {
6106         PickleState *st = _Pickle_GetGlobalState();
6107         PyErr_SetString(st->UnpicklingError,
6108                         "A load persistent id instruction was encountered,\n"
6109                         "but no persistent_load function was specified.");
6110         return -1;
6111     }
6112 }
6113 
6114 static int
load_pop(UnpicklerObject * self)6115 load_pop(UnpicklerObject *self)
6116 {
6117     Py_ssize_t len = Py_SIZE(self->stack);
6118 
6119     /* Note that we split the (pickle.py) stack into two stacks,
6120      * an object stack and a mark stack. We have to be clever and
6121      * pop the right one. We do this by looking at the top of the
6122      * mark stack first, and only signalling a stack underflow if
6123      * the object stack is empty and the mark stack doesn't match
6124      * our expectations.
6125      */
6126     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6127         self->num_marks--;
6128         self->stack->mark_set = self->num_marks != 0;
6129         self->stack->fence = self->num_marks ?
6130                 self->marks[self->num_marks - 1] : 0;
6131     } else if (len <= self->stack->fence)
6132         return Pdata_stack_underflow(self->stack);
6133     else {
6134         len--;
6135         Py_DECREF(self->stack->data[len]);
6136         Py_SET_SIZE(self->stack, len);
6137     }
6138     return 0;
6139 }
6140 
6141 static int
load_pop_mark(UnpicklerObject * self)6142 load_pop_mark(UnpicklerObject *self)
6143 {
6144     Py_ssize_t i;
6145 
6146     if ((i = marker(self)) < 0)
6147         return -1;
6148 
6149     Pdata_clear(self->stack, i);
6150 
6151     return 0;
6152 }
6153 
6154 static int
load_dup(UnpicklerObject * self)6155 load_dup(UnpicklerObject *self)
6156 {
6157     PyObject *last;
6158     Py_ssize_t len = Py_SIZE(self->stack);
6159 
6160     if (len <= self->stack->fence)
6161         return Pdata_stack_underflow(self->stack);
6162     last = self->stack->data[len - 1];
6163     PDATA_APPEND(self->stack, last, -1);
6164     return 0;
6165 }
6166 
6167 static int
load_get(UnpicklerObject * self)6168 load_get(UnpicklerObject *self)
6169 {
6170     PyObject *key, *value;
6171     Py_ssize_t idx;
6172     Py_ssize_t len;
6173     char *s;
6174 
6175     if ((len = _Unpickler_Readline(self, &s)) < 0)
6176         return -1;
6177     if (len < 2)
6178         return bad_readline();
6179 
6180     key = PyLong_FromString(s, NULL, 10);
6181     if (key == NULL)
6182         return -1;
6183     idx = PyLong_AsSsize_t(key);
6184     if (idx == -1 && PyErr_Occurred()) {
6185         Py_DECREF(key);
6186         return -1;
6187     }
6188 
6189     value = _Unpickler_MemoGet(self, idx);
6190     if (value == NULL) {
6191         if (!PyErr_Occurred()) {
6192            PickleState *st = _Pickle_GetGlobalState();
6193            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6194         }
6195         Py_DECREF(key);
6196         return -1;
6197     }
6198     Py_DECREF(key);
6199 
6200     PDATA_APPEND(self->stack, value, -1);
6201     return 0;
6202 }
6203 
6204 static int
load_binget(UnpicklerObject * self)6205 load_binget(UnpicklerObject *self)
6206 {
6207     PyObject *value;
6208     Py_ssize_t idx;
6209     char *s;
6210 
6211     if (_Unpickler_Read(self, &s, 1) < 0)
6212         return -1;
6213 
6214     idx = Py_CHARMASK(s[0]);
6215 
6216     value = _Unpickler_MemoGet(self, idx);
6217     if (value == NULL) {
6218         PyObject *key = PyLong_FromSsize_t(idx);
6219         if (key != NULL) {
6220             PickleState *st = _Pickle_GetGlobalState();
6221             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6222             Py_DECREF(key);
6223         }
6224         return -1;
6225     }
6226 
6227     PDATA_APPEND(self->stack, value, -1);
6228     return 0;
6229 }
6230 
6231 static int
load_long_binget(UnpicklerObject * self)6232 load_long_binget(UnpicklerObject *self)
6233 {
6234     PyObject *value;
6235     Py_ssize_t idx;
6236     char *s;
6237 
6238     if (_Unpickler_Read(self, &s, 4) < 0)
6239         return -1;
6240 
6241     idx = calc_binsize(s, 4);
6242 
6243     value = _Unpickler_MemoGet(self, idx);
6244     if (value == NULL) {
6245         PyObject *key = PyLong_FromSsize_t(idx);
6246         if (key != NULL) {
6247             PickleState *st = _Pickle_GetGlobalState();
6248             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6249             Py_DECREF(key);
6250         }
6251         return -1;
6252     }
6253 
6254     PDATA_APPEND(self->stack, value, -1);
6255     return 0;
6256 }
6257 
6258 /* Push an object from the extension registry (EXT[124]).  nbytes is
6259  * the number of bytes following the opcode, holding the index (code) value.
6260  */
6261 static int
load_extension(UnpicklerObject * self,int nbytes)6262 load_extension(UnpicklerObject *self, int nbytes)
6263 {
6264     char *codebytes;            /* the nbytes bytes after the opcode */
6265     long code;                  /* calc_binint returns long */
6266     PyObject *py_code;          /* code as a Python int */
6267     PyObject *obj;              /* the object to push */
6268     PyObject *pair;             /* (module_name, class_name) */
6269     PyObject *module_name, *class_name;
6270     PickleState *st = _Pickle_GetGlobalState();
6271 
6272     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6273     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6274         return -1;
6275     code = calc_binint(codebytes, nbytes);
6276     if (code <= 0) {            /* note that 0 is forbidden */
6277         /* Corrupt or hostile pickle. */
6278         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6279         return -1;
6280     }
6281 
6282     /* Look for the code in the cache. */
6283     py_code = PyLong_FromLong(code);
6284     if (py_code == NULL)
6285         return -1;
6286     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6287     if (obj != NULL) {
6288         /* Bingo. */
6289         Py_DECREF(py_code);
6290         PDATA_APPEND(self->stack, obj, -1);
6291         return 0;
6292     }
6293     if (PyErr_Occurred()) {
6294         Py_DECREF(py_code);
6295         return -1;
6296     }
6297 
6298     /* Look up the (module_name, class_name) pair. */
6299     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6300     if (pair == NULL) {
6301         Py_DECREF(py_code);
6302         if (!PyErr_Occurred()) {
6303             PyErr_Format(PyExc_ValueError, "unregistered extension "
6304                          "code %ld", code);
6305         }
6306         return -1;
6307     }
6308     /* Since the extension registry is manipulable via Python code,
6309      * confirm that pair is really a 2-tuple of strings.
6310      */
6311     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6312         goto error;
6313     }
6314 
6315     module_name = PyTuple_GET_ITEM(pair, 0);
6316     if (!PyUnicode_Check(module_name)) {
6317         goto error;
6318     }
6319 
6320     class_name = PyTuple_GET_ITEM(pair, 1);
6321     if (!PyUnicode_Check(class_name)) {
6322         goto error;
6323     }
6324 
6325     /* Load the object. */
6326     obj = find_class(self, module_name, class_name);
6327     if (obj == NULL) {
6328         Py_DECREF(py_code);
6329         return -1;
6330     }
6331     /* Cache code -> obj. */
6332     code = PyDict_SetItem(st->extension_cache, py_code, obj);
6333     Py_DECREF(py_code);
6334     if (code < 0) {
6335         Py_DECREF(obj);
6336         return -1;
6337     }
6338     PDATA_PUSH(self->stack, obj, -1);
6339     return 0;
6340 
6341 error:
6342     Py_DECREF(py_code);
6343     PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6344                  "isn't a 2-tuple of strings", code);
6345     return -1;
6346 }
6347 
6348 static int
load_put(UnpicklerObject * self)6349 load_put(UnpicklerObject *self)
6350 {
6351     PyObject *key, *value;
6352     Py_ssize_t idx;
6353     Py_ssize_t len;
6354     char *s = NULL;
6355 
6356     if ((len = _Unpickler_Readline(self, &s)) < 0)
6357         return -1;
6358     if (len < 2)
6359         return bad_readline();
6360     if (Py_SIZE(self->stack) <= self->stack->fence)
6361         return Pdata_stack_underflow(self->stack);
6362     value = self->stack->data[Py_SIZE(self->stack) - 1];
6363 
6364     key = PyLong_FromString(s, NULL, 10);
6365     if (key == NULL)
6366         return -1;
6367     idx = PyLong_AsSsize_t(key);
6368     Py_DECREF(key);
6369     if (idx < 0) {
6370         if (!PyErr_Occurred())
6371             PyErr_SetString(PyExc_ValueError,
6372                             "negative PUT argument");
6373         return -1;
6374     }
6375 
6376     return _Unpickler_MemoPut(self, idx, value);
6377 }
6378 
6379 static int
load_binput(UnpicklerObject * self)6380 load_binput(UnpicklerObject *self)
6381 {
6382     PyObject *value;
6383     Py_ssize_t idx;
6384     char *s;
6385 
6386     if (_Unpickler_Read(self, &s, 1) < 0)
6387         return -1;
6388 
6389     if (Py_SIZE(self->stack) <= self->stack->fence)
6390         return Pdata_stack_underflow(self->stack);
6391     value = self->stack->data[Py_SIZE(self->stack) - 1];
6392 
6393     idx = Py_CHARMASK(s[0]);
6394 
6395     return _Unpickler_MemoPut(self, idx, value);
6396 }
6397 
6398 static int
load_long_binput(UnpicklerObject * self)6399 load_long_binput(UnpicklerObject *self)
6400 {
6401     PyObject *value;
6402     Py_ssize_t idx;
6403     char *s;
6404 
6405     if (_Unpickler_Read(self, &s, 4) < 0)
6406         return -1;
6407 
6408     if (Py_SIZE(self->stack) <= self->stack->fence)
6409         return Pdata_stack_underflow(self->stack);
6410     value = self->stack->data[Py_SIZE(self->stack) - 1];
6411 
6412     idx = calc_binsize(s, 4);
6413     if (idx < 0) {
6414         PyErr_SetString(PyExc_ValueError,
6415                         "negative LONG_BINPUT argument");
6416         return -1;
6417     }
6418 
6419     return _Unpickler_MemoPut(self, idx, value);
6420 }
6421 
6422 static int
load_memoize(UnpicklerObject * self)6423 load_memoize(UnpicklerObject *self)
6424 {
6425     PyObject *value;
6426 
6427     if (Py_SIZE(self->stack) <= self->stack->fence)
6428         return Pdata_stack_underflow(self->stack);
6429     value = self->stack->data[Py_SIZE(self->stack) - 1];
6430 
6431     return _Unpickler_MemoPut(self, self->memo_len, value);
6432 }
6433 
6434 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6435 do_append(UnpicklerObject *self, Py_ssize_t x)
6436 {
6437     PyObject *value;
6438     PyObject *slice;
6439     PyObject *list;
6440     PyObject *result;
6441     Py_ssize_t len, i;
6442 
6443     len = Py_SIZE(self->stack);
6444     if (x > len || x <= self->stack->fence)
6445         return Pdata_stack_underflow(self->stack);
6446     if (len == x)  /* nothing to do */
6447         return 0;
6448 
6449     list = self->stack->data[x - 1];
6450 
6451     if (PyList_CheckExact(list)) {
6452         Py_ssize_t list_len;
6453         int ret;
6454 
6455         slice = Pdata_poplist(self->stack, x);
6456         if (!slice)
6457             return -1;
6458         list_len = PyList_GET_SIZE(list);
6459         ret = PyList_SetSlice(list, list_len, list_len, slice);
6460         Py_DECREF(slice);
6461         return ret;
6462     }
6463     else {
6464         PyObject *extend_func;
6465         _Py_IDENTIFIER(extend);
6466 
6467         if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6468             return -1;
6469         }
6470         if (extend_func != NULL) {
6471             slice = Pdata_poplist(self->stack, x);
6472             if (!slice) {
6473                 Py_DECREF(extend_func);
6474                 return -1;
6475             }
6476             result = _Pickle_FastCall(extend_func, slice);
6477             Py_DECREF(extend_func);
6478             if (result == NULL)
6479                 return -1;
6480             Py_DECREF(result);
6481         }
6482         else {
6483             PyObject *append_func;
6484             _Py_IDENTIFIER(append);
6485 
6486             /* Even if the PEP 307 requires extend() and append() methods,
6487                fall back on append() if the object has no extend() method
6488                for backward compatibility. */
6489             append_func = _PyObject_GetAttrId(list, &PyId_append);
6490             if (append_func == NULL)
6491                 return -1;
6492             for (i = x; i < len; i++) {
6493                 value = self->stack->data[i];
6494                 result = _Pickle_FastCall(append_func, value);
6495                 if (result == NULL) {
6496                     Pdata_clear(self->stack, i + 1);
6497                     Py_SET_SIZE(self->stack, x);
6498                     Py_DECREF(append_func);
6499                     return -1;
6500                 }
6501                 Py_DECREF(result);
6502             }
6503             Py_SET_SIZE(self->stack, x);
6504             Py_DECREF(append_func);
6505         }
6506     }
6507 
6508     return 0;
6509 }
6510 
6511 static int
load_append(UnpicklerObject * self)6512 load_append(UnpicklerObject *self)
6513 {
6514     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6515         return Pdata_stack_underflow(self->stack);
6516     return do_append(self, Py_SIZE(self->stack) - 1);
6517 }
6518 
6519 static int
load_appends(UnpicklerObject * self)6520 load_appends(UnpicklerObject *self)
6521 {
6522     Py_ssize_t i = marker(self);
6523     if (i < 0)
6524         return -1;
6525     return do_append(self, i);
6526 }
6527 
6528 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6529 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6530 {
6531     PyObject *value, *key;
6532     PyObject *dict;
6533     Py_ssize_t len, i;
6534     int status = 0;
6535 
6536     len = Py_SIZE(self->stack);
6537     if (x > len || x <= self->stack->fence)
6538         return Pdata_stack_underflow(self->stack);
6539     if (len == x)  /* nothing to do */
6540         return 0;
6541     if ((len - x) % 2 != 0) {
6542         PickleState *st = _Pickle_GetGlobalState();
6543         /* Corrupt or hostile pickle -- we never write one like this. */
6544         PyErr_SetString(st->UnpicklingError,
6545                         "odd number of items for SETITEMS");
6546         return -1;
6547     }
6548 
6549     /* Here, dict does not actually need to be a PyDict; it could be anything
6550        that supports the __setitem__ attribute. */
6551     dict = self->stack->data[x - 1];
6552 
6553     for (i = x + 1; i < len; i += 2) {
6554         key = self->stack->data[i - 1];
6555         value = self->stack->data[i];
6556         if (PyObject_SetItem(dict, key, value) < 0) {
6557             status = -1;
6558             break;
6559         }
6560     }
6561 
6562     Pdata_clear(self->stack, x);
6563     return status;
6564 }
6565 
6566 static int
load_setitem(UnpicklerObject * self)6567 load_setitem(UnpicklerObject *self)
6568 {
6569     return do_setitems(self, Py_SIZE(self->stack) - 2);
6570 }
6571 
6572 static int
load_setitems(UnpicklerObject * self)6573 load_setitems(UnpicklerObject *self)
6574 {
6575     Py_ssize_t i = marker(self);
6576     if (i < 0)
6577         return -1;
6578     return do_setitems(self, i);
6579 }
6580 
6581 static int
load_additems(UnpicklerObject * self)6582 load_additems(UnpicklerObject *self)
6583 {
6584     PyObject *set;
6585     Py_ssize_t mark, len, i;
6586 
6587     mark =  marker(self);
6588     if (mark < 0)
6589         return -1;
6590     len = Py_SIZE(self->stack);
6591     if (mark > len || mark <= self->stack->fence)
6592         return Pdata_stack_underflow(self->stack);
6593     if (len == mark)  /* nothing to do */
6594         return 0;
6595 
6596     set = self->stack->data[mark - 1];
6597 
6598     if (PySet_Check(set)) {
6599         PyObject *items;
6600         int status;
6601 
6602         items = Pdata_poptuple(self->stack, mark);
6603         if (items == NULL)
6604             return -1;
6605 
6606         status = _PySet_Update(set, items);
6607         Py_DECREF(items);
6608         return status;
6609     }
6610     else {
6611         PyObject *add_func;
6612         _Py_IDENTIFIER(add);
6613 
6614         add_func = _PyObject_GetAttrId(set, &PyId_add);
6615         if (add_func == NULL)
6616             return -1;
6617         for (i = mark; i < len; i++) {
6618             PyObject *result;
6619             PyObject *item;
6620 
6621             item = self->stack->data[i];
6622             result = _Pickle_FastCall(add_func, item);
6623             if (result == NULL) {
6624                 Pdata_clear(self->stack, i + 1);
6625                 Py_SET_SIZE(self->stack, mark);
6626                 return -1;
6627             }
6628             Py_DECREF(result);
6629         }
6630         Py_SET_SIZE(self->stack, mark);
6631     }
6632 
6633     return 0;
6634 }
6635 
6636 static int
load_build(UnpicklerObject * self)6637 load_build(UnpicklerObject *self)
6638 {
6639     PyObject *state, *inst, *slotstate;
6640     PyObject *setstate;
6641     int status = 0;
6642     _Py_IDENTIFIER(__setstate__);
6643 
6644     /* Stack is ... instance, state.  We want to leave instance at
6645      * the stack top, possibly mutated via instance.__setstate__(state).
6646      */
6647     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6648         return Pdata_stack_underflow(self->stack);
6649 
6650     PDATA_POP(self->stack, state);
6651     if (state == NULL)
6652         return -1;
6653 
6654     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6655 
6656     if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6657         Py_DECREF(state);
6658         return -1;
6659     }
6660     if (setstate != NULL) {
6661         PyObject *result;
6662 
6663         /* The explicit __setstate__ is responsible for everything. */
6664         result = _Pickle_FastCall(setstate, state);
6665         Py_DECREF(setstate);
6666         if (result == NULL)
6667             return -1;
6668         Py_DECREF(result);
6669         return 0;
6670     }
6671 
6672     /* A default __setstate__.  First see whether state embeds a
6673      * slot state dict too (a proto 2 addition).
6674      */
6675     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6676         PyObject *tmp = state;
6677 
6678         state = PyTuple_GET_ITEM(tmp, 0);
6679         slotstate = PyTuple_GET_ITEM(tmp, 1);
6680         Py_INCREF(state);
6681         Py_INCREF(slotstate);
6682         Py_DECREF(tmp);
6683     }
6684     else
6685         slotstate = NULL;
6686 
6687     /* Set inst.__dict__ from the state dict (if any). */
6688     if (state != Py_None) {
6689         PyObject *dict;
6690         PyObject *d_key, *d_value;
6691         Py_ssize_t i;
6692         _Py_IDENTIFIER(__dict__);
6693 
6694         if (!PyDict_Check(state)) {
6695             PickleState *st = _Pickle_GetGlobalState();
6696             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6697             goto error;
6698         }
6699         dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6700         if (dict == NULL)
6701             goto error;
6702 
6703         i = 0;
6704         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6705             /* normally the keys for instance attributes are
6706                interned.  we should try to do that here. */
6707             Py_INCREF(d_key);
6708             if (PyUnicode_CheckExact(d_key))
6709                 PyUnicode_InternInPlace(&d_key);
6710             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6711                 Py_DECREF(d_key);
6712                 goto error;
6713             }
6714             Py_DECREF(d_key);
6715         }
6716         Py_DECREF(dict);
6717     }
6718 
6719     /* Also set instance attributes from the slotstate dict (if any). */
6720     if (slotstate != NULL) {
6721         PyObject *d_key, *d_value;
6722         Py_ssize_t i;
6723 
6724         if (!PyDict_Check(slotstate)) {
6725             PickleState *st = _Pickle_GetGlobalState();
6726             PyErr_SetString(st->UnpicklingError,
6727                             "slot state is not a dictionary");
6728             goto error;
6729         }
6730         i = 0;
6731         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6732             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6733                 goto error;
6734         }
6735     }
6736 
6737     if (0) {
6738   error:
6739         status = -1;
6740     }
6741 
6742     Py_DECREF(state);
6743     Py_XDECREF(slotstate);
6744     return status;
6745 }
6746 
6747 static int
load_mark(UnpicklerObject * self)6748 load_mark(UnpicklerObject *self)
6749 {
6750 
6751     /* Note that we split the (pickle.py) stack into two stacks, an
6752      * object stack and a mark stack. Here we push a mark onto the
6753      * mark stack.
6754      */
6755 
6756     if (self->num_marks >= self->marks_size) {
6757         size_t alloc = ((size_t)self->num_marks << 1) + 20;
6758         Py_ssize_t *marks_new = self->marks;
6759         PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6760         if (marks_new == NULL) {
6761             PyErr_NoMemory();
6762             return -1;
6763         }
6764         self->marks = marks_new;
6765         self->marks_size = (Py_ssize_t)alloc;
6766     }
6767 
6768     self->stack->mark_set = 1;
6769     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6770 
6771     return 0;
6772 }
6773 
6774 static int
load_reduce(UnpicklerObject * self)6775 load_reduce(UnpicklerObject *self)
6776 {
6777     PyObject *callable = NULL;
6778     PyObject *argtup = NULL;
6779     PyObject *obj = NULL;
6780 
6781     PDATA_POP(self->stack, argtup);
6782     if (argtup == NULL)
6783         return -1;
6784     PDATA_POP(self->stack, callable);
6785     if (callable) {
6786         obj = PyObject_CallObject(callable, argtup);
6787         Py_DECREF(callable);
6788     }
6789     Py_DECREF(argtup);
6790 
6791     if (obj == NULL)
6792         return -1;
6793 
6794     PDATA_PUSH(self->stack, obj, -1);
6795     return 0;
6796 }
6797 
6798 /* Just raises an error if we don't know the protocol specified.  PROTO
6799  * is the first opcode for protocols >= 2.
6800  */
6801 static int
load_proto(UnpicklerObject * self)6802 load_proto(UnpicklerObject *self)
6803 {
6804     char *s;
6805     int i;
6806 
6807     if (_Unpickler_Read(self, &s, 1) < 0)
6808         return -1;
6809 
6810     i = (unsigned char)s[0];
6811     if (i <= HIGHEST_PROTOCOL) {
6812         self->proto = i;
6813         return 0;
6814     }
6815 
6816     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6817     return -1;
6818 }
6819 
6820 static int
load_frame(UnpicklerObject * self)6821 load_frame(UnpicklerObject *self)
6822 {
6823     char *s;
6824     Py_ssize_t frame_len;
6825 
6826     if (_Unpickler_Read(self, &s, 8) < 0)
6827         return -1;
6828 
6829     frame_len = calc_binsize(s, 8);
6830     if (frame_len < 0) {
6831         PyErr_Format(PyExc_OverflowError,
6832                      "FRAME length exceeds system's maximum of %zd bytes",
6833                      PY_SSIZE_T_MAX);
6834         return -1;
6835     }
6836 
6837     if (_Unpickler_Read(self, &s, frame_len) < 0)
6838         return -1;
6839 
6840     /* Rewind to start of frame */
6841     self->next_read_idx -= frame_len;
6842     return 0;
6843 }
6844 
6845 static PyObject *
load(UnpicklerObject * self)6846 load(UnpicklerObject *self)
6847 {
6848     PyObject *value = NULL;
6849     char *s = NULL;
6850 
6851     self->num_marks = 0;
6852     self->stack->mark_set = 0;
6853     self->stack->fence = 0;
6854     self->proto = 0;
6855     if (Py_SIZE(self->stack))
6856         Pdata_clear(self->stack, 0);
6857 
6858     /* Convenient macros for the dispatch while-switch loop just below. */
6859 #define OP(opcode, load_func) \
6860     case opcode: if (load_func(self) < 0) break; continue;
6861 
6862 #define OP_ARG(opcode, load_func, arg) \
6863     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6864 
6865     while (1) {
6866         if (_Unpickler_Read(self, &s, 1) < 0) {
6867             PickleState *st = _Pickle_GetGlobalState();
6868             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6869                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6870             }
6871             return NULL;
6872         }
6873 
6874         switch ((enum opcode)s[0]) {
6875         OP(NONE, load_none)
6876         OP(BININT, load_binint)
6877         OP(BININT1, load_binint1)
6878         OP(BININT2, load_binint2)
6879         OP(INT, load_int)
6880         OP(LONG, load_long)
6881         OP_ARG(LONG1, load_counted_long, 1)
6882         OP_ARG(LONG4, load_counted_long, 4)
6883         OP(FLOAT, load_float)
6884         OP(BINFLOAT, load_binfloat)
6885         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6886         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6887         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6888         OP(BYTEARRAY8, load_counted_bytearray)
6889         OP(NEXT_BUFFER, load_next_buffer)
6890         OP(READONLY_BUFFER, load_readonly_buffer)
6891         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6892         OP_ARG(BINSTRING, load_counted_binstring, 4)
6893         OP(STRING, load_string)
6894         OP(UNICODE, load_unicode)
6895         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6896         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6897         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6898         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6899         OP_ARG(TUPLE1, load_counted_tuple, 1)
6900         OP_ARG(TUPLE2, load_counted_tuple, 2)
6901         OP_ARG(TUPLE3, load_counted_tuple, 3)
6902         OP(TUPLE, load_tuple)
6903         OP(EMPTY_LIST, load_empty_list)
6904         OP(LIST, load_list)
6905         OP(EMPTY_DICT, load_empty_dict)
6906         OP(DICT, load_dict)
6907         OP(EMPTY_SET, load_empty_set)
6908         OP(ADDITEMS, load_additems)
6909         OP(FROZENSET, load_frozenset)
6910         OP(OBJ, load_obj)
6911         OP(INST, load_inst)
6912         OP_ARG(NEWOBJ, load_newobj, 0)
6913         OP_ARG(NEWOBJ_EX, load_newobj, 1)
6914         OP(GLOBAL, load_global)
6915         OP(STACK_GLOBAL, load_stack_global)
6916         OP(APPEND, load_append)
6917         OP(APPENDS, load_appends)
6918         OP(BUILD, load_build)
6919         OP(DUP, load_dup)
6920         OP(BINGET, load_binget)
6921         OP(LONG_BINGET, load_long_binget)
6922         OP(GET, load_get)
6923         OP(MARK, load_mark)
6924         OP(BINPUT, load_binput)
6925         OP(LONG_BINPUT, load_long_binput)
6926         OP(PUT, load_put)
6927         OP(MEMOIZE, load_memoize)
6928         OP(POP, load_pop)
6929         OP(POP_MARK, load_pop_mark)
6930         OP(SETITEM, load_setitem)
6931         OP(SETITEMS, load_setitems)
6932         OP(PERSID, load_persid)
6933         OP(BINPERSID, load_binpersid)
6934         OP(REDUCE, load_reduce)
6935         OP(PROTO, load_proto)
6936         OP(FRAME, load_frame)
6937         OP_ARG(EXT1, load_extension, 1)
6938         OP_ARG(EXT2, load_extension, 2)
6939         OP_ARG(EXT4, load_extension, 4)
6940         OP_ARG(NEWTRUE, load_bool, Py_True)
6941         OP_ARG(NEWFALSE, load_bool, Py_False)
6942 
6943         case STOP:
6944             break;
6945 
6946         default:
6947             {
6948                 PickleState *st = _Pickle_GetGlobalState();
6949                 unsigned char c = (unsigned char) *s;
6950                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6951                     PyErr_Format(st->UnpicklingError,
6952                                  "invalid load key, '%c'.", c);
6953                 }
6954                 else {
6955                     PyErr_Format(st->UnpicklingError,
6956                                  "invalid load key, '\\x%02x'.", c);
6957                 }
6958                 return NULL;
6959             }
6960         }
6961 
6962         break;                  /* and we are done! */
6963     }
6964 
6965     if (PyErr_Occurred()) {
6966         return NULL;
6967     }
6968 
6969     if (_Unpickler_SkipConsumed(self) < 0)
6970         return NULL;
6971 
6972     PDATA_POP(self->stack, value);
6973     return value;
6974 }
6975 
6976 /*[clinic input]
6977 
6978 _pickle.Unpickler.load
6979 
6980 Load a pickle.
6981 
6982 Read a pickled object representation from the open file object given
6983 in the constructor, and return the reconstituted object hierarchy
6984 specified therein.
6985 [clinic start generated code]*/
6986 
6987 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6988 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6989 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6990 {
6991     UnpicklerObject *unpickler = (UnpicklerObject*)self;
6992 
6993     /* Check whether the Unpickler was initialized correctly. This prevents
6994        segfaulting if a subclass overridden __init__ with a function that does
6995        not call Unpickler.__init__(). Here, we simply ensure that self->read
6996        is not NULL. */
6997     if (unpickler->read == NULL) {
6998         PickleState *st = _Pickle_GetGlobalState();
6999         PyErr_Format(st->UnpicklingError,
7000                      "Unpickler.__init__() was not called by %s.__init__()",
7001                      Py_TYPE(unpickler)->tp_name);
7002         return NULL;
7003     }
7004 
7005     return load(unpickler);
7006 }
7007 
7008 /* The name of find_class() is misleading. In newer pickle protocols, this
7009    function is used for loading any global (i.e., functions), not just
7010    classes. The name is kept only for backward compatibility. */
7011 
7012 /*[clinic input]
7013 
7014 _pickle.Unpickler.find_class
7015 
7016   module_name: object
7017   global_name: object
7018   /
7019 
7020 Return an object from a specified module.
7021 
7022 If necessary, the module will be imported. Subclasses may override
7023 this method (e.g. to restrict unpickling of arbitrary classes and
7024 functions).
7025 
7026 This method is called whenever a class or a function object is
7027 needed.  Both arguments passed are str objects.
7028 [clinic start generated code]*/
7029 
7030 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7031 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7032                                   PyObject *module_name,
7033                                   PyObject *global_name)
7034 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7035 {
7036     PyObject *global;
7037     PyObject *module;
7038 
7039     if (PySys_Audit("pickle.find_class", "OO",
7040                     module_name, global_name) < 0) {
7041         return NULL;
7042     }
7043 
7044     /* Try to map the old names used in Python 2.x to the new ones used in
7045        Python 3.x.  We do this only with old pickle protocols and when the
7046        user has not disabled the feature. */
7047     if (self->proto < 3 && self->fix_imports) {
7048         PyObject *key;
7049         PyObject *item;
7050         PickleState *st = _Pickle_GetGlobalState();
7051 
7052         /* Check if the global (i.e., a function or a class) was renamed
7053            or moved to another module. */
7054         key = PyTuple_Pack(2, module_name, global_name);
7055         if (key == NULL)
7056             return NULL;
7057         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7058         Py_DECREF(key);
7059         if (item) {
7060             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7061                 PyErr_Format(PyExc_RuntimeError,
7062                              "_compat_pickle.NAME_MAPPING values should be "
7063                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7064                 return NULL;
7065             }
7066             module_name = PyTuple_GET_ITEM(item, 0);
7067             global_name = PyTuple_GET_ITEM(item, 1);
7068             if (!PyUnicode_Check(module_name) ||
7069                 !PyUnicode_Check(global_name)) {
7070                 PyErr_Format(PyExc_RuntimeError,
7071                              "_compat_pickle.NAME_MAPPING values should be "
7072                              "pairs of str, not (%.200s, %.200s)",
7073                              Py_TYPE(module_name)->tp_name,
7074                              Py_TYPE(global_name)->tp_name);
7075                 return NULL;
7076             }
7077         }
7078         else if (PyErr_Occurred()) {
7079             return NULL;
7080         }
7081         else {
7082             /* Check if the module was renamed. */
7083             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7084             if (item) {
7085                 if (!PyUnicode_Check(item)) {
7086                     PyErr_Format(PyExc_RuntimeError,
7087                                 "_compat_pickle.IMPORT_MAPPING values should be "
7088                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
7089                     return NULL;
7090                 }
7091                 module_name = item;
7092             }
7093             else if (PyErr_Occurred()) {
7094                 return NULL;
7095             }
7096         }
7097     }
7098 
7099     /*
7100      * we don't use PyImport_GetModule here, because it can return partially-
7101      * initialised modules, which then cause the getattribute to fail.
7102      */
7103     module = PyImport_Import(module_name);
7104     if (module == NULL) {
7105         return NULL;
7106     }
7107     global = getattribute(module, global_name, self->proto >= 4);
7108     Py_DECREF(module);
7109     return global;
7110 }
7111 
7112 /*[clinic input]
7113 
7114 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7115 
7116 Returns size in memory, in bytes.
7117 [clinic start generated code]*/
7118 
7119 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7120 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7121 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7122 {
7123     Py_ssize_t res;
7124 
7125     res = _PyObject_SIZE(Py_TYPE(self));
7126     if (self->memo != NULL)
7127         res += self->memo_size * sizeof(PyObject *);
7128     if (self->marks != NULL)
7129         res += self->marks_size * sizeof(Py_ssize_t);
7130     if (self->input_line != NULL)
7131         res += strlen(self->input_line) + 1;
7132     if (self->encoding != NULL)
7133         res += strlen(self->encoding) + 1;
7134     if (self->errors != NULL)
7135         res += strlen(self->errors) + 1;
7136     return res;
7137 }
7138 
7139 static struct PyMethodDef Unpickler_methods[] = {
7140     _PICKLE_UNPICKLER_LOAD_METHODDEF
7141     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7142     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7143     {NULL, NULL}                /* sentinel */
7144 };
7145 
7146 static void
Unpickler_dealloc(UnpicklerObject * self)7147 Unpickler_dealloc(UnpicklerObject *self)
7148 {
7149     PyObject_GC_UnTrack((PyObject *)self);
7150     Py_XDECREF(self->readline);
7151     Py_XDECREF(self->readinto);
7152     Py_XDECREF(self->read);
7153     Py_XDECREF(self->peek);
7154     Py_XDECREF(self->stack);
7155     Py_XDECREF(self->pers_func);
7156     Py_XDECREF(self->buffers);
7157     if (self->buffer.buf != NULL) {
7158         PyBuffer_Release(&self->buffer);
7159         self->buffer.buf = NULL;
7160     }
7161 
7162     _Unpickler_MemoCleanup(self);
7163     PyMem_Free(self->marks);
7164     PyMem_Free(self->input_line);
7165     PyMem_Free(self->encoding);
7166     PyMem_Free(self->errors);
7167 
7168     Py_TYPE(self)->tp_free((PyObject *)self);
7169 }
7170 
7171 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7172 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7173 {
7174     Py_VISIT(self->readline);
7175     Py_VISIT(self->readinto);
7176     Py_VISIT(self->read);
7177     Py_VISIT(self->peek);
7178     Py_VISIT(self->stack);
7179     Py_VISIT(self->pers_func);
7180     Py_VISIT(self->buffers);
7181     return 0;
7182 }
7183 
7184 static int
Unpickler_clear(UnpicklerObject * self)7185 Unpickler_clear(UnpicklerObject *self)
7186 {
7187     Py_CLEAR(self->readline);
7188     Py_CLEAR(self->readinto);
7189     Py_CLEAR(self->read);
7190     Py_CLEAR(self->peek);
7191     Py_CLEAR(self->stack);
7192     Py_CLEAR(self->pers_func);
7193     Py_CLEAR(self->buffers);
7194     if (self->buffer.buf != NULL) {
7195         PyBuffer_Release(&self->buffer);
7196         self->buffer.buf = NULL;
7197     }
7198 
7199     _Unpickler_MemoCleanup(self);
7200     PyMem_Free(self->marks);
7201     self->marks = NULL;
7202     PyMem_Free(self->input_line);
7203     self->input_line = NULL;
7204     PyMem_Free(self->encoding);
7205     self->encoding = NULL;
7206     PyMem_Free(self->errors);
7207     self->errors = NULL;
7208 
7209     return 0;
7210 }
7211 
7212 /*[clinic input]
7213 
7214 _pickle.Unpickler.__init__
7215 
7216   file: object
7217   *
7218   fix_imports: bool = True
7219   encoding: str = 'ASCII'
7220   errors: str = 'strict'
7221   buffers: object(c_default="NULL") = ()
7222 
7223 This takes a binary file for reading a pickle data stream.
7224 
7225 The protocol version of the pickle is detected automatically, so no
7226 protocol argument is needed.  Bytes past the pickled object's
7227 representation are ignored.
7228 
7229 The argument *file* must have two methods, a read() method that takes
7230 an integer argument, and a readline() method that requires no
7231 arguments.  Both methods should return bytes.  Thus *file* can be a
7232 binary file object opened for reading, an io.BytesIO object, or any
7233 other custom object that meets this interface.
7234 
7235 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7236 which are used to control compatibility support for pickle stream
7237 generated by Python 2.  If *fix_imports* is True, pickle will try to
7238 map the old Python 2 names to the new names used in Python 3.  The
7239 *encoding* and *errors* tell pickle how to decode 8-bit string
7240 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7241 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7242 string instances as bytes objects.
7243 [clinic start generated code]*/
7244 
7245 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7246 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7247                                 int fix_imports, const char *encoding,
7248                                 const char *errors, PyObject *buffers)
7249 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7250 {
7251     _Py_IDENTIFIER(persistent_load);
7252 
7253     /* In case of multiple __init__() calls, clear previous content. */
7254     if (self->read != NULL)
7255         (void)Unpickler_clear(self);
7256 
7257     if (_Unpickler_SetInputStream(self, file) < 0)
7258         return -1;
7259 
7260     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7261         return -1;
7262 
7263     if (_Unpickler_SetBuffers(self, buffers) < 0)
7264         return -1;
7265 
7266     self->fix_imports = fix_imports;
7267 
7268     if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7269                         &self->pers_func, &self->pers_func_self) < 0)
7270     {
7271         return -1;
7272     }
7273 
7274     self->stack = (Pdata *)Pdata_New();
7275     if (self->stack == NULL)
7276         return -1;
7277 
7278     self->memo_size = 32;
7279     self->memo = _Unpickler_NewMemo(self->memo_size);
7280     if (self->memo == NULL)
7281         return -1;
7282 
7283     self->proto = 0;
7284 
7285     return 0;
7286 }
7287 
7288 
7289 /* Define a proxy object for the Unpickler's internal memo object. This is to
7290  * avoid breaking code like:
7291  *  unpickler.memo.clear()
7292  * and
7293  *  unpickler.memo = saved_memo
7294  * Is this a good idea? Not really, but we don't want to break code that uses
7295  * it. Note that we don't implement the entire mapping API here. This is
7296  * intentional, as these should be treated as black-box implementation details.
7297  *
7298  * We do, however, have to implement pickling/unpickling support because of
7299  * real-world code like cvs2svn.
7300  */
7301 
7302 /*[clinic input]
7303 _pickle.UnpicklerMemoProxy.clear
7304 
7305 Remove all items from memo.
7306 [clinic start generated code]*/
7307 
7308 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7309 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7310 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7311 {
7312     _Unpickler_MemoCleanup(self->unpickler);
7313     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7314     if (self->unpickler->memo == NULL)
7315         return NULL;
7316     Py_RETURN_NONE;
7317 }
7318 
7319 /*[clinic input]
7320 _pickle.UnpicklerMemoProxy.copy
7321 
7322 Copy the memo to a new object.
7323 [clinic start generated code]*/
7324 
7325 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7326 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7327 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7328 {
7329     size_t i;
7330     PyObject *new_memo = PyDict_New();
7331     if (new_memo == NULL)
7332         return NULL;
7333 
7334     for (i = 0; i < self->unpickler->memo_size; i++) {
7335         int status;
7336         PyObject *key, *value;
7337 
7338         value = self->unpickler->memo[i];
7339         if (value == NULL)
7340             continue;
7341 
7342         key = PyLong_FromSsize_t(i);
7343         if (key == NULL)
7344             goto error;
7345         status = PyDict_SetItem(new_memo, key, value);
7346         Py_DECREF(key);
7347         if (status < 0)
7348             goto error;
7349     }
7350     return new_memo;
7351 
7352 error:
7353     Py_DECREF(new_memo);
7354     return NULL;
7355 }
7356 
7357 /*[clinic input]
7358 _pickle.UnpicklerMemoProxy.__reduce__
7359 
7360 Implement pickling support.
7361 [clinic start generated code]*/
7362 
7363 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7364 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7365 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7366 {
7367     PyObject *reduce_value;
7368     PyObject *constructor_args;
7369     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7370     if (contents == NULL)
7371         return NULL;
7372 
7373     reduce_value = PyTuple_New(2);
7374     if (reduce_value == NULL) {
7375         Py_DECREF(contents);
7376         return NULL;
7377     }
7378     constructor_args = PyTuple_New(1);
7379     if (constructor_args == NULL) {
7380         Py_DECREF(contents);
7381         Py_DECREF(reduce_value);
7382         return NULL;
7383     }
7384     PyTuple_SET_ITEM(constructor_args, 0, contents);
7385     Py_INCREF((PyObject *)&PyDict_Type);
7386     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7387     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7388     return reduce_value;
7389 }
7390 
7391 static PyMethodDef unpicklerproxy_methods[] = {
7392     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7393     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7394     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7395     {NULL, NULL}    /* sentinel */
7396 };
7397 
7398 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7399 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7400 {
7401     PyObject_GC_UnTrack(self);
7402     Py_XDECREF(self->unpickler);
7403     PyObject_GC_Del((PyObject *)self);
7404 }
7405 
7406 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7407 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7408                             visitproc visit, void *arg)
7409 {
7410     Py_VISIT(self->unpickler);
7411     return 0;
7412 }
7413 
7414 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7415 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7416 {
7417     Py_CLEAR(self->unpickler);
7418     return 0;
7419 }
7420 
7421 static PyTypeObject UnpicklerMemoProxyType = {
7422     PyVarObject_HEAD_INIT(NULL, 0)
7423     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
7424     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
7425     0,
7426     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
7427     0,                                          /* tp_vectorcall_offset */
7428     0,                                          /* tp_getattr */
7429     0,                                          /* tp_setattr */
7430     0,                                          /* tp_as_async */
7431     0,                                          /* tp_repr */
7432     0,                                          /* tp_as_number */
7433     0,                                          /* tp_as_sequence */
7434     0,                                          /* tp_as_mapping */
7435     PyObject_HashNotImplemented,                /* tp_hash */
7436     0,                                          /* tp_call */
7437     0,                                          /* tp_str */
7438     PyObject_GenericGetAttr,                    /* tp_getattro */
7439     PyObject_GenericSetAttr,                    /* tp_setattro */
7440     0,                                          /* tp_as_buffer */
7441     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7442     0,                                          /* tp_doc */
7443     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
7444     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
7445     0,                                          /* tp_richcompare */
7446     0,                                          /* tp_weaklistoffset */
7447     0,                                          /* tp_iter */
7448     0,                                          /* tp_iternext */
7449     unpicklerproxy_methods,                     /* tp_methods */
7450 };
7451 
7452 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7453 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7454 {
7455     UnpicklerMemoProxyObject *self;
7456 
7457     self = PyObject_GC_New(UnpicklerMemoProxyObject,
7458                            &UnpicklerMemoProxyType);
7459     if (self == NULL)
7460         return NULL;
7461     Py_INCREF(unpickler);
7462     self->unpickler = unpickler;
7463     PyObject_GC_Track(self);
7464     return (PyObject *)self;
7465 }
7466 
7467 /*****************************************************************************/
7468 
7469 
7470 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7471 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7472 {
7473     return UnpicklerMemoProxy_New(self);
7474 }
7475 
7476 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7477 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7478 {
7479     PyObject **new_memo;
7480     size_t new_memo_size = 0;
7481 
7482     if (obj == NULL) {
7483         PyErr_SetString(PyExc_TypeError,
7484                         "attribute deletion is not supported");
7485         return -1;
7486     }
7487 
7488     if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
7489         UnpicklerObject *unpickler =
7490             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7491 
7492         new_memo_size = unpickler->memo_size;
7493         new_memo = _Unpickler_NewMemo(new_memo_size);
7494         if (new_memo == NULL)
7495             return -1;
7496 
7497         for (size_t i = 0; i < new_memo_size; i++) {
7498             Py_XINCREF(unpickler->memo[i]);
7499             new_memo[i] = unpickler->memo[i];
7500         }
7501     }
7502     else if (PyDict_Check(obj)) {
7503         Py_ssize_t i = 0;
7504         PyObject *key, *value;
7505 
7506         new_memo_size = PyDict_GET_SIZE(obj);
7507         new_memo = _Unpickler_NewMemo(new_memo_size);
7508         if (new_memo == NULL)
7509             return -1;
7510 
7511         while (PyDict_Next(obj, &i, &key, &value)) {
7512             Py_ssize_t idx;
7513             if (!PyLong_Check(key)) {
7514                 PyErr_SetString(PyExc_TypeError,
7515                                 "memo key must be integers");
7516                 goto error;
7517             }
7518             idx = PyLong_AsSsize_t(key);
7519             if (idx == -1 && PyErr_Occurred())
7520                 goto error;
7521             if (idx < 0) {
7522                 PyErr_SetString(PyExc_ValueError,
7523                                 "memo key must be positive integers.");
7524                 goto error;
7525             }
7526             if (_Unpickler_MemoPut(self, idx, value) < 0)
7527                 goto error;
7528         }
7529     }
7530     else {
7531         PyErr_Format(PyExc_TypeError,
7532                      "'memo' attribute must be an UnpicklerMemoProxy object "
7533                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7534         return -1;
7535     }
7536 
7537     _Unpickler_MemoCleanup(self);
7538     self->memo_size = new_memo_size;
7539     self->memo = new_memo;
7540 
7541     return 0;
7542 
7543   error:
7544     if (new_memo_size) {
7545         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7546             Py_XDECREF(new_memo[i]);
7547         }
7548         PyMem_Free(new_memo);
7549     }
7550     return -1;
7551 }
7552 
7553 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7554 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7555 {
7556     if (self->pers_func == NULL) {
7557         PyErr_SetString(PyExc_AttributeError, "persistent_load");
7558         return NULL;
7559     }
7560     return reconstruct_method(self->pers_func, self->pers_func_self);
7561 }
7562 
7563 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7564 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7565 {
7566     if (value == NULL) {
7567         PyErr_SetString(PyExc_TypeError,
7568                         "attribute deletion is not supported");
7569         return -1;
7570     }
7571     if (!PyCallable_Check(value)) {
7572         PyErr_SetString(PyExc_TypeError,
7573                         "persistent_load must be a callable taking "
7574                         "one argument");
7575         return -1;
7576     }
7577 
7578     self->pers_func_self = NULL;
7579     Py_INCREF(value);
7580     Py_XSETREF(self->pers_func, value);
7581 
7582     return 0;
7583 }
7584 
7585 static PyGetSetDef Unpickler_getsets[] = {
7586     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7587     {"persistent_load", (getter)Unpickler_get_persload,
7588                         (setter)Unpickler_set_persload},
7589     {NULL}
7590 };
7591 
7592 static PyTypeObject Unpickler_Type = {
7593     PyVarObject_HEAD_INIT(NULL, 0)
7594     "_pickle.Unpickler",                /*tp_name*/
7595     sizeof(UnpicklerObject),            /*tp_basicsize*/
7596     0,                                  /*tp_itemsize*/
7597     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7598     0,                                  /*tp_vectorcall_offset*/
7599     0,                                  /*tp_getattr*/
7600     0,                                  /*tp_setattr*/
7601     0,                                  /*tp_as_async*/
7602     0,                                  /*tp_repr*/
7603     0,                                  /*tp_as_number*/
7604     0,                                  /*tp_as_sequence*/
7605     0,                                  /*tp_as_mapping*/
7606     0,                                  /*tp_hash*/
7607     0,                                  /*tp_call*/
7608     0,                                  /*tp_str*/
7609     0,                                  /*tp_getattro*/
7610     0,                                  /*tp_setattro*/
7611     0,                                  /*tp_as_buffer*/
7612     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7613     _pickle_Unpickler___init____doc__,  /*tp_doc*/
7614     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7615     (inquiry)Unpickler_clear,           /*tp_clear*/
7616     0,                                  /*tp_richcompare*/
7617     0,                                  /*tp_weaklistoffset*/
7618     0,                                  /*tp_iter*/
7619     0,                                  /*tp_iternext*/
7620     Unpickler_methods,                  /*tp_methods*/
7621     0,                                  /*tp_members*/
7622     Unpickler_getsets,                  /*tp_getset*/
7623     0,                                  /*tp_base*/
7624     0,                                  /*tp_dict*/
7625     0,                                  /*tp_descr_get*/
7626     0,                                  /*tp_descr_set*/
7627     0,                                  /*tp_dictoffset*/
7628     _pickle_Unpickler___init__,         /*tp_init*/
7629     PyType_GenericAlloc,                /*tp_alloc*/
7630     PyType_GenericNew,                  /*tp_new*/
7631     PyObject_GC_Del,                    /*tp_free*/
7632     0,                                  /*tp_is_gc*/
7633 };
7634 
7635 /*[clinic input]
7636 
7637 _pickle.dump
7638 
7639   obj: object
7640   file: object
7641   protocol: object = None
7642   *
7643   fix_imports: bool = True
7644   buffer_callback: object = None
7645 
7646 Write a pickled representation of obj to the open file object file.
7647 
7648 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7649 be more efficient.
7650 
7651 The optional *protocol* argument tells the pickler to use the given
7652 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7653 protocol is 4. It was introduced in Python 3.4, and is incompatible
7654 with previous versions.
7655 
7656 Specifying a negative protocol version selects the highest protocol
7657 version supported.  The higher the protocol used, the more recent the
7658 version of Python needed to read the pickle produced.
7659 
7660 The *file* argument must have a write() method that accepts a single
7661 bytes argument.  It can thus be a file object opened for binary
7662 writing, an io.BytesIO instance, or any other custom object that meets
7663 this interface.
7664 
7665 If *fix_imports* is True and protocol is less than 3, pickle will try
7666 to map the new Python 3 names to the old module names used in Python
7667 2, so that the pickle data stream is readable with Python 2.
7668 
7669 If *buffer_callback* is None (the default), buffer views are serialized
7670 into *file* as part of the pickle stream.  It is an error if
7671 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7672 
7673 [clinic start generated code]*/
7674 
7675 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7676 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7677                   PyObject *protocol, int fix_imports,
7678                   PyObject *buffer_callback)
7679 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7680 {
7681     PicklerObject *pickler = _Pickler_New();
7682 
7683     if (pickler == NULL)
7684         return NULL;
7685 
7686     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7687         goto error;
7688 
7689     if (_Pickler_SetOutputStream(pickler, file) < 0)
7690         goto error;
7691 
7692     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7693         goto error;
7694 
7695     if (dump(pickler, obj) < 0)
7696         goto error;
7697 
7698     if (_Pickler_FlushToFile(pickler) < 0)
7699         goto error;
7700 
7701     Py_DECREF(pickler);
7702     Py_RETURN_NONE;
7703 
7704   error:
7705     Py_XDECREF(pickler);
7706     return NULL;
7707 }
7708 
7709 /*[clinic input]
7710 
7711 _pickle.dumps
7712 
7713   obj: object
7714   protocol: object = None
7715   *
7716   fix_imports: bool = True
7717   buffer_callback: object = None
7718 
7719 Return the pickled representation of the object as a bytes object.
7720 
7721 The optional *protocol* argument tells the pickler to use the given
7722 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7723 protocol is 4. It was introduced in Python 3.4, and is incompatible
7724 with previous versions.
7725 
7726 Specifying a negative protocol version selects the highest protocol
7727 version supported.  The higher the protocol used, the more recent the
7728 version of Python needed to read the pickle produced.
7729 
7730 If *fix_imports* is True and *protocol* is less than 3, pickle will
7731 try to map the new Python 3 names to the old module names used in
7732 Python 2, so that the pickle data stream is readable with Python 2.
7733 
7734 If *buffer_callback* is None (the default), buffer views are serialized
7735 into *file* as part of the pickle stream.  It is an error if
7736 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7737 
7738 [clinic start generated code]*/
7739 
7740 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7741 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7742                    int fix_imports, PyObject *buffer_callback)
7743 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7744 {
7745     PyObject *result;
7746     PicklerObject *pickler = _Pickler_New();
7747 
7748     if (pickler == NULL)
7749         return NULL;
7750 
7751     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7752         goto error;
7753 
7754     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7755         goto error;
7756 
7757     if (dump(pickler, obj) < 0)
7758         goto error;
7759 
7760     result = _Pickler_GetString(pickler);
7761     Py_DECREF(pickler);
7762     return result;
7763 
7764   error:
7765     Py_XDECREF(pickler);
7766     return NULL;
7767 }
7768 
7769 /*[clinic input]
7770 
7771 _pickle.load
7772 
7773   file: object
7774   *
7775   fix_imports: bool = True
7776   encoding: str = 'ASCII'
7777   errors: str = 'strict'
7778   buffers: object(c_default="NULL") = ()
7779 
7780 Read and return an object from the pickle data stored in a file.
7781 
7782 This is equivalent to ``Unpickler(file).load()``, but may be more
7783 efficient.
7784 
7785 The protocol version of the pickle is detected automatically, so no
7786 protocol argument is needed.  Bytes past the pickled object's
7787 representation are ignored.
7788 
7789 The argument *file* must have two methods, a read() method that takes
7790 an integer argument, and a readline() method that requires no
7791 arguments.  Both methods should return bytes.  Thus *file* can be a
7792 binary file object opened for reading, an io.BytesIO object, or any
7793 other custom object that meets this interface.
7794 
7795 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7796 which are used to control compatibility support for pickle stream
7797 generated by Python 2.  If *fix_imports* is True, pickle will try to
7798 map the old Python 2 names to the new names used in Python 3.  The
7799 *encoding* and *errors* tell pickle how to decode 8-bit string
7800 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7801 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7802 string instances as bytes objects.
7803 [clinic start generated code]*/
7804 
7805 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7806 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7807                   const char *encoding, const char *errors,
7808                   PyObject *buffers)
7809 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7810 {
7811     PyObject *result;
7812     UnpicklerObject *unpickler = _Unpickler_New();
7813 
7814     if (unpickler == NULL)
7815         return NULL;
7816 
7817     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7818         goto error;
7819 
7820     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7821         goto error;
7822 
7823     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7824         goto error;
7825 
7826     unpickler->fix_imports = fix_imports;
7827 
7828     result = load(unpickler);
7829     Py_DECREF(unpickler);
7830     return result;
7831 
7832   error:
7833     Py_XDECREF(unpickler);
7834     return NULL;
7835 }
7836 
7837 /*[clinic input]
7838 
7839 _pickle.loads
7840 
7841   data: object
7842   /
7843   *
7844   fix_imports: bool = True
7845   encoding: str = 'ASCII'
7846   errors: str = 'strict'
7847   buffers: object(c_default="NULL") = ()
7848 
7849 Read and return an object from the given pickle data.
7850 
7851 The protocol version of the pickle is detected automatically, so no
7852 protocol argument is needed.  Bytes past the pickled object's
7853 representation are ignored.
7854 
7855 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7856 which are used to control compatibility support for pickle stream
7857 generated by Python 2.  If *fix_imports* is True, pickle will try to
7858 map the old Python 2 names to the new names used in Python 3.  The
7859 *encoding* and *errors* tell pickle how to decode 8-bit string
7860 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7861 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7862 string instances as bytes objects.
7863 [clinic start generated code]*/
7864 
7865 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7866 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7867                    const char *encoding, const char *errors,
7868                    PyObject *buffers)
7869 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7870 {
7871     PyObject *result;
7872     UnpicklerObject *unpickler = _Unpickler_New();
7873 
7874     if (unpickler == NULL)
7875         return NULL;
7876 
7877     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7878         goto error;
7879 
7880     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7881         goto error;
7882 
7883     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7884         goto error;
7885 
7886     unpickler->fix_imports = fix_imports;
7887 
7888     result = load(unpickler);
7889     Py_DECREF(unpickler);
7890     return result;
7891 
7892   error:
7893     Py_XDECREF(unpickler);
7894     return NULL;
7895 }
7896 
7897 static struct PyMethodDef pickle_methods[] = {
7898     _PICKLE_DUMP_METHODDEF
7899     _PICKLE_DUMPS_METHODDEF
7900     _PICKLE_LOAD_METHODDEF
7901     _PICKLE_LOADS_METHODDEF
7902     {NULL, NULL} /* sentinel */
7903 };
7904 
7905 static int
pickle_clear(PyObject * m)7906 pickle_clear(PyObject *m)
7907 {
7908     _Pickle_ClearState(_Pickle_GetState(m));
7909     return 0;
7910 }
7911 
7912 static void
pickle_free(PyObject * m)7913 pickle_free(PyObject *m)
7914 {
7915     _Pickle_ClearState(_Pickle_GetState(m));
7916 }
7917 
7918 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7919 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7920 {
7921     PickleState *st = _Pickle_GetState(m);
7922     Py_VISIT(st->PickleError);
7923     Py_VISIT(st->PicklingError);
7924     Py_VISIT(st->UnpicklingError);
7925     Py_VISIT(st->dispatch_table);
7926     Py_VISIT(st->extension_registry);
7927     Py_VISIT(st->extension_cache);
7928     Py_VISIT(st->inverted_registry);
7929     Py_VISIT(st->name_mapping_2to3);
7930     Py_VISIT(st->import_mapping_2to3);
7931     Py_VISIT(st->name_mapping_3to2);
7932     Py_VISIT(st->import_mapping_3to2);
7933     Py_VISIT(st->codecs_encode);
7934     Py_VISIT(st->getattr);
7935     Py_VISIT(st->partial);
7936     return 0;
7937 }
7938 
7939 static struct PyModuleDef _picklemodule = {
7940     PyModuleDef_HEAD_INIT,
7941     "_pickle",            /* m_name */
7942     pickle_module_doc,    /* m_doc */
7943     sizeof(PickleState),  /* m_size */
7944     pickle_methods,       /* m_methods */
7945     NULL,                 /* m_reload */
7946     pickle_traverse,      /* m_traverse */
7947     pickle_clear,         /* m_clear */
7948     (freefunc)pickle_free /* m_free */
7949 };
7950 
7951 PyMODINIT_FUNC
PyInit__pickle(void)7952 PyInit__pickle(void)
7953 {
7954     PyObject *m;
7955     PickleState *st;
7956 
7957     m = PyState_FindModule(&_picklemodule);
7958     if (m) {
7959         Py_INCREF(m);
7960         return m;
7961     }
7962 
7963     if (PyType_Ready(&Pdata_Type) < 0)
7964         return NULL;
7965     if (PyType_Ready(&PicklerMemoProxyType) < 0)
7966         return NULL;
7967     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7968         return NULL;
7969 
7970     /* Create the module and add the functions. */
7971     m = PyModule_Create(&_picklemodule);
7972     if (m == NULL)
7973         return NULL;
7974 
7975     /* Add types */
7976     if (PyModule_AddType(m, &Pickler_Type) < 0) {
7977         return NULL;
7978     }
7979     if (PyModule_AddType(m, &Unpickler_Type) < 0) {
7980         return NULL;
7981     }
7982     if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
7983         return NULL;
7984     }
7985 
7986     st = _Pickle_GetState(m);
7987 
7988     /* Initialize the exceptions. */
7989     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7990     if (st->PickleError == NULL)
7991         return NULL;
7992     st->PicklingError = \
7993         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7994     if (st->PicklingError == NULL)
7995         return NULL;
7996     st->UnpicklingError = \
7997         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7998     if (st->UnpicklingError == NULL)
7999         return NULL;
8000 
8001     Py_INCREF(st->PickleError);
8002     if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
8003         return NULL;
8004     Py_INCREF(st->PicklingError);
8005     if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
8006         return NULL;
8007     Py_INCREF(st->UnpicklingError);
8008     if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
8009         return NULL;
8010 
8011     if (_Pickle_InitState(st) < 0)
8012         return NULL;
8013 
8014     return m;
8015 }
8016