• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "structmember.h"
3 
4 PyDoc_STRVAR(pickle_module_doc,
5 "Optimized C implementation for the Python pickle module.");
6 
7 /*[clinic input]
8 module _pickle
9 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
10 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
11 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
12 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
13 [clinic start generated code]*/
14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
15 
16 /* Bump this when new opcodes are added to the pickle protocol. */
17 enum {
18     HIGHEST_PROTOCOL = 4,
19     DEFAULT_PROTOCOL = 3
20 };
21 
22 /* Pickle opcodes. These must be kept updated with pickle.py.
23    Extensive docs are in pickletools.py. */
24 enum opcode {
25     MARK            = '(',
26     STOP            = '.',
27     POP             = '0',
28     POP_MARK        = '1',
29     DUP             = '2',
30     FLOAT           = 'F',
31     INT             = 'I',
32     BININT          = 'J',
33     BININT1         = 'K',
34     LONG            = 'L',
35     BININT2         = 'M',
36     NONE            = 'N',
37     PERSID          = 'P',
38     BINPERSID       = 'Q',
39     REDUCE          = 'R',
40     STRING          = 'S',
41     BINSTRING       = 'T',
42     SHORT_BINSTRING = 'U',
43     UNICODE         = 'V',
44     BINUNICODE      = 'X',
45     APPEND          = 'a',
46     BUILD           = 'b',
47     GLOBAL          = 'c',
48     DICT            = 'd',
49     EMPTY_DICT      = '}',
50     APPENDS         = 'e',
51     GET             = 'g',
52     BINGET          = 'h',
53     INST            = 'i',
54     LONG_BINGET     = 'j',
55     LIST            = 'l',
56     EMPTY_LIST      = ']',
57     OBJ             = 'o',
58     PUT             = 'p',
59     BINPUT          = 'q',
60     LONG_BINPUT     = 'r',
61     SETITEM         = 's',
62     TUPLE           = 't',
63     EMPTY_TUPLE     = ')',
64     SETITEMS        = 'u',
65     BINFLOAT        = 'G',
66 
67     /* Protocol 2. */
68     PROTO       = '\x80',
69     NEWOBJ      = '\x81',
70     EXT1        = '\x82',
71     EXT2        = '\x83',
72     EXT4        = '\x84',
73     TUPLE1      = '\x85',
74     TUPLE2      = '\x86',
75     TUPLE3      = '\x87',
76     NEWTRUE     = '\x88',
77     NEWFALSE    = '\x89',
78     LONG1       = '\x8a',
79     LONG4       = '\x8b',
80 
81     /* Protocol 3 (Python 3.x) */
82     BINBYTES       = 'B',
83     SHORT_BINBYTES = 'C',
84 
85     /* Protocol 4 */
86     SHORT_BINUNICODE = '\x8c',
87     BINUNICODE8      = '\x8d',
88     BINBYTES8        = '\x8e',
89     EMPTY_SET        = '\x8f',
90     ADDITEMS         = '\x90',
91     FROZENSET        = '\x91',
92     NEWOBJ_EX        = '\x92',
93     STACK_GLOBAL     = '\x93',
94     MEMOIZE          = '\x94',
95     FRAME            = '\x95'
96 };
97 
98 enum {
99    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
100       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
101       break if this gets out of synch with pickle.py, but it's unclear that would
102       help anything either. */
103     BATCHSIZE = 1000,
104 
105     /* Nesting limit until Pickler, when running in "fast mode", starts
106        checking for self-referential data-structures. */
107     FAST_NESTING_LIMIT = 50,
108 
109     /* Initial size of the write buffer of Pickler. */
110     WRITE_BUF_SIZE = 4096,
111 
112     /* Prefetch size when unpickling (disabled on unpeekable streams) */
113     PREFETCH = 8192 * 16,
114 
115     FRAME_SIZE_TARGET = 64 * 1024,
116 
117     FRAME_HEADER_SIZE = 9
118 };
119 
120 /*************************************************************************/
121 
122 /* State of the pickle module, per PEP 3121. */
123 typedef struct {
124     /* Exception classes for pickle. */
125     PyObject *PickleError;
126     PyObject *PicklingError;
127     PyObject *UnpicklingError;
128 
129     /* copyreg.dispatch_table, {type_object: pickling_function} */
130     PyObject *dispatch_table;
131 
132     /* For the extension opcodes EXT1, EXT2 and EXT4. */
133 
134     /* copyreg._extension_registry, {(module_name, function_name): code} */
135     PyObject *extension_registry;
136     /* copyreg._extension_cache, {code: object} */
137     PyObject *extension_cache;
138     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
139     PyObject *inverted_registry;
140 
141     /* Import mappings for compatibility with Python 2.x */
142 
143     /* _compat_pickle.NAME_MAPPING,
144        {(oldmodule, oldname): (newmodule, newname)} */
145     PyObject *name_mapping_2to3;
146     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
147     PyObject *import_mapping_2to3;
148     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
149     PyObject *name_mapping_3to2;
150     PyObject *import_mapping_3to2;
151 
152     /* codecs.encode, used for saving bytes in older protocols */
153     PyObject *codecs_encode;
154     /* builtins.getattr, used for saving nested names with protocol < 4 */
155     PyObject *getattr;
156     /* functools.partial, used for implementing __newobj_ex__ with protocols
157        2 and 3 */
158     PyObject *partial;
159 } PickleState;
160 
161 /* Forward declaration of the _pickle module definition. */
162 static struct PyModuleDef _picklemodule;
163 
164 /* Given a module object, get its per-module state. */
165 static PickleState *
_Pickle_GetState(PyObject * module)166 _Pickle_GetState(PyObject *module)
167 {
168     return (PickleState *)PyModule_GetState(module);
169 }
170 
171 /* Find the module instance imported in the currently running sub-interpreter
172    and get its state. */
173 static PickleState *
_Pickle_GetGlobalState(void)174 _Pickle_GetGlobalState(void)
175 {
176     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
177 }
178 
179 /* Clear the given pickle module state. */
180 static void
_Pickle_ClearState(PickleState * st)181 _Pickle_ClearState(PickleState *st)
182 {
183     Py_CLEAR(st->PickleError);
184     Py_CLEAR(st->PicklingError);
185     Py_CLEAR(st->UnpicklingError);
186     Py_CLEAR(st->dispatch_table);
187     Py_CLEAR(st->extension_registry);
188     Py_CLEAR(st->extension_cache);
189     Py_CLEAR(st->inverted_registry);
190     Py_CLEAR(st->name_mapping_2to3);
191     Py_CLEAR(st->import_mapping_2to3);
192     Py_CLEAR(st->name_mapping_3to2);
193     Py_CLEAR(st->import_mapping_3to2);
194     Py_CLEAR(st->codecs_encode);
195     Py_CLEAR(st->getattr);
196     Py_CLEAR(st->partial);
197 }
198 
199 /* Initialize the given pickle module state. */
200 static int
_Pickle_InitState(PickleState * st)201 _Pickle_InitState(PickleState *st)
202 {
203     PyObject *builtins;
204     PyObject *copyreg = NULL;
205     PyObject *compat_pickle = NULL;
206     PyObject *codecs = NULL;
207     PyObject *functools = NULL;
208 
209     builtins = PyEval_GetBuiltins();
210     if (builtins == NULL)
211         goto error;
212     st->getattr = PyDict_GetItemString(builtins, "getattr");
213     if (st->getattr == NULL)
214         goto error;
215     Py_INCREF(st->getattr);
216 
217     copyreg = PyImport_ImportModule("copyreg");
218     if (!copyreg)
219         goto error;
220     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
221     if (!st->dispatch_table)
222         goto error;
223     if (!PyDict_CheckExact(st->dispatch_table)) {
224         PyErr_Format(PyExc_RuntimeError,
225                      "copyreg.dispatch_table should be a dict, not %.200s",
226                      Py_TYPE(st->dispatch_table)->tp_name);
227         goto error;
228     }
229     st->extension_registry = \
230         PyObject_GetAttrString(copyreg, "_extension_registry");
231     if (!st->extension_registry)
232         goto error;
233     if (!PyDict_CheckExact(st->extension_registry)) {
234         PyErr_Format(PyExc_RuntimeError,
235                      "copyreg._extension_registry should be a dict, "
236                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
237         goto error;
238     }
239     st->inverted_registry = \
240         PyObject_GetAttrString(copyreg, "_inverted_registry");
241     if (!st->inverted_registry)
242         goto error;
243     if (!PyDict_CheckExact(st->inverted_registry)) {
244         PyErr_Format(PyExc_RuntimeError,
245                      "copyreg._inverted_registry should be a dict, "
246                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
247         goto error;
248     }
249     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
250     if (!st->extension_cache)
251         goto error;
252     if (!PyDict_CheckExact(st->extension_cache)) {
253         PyErr_Format(PyExc_RuntimeError,
254                      "copyreg._extension_cache should be a dict, "
255                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
256         goto error;
257     }
258     Py_CLEAR(copyreg);
259 
260     /* Load the 2.x -> 3.x stdlib module mapping tables */
261     compat_pickle = PyImport_ImportModule("_compat_pickle");
262     if (!compat_pickle)
263         goto error;
264     st->name_mapping_2to3 = \
265         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
266     if (!st->name_mapping_2to3)
267         goto error;
268     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
269         PyErr_Format(PyExc_RuntimeError,
270                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
271                      Py_TYPE(st->name_mapping_2to3)->tp_name);
272         goto error;
273     }
274     st->import_mapping_2to3 = \
275         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
276     if (!st->import_mapping_2to3)
277         goto error;
278     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
279         PyErr_Format(PyExc_RuntimeError,
280                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
281                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
282         goto error;
283     }
284     /* ... and the 3.x -> 2.x mapping tables */
285     st->name_mapping_3to2 = \
286         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
287     if (!st->name_mapping_3to2)
288         goto error;
289     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
290         PyErr_Format(PyExc_RuntimeError,
291                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
292                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
293         goto error;
294     }
295     st->import_mapping_3to2 = \
296         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
297     if (!st->import_mapping_3to2)
298         goto error;
299     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
300         PyErr_Format(PyExc_RuntimeError,
301                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
302                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
303         goto error;
304     }
305     Py_CLEAR(compat_pickle);
306 
307     codecs = PyImport_ImportModule("codecs");
308     if (codecs == NULL)
309         goto error;
310     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
311     if (st->codecs_encode == NULL) {
312         goto error;
313     }
314     if (!PyCallable_Check(st->codecs_encode)) {
315         PyErr_Format(PyExc_RuntimeError,
316                      "codecs.encode should be a callable, not %.200s",
317                      Py_TYPE(st->codecs_encode)->tp_name);
318         goto error;
319     }
320     Py_CLEAR(codecs);
321 
322     functools = PyImport_ImportModule("functools");
323     if (!functools)
324         goto error;
325     st->partial = PyObject_GetAttrString(functools, "partial");
326     if (!st->partial)
327         goto error;
328     Py_CLEAR(functools);
329 
330     return 0;
331 
332   error:
333     Py_CLEAR(copyreg);
334     Py_CLEAR(compat_pickle);
335     Py_CLEAR(codecs);
336     Py_CLEAR(functools);
337     _Pickle_ClearState(st);
338     return -1;
339 }
340 
341 /* Helper for calling a function with a single argument quickly.
342 
343    This function steals the reference of the given argument. */
344 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)345 _Pickle_FastCall(PyObject *func, PyObject *obj)
346 {
347     PyObject *result;
348 
349     result = _PyObject_CallArg1(func, obj);
350     Py_DECREF(obj);
351     return result;
352 }
353 
354 /*************************************************************************/
355 
356 /* Internal data type used as the unpickling stack. */
357 typedef struct {
358     PyObject_VAR_HEAD
359     PyObject **data;
360     int mark_set;          /* is MARK set? */
361     Py_ssize_t fence;      /* position of top MARK or 0 */
362     Py_ssize_t allocated;  /* number of slots in data allocated */
363 } Pdata;
364 
365 static void
Pdata_dealloc(Pdata * self)366 Pdata_dealloc(Pdata *self)
367 {
368     Py_ssize_t i = Py_SIZE(self);
369     while (--i >= 0) {
370         Py_DECREF(self->data[i]);
371     }
372     PyMem_FREE(self->data);
373     PyObject_Del(self);
374 }
375 
376 static PyTypeObject Pdata_Type = {
377     PyVarObject_HEAD_INIT(NULL, 0)
378     "_pickle.Pdata",              /*tp_name*/
379     sizeof(Pdata),                /*tp_basicsize*/
380     sizeof(PyObject *),           /*tp_itemsize*/
381     (destructor)Pdata_dealloc,    /*tp_dealloc*/
382 };
383 
384 static PyObject *
Pdata_New(void)385 Pdata_New(void)
386 {
387     Pdata *self;
388 
389     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
390         return NULL;
391     Py_SIZE(self) = 0;
392     self->mark_set = 0;
393     self->fence = 0;
394     self->allocated = 8;
395     self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
396     if (self->data)
397         return (PyObject *)self;
398     Py_DECREF(self);
399     return PyErr_NoMemory();
400 }
401 
402 
403 /* Retain only the initial clearto items.  If clearto >= the current
404  * number of items, this is a (non-erroneous) NOP.
405  */
406 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)407 Pdata_clear(Pdata *self, Py_ssize_t clearto)
408 {
409     Py_ssize_t i = Py_SIZE(self);
410 
411     assert(clearto >= self->fence);
412     if (clearto >= i)
413         return 0;
414 
415     while (--i >= clearto) {
416         Py_CLEAR(self->data[i]);
417     }
418     Py_SIZE(self) = clearto;
419     return 0;
420 }
421 
422 static int
Pdata_grow(Pdata * self)423 Pdata_grow(Pdata *self)
424 {
425     PyObject **data = self->data;
426     size_t allocated = (size_t)self->allocated;
427     size_t new_allocated;
428 
429     new_allocated = (allocated >> 3) + 6;
430     /* check for integer overflow */
431     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
432         goto nomemory;
433     new_allocated += allocated;
434     PyMem_RESIZE(data, PyObject *, new_allocated);
435     if (data == NULL)
436         goto nomemory;
437 
438     self->data = data;
439     self->allocated = (Py_ssize_t)new_allocated;
440     return 0;
441 
442   nomemory:
443     PyErr_NoMemory();
444     return -1;
445 }
446 
447 static int
Pdata_stack_underflow(Pdata * self)448 Pdata_stack_underflow(Pdata *self)
449 {
450     PickleState *st = _Pickle_GetGlobalState();
451     PyErr_SetString(st->UnpicklingError,
452                     self->mark_set ?
453                     "unexpected MARK found" :
454                     "unpickling stack underflow");
455     return -1;
456 }
457 
458 /* D is a Pdata*.  Pop the topmost element and store it into V, which
459  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
460  * is raised and V is set to NULL.
461  */
462 static PyObject *
Pdata_pop(Pdata * self)463 Pdata_pop(Pdata *self)
464 {
465     if (Py_SIZE(self) <= self->fence) {
466         Pdata_stack_underflow(self);
467         return NULL;
468     }
469     return self->data[--Py_SIZE(self)];
470 }
471 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
472 
473 static int
Pdata_push(Pdata * self,PyObject * obj)474 Pdata_push(Pdata *self, PyObject *obj)
475 {
476     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
477         return -1;
478     }
479     self->data[Py_SIZE(self)++] = obj;
480     return 0;
481 }
482 
483 /* Push an object on stack, transferring its ownership to the stack. */
484 #define PDATA_PUSH(D, O, ER) do {                               \
485         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
486 
487 /* Push an object on stack, adding a new reference to the object. */
488 #define PDATA_APPEND(D, O, ER) do {                             \
489         Py_INCREF((O));                                         \
490         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
491 
492 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)493 Pdata_poptuple(Pdata *self, Py_ssize_t start)
494 {
495     PyObject *tuple;
496     Py_ssize_t len, i, j;
497 
498     if (start < self->fence) {
499         Pdata_stack_underflow(self);
500         return NULL;
501     }
502     len = Py_SIZE(self) - start;
503     tuple = PyTuple_New(len);
504     if (tuple == NULL)
505         return NULL;
506     for (i = start, j = 0; j < len; i++, j++)
507         PyTuple_SET_ITEM(tuple, j, self->data[i]);
508 
509     Py_SIZE(self) = start;
510     return tuple;
511 }
512 
513 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)514 Pdata_poplist(Pdata *self, Py_ssize_t start)
515 {
516     PyObject *list;
517     Py_ssize_t len, i, j;
518 
519     len = Py_SIZE(self) - start;
520     list = PyList_New(len);
521     if (list == NULL)
522         return NULL;
523     for (i = start, j = 0; j < len; i++, j++)
524         PyList_SET_ITEM(list, j, self->data[i]);
525 
526     Py_SIZE(self) = start;
527     return list;
528 }
529 
530 typedef struct {
531     PyObject *me_key;
532     Py_ssize_t me_value;
533 } PyMemoEntry;
534 
535 typedef struct {
536     Py_ssize_t mt_mask;
537     Py_ssize_t mt_used;
538     Py_ssize_t mt_allocated;
539     PyMemoEntry *mt_table;
540 } PyMemoTable;
541 
542 typedef struct PicklerObject {
543     PyObject_HEAD
544     PyMemoTable *memo;          /* Memo table, keep track of the seen
545                                    objects to support self-referential objects
546                                    pickling. */
547     PyObject *pers_func;        /* persistent_id() method, can be NULL */
548     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
549 
550     PyObject *write;            /* write() method of the output stream. */
551     PyObject *output_buffer;    /* Write into a local bytearray buffer before
552                                    flushing to the stream. */
553     Py_ssize_t output_len;      /* Length of output_buffer. */
554     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
555     int proto;                  /* Pickle protocol number, >= 0 */
556     int bin;                    /* Boolean, true if proto > 0 */
557     int framing;                /* True when framing is enabled, proto >= 4 */
558     Py_ssize_t frame_start;     /* Position in output_buffer where the
559                                    current frame begins. -1 if there
560                                    is no frame currently open. */
561 
562     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
563     int fast;                   /* Enable fast mode if set to a true value.
564                                    The fast mode disable the usage of memo,
565                                    therefore speeding the pickling process by
566                                    not generating superfluous PUT opcodes. It
567                                    should not be used if with self-referential
568                                    objects. */
569     int fast_nesting;
570     int fix_imports;            /* Indicate whether Pickler should fix
571                                    the name of globals for Python 2.x. */
572     PyObject *fast_memo;
573 } PicklerObject;
574 
575 typedef struct UnpicklerObject {
576     PyObject_HEAD
577     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
578 
579     /* The unpickler memo is just an array of PyObject *s. Using a dict
580        is unnecessary, since the keys are contiguous ints. */
581     PyObject **memo;
582     Py_ssize_t memo_size;       /* Capacity of the memo array */
583     Py_ssize_t memo_len;        /* Number of objects in the memo */
584 
585     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
586 
587     Py_buffer buffer;
588     char *input_buffer;
589     char *input_line;
590     Py_ssize_t input_len;
591     Py_ssize_t next_read_idx;
592     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
593 
594     PyObject *read;             /* read() method of the input stream. */
595     PyObject *readline;         /* readline() method of the input stream. */
596     PyObject *peek;             /* peek() method of the input stream, or NULL */
597 
598     char *encoding;             /* Name of the encoding to be used for
599                                    decoding strings pickled using Python
600                                    2.x. The default value is "ASCII" */
601     char *errors;               /* Name of errors handling scheme to used when
602                                    decoding strings. The default value is
603                                    "strict". */
604     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
605                                    objects. */
606     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
607     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
608     int proto;                  /* Protocol of the pickle loaded. */
609     int fix_imports;            /* Indicate whether Unpickler should fix
610                                    the name of globals pickled by Python 2.x. */
611 } UnpicklerObject;
612 
613 typedef struct {
614     PyObject_HEAD
615     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
616 }  PicklerMemoProxyObject;
617 
618 typedef struct {
619     PyObject_HEAD
620     UnpicklerObject *unpickler;
621 } UnpicklerMemoProxyObject;
622 
623 /* Forward declarations */
624 static int save(PicklerObject *, PyObject *, int);
625 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
626 static PyTypeObject Pickler_Type;
627 static PyTypeObject Unpickler_Type;
628 
629 #include "clinic/_pickle.c.h"
630 
631 /*************************************************************************
632  A custom hashtable mapping void* to Python ints. This is used by the pickler
633  for memoization. Using a custom hashtable rather than PyDict allows us to skip
634  a bunch of unnecessary object creation. This makes a huge performance
635  difference. */
636 
637 #define MT_MINSIZE 8
638 #define PERTURB_SHIFT 5
639 
640 
641 static PyMemoTable *
PyMemoTable_New(void)642 PyMemoTable_New(void)
643 {
644     PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
645     if (memo == NULL) {
646         PyErr_NoMemory();
647         return NULL;
648     }
649 
650     memo->mt_used = 0;
651     memo->mt_allocated = MT_MINSIZE;
652     memo->mt_mask = MT_MINSIZE - 1;
653     memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
654     if (memo->mt_table == NULL) {
655         PyMem_FREE(memo);
656         PyErr_NoMemory();
657         return NULL;
658     }
659     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
660 
661     return memo;
662 }
663 
664 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)665 PyMemoTable_Copy(PyMemoTable *self)
666 {
667     Py_ssize_t i;
668     PyMemoTable *new = PyMemoTable_New();
669     if (new == NULL)
670         return NULL;
671 
672     new->mt_used = self->mt_used;
673     new->mt_allocated = self->mt_allocated;
674     new->mt_mask = self->mt_mask;
675     /* The table we get from _New() is probably smaller than we wanted.
676        Free it and allocate one that's the right size. */
677     PyMem_FREE(new->mt_table);
678     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
679     if (new->mt_table == NULL) {
680         PyMem_FREE(new);
681         PyErr_NoMemory();
682         return NULL;
683     }
684     for (i = 0; i < self->mt_allocated; i++) {
685         Py_XINCREF(self->mt_table[i].me_key);
686     }
687     memcpy(new->mt_table, self->mt_table,
688            sizeof(PyMemoEntry) * self->mt_allocated);
689 
690     return new;
691 }
692 
693 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)694 PyMemoTable_Size(PyMemoTable *self)
695 {
696     return self->mt_used;
697 }
698 
699 static int
PyMemoTable_Clear(PyMemoTable * self)700 PyMemoTable_Clear(PyMemoTable *self)
701 {
702     Py_ssize_t i = self->mt_allocated;
703 
704     while (--i >= 0) {
705         Py_XDECREF(self->mt_table[i].me_key);
706     }
707     self->mt_used = 0;
708     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
709     return 0;
710 }
711 
712 static void
PyMemoTable_Del(PyMemoTable * self)713 PyMemoTable_Del(PyMemoTable *self)
714 {
715     if (self == NULL)
716         return;
717     PyMemoTable_Clear(self);
718 
719     PyMem_FREE(self->mt_table);
720     PyMem_FREE(self);
721 }
722 
723 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
724    can be considerably simpler than dictobject.c's lookdict(). */
725 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)726 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
727 {
728     size_t i;
729     size_t perturb;
730     size_t mask = (size_t)self->mt_mask;
731     PyMemoEntry *table = self->mt_table;
732     PyMemoEntry *entry;
733     Py_hash_t hash = (Py_hash_t)key >> 3;
734 
735     i = hash & mask;
736     entry = &table[i];
737     if (entry->me_key == NULL || entry->me_key == key)
738         return entry;
739 
740     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
741         i = (i << 2) + i + perturb + 1;
742         entry = &table[i & mask];
743         if (entry->me_key == NULL || entry->me_key == key)
744             return entry;
745     }
746     assert(0);  /* Never reached */
747     return NULL;
748 }
749 
750 /* Returns -1 on failure, 0 on success. */
751 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,Py_ssize_t min_size)752 _PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
753 {
754     PyMemoEntry *oldtable = NULL;
755     PyMemoEntry *oldentry, *newentry;
756     Py_ssize_t new_size = MT_MINSIZE;
757     Py_ssize_t to_process;
758 
759     assert(min_size > 0);
760 
761     /* Find the smallest valid table size >= min_size. */
762     while (new_size < min_size && new_size > 0)
763         new_size <<= 1;
764     if (new_size <= 0) {
765         PyErr_NoMemory();
766         return -1;
767     }
768     /* new_size needs to be a power of two. */
769     assert((new_size & (new_size - 1)) == 0);
770 
771     /* Allocate new table. */
772     oldtable = self->mt_table;
773     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
774     if (self->mt_table == NULL) {
775         self->mt_table = oldtable;
776         PyErr_NoMemory();
777         return -1;
778     }
779     self->mt_allocated = new_size;
780     self->mt_mask = new_size - 1;
781     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
782 
783     /* Copy entries from the old table. */
784     to_process = self->mt_used;
785     for (oldentry = oldtable; to_process > 0; oldentry++) {
786         if (oldentry->me_key != NULL) {
787             to_process--;
788             /* newentry is a pointer to a chunk of the new
789                mt_table, so we're setting the key:value pair
790                in-place. */
791             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
792             newentry->me_key = oldentry->me_key;
793             newentry->me_value = oldentry->me_value;
794         }
795     }
796 
797     /* Deallocate the old table. */
798     PyMem_FREE(oldtable);
799     return 0;
800 }
801 
802 /* Returns NULL on failure, a pointer to the value otherwise. */
803 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)804 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
805 {
806     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
807     if (entry->me_key == NULL)
808         return NULL;
809     return &entry->me_value;
810 }
811 
812 /* Returns -1 on failure, 0 on success. */
813 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)814 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
815 {
816     PyMemoEntry *entry;
817 
818     assert(key != NULL);
819 
820     entry = _PyMemoTable_Lookup(self, key);
821     if (entry->me_key != NULL) {
822         entry->me_value = value;
823         return 0;
824     }
825     Py_INCREF(key);
826     entry->me_key = key;
827     entry->me_value = value;
828     self->mt_used++;
829 
830     /* If we added a key, we can safely resize. Otherwise just return!
831      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
832      *
833      * Quadrupling the size improves average table sparseness
834      * (reducing collisions) at the cost of some memory. It also halves
835      * the number of expensive resize operations in a growing memo table.
836      *
837      * Very large memo tables (over 50K items) use doubling instead.
838      * This may help applications with severe memory constraints.
839      */
840     if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
841         return 0;
842     return _PyMemoTable_ResizeTable(self,
843         (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
844 }
845 
846 #undef MT_MINSIZE
847 #undef PERTURB_SHIFT
848 
849 /*************************************************************************/
850 
851 
852 static int
_Pickler_ClearBuffer(PicklerObject * self)853 _Pickler_ClearBuffer(PicklerObject *self)
854 {
855     Py_XSETREF(self->output_buffer,
856               PyBytes_FromStringAndSize(NULL, self->max_output_len));
857     if (self->output_buffer == NULL)
858         return -1;
859     self->output_len = 0;
860     self->frame_start = -1;
861     return 0;
862 }
863 
864 static void
_write_size64(char * out,size_t value)865 _write_size64(char *out, size_t value)
866 {
867     size_t i;
868 
869     Py_BUILD_ASSERT(sizeof(size_t) <= 8);
870 
871     for (i = 0; i < sizeof(size_t); i++) {
872         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
873     }
874     for (i = sizeof(size_t); i < 8; i++) {
875         out[i] = 0;
876     }
877 }
878 
879 static void
_Pickler_WriteFrameHeader(PicklerObject * self,char * qdata,size_t frame_len)880 _Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
881 {
882     qdata[0] = FRAME;
883     _write_size64(qdata + 1, frame_len);
884 }
885 
886 static int
_Pickler_CommitFrame(PicklerObject * self)887 _Pickler_CommitFrame(PicklerObject *self)
888 {
889     size_t frame_len;
890     char *qdata;
891 
892     if (!self->framing || self->frame_start == -1)
893         return 0;
894     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
895     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
896     _Pickler_WriteFrameHeader(self, qdata, frame_len);
897     self->frame_start = -1;
898     return 0;
899 }
900 
901 static int
_Pickler_OpcodeBoundary(PicklerObject * self)902 _Pickler_OpcodeBoundary(PicklerObject *self)
903 {
904     Py_ssize_t frame_len;
905 
906     if (!self->framing || self->frame_start == -1)
907         return 0;
908     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
909     if (frame_len >= FRAME_SIZE_TARGET)
910         return _Pickler_CommitFrame(self);
911     else
912         return 0;
913 }
914 
915 static PyObject *
_Pickler_GetString(PicklerObject * self)916 _Pickler_GetString(PicklerObject *self)
917 {
918     PyObject *output_buffer = self->output_buffer;
919 
920     assert(self->output_buffer != NULL);
921 
922     if (_Pickler_CommitFrame(self))
923         return NULL;
924 
925     self->output_buffer = NULL;
926     /* Resize down to exact size */
927     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
928         return NULL;
929     return output_buffer;
930 }
931 
932 static int
_Pickler_FlushToFile(PicklerObject * self)933 _Pickler_FlushToFile(PicklerObject *self)
934 {
935     PyObject *output, *result;
936 
937     assert(self->write != NULL);
938 
939     /* This will commit the frame first */
940     output = _Pickler_GetString(self);
941     if (output == NULL)
942         return -1;
943 
944     result = _Pickle_FastCall(self->write, output);
945     Py_XDECREF(result);
946     return (result == NULL) ? -1 : 0;
947 }
948 
949 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)950 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
951 {
952     Py_ssize_t i, n, required;
953     char *buffer;
954     int need_new_frame;
955 
956     assert(s != NULL);
957     need_new_frame = (self->framing && self->frame_start == -1);
958 
959     if (need_new_frame)
960         n = data_len + FRAME_HEADER_SIZE;
961     else
962         n = data_len;
963 
964     required = self->output_len + n;
965     if (required > self->max_output_len) {
966         /* Make place in buffer for the pickle chunk */
967         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
968             PyErr_NoMemory();
969             return -1;
970         }
971         self->max_output_len = (self->output_len + n) / 2 * 3;
972         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
973             return -1;
974     }
975     buffer = PyBytes_AS_STRING(self->output_buffer);
976     if (need_new_frame) {
977         /* Setup new frame */
978         Py_ssize_t frame_start = self->output_len;
979         self->frame_start = frame_start;
980         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
981             /* Write an invalid value, for debugging */
982             buffer[frame_start + i] = 0xFE;
983         }
984         self->output_len += FRAME_HEADER_SIZE;
985     }
986     if (data_len < 8) {
987         /* This is faster than memcpy when the string is short. */
988         for (i = 0; i < data_len; i++) {
989             buffer[self->output_len + i] = s[i];
990         }
991     }
992     else {
993         memcpy(buffer + self->output_len, s, data_len);
994     }
995     self->output_len += data_len;
996     return data_len;
997 }
998 
999 static PicklerObject *
_Pickler_New(void)1000 _Pickler_New(void)
1001 {
1002     PicklerObject *self;
1003 
1004     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1005     if (self == NULL)
1006         return NULL;
1007 
1008     self->pers_func = NULL;
1009     self->dispatch_table = NULL;
1010     self->write = NULL;
1011     self->proto = 0;
1012     self->bin = 0;
1013     self->framing = 0;
1014     self->frame_start = -1;
1015     self->fast = 0;
1016     self->fast_nesting = 0;
1017     self->fix_imports = 0;
1018     self->fast_memo = NULL;
1019     self->max_output_len = WRITE_BUF_SIZE;
1020     self->output_len = 0;
1021 
1022     self->memo = PyMemoTable_New();
1023     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1024                                                     self->max_output_len);
1025 
1026     if (self->memo == NULL || self->output_buffer == NULL) {
1027         Py_DECREF(self);
1028         return NULL;
1029     }
1030     return self;
1031 }
1032 
1033 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1034 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1035 {
1036     long proto;
1037 
1038     if (protocol == NULL || protocol == Py_None) {
1039         proto = DEFAULT_PROTOCOL;
1040     }
1041     else {
1042         proto = PyLong_AsLong(protocol);
1043         if (proto < 0) {
1044             if (proto == -1 && PyErr_Occurred())
1045                 return -1;
1046             proto = HIGHEST_PROTOCOL;
1047         }
1048         else if (proto > HIGHEST_PROTOCOL) {
1049             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1050                          HIGHEST_PROTOCOL);
1051             return -1;
1052         }
1053     }
1054     self->proto = (int)proto;
1055     self->bin = proto > 0;
1056     self->fix_imports = fix_imports && proto < 3;
1057     return 0;
1058 }
1059 
1060 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1061    be called once on a freshly created Pickler. */
1062 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1063 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1064 {
1065     _Py_IDENTIFIER(write);
1066     assert(file != NULL);
1067     self->write = _PyObject_GetAttrId(file, &PyId_write);
1068     if (self->write == NULL) {
1069         if (PyErr_ExceptionMatches(PyExc_AttributeError))
1070             PyErr_SetString(PyExc_TypeError,
1071                             "file must have a 'write' attribute");
1072         return -1;
1073     }
1074 
1075     return 0;
1076 }
1077 
1078 /* Returns the size of the input on success, -1 on failure. This takes its
1079    own reference to `input`. */
1080 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1081 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1082 {
1083     if (self->buffer.buf != NULL)
1084         PyBuffer_Release(&self->buffer);
1085     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1086         return -1;
1087     self->input_buffer = self->buffer.buf;
1088     self->input_len = self->buffer.len;
1089     self->next_read_idx = 0;
1090     self->prefetched_idx = self->input_len;
1091     return self->input_len;
1092 }
1093 
1094 static int
bad_readline(void)1095 bad_readline(void)
1096 {
1097     PickleState *st = _Pickle_GetGlobalState();
1098     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1099     return -1;
1100 }
1101 
1102 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1103 _Unpickler_SkipConsumed(UnpicklerObject *self)
1104 {
1105     Py_ssize_t consumed;
1106     PyObject *r;
1107 
1108     consumed = self->next_read_idx - self->prefetched_idx;
1109     if (consumed <= 0)
1110         return 0;
1111 
1112     assert(self->peek);  /* otherwise we did something wrong */
1113     /* This makes a useless copy... */
1114     r = PyObject_CallFunction(self->read, "n", consumed);
1115     if (r == NULL)
1116         return -1;
1117     Py_DECREF(r);
1118 
1119     self->prefetched_idx = self->next_read_idx;
1120     return 0;
1121 }
1122 
1123 static const Py_ssize_t READ_WHOLE_LINE = -1;
1124 
1125 /* If reading from a file, we need to only pull the bytes we need, since there
1126    may be multiple pickle objects arranged contiguously in the same input
1127    buffer.
1128 
1129    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1130    bytes from the input stream/buffer.
1131 
1132    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1133    failure; on success, returns the number of bytes read from the file.
1134 
1135    On success, self->input_len will be 0; this is intentional so that when
1136    unpickling from a file, the "we've run out of data" code paths will trigger,
1137    causing the Unpickler to go back to the file for more data. Use the returned
1138    size to tell you how much data you can process. */
1139 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1140 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1141 {
1142     PyObject *data;
1143     Py_ssize_t read_size;
1144 
1145     assert(self->read != NULL);
1146 
1147     if (_Unpickler_SkipConsumed(self) < 0)
1148         return -1;
1149 
1150     if (n == READ_WHOLE_LINE) {
1151         data = _PyObject_CallNoArg(self->readline);
1152     }
1153     else {
1154         PyObject *len;
1155         /* Prefetch some data without advancing the file pointer, if possible */
1156         if (self->peek && n < PREFETCH) {
1157             len = PyLong_FromSsize_t(PREFETCH);
1158             if (len == NULL)
1159                 return -1;
1160             data = _Pickle_FastCall(self->peek, len);
1161             if (data == NULL) {
1162                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1163                     return -1;
1164                 /* peek() is probably not supported by the given file object */
1165                 PyErr_Clear();
1166                 Py_CLEAR(self->peek);
1167             }
1168             else {
1169                 read_size = _Unpickler_SetStringInput(self, data);
1170                 Py_DECREF(data);
1171                 self->prefetched_idx = 0;
1172                 if (n <= read_size)
1173                     return n;
1174             }
1175         }
1176         len = PyLong_FromSsize_t(n);
1177         if (len == NULL)
1178             return -1;
1179         data = _Pickle_FastCall(self->read, len);
1180     }
1181     if (data == NULL)
1182         return -1;
1183 
1184     read_size = _Unpickler_SetStringInput(self, data);
1185     Py_DECREF(data);
1186     return read_size;
1187 }
1188 
1189 /* Don't call it directly: use _Unpickler_Read() */
1190 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1191 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1192 {
1193     Py_ssize_t num_read;
1194 
1195     *s = NULL;
1196     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1197         PickleState *st = _Pickle_GetGlobalState();
1198         PyErr_SetString(st->UnpicklingError,
1199                         "read would overflow (invalid bytecode)");
1200         return -1;
1201     }
1202 
1203     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1204     assert(self->next_read_idx + n > self->input_len);
1205 
1206     if (!self->read)
1207         return bad_readline();
1208 
1209     num_read = _Unpickler_ReadFromFile(self, n);
1210     if (num_read < 0)
1211         return -1;
1212     if (num_read < n)
1213         return bad_readline();
1214     *s = self->input_buffer;
1215     self->next_read_idx = n;
1216     return n;
1217 }
1218 
1219 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1220 
1221    This should be used for all data reads, rather than accessing the unpickler's
1222    input buffer directly. This method deals correctly with reading from input
1223    streams, which the input buffer doesn't deal with.
1224 
1225    Note that when reading from a file-like object, self->next_read_idx won't
1226    be updated (it should remain at 0 for the entire unpickling process). You
1227    should use this function's return value to know how many bytes you can
1228    consume.
1229 
1230    Returns -1 (with an exception set) on failure. On success, return the
1231    number of chars read. */
1232 #define _Unpickler_Read(self, s, n) \
1233     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1234      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1235         (self)->next_read_idx += (n),                        \
1236         (n))                                                 \
1237      : _Unpickler_ReadImpl(self, (s), (n)))
1238 
1239 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1240 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1241                     char **result)
1242 {
1243     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1244     if (input_line == NULL) {
1245         PyErr_NoMemory();
1246         return -1;
1247     }
1248 
1249     memcpy(input_line, line, len);
1250     input_line[len] = '\0';
1251     self->input_line = input_line;
1252     *result = self->input_line;
1253     return len;
1254 }
1255 
1256 /* Read a line from the input stream/buffer. If we run off the end of the input
1257    before hitting \n, raise an error.
1258 
1259    Returns the number of chars read, or -1 on failure. */
1260 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1261 _Unpickler_Readline(UnpicklerObject *self, char **result)
1262 {
1263     Py_ssize_t i, num_read;
1264 
1265     for (i = self->next_read_idx; i < self->input_len; i++) {
1266         if (self->input_buffer[i] == '\n') {
1267             char *line_start = self->input_buffer + self->next_read_idx;
1268             num_read = i - self->next_read_idx + 1;
1269             self->next_read_idx = i + 1;
1270             return _Unpickler_CopyLine(self, line_start, num_read, result);
1271         }
1272     }
1273     if (!self->read)
1274         return bad_readline();
1275 
1276     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1277     if (num_read < 0)
1278         return -1;
1279     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1280         return bad_readline();
1281     self->next_read_idx = num_read;
1282     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1283 }
1284 
1285 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1286    will be modified in place. */
1287 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,Py_ssize_t new_size)1288 _Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1289 {
1290     Py_ssize_t i;
1291 
1292     assert(new_size > self->memo_size);
1293 
1294     PyMem_RESIZE(self->memo, PyObject *, new_size);
1295     if (self->memo == NULL) {
1296         PyErr_NoMemory();
1297         return -1;
1298     }
1299     for (i = self->memo_size; i < new_size; i++)
1300         self->memo[i] = NULL;
1301     self->memo_size = new_size;
1302     return 0;
1303 }
1304 
1305 /* Returns NULL if idx is out of bounds. */
1306 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,Py_ssize_t idx)1307 _Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1308 {
1309     if (idx < 0 || idx >= self->memo_size)
1310         return NULL;
1311 
1312     return self->memo[idx];
1313 }
1314 
1315 /* Returns -1 (with an exception set) on failure, 0 on success.
1316    This takes its own reference to `value`. */
1317 static int
_Unpickler_MemoPut(UnpicklerObject * self,Py_ssize_t idx,PyObject * value)1318 _Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1319 {
1320     PyObject *old_item;
1321 
1322     if (idx >= self->memo_size) {
1323         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1324             return -1;
1325         assert(idx < self->memo_size);
1326     }
1327     Py_INCREF(value);
1328     old_item = self->memo[idx];
1329     self->memo[idx] = value;
1330     if (old_item != NULL) {
1331         Py_DECREF(old_item);
1332     }
1333     else {
1334         self->memo_len++;
1335     }
1336     return 0;
1337 }
1338 
1339 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1340 _Unpickler_NewMemo(Py_ssize_t new_size)
1341 {
1342     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1343     if (memo == NULL) {
1344         PyErr_NoMemory();
1345         return NULL;
1346     }
1347     memset(memo, 0, new_size * sizeof(PyObject *));
1348     return memo;
1349 }
1350 
1351 /* Free the unpickler's memo, taking care to decref any items left in it. */
1352 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1353 _Unpickler_MemoCleanup(UnpicklerObject *self)
1354 {
1355     Py_ssize_t i;
1356     PyObject **memo = self->memo;
1357 
1358     if (self->memo == NULL)
1359         return;
1360     self->memo = NULL;
1361     i = self->memo_size;
1362     while (--i >= 0) {
1363         Py_XDECREF(memo[i]);
1364     }
1365     PyMem_FREE(memo);
1366 }
1367 
1368 static UnpicklerObject *
_Unpickler_New(void)1369 _Unpickler_New(void)
1370 {
1371     UnpicklerObject *self;
1372 
1373     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1374     if (self == NULL)
1375         return NULL;
1376 
1377     self->pers_func = NULL;
1378     self->input_buffer = NULL;
1379     self->input_line = NULL;
1380     self->input_len = 0;
1381     self->next_read_idx = 0;
1382     self->prefetched_idx = 0;
1383     self->read = NULL;
1384     self->readline = NULL;
1385     self->peek = NULL;
1386     self->encoding = NULL;
1387     self->errors = NULL;
1388     self->marks = NULL;
1389     self->num_marks = 0;
1390     self->marks_size = 0;
1391     self->proto = 0;
1392     self->fix_imports = 0;
1393     memset(&self->buffer, 0, sizeof(Py_buffer));
1394     self->memo_size = 32;
1395     self->memo_len = 0;
1396     self->memo = _Unpickler_NewMemo(self->memo_size);
1397     self->stack = (Pdata *)Pdata_New();
1398 
1399     if (self->memo == NULL || self->stack == NULL) {
1400         Py_DECREF(self);
1401         return NULL;
1402     }
1403 
1404     return self;
1405 }
1406 
1407 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1408    be called once on a freshly created Pickler. */
1409 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1410 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1411 {
1412     _Py_IDENTIFIER(peek);
1413     _Py_IDENTIFIER(read);
1414     _Py_IDENTIFIER(readline);
1415 
1416     self->peek = _PyObject_GetAttrId(file, &PyId_peek);
1417     if (self->peek == NULL) {
1418         if (PyErr_ExceptionMatches(PyExc_AttributeError))
1419             PyErr_Clear();
1420         else
1421             return -1;
1422     }
1423     self->read = _PyObject_GetAttrId(file, &PyId_read);
1424     self->readline = _PyObject_GetAttrId(file, &PyId_readline);
1425     if (self->readline == NULL || self->read == NULL) {
1426         if (PyErr_ExceptionMatches(PyExc_AttributeError))
1427             PyErr_SetString(PyExc_TypeError,
1428                             "file must have 'read' and 'readline' attributes");
1429         Py_CLEAR(self->read);
1430         Py_CLEAR(self->readline);
1431         Py_CLEAR(self->peek);
1432         return -1;
1433     }
1434     return 0;
1435 }
1436 
1437 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1438    be called once on a freshly created Pickler. */
1439 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1440 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1441                             const char *encoding,
1442                             const char *errors)
1443 {
1444     if (encoding == NULL)
1445         encoding = "ASCII";
1446     if (errors == NULL)
1447         errors = "strict";
1448 
1449     self->encoding = _PyMem_Strdup(encoding);
1450     self->errors = _PyMem_Strdup(errors);
1451     if (self->encoding == NULL || self->errors == NULL) {
1452         PyErr_NoMemory();
1453         return -1;
1454     }
1455     return 0;
1456 }
1457 
1458 /* Generate a GET opcode for an object stored in the memo. */
1459 static int
memo_get(PicklerObject * self,PyObject * key)1460 memo_get(PicklerObject *self, PyObject *key)
1461 {
1462     Py_ssize_t *value;
1463     char pdata[30];
1464     Py_ssize_t len;
1465 
1466     value = PyMemoTable_Get(self->memo, key);
1467     if (value == NULL)  {
1468         PyErr_SetObject(PyExc_KeyError, key);
1469         return -1;
1470     }
1471 
1472     if (!self->bin) {
1473         pdata[0] = GET;
1474         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1475                       "%" PY_FORMAT_SIZE_T "d\n", *value);
1476         len = strlen(pdata);
1477     }
1478     else {
1479         if (*value < 256) {
1480             pdata[0] = BINGET;
1481             pdata[1] = (unsigned char)(*value & 0xff);
1482             len = 2;
1483         }
1484         else if ((size_t)*value <= 0xffffffffUL) {
1485             pdata[0] = LONG_BINGET;
1486             pdata[1] = (unsigned char)(*value & 0xff);
1487             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1488             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1489             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1490             len = 5;
1491         }
1492         else { /* unlikely */
1493             PickleState *st = _Pickle_GetGlobalState();
1494             PyErr_SetString(st->PicklingError,
1495                             "memo id too large for LONG_BINGET");
1496             return -1;
1497         }
1498     }
1499 
1500     if (_Pickler_Write(self, pdata, len) < 0)
1501         return -1;
1502 
1503     return 0;
1504 }
1505 
1506 /* Store an object in the memo, assign it a new unique ID based on the number
1507    of objects currently stored in the memo and generate a PUT opcode. */
1508 static int
memo_put(PicklerObject * self,PyObject * obj)1509 memo_put(PicklerObject *self, PyObject *obj)
1510 {
1511     char pdata[30];
1512     Py_ssize_t len;
1513     Py_ssize_t idx;
1514 
1515     const char memoize_op = MEMOIZE;
1516 
1517     if (self->fast)
1518         return 0;
1519 
1520     idx = PyMemoTable_Size(self->memo);
1521     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1522         return -1;
1523 
1524     if (self->proto >= 4) {
1525         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1526             return -1;
1527         return 0;
1528     }
1529     else if (!self->bin) {
1530         pdata[0] = PUT;
1531         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1532                       "%" PY_FORMAT_SIZE_T "d\n", idx);
1533         len = strlen(pdata);
1534     }
1535     else {
1536         if (idx < 256) {
1537             pdata[0] = BINPUT;
1538             pdata[1] = (unsigned char)idx;
1539             len = 2;
1540         }
1541         else if ((size_t)idx <= 0xffffffffUL) {
1542             pdata[0] = LONG_BINPUT;
1543             pdata[1] = (unsigned char)(idx & 0xff);
1544             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1545             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1546             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1547             len = 5;
1548         }
1549         else { /* unlikely */
1550             PickleState *st = _Pickle_GetGlobalState();
1551             PyErr_SetString(st->PicklingError,
1552                             "memo id too large for LONG_BINPUT");
1553             return -1;
1554         }
1555     }
1556     if (_Pickler_Write(self, pdata, len) < 0)
1557         return -1;
1558 
1559     return 0;
1560 }
1561 
1562 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1563 get_dotted_path(PyObject *obj, PyObject *name)
1564 {
1565     _Py_static_string(PyId_dot, ".");
1566     PyObject *dotted_path;
1567     Py_ssize_t i, n;
1568 
1569     dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1570     if (dotted_path == NULL)
1571         return NULL;
1572     n = PyList_GET_SIZE(dotted_path);
1573     assert(n >= 1);
1574     for (i = 0; i < n; i++) {
1575         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1576         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1577             if (obj == NULL)
1578                 PyErr_Format(PyExc_AttributeError,
1579                              "Can't pickle local object %R", name);
1580             else
1581                 PyErr_Format(PyExc_AttributeError,
1582                              "Can't pickle local attribute %R on %R", name, obj);
1583             Py_DECREF(dotted_path);
1584             return NULL;
1585         }
1586     }
1587     return dotted_path;
1588 }
1589 
1590 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1591 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1592 {
1593     Py_ssize_t i, n;
1594     PyObject *parent = NULL;
1595 
1596     assert(PyList_CheckExact(names));
1597     Py_INCREF(obj);
1598     n = PyList_GET_SIZE(names);
1599     for (i = 0; i < n; i++) {
1600         PyObject *name = PyList_GET_ITEM(names, i);
1601         Py_XDECREF(parent);
1602         parent = obj;
1603         obj = PyObject_GetAttr(parent, name);
1604         if (obj == NULL) {
1605             Py_DECREF(parent);
1606             return NULL;
1607         }
1608     }
1609     if (pparent != NULL)
1610         *pparent = parent;
1611     else
1612         Py_XDECREF(parent);
1613     return obj;
1614 }
1615 
1616 static void
reformat_attribute_error(PyObject * obj,PyObject * name)1617 reformat_attribute_error(PyObject *obj, PyObject *name)
1618 {
1619     if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
1620         PyErr_Clear();
1621         PyErr_Format(PyExc_AttributeError,
1622                      "Can't get attribute %R on %R", name, obj);
1623     }
1624 }
1625 
1626 
1627 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1628 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1629 {
1630     PyObject *dotted_path, *attr;
1631 
1632     if (allow_qualname) {
1633         dotted_path = get_dotted_path(obj, name);
1634         if (dotted_path == NULL)
1635             return NULL;
1636         attr = get_deep_attribute(obj, dotted_path, NULL);
1637         Py_DECREF(dotted_path);
1638     }
1639     else
1640         attr = PyObject_GetAttr(obj, name);
1641     if (attr == NULL)
1642         reformat_attribute_error(obj, name);
1643     return attr;
1644 }
1645 
1646 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1647 whichmodule(PyObject *global, PyObject *dotted_path)
1648 {
1649     PyObject *module_name;
1650     PyObject *modules_dict;
1651     PyObject *module;
1652     Py_ssize_t i;
1653     _Py_IDENTIFIER(__module__);
1654     _Py_IDENTIFIER(modules);
1655     _Py_IDENTIFIER(__main__);
1656 
1657     module_name = _PyObject_GetAttrId(global, &PyId___module__);
1658 
1659     if (module_name == NULL) {
1660         if (!PyErr_ExceptionMatches(PyExc_AttributeError))
1661             return NULL;
1662         PyErr_Clear();
1663     }
1664     else {
1665         /* In some rare cases (e.g., bound methods of extension types),
1666            __module__ can be None. If it is so, then search sys.modules for
1667            the module of global. */
1668         if (module_name != Py_None)
1669             return module_name;
1670         Py_CLEAR(module_name);
1671     }
1672     assert(module_name == NULL);
1673 
1674     /* Fallback on walking sys.modules */
1675     modules_dict = _PySys_GetObjectId(&PyId_modules);
1676     if (modules_dict == NULL) {
1677         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1678         return NULL;
1679     }
1680 
1681     i = 0;
1682     while (PyDict_Next(modules_dict, &i, &module_name, &module)) {
1683         PyObject *candidate;
1684         if (PyUnicode_Check(module_name) &&
1685             _PyUnicode_EqualToASCIIString(module_name, "__main__"))
1686             continue;
1687         if (module == Py_None)
1688             continue;
1689 
1690         candidate = get_deep_attribute(module, dotted_path, NULL);
1691         if (candidate == NULL) {
1692             if (!PyErr_ExceptionMatches(PyExc_AttributeError))
1693                 return NULL;
1694             PyErr_Clear();
1695             continue;
1696         }
1697 
1698         if (candidate == global) {
1699             Py_INCREF(module_name);
1700             Py_DECREF(candidate);
1701             return module_name;
1702         }
1703         Py_DECREF(candidate);
1704     }
1705 
1706     /* If no module is found, use __main__. */
1707     module_name = _PyUnicode_FromId(&PyId___main__);
1708     Py_INCREF(module_name);
1709     return module_name;
1710 }
1711 
1712 /* fast_save_enter() and fast_save_leave() are guards against recursive
1713    objects when Pickler is used with the "fast mode" (i.e., with object
1714    memoization disabled). If the nesting of a list or dict object exceed
1715    FAST_NESTING_LIMIT, these guards will start keeping an internal
1716    reference to the seen list or dict objects and check whether these objects
1717    are recursive. These are not strictly necessary, since save() has a
1718    hard-coded recursion limit, but they give a nicer error message than the
1719    typical RuntimeError. */
1720 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1721 fast_save_enter(PicklerObject *self, PyObject *obj)
1722 {
1723     /* if fast_nesting < 0, we're doing an error exit. */
1724     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1725         PyObject *key = NULL;
1726         if (self->fast_memo == NULL) {
1727             self->fast_memo = PyDict_New();
1728             if (self->fast_memo == NULL) {
1729                 self->fast_nesting = -1;
1730                 return 0;
1731             }
1732         }
1733         key = PyLong_FromVoidPtr(obj);
1734         if (key == NULL)
1735             return 0;
1736         if (PyDict_GetItemWithError(self->fast_memo, key)) {
1737             Py_DECREF(key);
1738             PyErr_Format(PyExc_ValueError,
1739                          "fast mode: can't pickle cyclic objects "
1740                          "including object type %.200s at %p",
1741                          obj->ob_type->tp_name, obj);
1742             self->fast_nesting = -1;
1743             return 0;
1744         }
1745         if (PyErr_Occurred()) {
1746             return 0;
1747         }
1748         if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1749             Py_DECREF(key);
1750             self->fast_nesting = -1;
1751             return 0;
1752         }
1753         Py_DECREF(key);
1754     }
1755     return 1;
1756 }
1757 
1758 static int
fast_save_leave(PicklerObject * self,PyObject * obj)1759 fast_save_leave(PicklerObject *self, PyObject *obj)
1760 {
1761     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1762         PyObject *key = PyLong_FromVoidPtr(obj);
1763         if (key == NULL)
1764             return 0;
1765         if (PyDict_DelItem(self->fast_memo, key) < 0) {
1766             Py_DECREF(key);
1767             return 0;
1768         }
1769         Py_DECREF(key);
1770     }
1771     return 1;
1772 }
1773 
1774 static int
save_none(PicklerObject * self,PyObject * obj)1775 save_none(PicklerObject *self, PyObject *obj)
1776 {
1777     const char none_op = NONE;
1778     if (_Pickler_Write(self, &none_op, 1) < 0)
1779         return -1;
1780 
1781     return 0;
1782 }
1783 
1784 static int
save_bool(PicklerObject * self,PyObject * obj)1785 save_bool(PicklerObject *self, PyObject *obj)
1786 {
1787     if (self->proto >= 2) {
1788         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
1789         if (_Pickler_Write(self, &bool_op, 1) < 0)
1790             return -1;
1791     }
1792     else {
1793         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
1794          * so that unpicklers written before bools were introduced unpickle them
1795          * as ints, but unpicklers after can recognize that bools were intended.
1796          * Note that protocol 2 added direct ways to pickle bools.
1797          */
1798         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
1799         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
1800             return -1;
1801     }
1802     return 0;
1803 }
1804 
1805 static int
save_long(PicklerObject * self,PyObject * obj)1806 save_long(PicklerObject *self, PyObject *obj)
1807 {
1808     PyObject *repr = NULL;
1809     Py_ssize_t size;
1810     long val;
1811     int status = 0;
1812 
1813     const char long_op = LONG;
1814 
1815     val= PyLong_AsLong(obj);
1816     if (val == -1 && PyErr_Occurred()) {
1817         /* out of range for int pickling */
1818         PyErr_Clear();
1819     }
1820     else if (self->bin &&
1821              (sizeof(long) <= 4 ||
1822               (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) {
1823         /* result fits in a signed 4-byte integer.
1824 
1825            Note: we can't use -0x80000000L in the above condition because some
1826            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
1827            before applying the unary minus when sizeof(long) <= 4. The
1828            resulting value stays unsigned which is commonly not what we want,
1829            so MSVC happily warns us about it.  However, that result would have
1830            been fine because we guard for sizeof(long) <= 4 which turns the
1831            condition true in that particular case. */
1832         char pdata[32];
1833         Py_ssize_t len = 0;
1834 
1835         pdata[1] = (unsigned char)(val & 0xff);
1836         pdata[2] = (unsigned char)((val >> 8) & 0xff);
1837         pdata[3] = (unsigned char)((val >> 16) & 0xff);
1838         pdata[4] = (unsigned char)((val >> 24) & 0xff);
1839 
1840         if ((pdata[4] == 0) && (pdata[3] == 0)) {
1841             if (pdata[2] == 0) {
1842                 pdata[0] = BININT1;
1843                 len = 2;
1844             }
1845             else {
1846                 pdata[0] = BININT2;
1847                 len = 3;
1848             }
1849         }
1850         else {
1851             pdata[0] = BININT;
1852             len = 5;
1853         }
1854 
1855         if (_Pickler_Write(self, pdata, len) < 0)
1856             return -1;
1857 
1858         return 0;
1859     }
1860 
1861     if (self->proto >= 2) {
1862         /* Linear-time pickling. */
1863         size_t nbits;
1864         size_t nbytes;
1865         unsigned char *pdata;
1866         char header[5];
1867         int i;
1868         int sign = _PyLong_Sign(obj);
1869 
1870         if (sign == 0) {
1871             header[0] = LONG1;
1872             header[1] = 0;      /* It's 0 -- an empty bytestring. */
1873             if (_Pickler_Write(self, header, 2) < 0)
1874                 goto error;
1875             return 0;
1876         }
1877         nbits = _PyLong_NumBits(obj);
1878         if (nbits == (size_t)-1 && PyErr_Occurred())
1879             goto error;
1880         /* How many bytes do we need?  There are nbits >> 3 full
1881          * bytes of data, and nbits & 7 leftover bits.  If there
1882          * are any leftover bits, then we clearly need another
1883          * byte.  Wnat's not so obvious is that we *probably*
1884          * need another byte even if there aren't any leftovers:
1885          * the most-significant bit of the most-significant byte
1886          * acts like a sign bit, and it's usually got a sense
1887          * opposite of the one we need.  The exception is ints
1888          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
1889          * its own 256's-complement, so has the right sign bit
1890          * even without the extra byte.  That's a pain to check
1891          * for in advance, though, so we always grab an extra
1892          * byte at the start, and cut it back later if possible.
1893          */
1894         nbytes = (nbits >> 3) + 1;
1895         if (nbytes > 0x7fffffffL) {
1896             PyErr_SetString(PyExc_OverflowError,
1897                             "int too large to pickle");
1898             goto error;
1899         }
1900         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
1901         if (repr == NULL)
1902             goto error;
1903         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
1904         i = _PyLong_AsByteArray((PyLongObject *)obj,
1905                                 pdata, nbytes,
1906                                 1 /* little endian */ , 1 /* signed */ );
1907         if (i < 0)
1908             goto error;
1909         /* If the int is negative, this may be a byte more than
1910          * needed.  This is so iff the MSB is all redundant sign
1911          * bits.
1912          */
1913         if (sign < 0 &&
1914             nbytes > 1 &&
1915             pdata[nbytes - 1] == 0xff &&
1916             (pdata[nbytes - 2] & 0x80) != 0) {
1917             nbytes--;
1918         }
1919 
1920         if (nbytes < 256) {
1921             header[0] = LONG1;
1922             header[1] = (unsigned char)nbytes;
1923             size = 2;
1924         }
1925         else {
1926             header[0] = LONG4;
1927             size = (Py_ssize_t) nbytes;
1928             for (i = 1; i < 5; i++) {
1929                 header[i] = (unsigned char)(size & 0xff);
1930                 size >>= 8;
1931             }
1932             size = 5;
1933         }
1934         if (_Pickler_Write(self, header, size) < 0 ||
1935             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
1936             goto error;
1937     }
1938     else {
1939         char *string;
1940 
1941         /* proto < 2: write the repr and newline.  This is quadratic-time (in
1942            the number of digits), in both directions.  We add a trailing 'L'
1943            to the repr, for compatibility with Python 2.x. */
1944 
1945         repr = PyObject_Repr(obj);
1946         if (repr == NULL)
1947             goto error;
1948 
1949         string = PyUnicode_AsUTF8AndSize(repr, &size);
1950         if (string == NULL)
1951             goto error;
1952 
1953         if (_Pickler_Write(self, &long_op, 1) < 0 ||
1954             _Pickler_Write(self, string, size) < 0 ||
1955             _Pickler_Write(self, "L\n", 2) < 0)
1956             goto error;
1957     }
1958 
1959     if (0) {
1960   error:
1961       status = -1;
1962     }
1963     Py_XDECREF(repr);
1964 
1965     return status;
1966 }
1967 
1968 static int
save_float(PicklerObject * self,PyObject * obj)1969 save_float(PicklerObject *self, PyObject *obj)
1970 {
1971     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1972 
1973     if (self->bin) {
1974         char pdata[9];
1975         pdata[0] = BINFLOAT;
1976         if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1977             return -1;
1978         if (_Pickler_Write(self, pdata, 9) < 0)
1979             return -1;
1980    }
1981     else {
1982         int result = -1;
1983         char *buf = NULL;
1984         char op = FLOAT;
1985 
1986         if (_Pickler_Write(self, &op, 1) < 0)
1987             goto done;
1988 
1989         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
1990         if (!buf) {
1991             PyErr_NoMemory();
1992             goto done;
1993         }
1994 
1995         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
1996             goto done;
1997 
1998         if (_Pickler_Write(self, "\n", 1) < 0)
1999             goto done;
2000 
2001         result = 0;
2002 done:
2003         PyMem_Free(buf);
2004         return result;
2005     }
2006 
2007     return 0;
2008 }
2009 
2010 static int
save_bytes(PicklerObject * self,PyObject * obj)2011 save_bytes(PicklerObject *self, PyObject *obj)
2012 {
2013     if (self->proto < 3) {
2014         /* Older pickle protocols do not have an opcode for pickling bytes
2015            objects. Therefore, we need to fake the copy protocol (i.e.,
2016            the __reduce__ method) to permit bytes object unpickling.
2017 
2018            Here we use a hack to be compatible with Python 2. Since in Python
2019            2 'bytes' is just an alias for 'str' (which has different
2020            parameters than the actual bytes object), we use codecs.encode
2021            to create the appropriate 'str' object when unpickled using
2022            Python 2 *and* the appropriate 'bytes' object when unpickled
2023            using Python 3. Again this is a hack and we don't need to do this
2024            with newer protocols. */
2025         PyObject *reduce_value = NULL;
2026         int status;
2027 
2028         if (PyBytes_GET_SIZE(obj) == 0) {
2029             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2030         }
2031         else {
2032             PickleState *st = _Pickle_GetGlobalState();
2033             PyObject *unicode_str =
2034                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2035                                        PyBytes_GET_SIZE(obj),
2036                                        "strict");
2037             _Py_IDENTIFIER(latin1);
2038 
2039             if (unicode_str == NULL)
2040                 return -1;
2041             reduce_value = Py_BuildValue("(O(OO))",
2042                                          st->codecs_encode, unicode_str,
2043                                          _PyUnicode_FromId(&PyId_latin1));
2044             Py_DECREF(unicode_str);
2045         }
2046 
2047         if (reduce_value == NULL)
2048             return -1;
2049 
2050         /* save_reduce() will memoize the object automatically. */
2051         status = save_reduce(self, reduce_value, obj);
2052         Py_DECREF(reduce_value);
2053         return status;
2054     }
2055     else {
2056         Py_ssize_t size;
2057         char header[9];
2058         Py_ssize_t len;
2059 
2060         size = PyBytes_GET_SIZE(obj);
2061         if (size < 0)
2062             return -1;
2063 
2064         if (size <= 0xff) {
2065             header[0] = SHORT_BINBYTES;
2066             header[1] = (unsigned char)size;
2067             len = 2;
2068         }
2069         else if ((size_t)size <= 0xffffffffUL) {
2070             header[0] = BINBYTES;
2071             header[1] = (unsigned char)(size & 0xff);
2072             header[2] = (unsigned char)((size >> 8) & 0xff);
2073             header[3] = (unsigned char)((size >> 16) & 0xff);
2074             header[4] = (unsigned char)((size >> 24) & 0xff);
2075             len = 5;
2076         }
2077         else if (self->proto >= 4) {
2078             header[0] = BINBYTES8;
2079             _write_size64(header + 1, size);
2080             len = 9;
2081         }
2082         else {
2083             PyErr_SetString(PyExc_OverflowError,
2084                             "cannot serialize a bytes object larger than 4 GiB");
2085             return -1;          /* string too large */
2086         }
2087 
2088         if (_Pickler_Write(self, header, len) < 0)
2089             return -1;
2090 
2091         if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
2092             return -1;
2093 
2094         if (memo_put(self, obj) < 0)
2095             return -1;
2096 
2097         return 0;
2098     }
2099 }
2100 
2101 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2102    backslash and newline characters to \uXXXX escapes. */
2103 static PyObject *
raw_unicode_escape(PyObject * obj)2104 raw_unicode_escape(PyObject *obj)
2105 {
2106     char *p;
2107     Py_ssize_t i, size;
2108     void *data;
2109     unsigned int kind;
2110     _PyBytesWriter writer;
2111 
2112     if (PyUnicode_READY(obj))
2113         return NULL;
2114 
2115     _PyBytesWriter_Init(&writer);
2116 
2117     size = PyUnicode_GET_LENGTH(obj);
2118     data = PyUnicode_DATA(obj);
2119     kind = PyUnicode_KIND(obj);
2120 
2121     p = _PyBytesWriter_Alloc(&writer, size);
2122     if (p == NULL)
2123         goto error;
2124     writer.overallocate = 1;
2125 
2126     for (i=0; i < size; i++) {
2127         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2128         /* Map 32-bit characters to '\Uxxxxxxxx' */
2129         if (ch >= 0x10000) {
2130             /* -1: subtract 1 preallocated byte */
2131             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2132             if (p == NULL)
2133                 goto error;
2134 
2135             *p++ = '\\';
2136             *p++ = 'U';
2137             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2138             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2139             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2140             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2141             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2142             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2143             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2144             *p++ = Py_hexdigits[ch & 15];
2145         }
2146         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2147         else if (ch >= 256 || ch == '\\' || ch == '\n') {
2148             /* -1: subtract 1 preallocated byte */
2149             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2150             if (p == NULL)
2151                 goto error;
2152 
2153             *p++ = '\\';
2154             *p++ = 'u';
2155             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2156             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2157             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2158             *p++ = Py_hexdigits[ch & 15];
2159         }
2160         /* Copy everything else as-is */
2161         else
2162             *p++ = (char) ch;
2163     }
2164 
2165     return _PyBytesWriter_Finish(&writer, p);
2166 
2167 error:
2168     _PyBytesWriter_Dealloc(&writer);
2169     return NULL;
2170 }
2171 
2172 static int
write_utf8(PicklerObject * self,const char * data,Py_ssize_t size)2173 write_utf8(PicklerObject *self, const char *data, Py_ssize_t size)
2174 {
2175     char header[9];
2176     Py_ssize_t len;
2177 
2178     assert(size >= 0);
2179     if (size <= 0xff && self->proto >= 4) {
2180         header[0] = SHORT_BINUNICODE;
2181         header[1] = (unsigned char)(size & 0xff);
2182         len = 2;
2183     }
2184     else if ((size_t)size <= 0xffffffffUL) {
2185         header[0] = BINUNICODE;
2186         header[1] = (unsigned char)(size & 0xff);
2187         header[2] = (unsigned char)((size >> 8) & 0xff);
2188         header[3] = (unsigned char)((size >> 16) & 0xff);
2189         header[4] = (unsigned char)((size >> 24) & 0xff);
2190         len = 5;
2191     }
2192     else if (self->proto >= 4) {
2193         header[0] = BINUNICODE8;
2194         _write_size64(header + 1, size);
2195         len = 9;
2196     }
2197     else {
2198         PyErr_SetString(PyExc_OverflowError,
2199                         "cannot serialize a string larger than 4GiB");
2200         return -1;
2201     }
2202 
2203     if (_Pickler_Write(self, header, len) < 0)
2204         return -1;
2205     if (_Pickler_Write(self, data, size) < 0)
2206         return -1;
2207 
2208     return 0;
2209 }
2210 
2211 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2212 write_unicode_binary(PicklerObject *self, PyObject *obj)
2213 {
2214     PyObject *encoded = NULL;
2215     Py_ssize_t size;
2216     char *data;
2217     int r;
2218 
2219     if (PyUnicode_READY(obj))
2220         return -1;
2221 
2222     data = PyUnicode_AsUTF8AndSize(obj, &size);
2223     if (data != NULL)
2224         return write_utf8(self, data, size);
2225 
2226     /* Issue #8383: for strings with lone surrogates, fallback on the
2227        "surrogatepass" error handler. */
2228     PyErr_Clear();
2229     encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2230     if (encoded == NULL)
2231         return -1;
2232 
2233     r = write_utf8(self, PyBytes_AS_STRING(encoded),
2234                    PyBytes_GET_SIZE(encoded));
2235     Py_DECREF(encoded);
2236     return r;
2237 }
2238 
2239 static int
save_unicode(PicklerObject * self,PyObject * obj)2240 save_unicode(PicklerObject *self, PyObject *obj)
2241 {
2242     if (self->bin) {
2243         if (write_unicode_binary(self, obj) < 0)
2244             return -1;
2245     }
2246     else {
2247         PyObject *encoded;
2248         Py_ssize_t size;
2249         const char unicode_op = UNICODE;
2250 
2251         encoded = raw_unicode_escape(obj);
2252         if (encoded == NULL)
2253             return -1;
2254 
2255         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2256             Py_DECREF(encoded);
2257             return -1;
2258         }
2259 
2260         size = PyBytes_GET_SIZE(encoded);
2261         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2262             Py_DECREF(encoded);
2263             return -1;
2264         }
2265         Py_DECREF(encoded);
2266 
2267         if (_Pickler_Write(self, "\n", 1) < 0)
2268             return -1;
2269     }
2270     if (memo_put(self, obj) < 0)
2271         return -1;
2272 
2273     return 0;
2274 }
2275 
2276 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2277 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2278 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2279 {
2280     Py_ssize_t i;
2281 
2282     assert(PyTuple_Size(t) == len);
2283 
2284     for (i = 0; i < len; i++) {
2285         PyObject *element = PyTuple_GET_ITEM(t, i);
2286 
2287         if (element == NULL)
2288             return -1;
2289         if (save(self, element, 0) < 0)
2290             return -1;
2291     }
2292 
2293     return 0;
2294 }
2295 
2296 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2297  * used across protocols to minimize the space needed to pickle them.
2298  * Tuples are also the only builtin immutable type that can be recursive
2299  * (a tuple can be reached from itself), and that requires some subtle
2300  * magic so that it works in all cases.  IOW, this is a long routine.
2301  */
2302 static int
save_tuple(PicklerObject * self,PyObject * obj)2303 save_tuple(PicklerObject *self, PyObject *obj)
2304 {
2305     Py_ssize_t len, i;
2306 
2307     const char mark_op = MARK;
2308     const char tuple_op = TUPLE;
2309     const char pop_op = POP;
2310     const char pop_mark_op = POP_MARK;
2311     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2312 
2313     if ((len = PyTuple_Size(obj)) < 0)
2314         return -1;
2315 
2316     if (len == 0) {
2317         char pdata[2];
2318 
2319         if (self->proto) {
2320             pdata[0] = EMPTY_TUPLE;
2321             len = 1;
2322         }
2323         else {
2324             pdata[0] = MARK;
2325             pdata[1] = TUPLE;
2326             len = 2;
2327         }
2328         if (_Pickler_Write(self, pdata, len) < 0)
2329             return -1;
2330         return 0;
2331     }
2332 
2333     /* The tuple isn't in the memo now.  If it shows up there after
2334      * saving the tuple elements, the tuple must be recursive, in
2335      * which case we'll pop everything we put on the stack, and fetch
2336      * its value from the memo.
2337      */
2338     if (len <= 3 && self->proto >= 2) {
2339         /* Use TUPLE{1,2,3} opcodes. */
2340         if (store_tuple_elements(self, obj, len) < 0)
2341             return -1;
2342 
2343         if (PyMemoTable_Get(self->memo, obj)) {
2344             /* pop the len elements */
2345             for (i = 0; i < len; i++)
2346                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2347                     return -1;
2348             /* fetch from memo */
2349             if (memo_get(self, obj) < 0)
2350                 return -1;
2351 
2352             return 0;
2353         }
2354         else { /* Not recursive. */
2355             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2356                 return -1;
2357         }
2358         goto memoize;
2359     }
2360 
2361     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2362      * Generate MARK e1 e2 ... TUPLE
2363      */
2364     if (_Pickler_Write(self, &mark_op, 1) < 0)
2365         return -1;
2366 
2367     if (store_tuple_elements(self, obj, len) < 0)
2368         return -1;
2369 
2370     if (PyMemoTable_Get(self->memo, obj)) {
2371         /* pop the stack stuff we pushed */
2372         if (self->bin) {
2373             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2374                 return -1;
2375         }
2376         else {
2377             /* Note that we pop one more than len, to remove
2378              * the MARK too.
2379              */
2380             for (i = 0; i <= len; i++)
2381                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2382                     return -1;
2383         }
2384         /* fetch from memo */
2385         if (memo_get(self, obj) < 0)
2386             return -1;
2387 
2388         return 0;
2389     }
2390     else { /* Not recursive. */
2391         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2392             return -1;
2393     }
2394 
2395   memoize:
2396     if (memo_put(self, obj) < 0)
2397         return -1;
2398 
2399     return 0;
2400 }
2401 
2402 /* iter is an iterator giving items, and we batch up chunks of
2403  *     MARK item item ... item APPENDS
2404  * opcode sequences.  Calling code should have arranged to first create an
2405  * empty list, or list-like object, for the APPENDS to operate on.
2406  * Returns 0 on success, <0 on error.
2407  */
2408 static int
batch_list(PicklerObject * self,PyObject * iter)2409 batch_list(PicklerObject *self, PyObject *iter)
2410 {
2411     PyObject *obj = NULL;
2412     PyObject *firstitem = NULL;
2413     int i, n;
2414 
2415     const char mark_op = MARK;
2416     const char append_op = APPEND;
2417     const char appends_op = APPENDS;
2418 
2419     assert(iter != NULL);
2420 
2421     /* XXX: I think this function could be made faster by avoiding the
2422        iterator interface and fetching objects directly from list using
2423        PyList_GET_ITEM.
2424     */
2425 
2426     if (self->proto == 0) {
2427         /* APPENDS isn't available; do one at a time. */
2428         for (;;) {
2429             obj = PyIter_Next(iter);
2430             if (obj == NULL) {
2431                 if (PyErr_Occurred())
2432                     return -1;
2433                 break;
2434             }
2435             i = save(self, obj, 0);
2436             Py_DECREF(obj);
2437             if (i < 0)
2438                 return -1;
2439             if (_Pickler_Write(self, &append_op, 1) < 0)
2440                 return -1;
2441         }
2442         return 0;
2443     }
2444 
2445     /* proto > 0:  write in batches of BATCHSIZE. */
2446     do {
2447         /* Get first item */
2448         firstitem = PyIter_Next(iter);
2449         if (firstitem == NULL) {
2450             if (PyErr_Occurred())
2451                 goto error;
2452 
2453             /* nothing more to add */
2454             break;
2455         }
2456 
2457         /* Try to get a second item */
2458         obj = PyIter_Next(iter);
2459         if (obj == NULL) {
2460             if (PyErr_Occurred())
2461                 goto error;
2462 
2463             /* Only one item to write */
2464             if (save(self, firstitem, 0) < 0)
2465                 goto error;
2466             if (_Pickler_Write(self, &append_op, 1) < 0)
2467                 goto error;
2468             Py_CLEAR(firstitem);
2469             break;
2470         }
2471 
2472         /* More than one item to write */
2473 
2474         /* Pump out MARK, items, APPENDS. */
2475         if (_Pickler_Write(self, &mark_op, 1) < 0)
2476             goto error;
2477 
2478         if (save(self, firstitem, 0) < 0)
2479             goto error;
2480         Py_CLEAR(firstitem);
2481         n = 1;
2482 
2483         /* Fetch and save up to BATCHSIZE items */
2484         while (obj) {
2485             if (save(self, obj, 0) < 0)
2486                 goto error;
2487             Py_CLEAR(obj);
2488             n += 1;
2489 
2490             if (n == BATCHSIZE)
2491                 break;
2492 
2493             obj = PyIter_Next(iter);
2494             if (obj == NULL) {
2495                 if (PyErr_Occurred())
2496                     goto error;
2497                 break;
2498             }
2499         }
2500 
2501         if (_Pickler_Write(self, &appends_op, 1) < 0)
2502             goto error;
2503 
2504     } while (n == BATCHSIZE);
2505     return 0;
2506 
2507   error:
2508     Py_XDECREF(firstitem);
2509     Py_XDECREF(obj);
2510     return -1;
2511 }
2512 
2513 /* This is a variant of batch_list() above, specialized for lists (with no
2514  * support for list subclasses). Like batch_list(), we batch up chunks of
2515  *     MARK item item ... item APPENDS
2516  * opcode sequences.  Calling code should have arranged to first create an
2517  * empty list, or list-like object, for the APPENDS to operate on.
2518  * Returns 0 on success, -1 on error.
2519  *
2520  * This version is considerably faster than batch_list(), if less general.
2521  *
2522  * Note that this only works for protocols > 0.
2523  */
2524 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2525 batch_list_exact(PicklerObject *self, PyObject *obj)
2526 {
2527     PyObject *item = NULL;
2528     Py_ssize_t this_batch, total;
2529 
2530     const char append_op = APPEND;
2531     const char appends_op = APPENDS;
2532     const char mark_op = MARK;
2533 
2534     assert(obj != NULL);
2535     assert(self->proto > 0);
2536     assert(PyList_CheckExact(obj));
2537 
2538     if (PyList_GET_SIZE(obj) == 1) {
2539         item = PyList_GET_ITEM(obj, 0);
2540         if (save(self, item, 0) < 0)
2541             return -1;
2542         if (_Pickler_Write(self, &append_op, 1) < 0)
2543             return -1;
2544         return 0;
2545     }
2546 
2547     /* Write in batches of BATCHSIZE. */
2548     total = 0;
2549     do {
2550         this_batch = 0;
2551         if (_Pickler_Write(self, &mark_op, 1) < 0)
2552             return -1;
2553         while (total < PyList_GET_SIZE(obj)) {
2554             item = PyList_GET_ITEM(obj, total);
2555             if (save(self, item, 0) < 0)
2556                 return -1;
2557             total++;
2558             if (++this_batch == BATCHSIZE)
2559                 break;
2560         }
2561         if (_Pickler_Write(self, &appends_op, 1) < 0)
2562             return -1;
2563 
2564     } while (total < PyList_GET_SIZE(obj));
2565 
2566     return 0;
2567 }
2568 
2569 static int
save_list(PicklerObject * self,PyObject * obj)2570 save_list(PicklerObject *self, PyObject *obj)
2571 {
2572     char header[3];
2573     Py_ssize_t len;
2574     int status = 0;
2575 
2576     if (self->fast && !fast_save_enter(self, obj))
2577         goto error;
2578 
2579     /* Create an empty list. */
2580     if (self->bin) {
2581         header[0] = EMPTY_LIST;
2582         len = 1;
2583     }
2584     else {
2585         header[0] = MARK;
2586         header[1] = LIST;
2587         len = 2;
2588     }
2589 
2590     if (_Pickler_Write(self, header, len) < 0)
2591         goto error;
2592 
2593     /* Get list length, and bow out early if empty. */
2594     if ((len = PyList_Size(obj)) < 0)
2595         goto error;
2596 
2597     if (memo_put(self, obj) < 0)
2598         goto error;
2599 
2600     if (len != 0) {
2601         /* Materialize the list elements. */
2602         if (PyList_CheckExact(obj) && self->proto > 0) {
2603             if (Py_EnterRecursiveCall(" while pickling an object"))
2604                 goto error;
2605             status = batch_list_exact(self, obj);
2606             Py_LeaveRecursiveCall();
2607         } else {
2608             PyObject *iter = PyObject_GetIter(obj);
2609             if (iter == NULL)
2610                 goto error;
2611 
2612             if (Py_EnterRecursiveCall(" while pickling an object")) {
2613                 Py_DECREF(iter);
2614                 goto error;
2615             }
2616             status = batch_list(self, iter);
2617             Py_LeaveRecursiveCall();
2618             Py_DECREF(iter);
2619         }
2620     }
2621     if (0) {
2622   error:
2623         status = -1;
2624     }
2625 
2626     if (self->fast && !fast_save_leave(self, obj))
2627         status = -1;
2628 
2629     return status;
2630 }
2631 
2632 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2633  *     MARK key value ... key value SETITEMS
2634  * opcode sequences.  Calling code should have arranged to first create an
2635  * empty dict, or dict-like object, for the SETITEMS to operate on.
2636  * Returns 0 on success, <0 on error.
2637  *
2638  * This is very much like batch_list().  The difference between saving
2639  * elements directly, and picking apart two-tuples, is so long-winded at
2640  * the C level, though, that attempts to combine these routines were too
2641  * ugly to bear.
2642  */
2643 static int
batch_dict(PicklerObject * self,PyObject * iter)2644 batch_dict(PicklerObject *self, PyObject *iter)
2645 {
2646     PyObject *obj = NULL;
2647     PyObject *firstitem = NULL;
2648     int i, n;
2649 
2650     const char mark_op = MARK;
2651     const char setitem_op = SETITEM;
2652     const char setitems_op = SETITEMS;
2653 
2654     assert(iter != NULL);
2655 
2656     if (self->proto == 0) {
2657         /* SETITEMS isn't available; do one at a time. */
2658         for (;;) {
2659             obj = PyIter_Next(iter);
2660             if (obj == NULL) {
2661                 if (PyErr_Occurred())
2662                     return -1;
2663                 break;
2664             }
2665             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2666                 PyErr_SetString(PyExc_TypeError, "dict items "
2667                                 "iterator must return 2-tuples");
2668                 return -1;
2669             }
2670             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2671             if (i >= 0)
2672                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2673             Py_DECREF(obj);
2674             if (i < 0)
2675                 return -1;
2676             if (_Pickler_Write(self, &setitem_op, 1) < 0)
2677                 return -1;
2678         }
2679         return 0;
2680     }
2681 
2682     /* proto > 0:  write in batches of BATCHSIZE. */
2683     do {
2684         /* Get first item */
2685         firstitem = PyIter_Next(iter);
2686         if (firstitem == NULL) {
2687             if (PyErr_Occurred())
2688                 goto error;
2689 
2690             /* nothing more to add */
2691             break;
2692         }
2693         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2694             PyErr_SetString(PyExc_TypeError, "dict items "
2695                                 "iterator must return 2-tuples");
2696             goto error;
2697         }
2698 
2699         /* Try to get a second item */
2700         obj = PyIter_Next(iter);
2701         if (obj == NULL) {
2702             if (PyErr_Occurred())
2703                 goto error;
2704 
2705             /* Only one item to write */
2706             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2707                 goto error;
2708             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2709                 goto error;
2710             if (_Pickler_Write(self, &setitem_op, 1) < 0)
2711                 goto error;
2712             Py_CLEAR(firstitem);
2713             break;
2714         }
2715 
2716         /* More than one item to write */
2717 
2718         /* Pump out MARK, items, SETITEMS. */
2719         if (_Pickler_Write(self, &mark_op, 1) < 0)
2720             goto error;
2721 
2722         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2723             goto error;
2724         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2725             goto error;
2726         Py_CLEAR(firstitem);
2727         n = 1;
2728 
2729         /* Fetch and save up to BATCHSIZE items */
2730         while (obj) {
2731             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2732                 PyErr_SetString(PyExc_TypeError, "dict items "
2733                     "iterator must return 2-tuples");
2734                 goto error;
2735             }
2736             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2737                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2738                 goto error;
2739             Py_CLEAR(obj);
2740             n += 1;
2741 
2742             if (n == BATCHSIZE)
2743                 break;
2744 
2745             obj = PyIter_Next(iter);
2746             if (obj == NULL) {
2747                 if (PyErr_Occurred())
2748                     goto error;
2749                 break;
2750             }
2751         }
2752 
2753         if (_Pickler_Write(self, &setitems_op, 1) < 0)
2754             goto error;
2755 
2756     } while (n == BATCHSIZE);
2757     return 0;
2758 
2759   error:
2760     Py_XDECREF(firstitem);
2761     Py_XDECREF(obj);
2762     return -1;
2763 }
2764 
2765 /* This is a variant of batch_dict() above that specializes for dicts, with no
2766  * support for dict subclasses. Like batch_dict(), we batch up chunks of
2767  *     MARK key value ... key value SETITEMS
2768  * opcode sequences.  Calling code should have arranged to first create an
2769  * empty dict, or dict-like object, for the SETITEMS to operate on.
2770  * Returns 0 on success, -1 on error.
2771  *
2772  * Note that this currently doesn't work for protocol 0.
2773  */
2774 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)2775 batch_dict_exact(PicklerObject *self, PyObject *obj)
2776 {
2777     PyObject *key = NULL, *value = NULL;
2778     int i;
2779     Py_ssize_t dict_size, ppos = 0;
2780 
2781     const char mark_op = MARK;
2782     const char setitem_op = SETITEM;
2783     const char setitems_op = SETITEMS;
2784 
2785     assert(obj != NULL);
2786     assert(self->proto > 0);
2787 
2788     dict_size = PyDict_Size(obj);
2789 
2790     /* Special-case len(d) == 1 to save space. */
2791     if (dict_size == 1) {
2792         PyDict_Next(obj, &ppos, &key, &value);
2793         if (save(self, key, 0) < 0)
2794             return -1;
2795         if (save(self, value, 0) < 0)
2796             return -1;
2797         if (_Pickler_Write(self, &setitem_op, 1) < 0)
2798             return -1;
2799         return 0;
2800     }
2801 
2802     /* Write in batches of BATCHSIZE. */
2803     do {
2804         i = 0;
2805         if (_Pickler_Write(self, &mark_op, 1) < 0)
2806             return -1;
2807         while (PyDict_Next(obj, &ppos, &key, &value)) {
2808             if (save(self, key, 0) < 0)
2809                 return -1;
2810             if (save(self, value, 0) < 0)
2811                 return -1;
2812             if (++i == BATCHSIZE)
2813                 break;
2814         }
2815         if (_Pickler_Write(self, &setitems_op, 1) < 0)
2816             return -1;
2817         if (PyDict_Size(obj) != dict_size) {
2818             PyErr_Format(
2819                 PyExc_RuntimeError,
2820                 "dictionary changed size during iteration");
2821             return -1;
2822         }
2823 
2824     } while (i == BATCHSIZE);
2825     return 0;
2826 }
2827 
2828 static int
save_dict(PicklerObject * self,PyObject * obj)2829 save_dict(PicklerObject *self, PyObject *obj)
2830 {
2831     PyObject *items, *iter;
2832     char header[3];
2833     Py_ssize_t len;
2834     int status = 0;
2835 
2836     if (self->fast && !fast_save_enter(self, obj))
2837         goto error;
2838 
2839     /* Create an empty dict. */
2840     if (self->bin) {
2841         header[0] = EMPTY_DICT;
2842         len = 1;
2843     }
2844     else {
2845         header[0] = MARK;
2846         header[1] = DICT;
2847         len = 2;
2848     }
2849 
2850     if (_Pickler_Write(self, header, len) < 0)
2851         goto error;
2852 
2853     /* Get dict size, and bow out early if empty. */
2854     if ((len = PyDict_Size(obj)) < 0)
2855         goto error;
2856 
2857     if (memo_put(self, obj) < 0)
2858         goto error;
2859 
2860     if (len != 0) {
2861         /* Save the dict items. */
2862         if (PyDict_CheckExact(obj) && self->proto > 0) {
2863             /* We can take certain shortcuts if we know this is a dict and
2864                not a dict subclass. */
2865             if (Py_EnterRecursiveCall(" while pickling an object"))
2866                 goto error;
2867             status = batch_dict_exact(self, obj);
2868             Py_LeaveRecursiveCall();
2869         } else {
2870             _Py_IDENTIFIER(items);
2871 
2872             items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
2873             if (items == NULL)
2874                 goto error;
2875             iter = PyObject_GetIter(items);
2876             Py_DECREF(items);
2877             if (iter == NULL)
2878                 goto error;
2879             if (Py_EnterRecursiveCall(" while pickling an object")) {
2880                 Py_DECREF(iter);
2881                 goto error;
2882             }
2883             status = batch_dict(self, iter);
2884             Py_LeaveRecursiveCall();
2885             Py_DECREF(iter);
2886         }
2887     }
2888 
2889     if (0) {
2890   error:
2891         status = -1;
2892     }
2893 
2894     if (self->fast && !fast_save_leave(self, obj))
2895         status = -1;
2896 
2897     return status;
2898 }
2899 
2900 static int
save_set(PicklerObject * self,PyObject * obj)2901 save_set(PicklerObject *self, PyObject *obj)
2902 {
2903     PyObject *item;
2904     int i;
2905     Py_ssize_t set_size, ppos = 0;
2906     Py_hash_t hash;
2907 
2908     const char empty_set_op = EMPTY_SET;
2909     const char mark_op = MARK;
2910     const char additems_op = ADDITEMS;
2911 
2912     if (self->proto < 4) {
2913         PyObject *items;
2914         PyObject *reduce_value;
2915         int status;
2916 
2917         items = PySequence_List(obj);
2918         if (items == NULL) {
2919             return -1;
2920         }
2921         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
2922         Py_DECREF(items);
2923         if (reduce_value == NULL) {
2924             return -1;
2925         }
2926         /* save_reduce() will memoize the object automatically. */
2927         status = save_reduce(self, reduce_value, obj);
2928         Py_DECREF(reduce_value);
2929         return status;
2930     }
2931 
2932     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
2933         return -1;
2934 
2935     if (memo_put(self, obj) < 0)
2936         return -1;
2937 
2938     set_size = PySet_GET_SIZE(obj);
2939     if (set_size == 0)
2940         return 0;  /* nothing to do */
2941 
2942     /* Write in batches of BATCHSIZE. */
2943     do {
2944         i = 0;
2945         if (_Pickler_Write(self, &mark_op, 1) < 0)
2946             return -1;
2947         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
2948             if (save(self, item, 0) < 0)
2949                 return -1;
2950             if (++i == BATCHSIZE)
2951                 break;
2952         }
2953         if (_Pickler_Write(self, &additems_op, 1) < 0)
2954             return -1;
2955         if (PySet_GET_SIZE(obj) != set_size) {
2956             PyErr_Format(
2957                 PyExc_RuntimeError,
2958                 "set changed size during iteration");
2959             return -1;
2960         }
2961     } while (i == BATCHSIZE);
2962 
2963     return 0;
2964 }
2965 
2966 static int
save_frozenset(PicklerObject * self,PyObject * obj)2967 save_frozenset(PicklerObject *self, PyObject *obj)
2968 {
2969     PyObject *iter;
2970 
2971     const char mark_op = MARK;
2972     const char frozenset_op = FROZENSET;
2973 
2974     if (self->fast && !fast_save_enter(self, obj))
2975         return -1;
2976 
2977     if (self->proto < 4) {
2978         PyObject *items;
2979         PyObject *reduce_value;
2980         int status;
2981 
2982         items = PySequence_List(obj);
2983         if (items == NULL) {
2984             return -1;
2985         }
2986         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
2987                                      items);
2988         Py_DECREF(items);
2989         if (reduce_value == NULL) {
2990             return -1;
2991         }
2992         /* save_reduce() will memoize the object automatically. */
2993         status = save_reduce(self, reduce_value, obj);
2994         Py_DECREF(reduce_value);
2995         return status;
2996     }
2997 
2998     if (_Pickler_Write(self, &mark_op, 1) < 0)
2999         return -1;
3000 
3001     iter = PyObject_GetIter(obj);
3002     if (iter == NULL) {
3003         return -1;
3004     }
3005     for (;;) {
3006         PyObject *item;
3007 
3008         item = PyIter_Next(iter);
3009         if (item == NULL) {
3010             if (PyErr_Occurred()) {
3011                 Py_DECREF(iter);
3012                 return -1;
3013             }
3014             break;
3015         }
3016         if (save(self, item, 0) < 0) {
3017             Py_DECREF(item);
3018             Py_DECREF(iter);
3019             return -1;
3020         }
3021         Py_DECREF(item);
3022     }
3023     Py_DECREF(iter);
3024 
3025     /* If the object is already in the memo, this means it is
3026        recursive. In this case, throw away everything we put on the
3027        stack, and fetch the object back from the memo. */
3028     if (PyMemoTable_Get(self->memo, obj)) {
3029         const char pop_mark_op = POP_MARK;
3030 
3031         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3032             return -1;
3033         if (memo_get(self, obj) < 0)
3034             return -1;
3035         return 0;
3036     }
3037 
3038     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3039         return -1;
3040     if (memo_put(self, obj) < 0)
3041         return -1;
3042 
3043     return 0;
3044 }
3045 
3046 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3047 fix_imports(PyObject **module_name, PyObject **global_name)
3048 {
3049     PyObject *key;
3050     PyObject *item;
3051     PickleState *st = _Pickle_GetGlobalState();
3052 
3053     key = PyTuple_Pack(2, *module_name, *global_name);
3054     if (key == NULL)
3055         return -1;
3056     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3057     Py_DECREF(key);
3058     if (item) {
3059         PyObject *fixed_module_name;
3060         PyObject *fixed_global_name;
3061 
3062         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3063             PyErr_Format(PyExc_RuntimeError,
3064                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3065                          "should be 2-tuples, not %.200s",
3066                          Py_TYPE(item)->tp_name);
3067             return -1;
3068         }
3069         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3070         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3071         if (!PyUnicode_Check(fixed_module_name) ||
3072             !PyUnicode_Check(fixed_global_name)) {
3073             PyErr_Format(PyExc_RuntimeError,
3074                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3075                          "should be pairs of str, not (%.200s, %.200s)",
3076                          Py_TYPE(fixed_module_name)->tp_name,
3077                          Py_TYPE(fixed_global_name)->tp_name);
3078             return -1;
3079         }
3080 
3081         Py_CLEAR(*module_name);
3082         Py_CLEAR(*global_name);
3083         Py_INCREF(fixed_module_name);
3084         Py_INCREF(fixed_global_name);
3085         *module_name = fixed_module_name;
3086         *global_name = fixed_global_name;
3087         return 0;
3088     }
3089     else if (PyErr_Occurred()) {
3090         return -1;
3091     }
3092 
3093     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3094     if (item) {
3095         if (!PyUnicode_Check(item)) {
3096             PyErr_Format(PyExc_RuntimeError,
3097                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3098                          "should be strings, not %.200s",
3099                          Py_TYPE(item)->tp_name);
3100             return -1;
3101         }
3102         Py_INCREF(item);
3103         Py_XSETREF(*module_name, item);
3104     }
3105     else if (PyErr_Occurred()) {
3106         return -1;
3107     }
3108 
3109     return 0;
3110 }
3111 
3112 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3113 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3114 {
3115     PyObject *global_name = NULL;
3116     PyObject *module_name = NULL;
3117     PyObject *module = NULL;
3118     PyObject *parent = NULL;
3119     PyObject *dotted_path = NULL;
3120     PyObject *lastname = NULL;
3121     PyObject *cls;
3122     PickleState *st = _Pickle_GetGlobalState();
3123     int status = 0;
3124     _Py_IDENTIFIER(__name__);
3125     _Py_IDENTIFIER(__qualname__);
3126 
3127     const char global_op = GLOBAL;
3128 
3129     if (name) {
3130         Py_INCREF(name);
3131         global_name = name;
3132     }
3133     else {
3134         global_name = _PyObject_GetAttrId(obj, &PyId___qualname__);
3135         if (global_name == NULL) {
3136             if (!PyErr_ExceptionMatches(PyExc_AttributeError))
3137                 goto error;
3138             PyErr_Clear();
3139         }
3140         if (global_name == NULL) {
3141             global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3142             if (global_name == NULL)
3143                 goto error;
3144         }
3145     }
3146 
3147     dotted_path = get_dotted_path(module, global_name);
3148     if (dotted_path == NULL)
3149         goto error;
3150     module_name = whichmodule(obj, dotted_path);
3151     if (module_name == NULL)
3152         goto error;
3153 
3154     /* XXX: Change to use the import C API directly with level=0 to disallow
3155        relative imports.
3156 
3157        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3158        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3159        custom import functions (IMHO, this would be a nice security
3160        feature). The import C API would need to be extended to support the
3161        extra parameters of __import__ to fix that. */
3162     module = PyImport_Import(module_name);
3163     if (module == NULL) {
3164         PyErr_Format(st->PicklingError,
3165                      "Can't pickle %R: import of module %R failed",
3166                      obj, module_name);
3167         goto error;
3168     }
3169     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3170     Py_INCREF(lastname);
3171     cls = get_deep_attribute(module, dotted_path, &parent);
3172     Py_CLEAR(dotted_path);
3173     if (cls == NULL) {
3174         PyErr_Format(st->PicklingError,
3175                      "Can't pickle %R: attribute lookup %S on %S failed",
3176                      obj, global_name, module_name);
3177         goto error;
3178     }
3179     if (cls != obj) {
3180         Py_DECREF(cls);
3181         PyErr_Format(st->PicklingError,
3182                      "Can't pickle %R: it's not the same object as %S.%S",
3183                      obj, module_name, global_name);
3184         goto error;
3185     }
3186     Py_DECREF(cls);
3187 
3188     if (self->proto >= 2) {
3189         /* See whether this is in the extension registry, and if
3190          * so generate an EXT opcode.
3191          */
3192         PyObject *extension_key;
3193         PyObject *code_obj;      /* extension code as Python object */
3194         long code;               /* extension code as C value */
3195         char pdata[5];
3196         Py_ssize_t n;
3197 
3198         extension_key = PyTuple_Pack(2, module_name, global_name);
3199         if (extension_key == NULL) {
3200             goto error;
3201         }
3202         code_obj = PyDict_GetItemWithError(st->extension_registry,
3203                                            extension_key);
3204         Py_DECREF(extension_key);
3205         /* The object is not registered in the extension registry.
3206            This is the most likely code path. */
3207         if (code_obj == NULL) {
3208             if (PyErr_Occurred()) {
3209                 goto error;
3210             }
3211             goto gen_global;
3212         }
3213 
3214         /* XXX: pickle.py doesn't check neither the type, nor the range
3215            of the value returned by the extension_registry. It should for
3216            consistency. */
3217 
3218         /* Verify code_obj has the right type and value. */
3219         if (!PyLong_Check(code_obj)) {
3220             PyErr_Format(st->PicklingError,
3221                          "Can't pickle %R: extension code %R isn't an integer",
3222                          obj, code_obj);
3223             goto error;
3224         }
3225         code = PyLong_AS_LONG(code_obj);
3226         if (code <= 0 || code > 0x7fffffffL) {
3227             if (!PyErr_Occurred())
3228                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3229                              "code %ld is out of range", obj, code);
3230             goto error;
3231         }
3232 
3233         /* Generate an EXT opcode. */
3234         if (code <= 0xff) {
3235             pdata[0] = EXT1;
3236             pdata[1] = (unsigned char)code;
3237             n = 2;
3238         }
3239         else if (code <= 0xffff) {
3240             pdata[0] = EXT2;
3241             pdata[1] = (unsigned char)(code & 0xff);
3242             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3243             n = 3;
3244         }
3245         else {
3246             pdata[0] = EXT4;
3247             pdata[1] = (unsigned char)(code & 0xff);
3248             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3249             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3250             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3251             n = 5;
3252         }
3253 
3254         if (_Pickler_Write(self, pdata, n) < 0)
3255             goto error;
3256     }
3257     else {
3258   gen_global:
3259         if (parent == module) {
3260             Py_INCREF(lastname);
3261             Py_DECREF(global_name);
3262             global_name = lastname;
3263         }
3264         if (self->proto >= 4) {
3265             const char stack_global_op = STACK_GLOBAL;
3266 
3267             if (save(self, module_name, 0) < 0)
3268                 goto error;
3269             if (save(self, global_name, 0) < 0)
3270                 goto error;
3271 
3272             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3273                 goto error;
3274         }
3275         else if (parent != module) {
3276             PickleState *st = _Pickle_GetGlobalState();
3277             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3278                                         st->getattr, parent, lastname);
3279             status = save_reduce(self, reduce_value, NULL);
3280             Py_DECREF(reduce_value);
3281             if (status < 0)
3282                 goto error;
3283         }
3284         else {
3285             /* Generate a normal global opcode if we are using a pickle
3286                protocol < 4, or if the object is not registered in the
3287                extension registry. */
3288             PyObject *encoded;
3289             PyObject *(*unicode_encoder)(PyObject *);
3290 
3291             if (_Pickler_Write(self, &global_op, 1) < 0)
3292                 goto error;
3293 
3294             /* For protocol < 3 and if the user didn't request against doing
3295                so, we convert module names to the old 2.x module names. */
3296             if (self->proto < 3 && self->fix_imports) {
3297                 if (fix_imports(&module_name, &global_name) < 0) {
3298                     goto error;
3299                 }
3300             }
3301 
3302             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3303                both the module name and the global name using UTF-8. We do so
3304                only when we are using the pickle protocol newer than version
3305                3. This is to ensure compatibility with older Unpickler running
3306                on Python 2.x. */
3307             if (self->proto == 3) {
3308                 unicode_encoder = PyUnicode_AsUTF8String;
3309             }
3310             else {
3311                 unicode_encoder = PyUnicode_AsASCIIString;
3312             }
3313             encoded = unicode_encoder(module_name);
3314             if (encoded == NULL) {
3315                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3316                     PyErr_Format(st->PicklingError,
3317                                  "can't pickle module identifier '%S' using "
3318                                  "pickle protocol %i",
3319                                  module_name, self->proto);
3320                 goto error;
3321             }
3322             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3323                                PyBytes_GET_SIZE(encoded)) < 0) {
3324                 Py_DECREF(encoded);
3325                 goto error;
3326             }
3327             Py_DECREF(encoded);
3328             if(_Pickler_Write(self, "\n", 1) < 0)
3329                 goto error;
3330 
3331             /* Save the name of the module. */
3332             encoded = unicode_encoder(global_name);
3333             if (encoded == NULL) {
3334                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3335                     PyErr_Format(st->PicklingError,
3336                                  "can't pickle global identifier '%S' using "
3337                                  "pickle protocol %i",
3338                                  global_name, self->proto);
3339                 goto error;
3340             }
3341             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3342                                PyBytes_GET_SIZE(encoded)) < 0) {
3343                 Py_DECREF(encoded);
3344                 goto error;
3345             }
3346             Py_DECREF(encoded);
3347             if (_Pickler_Write(self, "\n", 1) < 0)
3348                 goto error;
3349         }
3350         /* Memoize the object. */
3351         if (memo_put(self, obj) < 0)
3352             goto error;
3353     }
3354 
3355     if (0) {
3356   error:
3357         status = -1;
3358     }
3359     Py_XDECREF(module_name);
3360     Py_XDECREF(global_name);
3361     Py_XDECREF(module);
3362     Py_XDECREF(parent);
3363     Py_XDECREF(dotted_path);
3364     Py_XDECREF(lastname);
3365 
3366     return status;
3367 }
3368 
3369 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3370 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3371 {
3372     PyObject *reduce_value;
3373     int status;
3374 
3375     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3376     if (reduce_value == NULL) {
3377         return -1;
3378     }
3379     status = save_reduce(self, reduce_value, obj);
3380     Py_DECREF(reduce_value);
3381     return status;
3382 }
3383 
3384 static int
save_type(PicklerObject * self,PyObject * obj)3385 save_type(PicklerObject *self, PyObject *obj)
3386 {
3387     if (obj == (PyObject *)&_PyNone_Type) {
3388         return save_singleton_type(self, obj, Py_None);
3389     }
3390     else if (obj == (PyObject *)&PyEllipsis_Type) {
3391         return save_singleton_type(self, obj, Py_Ellipsis);
3392     }
3393     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3394         return save_singleton_type(self, obj, Py_NotImplemented);
3395     }
3396     return save_global(self, obj, NULL);
3397 }
3398 
3399 static int
save_pers(PicklerObject * self,PyObject * obj,PyObject * func)3400 save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
3401 {
3402     PyObject *pid = NULL;
3403     int status = 0;
3404 
3405     const char persid_op = PERSID;
3406     const char binpersid_op = BINPERSID;
3407 
3408     Py_INCREF(obj);
3409     pid = _Pickle_FastCall(func, obj);
3410     if (pid == NULL)
3411         return -1;
3412 
3413     if (pid != Py_None) {
3414         if (self->bin) {
3415             if (save(self, pid, 1) < 0 ||
3416                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3417                 goto error;
3418         }
3419         else {
3420             PyObject *pid_str;
3421 
3422             pid_str = PyObject_Str(pid);
3423             if (pid_str == NULL)
3424                 goto error;
3425 
3426             /* XXX: Should it check whether the pid contains embedded
3427                newlines? */
3428             if (!PyUnicode_IS_ASCII(pid_str)) {
3429                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3430                                 "persistent IDs in protocol 0 must be "
3431                                 "ASCII strings");
3432                 Py_DECREF(pid_str);
3433                 goto error;
3434             }
3435 
3436             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3437                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3438                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3439                 _Pickler_Write(self, "\n", 1) < 0) {
3440                 Py_DECREF(pid_str);
3441                 goto error;
3442             }
3443             Py_DECREF(pid_str);
3444         }
3445         status = 1;
3446     }
3447 
3448     if (0) {
3449   error:
3450         status = -1;
3451     }
3452     Py_XDECREF(pid);
3453 
3454     return status;
3455 }
3456 
3457 static PyObject *
get_class(PyObject * obj)3458 get_class(PyObject *obj)
3459 {
3460     PyObject *cls;
3461     _Py_IDENTIFIER(__class__);
3462 
3463     cls = _PyObject_GetAttrId(obj, &PyId___class__);
3464     if (cls == NULL) {
3465         if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
3466             PyErr_Clear();
3467             cls = (PyObject *) Py_TYPE(obj);
3468             Py_INCREF(cls);
3469         }
3470     }
3471     return cls;
3472 }
3473 
3474 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3475  * appropriate __reduce__ method for obj.
3476  */
3477 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3478 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3479 {
3480     PyObject *callable;
3481     PyObject *argtup;
3482     PyObject *state = NULL;
3483     PyObject *listitems = Py_None;
3484     PyObject *dictitems = Py_None;
3485     PickleState *st = _Pickle_GetGlobalState();
3486     Py_ssize_t size;
3487     int use_newobj = 0, use_newobj_ex = 0;
3488 
3489     const char reduce_op = REDUCE;
3490     const char build_op = BUILD;
3491     const char newobj_op = NEWOBJ;
3492     const char newobj_ex_op = NEWOBJ_EX;
3493 
3494     size = PyTuple_Size(args);
3495     if (size < 2 || size > 5) {
3496         PyErr_SetString(st->PicklingError, "tuple returned by "
3497                         "__reduce__ must contain 2 through 5 elements");
3498         return -1;
3499     }
3500 
3501     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3502                            &callable, &argtup, &state, &listitems, &dictitems))
3503         return -1;
3504 
3505     if (!PyCallable_Check(callable)) {
3506         PyErr_SetString(st->PicklingError, "first item of the tuple "
3507                         "returned by __reduce__ must be callable");
3508         return -1;
3509     }
3510     if (!PyTuple_Check(argtup)) {
3511         PyErr_SetString(st->PicklingError, "second item of the tuple "
3512                         "returned by __reduce__ must be a tuple");
3513         return -1;
3514     }
3515 
3516     if (state == Py_None)
3517         state = NULL;
3518 
3519     if (listitems == Py_None)
3520         listitems = NULL;
3521     else if (!PyIter_Check(listitems)) {
3522         PyErr_Format(st->PicklingError, "fourth element of the tuple "
3523                      "returned by __reduce__ must be an iterator, not %s",
3524                      Py_TYPE(listitems)->tp_name);
3525         return -1;
3526     }
3527 
3528     if (dictitems == Py_None)
3529         dictitems = NULL;
3530     else if (!PyIter_Check(dictitems)) {
3531         PyErr_Format(st->PicklingError, "fifth element of the tuple "
3532                      "returned by __reduce__ must be an iterator, not %s",
3533                      Py_TYPE(dictitems)->tp_name);
3534         return -1;
3535     }
3536 
3537     if (self->proto >= 2) {
3538         PyObject *name;
3539         _Py_IDENTIFIER(__name__);
3540 
3541         name = _PyObject_GetAttrId(callable, &PyId___name__);
3542         if (name == NULL) {
3543             if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3544                 return -1;
3545             }
3546             PyErr_Clear();
3547         }
3548         else if (PyUnicode_Check(name)) {
3549             _Py_IDENTIFIER(__newobj_ex__);
3550             use_newobj_ex = _PyUnicode_EqualToASCIIId(
3551                     name, &PyId___newobj_ex__);
3552             if (!use_newobj_ex) {
3553                 _Py_IDENTIFIER(__newobj__);
3554                 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
3555             }
3556         }
3557         Py_XDECREF(name);
3558     }
3559 
3560     if (use_newobj_ex) {
3561         PyObject *cls;
3562         PyObject *args;
3563         PyObject *kwargs;
3564 
3565         if (Py_SIZE(argtup) != 3) {
3566             PyErr_Format(st->PicklingError,
3567                          "length of the NEWOBJ_EX argument tuple must be "
3568                          "exactly 3, not %zd", Py_SIZE(argtup));
3569             return -1;
3570         }
3571 
3572         cls = PyTuple_GET_ITEM(argtup, 0);
3573         if (!PyType_Check(cls)) {
3574             PyErr_Format(st->PicklingError,
3575                          "first item from NEWOBJ_EX argument tuple must "
3576                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3577             return -1;
3578         }
3579         args = PyTuple_GET_ITEM(argtup, 1);
3580         if (!PyTuple_Check(args)) {
3581             PyErr_Format(st->PicklingError,
3582                          "second item from NEWOBJ_EX argument tuple must "
3583                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3584             return -1;
3585         }
3586         kwargs = PyTuple_GET_ITEM(argtup, 2);
3587         if (!PyDict_Check(kwargs)) {
3588             PyErr_Format(st->PicklingError,
3589                          "third item from NEWOBJ_EX argument tuple must "
3590                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3591             return -1;
3592         }
3593 
3594         if (self->proto >= 4) {
3595             if (save(self, cls, 0) < 0 ||
3596                 save(self, args, 0) < 0 ||
3597                 save(self, kwargs, 0) < 0 ||
3598                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3599                 return -1;
3600             }
3601         }
3602         else {
3603             PyObject *newargs;
3604             PyObject *cls_new;
3605             Py_ssize_t i;
3606             _Py_IDENTIFIER(__new__);
3607 
3608             newargs = PyTuple_New(Py_SIZE(args) + 2);
3609             if (newargs == NULL)
3610                 return -1;
3611 
3612             cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
3613             if (cls_new == NULL) {
3614                 Py_DECREF(newargs);
3615                 return -1;
3616             }
3617             PyTuple_SET_ITEM(newargs, 0, cls_new);
3618             Py_INCREF(cls);
3619             PyTuple_SET_ITEM(newargs, 1, cls);
3620             for (i = 0; i < Py_SIZE(args); i++) {
3621                 PyObject *item = PyTuple_GET_ITEM(args, i);
3622                 Py_INCREF(item);
3623                 PyTuple_SET_ITEM(newargs, i + 2, item);
3624             }
3625 
3626             callable = PyObject_Call(st->partial, newargs, kwargs);
3627             Py_DECREF(newargs);
3628             if (callable == NULL)
3629                 return -1;
3630 
3631             newargs = PyTuple_New(0);
3632             if (newargs == NULL) {
3633                 Py_DECREF(callable);
3634                 return -1;
3635             }
3636 
3637             if (save(self, callable, 0) < 0 ||
3638                 save(self, newargs, 0) < 0 ||
3639                 _Pickler_Write(self, &reduce_op, 1) < 0) {
3640                 Py_DECREF(newargs);
3641                 Py_DECREF(callable);
3642                 return -1;
3643             }
3644             Py_DECREF(newargs);
3645             Py_DECREF(callable);
3646         }
3647     }
3648     else if (use_newobj) {
3649         PyObject *cls;
3650         PyObject *newargtup;
3651         PyObject *obj_class;
3652         int p;
3653 
3654         /* Sanity checks. */
3655         if (Py_SIZE(argtup) < 1) {
3656             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
3657             return -1;
3658         }
3659 
3660         cls = PyTuple_GET_ITEM(argtup, 0);
3661         if (!PyType_Check(cls)) {
3662             PyErr_SetString(st->PicklingError, "args[0] from "
3663                             "__newobj__ args is not a type");
3664             return -1;
3665         }
3666 
3667         if (obj != NULL) {
3668             obj_class = get_class(obj);
3669             p = obj_class != cls;    /* true iff a problem */
3670             Py_DECREF(obj_class);
3671             if (p) {
3672                 PyErr_SetString(st->PicklingError, "args[0] from "
3673                                 "__newobj__ args has the wrong class");
3674                 return -1;
3675             }
3676         }
3677         /* XXX: These calls save() are prone to infinite recursion. Imagine
3678            what happen if the value returned by the __reduce__() method of
3679            some extension type contains another object of the same type. Ouch!
3680 
3681            Here is a quick example, that I ran into, to illustrate what I
3682            mean:
3683 
3684              >>> import pickle, copyreg
3685              >>> copyreg.dispatch_table.pop(complex)
3686              >>> pickle.dumps(1+2j)
3687              Traceback (most recent call last):
3688                ...
3689              RecursionError: maximum recursion depth exceeded
3690 
3691            Removing the complex class from copyreg.dispatch_table made the
3692            __reduce_ex__() method emit another complex object:
3693 
3694              >>> (1+1j).__reduce_ex__(2)
3695              (<function __newobj__ at 0xb7b71c3c>,
3696                (<class 'complex'>, (1+1j)), None, None, None)
3697 
3698            Thus when save() was called on newargstup (the 2nd item) recursion
3699            ensued. Of course, the bug was in the complex class which had a
3700            broken __getnewargs__() that emitted another complex object. But,
3701            the point, here, is it is quite easy to end up with a broken reduce
3702            function. */
3703 
3704         /* Save the class and its __new__ arguments. */
3705         if (save(self, cls, 0) < 0)
3706             return -1;
3707 
3708         newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3709         if (newargtup == NULL)
3710             return -1;
3711 
3712         p = save(self, newargtup, 0);
3713         Py_DECREF(newargtup);
3714         if (p < 0)
3715             return -1;
3716 
3717         /* Add NEWOBJ opcode. */
3718         if (_Pickler_Write(self, &newobj_op, 1) < 0)
3719             return -1;
3720     }
3721     else { /* Not using NEWOBJ. */
3722         if (save(self, callable, 0) < 0 ||
3723             save(self, argtup, 0) < 0 ||
3724             _Pickler_Write(self, &reduce_op, 1) < 0)
3725             return -1;
3726     }
3727 
3728     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3729        the caller do not want to memoize the object. Not particularly useful,
3730        but that is to mimic the behavior save_reduce() in pickle.py when
3731        obj is None. */
3732     if (obj != NULL) {
3733         /* If the object is already in the memo, this means it is
3734            recursive. In this case, throw away everything we put on the
3735            stack, and fetch the object back from the memo. */
3736         if (PyMemoTable_Get(self->memo, obj)) {
3737             const char pop_op = POP;
3738 
3739             if (_Pickler_Write(self, &pop_op, 1) < 0)
3740                 return -1;
3741             if (memo_get(self, obj) < 0)
3742                 return -1;
3743 
3744             return 0;
3745         }
3746         else if (memo_put(self, obj) < 0)
3747             return -1;
3748     }
3749 
3750     if (listitems && batch_list(self, listitems) < 0)
3751         return -1;
3752 
3753     if (dictitems && batch_dict(self, dictitems) < 0)
3754         return -1;
3755 
3756     if (state) {
3757         if (save(self, state, 0) < 0 ||
3758             _Pickler_Write(self, &build_op, 1) < 0)
3759             return -1;
3760     }
3761 
3762     return 0;
3763 }
3764 
3765 static int
save(PicklerObject * self,PyObject * obj,int pers_save)3766 save(PicklerObject *self, PyObject *obj, int pers_save)
3767 {
3768     PyTypeObject *type;
3769     PyObject *reduce_func = NULL;
3770     PyObject *reduce_value = NULL;
3771     int status = 0;
3772 
3773     if (_Pickler_OpcodeBoundary(self) < 0)
3774         return -1;
3775 
3776     if (Py_EnterRecursiveCall(" while pickling an object"))
3777         return -1;
3778 
3779     /* The extra pers_save argument is necessary to avoid calling save_pers()
3780        on its returned object. */
3781     if (!pers_save && self->pers_func) {
3782         /* save_pers() returns:
3783             -1   to signal an error;
3784              0   if it did nothing successfully;
3785              1   if a persistent id was saved.
3786          */
3787         if ((status = save_pers(self, obj, self->pers_func)) != 0)
3788             goto done;
3789     }
3790 
3791     type = Py_TYPE(obj);
3792 
3793     /* The old cPickle had an optimization that used switch-case statement
3794        dispatching on the first letter of the type name.  This has was removed
3795        since benchmarks shown that this optimization was actually slowing
3796        things down. */
3797 
3798     /* Atom types; these aren't memoized, so don't check the memo. */
3799 
3800     if (obj == Py_None) {
3801         status = save_none(self, obj);
3802         goto done;
3803     }
3804     else if (obj == Py_False || obj == Py_True) {
3805         status = save_bool(self, obj);
3806         goto done;
3807     }
3808     else if (type == &PyLong_Type) {
3809         status = save_long(self, obj);
3810         goto done;
3811     }
3812     else if (type == &PyFloat_Type) {
3813         status = save_float(self, obj);
3814         goto done;
3815     }
3816 
3817     /* Check the memo to see if it has the object. If so, generate
3818        a GET (or BINGET) opcode, instead of pickling the object
3819        once again. */
3820     if (PyMemoTable_Get(self->memo, obj)) {
3821         if (memo_get(self, obj) < 0)
3822             goto error;
3823         goto done;
3824     }
3825 
3826     if (type == &PyBytes_Type) {
3827         status = save_bytes(self, obj);
3828         goto done;
3829     }
3830     else if (type == &PyUnicode_Type) {
3831         status = save_unicode(self, obj);
3832         goto done;
3833     }
3834     else if (type == &PyDict_Type) {
3835         status = save_dict(self, obj);
3836         goto done;
3837     }
3838     else if (type == &PySet_Type) {
3839         status = save_set(self, obj);
3840         goto done;
3841     }
3842     else if (type == &PyFrozenSet_Type) {
3843         status = save_frozenset(self, obj);
3844         goto done;
3845     }
3846     else if (type == &PyList_Type) {
3847         status = save_list(self, obj);
3848         goto done;
3849     }
3850     else if (type == &PyTuple_Type) {
3851         status = save_tuple(self, obj);
3852         goto done;
3853     }
3854     else if (type == &PyType_Type) {
3855         status = save_type(self, obj);
3856         goto done;
3857     }
3858     else if (type == &PyFunction_Type) {
3859         status = save_global(self, obj, NULL);
3860         goto done;
3861     }
3862 
3863     /* XXX: This part needs some unit tests. */
3864 
3865     /* Get a reduction callable, and call it.  This may come from
3866      * self.dispatch_table, copyreg.dispatch_table, the object's
3867      * __reduce_ex__ method, or the object's __reduce__ method.
3868      */
3869     if (self->dispatch_table == NULL) {
3870         PickleState *st = _Pickle_GetGlobalState();
3871         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
3872                                               (PyObject *)type);
3873         if (reduce_func == NULL) {
3874             if (PyErr_Occurred()) {
3875                 goto error;
3876             }
3877         } else {
3878             /* PyDict_GetItemWithError() returns a borrowed reference.
3879                Increase the reference count to be consistent with
3880                PyObject_GetItem and _PyObject_GetAttrId used below. */
3881             Py_INCREF(reduce_func);
3882         }
3883     } else {
3884         reduce_func = PyObject_GetItem(self->dispatch_table,
3885                                        (PyObject *)type);
3886         if (reduce_func == NULL) {
3887             if (PyErr_ExceptionMatches(PyExc_KeyError))
3888                 PyErr_Clear();
3889             else
3890                 goto error;
3891         }
3892     }
3893     if (reduce_func != NULL) {
3894         Py_INCREF(obj);
3895         reduce_value = _Pickle_FastCall(reduce_func, obj);
3896     }
3897     else if (PyType_IsSubtype(type, &PyType_Type)) {
3898         status = save_global(self, obj, NULL);
3899         goto done;
3900     }
3901     else {
3902         _Py_IDENTIFIER(__reduce__);
3903         _Py_IDENTIFIER(__reduce_ex__);
3904 
3905 
3906         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3907            automatically defined as __reduce__. While this is convenient, this
3908            make it impossible to know which method was actually called. Of
3909            course, this is not a big deal. But still, it would be nice to let
3910            the user know which method was called when something go
3911            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3912            don't actually have to check for a __reduce__ method. */
3913 
3914         /* Check for a __reduce_ex__ method. */
3915         reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce_ex__);
3916         if (reduce_func != NULL) {
3917             PyObject *proto;
3918             proto = PyLong_FromLong(self->proto);
3919             if (proto != NULL) {
3920                 reduce_value = _Pickle_FastCall(reduce_func, proto);
3921             }
3922         }
3923         else {
3924             PickleState *st = _Pickle_GetGlobalState();
3925 
3926             if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
3927                 PyErr_Clear();
3928             }
3929             else {
3930                 goto error;
3931             }
3932             /* Check for a __reduce__ method. */
3933             reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
3934             if (reduce_func != NULL) {
3935                 reduce_value = _PyObject_CallNoArg(reduce_func);
3936             }
3937             else {
3938                 PyErr_Format(st->PicklingError,
3939                              "can't pickle '%.200s' object: %R",
3940                              type->tp_name, obj);
3941                 goto error;
3942             }
3943         }
3944     }
3945 
3946     if (reduce_value == NULL)
3947         goto error;
3948 
3949     if (PyUnicode_Check(reduce_value)) {
3950         status = save_global(self, obj, reduce_value);
3951         goto done;
3952     }
3953 
3954     if (!PyTuple_Check(reduce_value)) {
3955         PickleState *st = _Pickle_GetGlobalState();
3956         PyErr_SetString(st->PicklingError,
3957                         "__reduce__ must return a string or tuple");
3958         goto error;
3959     }
3960 
3961     status = save_reduce(self, reduce_value, obj);
3962 
3963     if (0) {
3964   error:
3965         status = -1;
3966     }
3967   done:
3968 
3969     Py_LeaveRecursiveCall();
3970     Py_XDECREF(reduce_func);
3971     Py_XDECREF(reduce_value);
3972 
3973     return status;
3974 }
3975 
3976 static int
dump(PicklerObject * self,PyObject * obj)3977 dump(PicklerObject *self, PyObject *obj)
3978 {
3979     const char stop_op = STOP;
3980 
3981     if (self->proto >= 2) {
3982         char header[2];
3983 
3984         header[0] = PROTO;
3985         assert(self->proto >= 0 && self->proto < 256);
3986         header[1] = (unsigned char)self->proto;
3987         if (_Pickler_Write(self, header, 2) < 0)
3988             return -1;
3989         if (self->proto >= 4)
3990             self->framing = 1;
3991     }
3992 
3993     if (save(self, obj, 0) < 0 ||
3994         _Pickler_Write(self, &stop_op, 1) < 0)
3995         return -1;
3996 
3997     return 0;
3998 }
3999 
4000 /*[clinic input]
4001 
4002 _pickle.Pickler.clear_memo
4003 
4004 Clears the pickler's "memo".
4005 
4006 The memo is the data structure that remembers which objects the
4007 pickler has already seen, so that shared or recursive objects are
4008 pickled by reference and not by value.  This method is useful when
4009 re-using picklers.
4010 [clinic start generated code]*/
4011 
4012 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4013 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4014 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4015 {
4016     if (self->memo)
4017         PyMemoTable_Clear(self->memo);
4018 
4019     Py_RETURN_NONE;
4020 }
4021 
4022 /*[clinic input]
4023 
4024 _pickle.Pickler.dump
4025 
4026   obj: object
4027   /
4028 
4029 Write a pickled representation of the given object to the open file.
4030 [clinic start generated code]*/
4031 
4032 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4033 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4034 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4035 {
4036     /* Check whether the Pickler was initialized correctly (issue3664).
4037        Developers often forget to call __init__() in their subclasses, which
4038        would trigger a segfault without this check. */
4039     if (self->write == NULL) {
4040         PickleState *st = _Pickle_GetGlobalState();
4041         PyErr_Format(st->PicklingError,
4042                      "Pickler.__init__() was not called by %s.__init__()",
4043                      Py_TYPE(self)->tp_name);
4044         return NULL;
4045     }
4046 
4047     if (_Pickler_ClearBuffer(self) < 0)
4048         return NULL;
4049 
4050     if (dump(self, obj) < 0)
4051         return NULL;
4052 
4053     if (_Pickler_FlushToFile(self) < 0)
4054         return NULL;
4055 
4056     Py_RETURN_NONE;
4057 }
4058 
4059 /*[clinic input]
4060 
4061 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4062 
4063 Returns size in memory, in bytes.
4064 [clinic start generated code]*/
4065 
4066 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4067 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4068 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4069 {
4070     Py_ssize_t res, s;
4071 
4072     res = _PyObject_SIZE(Py_TYPE(self));
4073     if (self->memo != NULL) {
4074         res += sizeof(PyMemoTable);
4075         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4076     }
4077     if (self->output_buffer != NULL) {
4078         s = _PySys_GetSizeOf(self->output_buffer);
4079         if (s == -1)
4080             return -1;
4081         res += s;
4082     }
4083     return res;
4084 }
4085 
4086 static struct PyMethodDef Pickler_methods[] = {
4087     _PICKLE_PICKLER_DUMP_METHODDEF
4088     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4089     _PICKLE_PICKLER___SIZEOF___METHODDEF
4090     {NULL, NULL}                /* sentinel */
4091 };
4092 
4093 static void
Pickler_dealloc(PicklerObject * self)4094 Pickler_dealloc(PicklerObject *self)
4095 {
4096     PyObject_GC_UnTrack(self);
4097 
4098     Py_XDECREF(self->output_buffer);
4099     Py_XDECREF(self->write);
4100     Py_XDECREF(self->pers_func);
4101     Py_XDECREF(self->dispatch_table);
4102     Py_XDECREF(self->fast_memo);
4103 
4104     PyMemoTable_Del(self->memo);
4105 
4106     Py_TYPE(self)->tp_free((PyObject *)self);
4107 }
4108 
4109 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4110 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4111 {
4112     Py_VISIT(self->write);
4113     Py_VISIT(self->pers_func);
4114     Py_VISIT(self->dispatch_table);
4115     Py_VISIT(self->fast_memo);
4116     return 0;
4117 }
4118 
4119 static int
Pickler_clear(PicklerObject * self)4120 Pickler_clear(PicklerObject *self)
4121 {
4122     Py_CLEAR(self->output_buffer);
4123     Py_CLEAR(self->write);
4124     Py_CLEAR(self->pers_func);
4125     Py_CLEAR(self->dispatch_table);
4126     Py_CLEAR(self->fast_memo);
4127 
4128     if (self->memo != NULL) {
4129         PyMemoTable *memo = self->memo;
4130         self->memo = NULL;
4131         PyMemoTable_Del(memo);
4132     }
4133     return 0;
4134 }
4135 
4136 
4137 /*[clinic input]
4138 
4139 _pickle.Pickler.__init__
4140 
4141   file: object
4142   protocol: object = NULL
4143   fix_imports: bool = True
4144 
4145 This takes a binary file for writing a pickle data stream.
4146 
4147 The optional *protocol* argument tells the pickler to use the given
4148 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
4149 protocol is 3; a backward-incompatible protocol designed for Python 3.
4150 
4151 Specifying a negative protocol version selects the highest protocol
4152 version supported.  The higher the protocol used, the more recent the
4153 version of Python needed to read the pickle produced.
4154 
4155 The *file* argument must have a write() method that accepts a single
4156 bytes argument. It can thus be a file object opened for binary
4157 writing, an io.BytesIO instance, or any other custom object that meets
4158 this interface.
4159 
4160 If *fix_imports* is True and protocol is less than 3, pickle will try
4161 to map the new Python 3 names to the old module names used in Python
4162 2, so that the pickle data stream is readable with Python 2.
4163 [clinic start generated code]*/
4164 
4165 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports)4166 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4167                               PyObject *protocol, int fix_imports)
4168 /*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/
4169 {
4170     _Py_IDENTIFIER(persistent_id);
4171     _Py_IDENTIFIER(dispatch_table);
4172 
4173     /* In case of multiple __init__() calls, clear previous content. */
4174     if (self->write != NULL)
4175         (void)Pickler_clear(self);
4176 
4177     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4178         return -1;
4179 
4180     if (_Pickler_SetOutputStream(self, file) < 0)
4181         return -1;
4182 
4183     /* memo and output_buffer may have already been created in _Pickler_New */
4184     if (self->memo == NULL) {
4185         self->memo = PyMemoTable_New();
4186         if (self->memo == NULL)
4187             return -1;
4188     }
4189     self->output_len = 0;
4190     if (self->output_buffer == NULL) {
4191         self->max_output_len = WRITE_BUF_SIZE;
4192         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4193                                                         self->max_output_len);
4194         if (self->output_buffer == NULL)
4195             return -1;
4196     }
4197 
4198     self->fast = 0;
4199     self->fast_nesting = 0;
4200     self->fast_memo = NULL;
4201     self->pers_func = NULL;
4202     if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
4203         self->pers_func = _PyObject_GetAttrId((PyObject *)self,
4204                                               &PyId_persistent_id);
4205         if (self->pers_func == NULL)
4206             return -1;
4207     }
4208     self->dispatch_table = NULL;
4209     if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
4210         self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
4211                                                    &PyId_dispatch_table);
4212         if (self->dispatch_table == NULL)
4213             return -1;
4214     }
4215 
4216     return 0;
4217 }
4218 
4219 
4220 /* Define a proxy object for the Pickler's internal memo object. This is to
4221  * avoid breaking code like:
4222  *  pickler.memo.clear()
4223  * and
4224  *  pickler.memo = saved_memo
4225  * Is this a good idea? Not really, but we don't want to break code that uses
4226  * it. Note that we don't implement the entire mapping API here. This is
4227  * intentional, as these should be treated as black-box implementation details.
4228  */
4229 
4230 /*[clinic input]
4231 _pickle.PicklerMemoProxy.clear
4232 
4233 Remove all items from memo.
4234 [clinic start generated code]*/
4235 
4236 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4237 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4238 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4239 {
4240     if (self->pickler->memo)
4241         PyMemoTable_Clear(self->pickler->memo);
4242     Py_RETURN_NONE;
4243 }
4244 
4245 /*[clinic input]
4246 _pickle.PicklerMemoProxy.copy
4247 
4248 Copy the memo to a new object.
4249 [clinic start generated code]*/
4250 
4251 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4252 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4253 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4254 {
4255     Py_ssize_t i;
4256     PyMemoTable *memo;
4257     PyObject *new_memo = PyDict_New();
4258     if (new_memo == NULL)
4259         return NULL;
4260 
4261     memo = self->pickler->memo;
4262     for (i = 0; i < memo->mt_allocated; ++i) {
4263         PyMemoEntry entry = memo->mt_table[i];
4264         if (entry.me_key != NULL) {
4265             int status;
4266             PyObject *key, *value;
4267 
4268             key = PyLong_FromVoidPtr(entry.me_key);
4269             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4270 
4271             if (key == NULL || value == NULL) {
4272                 Py_XDECREF(key);
4273                 Py_XDECREF(value);
4274                 goto error;
4275             }
4276             status = PyDict_SetItem(new_memo, key, value);
4277             Py_DECREF(key);
4278             Py_DECREF(value);
4279             if (status < 0)
4280                 goto error;
4281         }
4282     }
4283     return new_memo;
4284 
4285   error:
4286     Py_XDECREF(new_memo);
4287     return NULL;
4288 }
4289 
4290 /*[clinic input]
4291 _pickle.PicklerMemoProxy.__reduce__
4292 
4293 Implement pickle support.
4294 [clinic start generated code]*/
4295 
4296 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4297 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4298 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4299 {
4300     PyObject *reduce_value, *dict_args;
4301     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4302     if (contents == NULL)
4303         return NULL;
4304 
4305     reduce_value = PyTuple_New(2);
4306     if (reduce_value == NULL) {
4307         Py_DECREF(contents);
4308         return NULL;
4309     }
4310     dict_args = PyTuple_New(1);
4311     if (dict_args == NULL) {
4312         Py_DECREF(contents);
4313         Py_DECREF(reduce_value);
4314         return NULL;
4315     }
4316     PyTuple_SET_ITEM(dict_args, 0, contents);
4317     Py_INCREF((PyObject *)&PyDict_Type);
4318     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4319     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4320     return reduce_value;
4321 }
4322 
4323 static PyMethodDef picklerproxy_methods[] = {
4324     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4325     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4326     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4327     {NULL, NULL} /* sentinel */
4328 };
4329 
4330 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4331 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4332 {
4333     PyObject_GC_UnTrack(self);
4334     Py_XDECREF(self->pickler);
4335     PyObject_GC_Del((PyObject *)self);
4336 }
4337 
4338 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4339 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4340                           visitproc visit, void *arg)
4341 {
4342     Py_VISIT(self->pickler);
4343     return 0;
4344 }
4345 
4346 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4347 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4348 {
4349     Py_CLEAR(self->pickler);
4350     return 0;
4351 }
4352 
4353 static PyTypeObject PicklerMemoProxyType = {
4354     PyVarObject_HEAD_INIT(NULL, 0)
4355     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4356     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4357     0,
4358     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4359     0,                                          /* tp_print */
4360     0,                                          /* tp_getattr */
4361     0,                                          /* tp_setattr */
4362     0,                                          /* tp_compare */
4363     0,                                          /* tp_repr */
4364     0,                                          /* tp_as_number */
4365     0,                                          /* tp_as_sequence */
4366     0,                                          /* tp_as_mapping */
4367     PyObject_HashNotImplemented,                /* tp_hash */
4368     0,                                          /* tp_call */
4369     0,                                          /* tp_str */
4370     PyObject_GenericGetAttr,                    /* tp_getattro */
4371     PyObject_GenericSetAttr,                    /* tp_setattro */
4372     0,                                          /* tp_as_buffer */
4373     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4374     0,                                          /* tp_doc */
4375     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4376     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4377     0,                                          /* tp_richcompare */
4378     0,                                          /* tp_weaklistoffset */
4379     0,                                          /* tp_iter */
4380     0,                                          /* tp_iternext */
4381     picklerproxy_methods,                       /* tp_methods */
4382 };
4383 
4384 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4385 PicklerMemoProxy_New(PicklerObject *pickler)
4386 {
4387     PicklerMemoProxyObject *self;
4388 
4389     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4390     if (self == NULL)
4391         return NULL;
4392     Py_INCREF(pickler);
4393     self->pickler = pickler;
4394     PyObject_GC_Track(self);
4395     return (PyObject *)self;
4396 }
4397 
4398 /*****************************************************************************/
4399 
4400 static PyObject *
Pickler_get_memo(PicklerObject * self)4401 Pickler_get_memo(PicklerObject *self)
4402 {
4403     return PicklerMemoProxy_New(self);
4404 }
4405 
4406 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj)4407 Pickler_set_memo(PicklerObject *self, PyObject *obj)
4408 {
4409     PyMemoTable *new_memo = NULL;
4410 
4411     if (obj == NULL) {
4412         PyErr_SetString(PyExc_TypeError,
4413                         "attribute deletion is not supported");
4414         return -1;
4415     }
4416 
4417     if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4418         PicklerObject *pickler =
4419             ((PicklerMemoProxyObject *)obj)->pickler;
4420 
4421         new_memo = PyMemoTable_Copy(pickler->memo);
4422         if (new_memo == NULL)
4423             return -1;
4424     }
4425     else if (PyDict_Check(obj)) {
4426         Py_ssize_t i = 0;
4427         PyObject *key, *value;
4428 
4429         new_memo = PyMemoTable_New();
4430         if (new_memo == NULL)
4431             return -1;
4432 
4433         while (PyDict_Next(obj, &i, &key, &value)) {
4434             Py_ssize_t memo_id;
4435             PyObject *memo_obj;
4436 
4437             if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
4438                 PyErr_SetString(PyExc_TypeError,
4439                                 "'memo' values must be 2-item tuples");
4440                 goto error;
4441             }
4442             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4443             if (memo_id == -1 && PyErr_Occurred())
4444                 goto error;
4445             memo_obj = PyTuple_GET_ITEM(value, 1);
4446             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4447                 goto error;
4448         }
4449     }
4450     else {
4451         PyErr_Format(PyExc_TypeError,
4452                      "'memo' attribute must be a PicklerMemoProxy object"
4453                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
4454         return -1;
4455     }
4456 
4457     PyMemoTable_Del(self->memo);
4458     self->memo = new_memo;
4459 
4460     return 0;
4461 
4462   error:
4463     if (new_memo)
4464         PyMemoTable_Del(new_memo);
4465     return -1;
4466 }
4467 
4468 static PyObject *
Pickler_get_persid(PicklerObject * self)4469 Pickler_get_persid(PicklerObject *self)
4470 {
4471     if (self->pers_func == NULL)
4472         PyErr_SetString(PyExc_AttributeError, "persistent_id");
4473     else
4474         Py_INCREF(self->pers_func);
4475     return self->pers_func;
4476 }
4477 
4478 static int
Pickler_set_persid(PicklerObject * self,PyObject * value)4479 Pickler_set_persid(PicklerObject *self, PyObject *value)
4480 {
4481     if (value == NULL) {
4482         PyErr_SetString(PyExc_TypeError,
4483                         "attribute deletion is not supported");
4484         return -1;
4485     }
4486     if (!PyCallable_Check(value)) {
4487         PyErr_SetString(PyExc_TypeError,
4488                         "persistent_id must be a callable taking one argument");
4489         return -1;
4490     }
4491 
4492     Py_INCREF(value);
4493     Py_XSETREF(self->pers_func, value);
4494 
4495     return 0;
4496 }
4497 
4498 static PyMemberDef Pickler_members[] = {
4499     {"bin", T_INT, offsetof(PicklerObject, bin)},
4500     {"fast", T_INT, offsetof(PicklerObject, fast)},
4501     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
4502     {NULL}
4503 };
4504 
4505 static PyGetSetDef Pickler_getsets[] = {
4506     {"memo",          (getter)Pickler_get_memo,
4507                       (setter)Pickler_set_memo},
4508     {"persistent_id", (getter)Pickler_get_persid,
4509                       (setter)Pickler_set_persid},
4510     {NULL}
4511 };
4512 
4513 static PyTypeObject Pickler_Type = {
4514     PyVarObject_HEAD_INIT(NULL, 0)
4515     "_pickle.Pickler"  ,                /*tp_name*/
4516     sizeof(PicklerObject),              /*tp_basicsize*/
4517     0,                                  /*tp_itemsize*/
4518     (destructor)Pickler_dealloc,        /*tp_dealloc*/
4519     0,                                  /*tp_print*/
4520     0,                                  /*tp_getattr*/
4521     0,                                  /*tp_setattr*/
4522     0,                                  /*tp_reserved*/
4523     0,                                  /*tp_repr*/
4524     0,                                  /*tp_as_number*/
4525     0,                                  /*tp_as_sequence*/
4526     0,                                  /*tp_as_mapping*/
4527     0,                                  /*tp_hash*/
4528     0,                                  /*tp_call*/
4529     0,                                  /*tp_str*/
4530     0,                                  /*tp_getattro*/
4531     0,                                  /*tp_setattro*/
4532     0,                                  /*tp_as_buffer*/
4533     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4534     _pickle_Pickler___init____doc__,    /*tp_doc*/
4535     (traverseproc)Pickler_traverse,     /*tp_traverse*/
4536     (inquiry)Pickler_clear,             /*tp_clear*/
4537     0,                                  /*tp_richcompare*/
4538     0,                                  /*tp_weaklistoffset*/
4539     0,                                  /*tp_iter*/
4540     0,                                  /*tp_iternext*/
4541     Pickler_methods,                    /*tp_methods*/
4542     Pickler_members,                    /*tp_members*/
4543     Pickler_getsets,                    /*tp_getset*/
4544     0,                                  /*tp_base*/
4545     0,                                  /*tp_dict*/
4546     0,                                  /*tp_descr_get*/
4547     0,                                  /*tp_descr_set*/
4548     0,                                  /*tp_dictoffset*/
4549     _pickle_Pickler___init__,           /*tp_init*/
4550     PyType_GenericAlloc,                /*tp_alloc*/
4551     PyType_GenericNew,                  /*tp_new*/
4552     PyObject_GC_Del,                    /*tp_free*/
4553     0,                                  /*tp_is_gc*/
4554 };
4555 
4556 /* Temporary helper for calling self.find_class().
4557 
4558    XXX: It would be nice to able to avoid Python function call overhead, by
4559    using directly the C version of find_class(), when find_class() is not
4560    overridden by a subclass. Although, this could become rather hackish. A
4561    simpler optimization would be to call the C function when self is not a
4562    subclass instance. */
4563 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)4564 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4565 {
4566     _Py_IDENTIFIER(find_class);
4567 
4568     return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
4569                                   module_name, global_name);
4570 }
4571 
4572 static Py_ssize_t
marker(UnpicklerObject * self)4573 marker(UnpicklerObject *self)
4574 {
4575     Py_ssize_t mark;
4576 
4577     if (self->num_marks < 1) {
4578         PickleState *st = _Pickle_GetGlobalState();
4579         PyErr_SetString(st->UnpicklingError, "could not find MARK");
4580         return -1;
4581     }
4582 
4583     mark = self->marks[--self->num_marks];
4584     self->stack->mark_set = self->num_marks != 0;
4585     self->stack->fence = self->num_marks ?
4586             self->marks[self->num_marks - 1] : 0;
4587     return mark;
4588 }
4589 
4590 static int
load_none(UnpicklerObject * self)4591 load_none(UnpicklerObject *self)
4592 {
4593     PDATA_APPEND(self->stack, Py_None, -1);
4594     return 0;
4595 }
4596 
4597 static int
load_int(UnpicklerObject * self)4598 load_int(UnpicklerObject *self)
4599 {
4600     PyObject *value;
4601     char *endptr, *s;
4602     Py_ssize_t len;
4603     long x;
4604 
4605     if ((len = _Unpickler_Readline(self, &s)) < 0)
4606         return -1;
4607     if (len < 2)
4608         return bad_readline();
4609 
4610     errno = 0;
4611     /* XXX: Should the base argument of strtol() be explicitly set to 10?
4612        XXX(avassalotti): Should this uses PyOS_strtol()? */
4613     x = strtol(s, &endptr, 0);
4614 
4615     if (errno || (*endptr != '\n' && *endptr != '\0')) {
4616         /* Hm, maybe we've got something long.  Let's try reading
4617          * it as a Python int object. */
4618         errno = 0;
4619         /* XXX: Same thing about the base here. */
4620         value = PyLong_FromString(s, NULL, 0);
4621         if (value == NULL) {
4622             PyErr_SetString(PyExc_ValueError,
4623                             "could not convert string to int");
4624             return -1;
4625         }
4626     }
4627     else {
4628         if (len == 3 && (x == 0 || x == 1)) {
4629             if ((value = PyBool_FromLong(x)) == NULL)
4630                 return -1;
4631         }
4632         else {
4633             if ((value = PyLong_FromLong(x)) == NULL)
4634                 return -1;
4635         }
4636     }
4637 
4638     PDATA_PUSH(self->stack, value, -1);
4639     return 0;
4640 }
4641 
4642 static int
load_bool(UnpicklerObject * self,PyObject * boolean)4643 load_bool(UnpicklerObject *self, PyObject *boolean)
4644 {
4645     assert(boolean == Py_True || boolean == Py_False);
4646     PDATA_APPEND(self->stack, boolean, -1);
4647     return 0;
4648 }
4649 
4650 /* s contains x bytes of an unsigned little-endian integer.  Return its value
4651  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4652  */
4653 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)4654 calc_binsize(char *bytes, int nbytes)
4655 {
4656     unsigned char *s = (unsigned char *)bytes;
4657     int i;
4658     size_t x = 0;
4659 
4660     if (nbytes > (int)sizeof(size_t)) {
4661         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
4662          * have 64-bit size that can't be represented on 32-bit platform.
4663          */
4664         for (i = (int)sizeof(size_t); i < nbytes; i++) {
4665             if (s[i])
4666                 return -1;
4667         }
4668         nbytes = (int)sizeof(size_t);
4669     }
4670     for (i = 0; i < nbytes; i++) {
4671         x |= (size_t) s[i] << (8 * i);
4672     }
4673 
4674     if (x > PY_SSIZE_T_MAX)
4675         return -1;
4676     else
4677         return (Py_ssize_t) x;
4678 }
4679 
4680 /* s contains x bytes of a little-endian integer.  Return its value as a
4681  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
4682  * int, but when x is 4 it's a signed one.  This is a historical source
4683  * of x-platform bugs.
4684  */
4685 static long
calc_binint(char * bytes,int nbytes)4686 calc_binint(char *bytes, int nbytes)
4687 {
4688     unsigned char *s = (unsigned char *)bytes;
4689     Py_ssize_t i;
4690     long x = 0;
4691 
4692     for (i = 0; i < nbytes; i++) {
4693         x |= (long)s[i] << (8 * i);
4694     }
4695 
4696     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4697      * is signed, so on a box with longs bigger than 4 bytes we need
4698      * to extend a BININT's sign bit to the full width.
4699      */
4700     if (SIZEOF_LONG > 4 && nbytes == 4) {
4701         x |= -(x & (1L << 31));
4702     }
4703 
4704     return x;
4705 }
4706 
4707 static int
load_binintx(UnpicklerObject * self,char * s,int size)4708 load_binintx(UnpicklerObject *self, char *s, int size)
4709 {
4710     PyObject *value;
4711     long x;
4712 
4713     x = calc_binint(s, size);
4714 
4715     if ((value = PyLong_FromLong(x)) == NULL)
4716         return -1;
4717 
4718     PDATA_PUSH(self->stack, value, -1);
4719     return 0;
4720 }
4721 
4722 static int
load_binint(UnpicklerObject * self)4723 load_binint(UnpicklerObject *self)
4724 {
4725     char *s;
4726 
4727     if (_Unpickler_Read(self, &s, 4) < 0)
4728         return -1;
4729 
4730     return load_binintx(self, s, 4);
4731 }
4732 
4733 static int
load_binint1(UnpicklerObject * self)4734 load_binint1(UnpicklerObject *self)
4735 {
4736     char *s;
4737 
4738     if (_Unpickler_Read(self, &s, 1) < 0)
4739         return -1;
4740 
4741     return load_binintx(self, s, 1);
4742 }
4743 
4744 static int
load_binint2(UnpicklerObject * self)4745 load_binint2(UnpicklerObject *self)
4746 {
4747     char *s;
4748 
4749     if (_Unpickler_Read(self, &s, 2) < 0)
4750         return -1;
4751 
4752     return load_binintx(self, s, 2);
4753 }
4754 
4755 static int
load_long(UnpicklerObject * self)4756 load_long(UnpicklerObject *self)
4757 {
4758     PyObject *value;
4759     char *s;
4760     Py_ssize_t len;
4761 
4762     if ((len = _Unpickler_Readline(self, &s)) < 0)
4763         return -1;
4764     if (len < 2)
4765         return bad_readline();
4766 
4767     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4768        the 'L' before calling PyLong_FromString.  In order to maintain
4769        compatibility with Python 3.0.0, we don't actually *require*
4770        the 'L' to be present. */
4771     if (s[len-2] == 'L')
4772         s[len-2] = '\0';
4773     /* XXX: Should the base argument explicitly set to 10? */
4774     value = PyLong_FromString(s, NULL, 0);
4775     if (value == NULL)
4776         return -1;
4777 
4778     PDATA_PUSH(self->stack, value, -1);
4779     return 0;
4780 }
4781 
4782 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
4783  * data following.
4784  */
4785 static int
load_counted_long(UnpicklerObject * self,int size)4786 load_counted_long(UnpicklerObject *self, int size)
4787 {
4788     PyObject *value;
4789     char *nbytes;
4790     char *pdata;
4791 
4792     assert(size == 1 || size == 4);
4793     if (_Unpickler_Read(self, &nbytes, size) < 0)
4794         return -1;
4795 
4796     size = calc_binint(nbytes, size);
4797     if (size < 0) {
4798         PickleState *st = _Pickle_GetGlobalState();
4799         /* Corrupt or hostile pickle -- we never write one like this */
4800         PyErr_SetString(st->UnpicklingError,
4801                         "LONG pickle has negative byte count");
4802         return -1;
4803     }
4804 
4805     if (size == 0)
4806         value = PyLong_FromLong(0L);
4807     else {
4808         /* Read the raw little-endian bytes and convert. */
4809         if (_Unpickler_Read(self, &pdata, size) < 0)
4810             return -1;
4811         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4812                                       1 /* little endian */ , 1 /* signed */ );
4813     }
4814     if (value == NULL)
4815         return -1;
4816     PDATA_PUSH(self->stack, value, -1);
4817     return 0;
4818 }
4819 
4820 static int
load_float(UnpicklerObject * self)4821 load_float(UnpicklerObject *self)
4822 {
4823     PyObject *value;
4824     char *endptr, *s;
4825     Py_ssize_t len;
4826     double d;
4827 
4828     if ((len = _Unpickler_Readline(self, &s)) < 0)
4829         return -1;
4830     if (len < 2)
4831         return bad_readline();
4832 
4833     errno = 0;
4834     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4835     if (d == -1.0 && PyErr_Occurred())
4836         return -1;
4837     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
4838         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4839         return -1;
4840     }
4841     value = PyFloat_FromDouble(d);
4842     if (value == NULL)
4843         return -1;
4844 
4845     PDATA_PUSH(self->stack, value, -1);
4846     return 0;
4847 }
4848 
4849 static int
load_binfloat(UnpicklerObject * self)4850 load_binfloat(UnpicklerObject *self)
4851 {
4852     PyObject *value;
4853     double x;
4854     char *s;
4855 
4856     if (_Unpickler_Read(self, &s, 8) < 0)
4857         return -1;
4858 
4859     x = _PyFloat_Unpack8((unsigned char *)s, 0);
4860     if (x == -1.0 && PyErr_Occurred())
4861         return -1;
4862 
4863     if ((value = PyFloat_FromDouble(x)) == NULL)
4864         return -1;
4865 
4866     PDATA_PUSH(self->stack, value, -1);
4867     return 0;
4868 }
4869 
4870 static int
load_string(UnpicklerObject * self)4871 load_string(UnpicklerObject *self)
4872 {
4873     PyObject *bytes;
4874     PyObject *obj;
4875     Py_ssize_t len;
4876     char *s, *p;
4877 
4878     if ((len = _Unpickler_Readline(self, &s)) < 0)
4879         return -1;
4880     /* Strip the newline */
4881     len--;
4882     /* Strip outermost quotes */
4883     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
4884         p = s + 1;
4885         len -= 2;
4886     }
4887     else {
4888         PickleState *st = _Pickle_GetGlobalState();
4889         PyErr_SetString(st->UnpicklingError,
4890                         "the STRING opcode argument must be quoted");
4891         return -1;
4892     }
4893     assert(len >= 0);
4894 
4895     /* Use the PyBytes API to decode the string, since that is what is used
4896        to encode, and then coerce the result to Unicode. */
4897     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4898     if (bytes == NULL)
4899         return -1;
4900 
4901     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
4902        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
4903     if (strcmp(self->encoding, "bytes") == 0) {
4904         obj = bytes;
4905     }
4906     else {
4907         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4908         Py_DECREF(bytes);
4909         if (obj == NULL) {
4910             return -1;
4911         }
4912     }
4913 
4914     PDATA_PUSH(self->stack, obj, -1);
4915     return 0;
4916 }
4917 
4918 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)4919 load_counted_binstring(UnpicklerObject *self, int nbytes)
4920 {
4921     PyObject *obj;
4922     Py_ssize_t size;
4923     char *s;
4924 
4925     if (_Unpickler_Read(self, &s, nbytes) < 0)
4926         return -1;
4927 
4928     size = calc_binsize(s, nbytes);
4929     if (size < 0) {
4930         PickleState *st = _Pickle_GetGlobalState();
4931         PyErr_Format(st->UnpicklingError,
4932                      "BINSTRING exceeds system's maximum size of %zd bytes",
4933                      PY_SSIZE_T_MAX);
4934         return -1;
4935     }
4936 
4937     if (_Unpickler_Read(self, &s, size) < 0)
4938         return -1;
4939 
4940     /* Convert Python 2.x strings to bytes if the *encoding* given to the
4941        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
4942     if (strcmp(self->encoding, "bytes") == 0) {
4943         obj = PyBytes_FromStringAndSize(s, size);
4944     }
4945     else {
4946         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
4947     }
4948     if (obj == NULL) {
4949         return -1;
4950     }
4951 
4952     PDATA_PUSH(self->stack, obj, -1);
4953     return 0;
4954 }
4955 
4956 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)4957 load_counted_binbytes(UnpicklerObject *self, int nbytes)
4958 {
4959     PyObject *bytes;
4960     Py_ssize_t size;
4961     char *s;
4962 
4963     if (_Unpickler_Read(self, &s, nbytes) < 0)
4964         return -1;
4965 
4966     size = calc_binsize(s, nbytes);
4967     if (size < 0) {
4968         PyErr_Format(PyExc_OverflowError,
4969                      "BINBYTES exceeds system's maximum size of %zd bytes",
4970                      PY_SSIZE_T_MAX);
4971         return -1;
4972     }
4973 
4974     if (_Unpickler_Read(self, &s, size) < 0)
4975         return -1;
4976 
4977     bytes = PyBytes_FromStringAndSize(s, size);
4978     if (bytes == NULL)
4979         return -1;
4980 
4981     PDATA_PUSH(self->stack, bytes, -1);
4982     return 0;
4983 }
4984 
4985 static int
load_unicode(UnpicklerObject * self)4986 load_unicode(UnpicklerObject *self)
4987 {
4988     PyObject *str;
4989     Py_ssize_t len;
4990     char *s;
4991 
4992     if ((len = _Unpickler_Readline(self, &s)) < 0)
4993         return -1;
4994     if (len < 1)
4995         return bad_readline();
4996 
4997     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4998     if (str == NULL)
4999         return -1;
5000 
5001     PDATA_PUSH(self->stack, str, -1);
5002     return 0;
5003 }
5004 
5005 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5006 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5007 {
5008     PyObject *str;
5009     Py_ssize_t size;
5010     char *s;
5011 
5012     if (_Unpickler_Read(self, &s, nbytes) < 0)
5013         return -1;
5014 
5015     size = calc_binsize(s, nbytes);
5016     if (size < 0) {
5017         PyErr_Format(PyExc_OverflowError,
5018                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5019                      PY_SSIZE_T_MAX);
5020         return -1;
5021     }
5022 
5023     if (_Unpickler_Read(self, &s, size) < 0)
5024         return -1;
5025 
5026     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5027     if (str == NULL)
5028         return -1;
5029 
5030     PDATA_PUSH(self->stack, str, -1);
5031     return 0;
5032 }
5033 
5034 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5035 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5036 {
5037     PyObject *tuple;
5038 
5039     if (Py_SIZE(self->stack) < len)
5040         return Pdata_stack_underflow(self->stack);
5041 
5042     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5043     if (tuple == NULL)
5044         return -1;
5045     PDATA_PUSH(self->stack, tuple, -1);
5046     return 0;
5047 }
5048 
5049 static int
load_tuple(UnpicklerObject * self)5050 load_tuple(UnpicklerObject *self)
5051 {
5052     Py_ssize_t i;
5053 
5054     if ((i = marker(self)) < 0)
5055         return -1;
5056 
5057     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5058 }
5059 
5060 static int
load_empty_list(UnpicklerObject * self)5061 load_empty_list(UnpicklerObject *self)
5062 {
5063     PyObject *list;
5064 
5065     if ((list = PyList_New(0)) == NULL)
5066         return -1;
5067     PDATA_PUSH(self->stack, list, -1);
5068     return 0;
5069 }
5070 
5071 static int
load_empty_dict(UnpicklerObject * self)5072 load_empty_dict(UnpicklerObject *self)
5073 {
5074     PyObject *dict;
5075 
5076     if ((dict = PyDict_New()) == NULL)
5077         return -1;
5078     PDATA_PUSH(self->stack, dict, -1);
5079     return 0;
5080 }
5081 
5082 static int
load_empty_set(UnpicklerObject * self)5083 load_empty_set(UnpicklerObject *self)
5084 {
5085     PyObject *set;
5086 
5087     if ((set = PySet_New(NULL)) == NULL)
5088         return -1;
5089     PDATA_PUSH(self->stack, set, -1);
5090     return 0;
5091 }
5092 
5093 static int
load_list(UnpicklerObject * self)5094 load_list(UnpicklerObject *self)
5095 {
5096     PyObject *list;
5097     Py_ssize_t i;
5098 
5099     if ((i = marker(self)) < 0)
5100         return -1;
5101 
5102     list = Pdata_poplist(self->stack, i);
5103     if (list == NULL)
5104         return -1;
5105     PDATA_PUSH(self->stack, list, -1);
5106     return 0;
5107 }
5108 
5109 static int
load_dict(UnpicklerObject * self)5110 load_dict(UnpicklerObject *self)
5111 {
5112     PyObject *dict, *key, *value;
5113     Py_ssize_t i, j, k;
5114 
5115     if ((i = marker(self)) < 0)
5116         return -1;
5117     j = Py_SIZE(self->stack);
5118 
5119     if ((dict = PyDict_New()) == NULL)
5120         return -1;
5121 
5122     if ((j - i) % 2 != 0) {
5123         PickleState *st = _Pickle_GetGlobalState();
5124         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5125         Py_DECREF(dict);
5126         return -1;
5127     }
5128 
5129     for (k = i + 1; k < j; k += 2) {
5130         key = self->stack->data[k - 1];
5131         value = self->stack->data[k];
5132         if (PyDict_SetItem(dict, key, value) < 0) {
5133             Py_DECREF(dict);
5134             return -1;
5135         }
5136     }
5137     Pdata_clear(self->stack, i);
5138     PDATA_PUSH(self->stack, dict, -1);
5139     return 0;
5140 }
5141 
5142 static int
load_frozenset(UnpicklerObject * self)5143 load_frozenset(UnpicklerObject *self)
5144 {
5145     PyObject *items;
5146     PyObject *frozenset;
5147     Py_ssize_t i;
5148 
5149     if ((i = marker(self)) < 0)
5150         return -1;
5151 
5152     items = Pdata_poptuple(self->stack, i);
5153     if (items == NULL)
5154         return -1;
5155 
5156     frozenset = PyFrozenSet_New(items);
5157     Py_DECREF(items);
5158     if (frozenset == NULL)
5159         return -1;
5160 
5161     PDATA_PUSH(self->stack, frozenset, -1);
5162     return 0;
5163 }
5164 
5165 static PyObject *
instantiate(PyObject * cls,PyObject * args)5166 instantiate(PyObject *cls, PyObject *args)
5167 {
5168     PyObject *result = NULL;
5169     _Py_IDENTIFIER(__getinitargs__);
5170     /* Caller must assure args are a tuple.  Normally, args come from
5171        Pdata_poptuple which packs objects from the top of the stack
5172        into a newly created tuple. */
5173     assert(PyTuple_Check(args));
5174     if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
5175         _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
5176         result = PyObject_CallObject(cls, args);
5177     }
5178     else {
5179         _Py_IDENTIFIER(__new__);
5180 
5181         result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
5182     }
5183     return result;
5184 }
5185 
5186 static int
load_obj(UnpicklerObject * self)5187 load_obj(UnpicklerObject *self)
5188 {
5189     PyObject *cls, *args, *obj = NULL;
5190     Py_ssize_t i;
5191 
5192     if ((i = marker(self)) < 0)
5193         return -1;
5194 
5195     if (Py_SIZE(self->stack) - i < 1)
5196         return Pdata_stack_underflow(self->stack);
5197 
5198     args = Pdata_poptuple(self->stack, i + 1);
5199     if (args == NULL)
5200         return -1;
5201 
5202     PDATA_POP(self->stack, cls);
5203     if (cls) {
5204         obj = instantiate(cls, args);
5205         Py_DECREF(cls);
5206     }
5207     Py_DECREF(args);
5208     if (obj == NULL)
5209         return -1;
5210 
5211     PDATA_PUSH(self->stack, obj, -1);
5212     return 0;
5213 }
5214 
5215 static int
load_inst(UnpicklerObject * self)5216 load_inst(UnpicklerObject *self)
5217 {
5218     PyObject *cls = NULL;
5219     PyObject *args = NULL;
5220     PyObject *obj = NULL;
5221     PyObject *module_name;
5222     PyObject *class_name;
5223     Py_ssize_t len;
5224     Py_ssize_t i;
5225     char *s;
5226 
5227     if ((i = marker(self)) < 0)
5228         return -1;
5229     if ((len = _Unpickler_Readline(self, &s)) < 0)
5230         return -1;
5231     if (len < 2)
5232         return bad_readline();
5233 
5234     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5235        identifiers are permitted in Python 3.0, since the INST opcode is only
5236        supported by older protocols on Python 2.x. */
5237     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5238     if (module_name == NULL)
5239         return -1;
5240 
5241     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5242         if (len < 2) {
5243             Py_DECREF(module_name);
5244             return bad_readline();
5245         }
5246         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5247         if (class_name != NULL) {
5248             cls = find_class(self, module_name, class_name);
5249             Py_DECREF(class_name);
5250         }
5251     }
5252     Py_DECREF(module_name);
5253 
5254     if (cls == NULL)
5255         return -1;
5256 
5257     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5258         obj = instantiate(cls, args);
5259         Py_DECREF(args);
5260     }
5261     Py_DECREF(cls);
5262 
5263     if (obj == NULL)
5264         return -1;
5265 
5266     PDATA_PUSH(self->stack, obj, -1);
5267     return 0;
5268 }
5269 
5270 static int
load_newobj(UnpicklerObject * self)5271 load_newobj(UnpicklerObject *self)
5272 {
5273     PyObject *args = NULL;
5274     PyObject *clsraw = NULL;
5275     PyTypeObject *cls;          /* clsraw cast to its true type */
5276     PyObject *obj;
5277     PickleState *st = _Pickle_GetGlobalState();
5278 
5279     /* Stack is ... cls argtuple, and we want to call
5280      * cls.__new__(cls, *argtuple).
5281      */
5282     PDATA_POP(self->stack, args);
5283     if (args == NULL)
5284         goto error;
5285     if (!PyTuple_Check(args)) {
5286         PyErr_SetString(st->UnpicklingError,
5287                         "NEWOBJ expected an arg " "tuple.");
5288         goto error;
5289     }
5290 
5291     PDATA_POP(self->stack, clsraw);
5292     cls = (PyTypeObject *)clsraw;
5293     if (cls == NULL)
5294         goto error;
5295     if (!PyType_Check(cls)) {
5296         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5297                         "isn't a type object");
5298         goto error;
5299     }
5300     if (cls->tp_new == NULL) {
5301         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5302                         "has NULL tp_new");
5303         goto error;
5304     }
5305 
5306     /* Call __new__. */
5307     obj = cls->tp_new(cls, args, NULL);
5308     if (obj == NULL)
5309         goto error;
5310 
5311     Py_DECREF(args);
5312     Py_DECREF(clsraw);
5313     PDATA_PUSH(self->stack, obj, -1);
5314     return 0;
5315 
5316   error:
5317     Py_XDECREF(args);
5318     Py_XDECREF(clsraw);
5319     return -1;
5320 }
5321 
5322 static int
load_newobj_ex(UnpicklerObject * self)5323 load_newobj_ex(UnpicklerObject *self)
5324 {
5325     PyObject *cls, *args, *kwargs;
5326     PyObject *obj;
5327     PickleState *st = _Pickle_GetGlobalState();
5328 
5329     PDATA_POP(self->stack, kwargs);
5330     if (kwargs == NULL) {
5331         return -1;
5332     }
5333     PDATA_POP(self->stack, args);
5334     if (args == NULL) {
5335         Py_DECREF(kwargs);
5336         return -1;
5337     }
5338     PDATA_POP(self->stack, cls);
5339     if (cls == NULL) {
5340         Py_DECREF(kwargs);
5341         Py_DECREF(args);
5342         return -1;
5343     }
5344 
5345     if (!PyType_Check(cls)) {
5346         Py_DECREF(kwargs);
5347         Py_DECREF(args);
5348         PyErr_Format(st->UnpicklingError,
5349                      "NEWOBJ_EX class argument must be a type, not %.200s",
5350                      Py_TYPE(cls)->tp_name);
5351         Py_DECREF(cls);
5352         return -1;
5353     }
5354 
5355     if (((PyTypeObject *)cls)->tp_new == NULL) {
5356         Py_DECREF(kwargs);
5357         Py_DECREF(args);
5358         Py_DECREF(cls);
5359         PyErr_SetString(st->UnpicklingError,
5360                         "NEWOBJ_EX class argument doesn't have __new__");
5361         return -1;
5362     }
5363     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5364     Py_DECREF(kwargs);
5365     Py_DECREF(args);
5366     Py_DECREF(cls);
5367     if (obj == NULL) {
5368         return -1;
5369     }
5370     PDATA_PUSH(self->stack, obj, -1);
5371     return 0;
5372 }
5373 
5374 static int
load_global(UnpicklerObject * self)5375 load_global(UnpicklerObject *self)
5376 {
5377     PyObject *global = NULL;
5378     PyObject *module_name;
5379     PyObject *global_name;
5380     Py_ssize_t len;
5381     char *s;
5382 
5383     if ((len = _Unpickler_Readline(self, &s)) < 0)
5384         return -1;
5385     if (len < 2)
5386         return bad_readline();
5387     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5388     if (!module_name)
5389         return -1;
5390 
5391     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5392         if (len < 2) {
5393             Py_DECREF(module_name);
5394             return bad_readline();
5395         }
5396         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5397         if (global_name) {
5398             global = find_class(self, module_name, global_name);
5399             Py_DECREF(global_name);
5400         }
5401     }
5402     Py_DECREF(module_name);
5403 
5404     if (global == NULL)
5405         return -1;
5406     PDATA_PUSH(self->stack, global, -1);
5407     return 0;
5408 }
5409 
5410 static int
load_stack_global(UnpicklerObject * self)5411 load_stack_global(UnpicklerObject *self)
5412 {
5413     PyObject *global;
5414     PyObject *module_name;
5415     PyObject *global_name;
5416 
5417     PDATA_POP(self->stack, global_name);
5418     PDATA_POP(self->stack, module_name);
5419     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5420         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5421         PickleState *st = _Pickle_GetGlobalState();
5422         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
5423         Py_XDECREF(global_name);
5424         Py_XDECREF(module_name);
5425         return -1;
5426     }
5427     global = find_class(self, module_name, global_name);
5428     Py_DECREF(global_name);
5429     Py_DECREF(module_name);
5430     if (global == NULL)
5431         return -1;
5432     PDATA_PUSH(self->stack, global, -1);
5433     return 0;
5434 }
5435 
5436 static int
load_persid(UnpicklerObject * self)5437 load_persid(UnpicklerObject *self)
5438 {
5439     PyObject *pid;
5440     Py_ssize_t len;
5441     char *s;
5442 
5443     if (self->pers_func) {
5444         if ((len = _Unpickler_Readline(self, &s)) < 0)
5445             return -1;
5446         if (len < 1)
5447             return bad_readline();
5448 
5449         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
5450         if (pid == NULL) {
5451             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
5452                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
5453                                 "persistent IDs in protocol 0 must be "
5454                                 "ASCII strings");
5455             }
5456             return -1;
5457         }
5458 
5459         /* This does not leak since _Pickle_FastCall() steals the reference
5460            to pid first. */
5461         pid = _Pickle_FastCall(self->pers_func, pid);
5462         if (pid == NULL)
5463             return -1;
5464 
5465         PDATA_PUSH(self->stack, pid, -1);
5466         return 0;
5467     }
5468     else {
5469         PickleState *st = _Pickle_GetGlobalState();
5470         PyErr_SetString(st->UnpicklingError,
5471                         "A load persistent id instruction was encountered,\n"
5472                         "but no persistent_load function was specified.");
5473         return -1;
5474     }
5475 }
5476 
5477 static int
load_binpersid(UnpicklerObject * self)5478 load_binpersid(UnpicklerObject *self)
5479 {
5480     PyObject *pid;
5481 
5482     if (self->pers_func) {
5483         PDATA_POP(self->stack, pid);
5484         if (pid == NULL)
5485             return -1;
5486 
5487         /* This does not leak since _Pickle_FastCall() steals the
5488            reference to pid first. */
5489         pid = _Pickle_FastCall(self->pers_func, pid);
5490         if (pid == NULL)
5491             return -1;
5492 
5493         PDATA_PUSH(self->stack, pid, -1);
5494         return 0;
5495     }
5496     else {
5497         PickleState *st = _Pickle_GetGlobalState();
5498         PyErr_SetString(st->UnpicklingError,
5499                         "A load persistent id instruction was encountered,\n"
5500                         "but no persistent_load function was specified.");
5501         return -1;
5502     }
5503 }
5504 
5505 static int
load_pop(UnpicklerObject * self)5506 load_pop(UnpicklerObject *self)
5507 {
5508     Py_ssize_t len = Py_SIZE(self->stack);
5509 
5510     /* Note that we split the (pickle.py) stack into two stacks,
5511      * an object stack and a mark stack. We have to be clever and
5512      * pop the right one. We do this by looking at the top of the
5513      * mark stack first, and only signalling a stack underflow if
5514      * the object stack is empty and the mark stack doesn't match
5515      * our expectations.
5516      */
5517     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
5518         self->num_marks--;
5519         self->stack->mark_set = self->num_marks != 0;
5520         self->stack->fence = self->num_marks ?
5521                 self->marks[self->num_marks - 1] : 0;
5522     } else if (len <= self->stack->fence)
5523         return Pdata_stack_underflow(self->stack);
5524     else {
5525         len--;
5526         Py_DECREF(self->stack->data[len]);
5527         Py_SIZE(self->stack) = len;
5528     }
5529     return 0;
5530 }
5531 
5532 static int
load_pop_mark(UnpicklerObject * self)5533 load_pop_mark(UnpicklerObject *self)
5534 {
5535     Py_ssize_t i;
5536 
5537     if ((i = marker(self)) < 0)
5538         return -1;
5539 
5540     Pdata_clear(self->stack, i);
5541 
5542     return 0;
5543 }
5544 
5545 static int
load_dup(UnpicklerObject * self)5546 load_dup(UnpicklerObject *self)
5547 {
5548     PyObject *last;
5549     Py_ssize_t len = Py_SIZE(self->stack);
5550 
5551     if (len <= self->stack->fence)
5552         return Pdata_stack_underflow(self->stack);
5553     last = self->stack->data[len - 1];
5554     PDATA_APPEND(self->stack, last, -1);
5555     return 0;
5556 }
5557 
5558 static int
load_get(UnpicklerObject * self)5559 load_get(UnpicklerObject *self)
5560 {
5561     PyObject *key, *value;
5562     Py_ssize_t idx;
5563     Py_ssize_t len;
5564     char *s;
5565 
5566     if ((len = _Unpickler_Readline(self, &s)) < 0)
5567         return -1;
5568     if (len < 2)
5569         return bad_readline();
5570 
5571     key = PyLong_FromString(s, NULL, 10);
5572     if (key == NULL)
5573         return -1;
5574     idx = PyLong_AsSsize_t(key);
5575     if (idx == -1 && PyErr_Occurred()) {
5576         Py_DECREF(key);
5577         return -1;
5578     }
5579 
5580     value = _Unpickler_MemoGet(self, idx);
5581     if (value == NULL) {
5582         if (!PyErr_Occurred())
5583             PyErr_SetObject(PyExc_KeyError, key);
5584         Py_DECREF(key);
5585         return -1;
5586     }
5587     Py_DECREF(key);
5588 
5589     PDATA_APPEND(self->stack, value, -1);
5590     return 0;
5591 }
5592 
5593 static int
load_binget(UnpicklerObject * self)5594 load_binget(UnpicklerObject *self)
5595 {
5596     PyObject *value;
5597     Py_ssize_t idx;
5598     char *s;
5599 
5600     if (_Unpickler_Read(self, &s, 1) < 0)
5601         return -1;
5602 
5603     idx = Py_CHARMASK(s[0]);
5604 
5605     value = _Unpickler_MemoGet(self, idx);
5606     if (value == NULL) {
5607         PyObject *key = PyLong_FromSsize_t(idx);
5608         if (key != NULL) {
5609             PyErr_SetObject(PyExc_KeyError, key);
5610             Py_DECREF(key);
5611         }
5612         return -1;
5613     }
5614 
5615     PDATA_APPEND(self->stack, value, -1);
5616     return 0;
5617 }
5618 
5619 static int
load_long_binget(UnpicklerObject * self)5620 load_long_binget(UnpicklerObject *self)
5621 {
5622     PyObject *value;
5623     Py_ssize_t idx;
5624     char *s;
5625 
5626     if (_Unpickler_Read(self, &s, 4) < 0)
5627         return -1;
5628 
5629     idx = calc_binsize(s, 4);
5630 
5631     value = _Unpickler_MemoGet(self, idx);
5632     if (value == NULL) {
5633         PyObject *key = PyLong_FromSsize_t(idx);
5634         if (key != NULL) {
5635             PyErr_SetObject(PyExc_KeyError, key);
5636             Py_DECREF(key);
5637         }
5638         return -1;
5639     }
5640 
5641     PDATA_APPEND(self->stack, value, -1);
5642     return 0;
5643 }
5644 
5645 /* Push an object from the extension registry (EXT[124]).  nbytes is
5646  * the number of bytes following the opcode, holding the index (code) value.
5647  */
5648 static int
load_extension(UnpicklerObject * self,int nbytes)5649 load_extension(UnpicklerObject *self, int nbytes)
5650 {
5651     char *codebytes;            /* the nbytes bytes after the opcode */
5652     long code;                  /* calc_binint returns long */
5653     PyObject *py_code;          /* code as a Python int */
5654     PyObject *obj;              /* the object to push */
5655     PyObject *pair;             /* (module_name, class_name) */
5656     PyObject *module_name, *class_name;
5657     PickleState *st = _Pickle_GetGlobalState();
5658 
5659     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
5660     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
5661         return -1;
5662     code = calc_binint(codebytes, nbytes);
5663     if (code <= 0) {            /* note that 0 is forbidden */
5664         /* Corrupt or hostile pickle. */
5665         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
5666         return -1;
5667     }
5668 
5669     /* Look for the code in the cache. */
5670     py_code = PyLong_FromLong(code);
5671     if (py_code == NULL)
5672         return -1;
5673     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
5674     if (obj != NULL) {
5675         /* Bingo. */
5676         Py_DECREF(py_code);
5677         PDATA_APPEND(self->stack, obj, -1);
5678         return 0;
5679     }
5680     if (PyErr_Occurred()) {
5681         Py_DECREF(py_code);
5682         return -1;
5683     }
5684 
5685     /* Look up the (module_name, class_name) pair. */
5686     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
5687     if (pair == NULL) {
5688         Py_DECREF(py_code);
5689         if (!PyErr_Occurred()) {
5690             PyErr_Format(PyExc_ValueError, "unregistered extension "
5691                          "code %ld", code);
5692         }
5693         return -1;
5694     }
5695     /* Since the extension registry is manipulable via Python code,
5696      * confirm that pair is really a 2-tuple of strings.
5697      */
5698     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5699         !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5700         !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5701         Py_DECREF(py_code);
5702         PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5703                      "isn't a 2-tuple of strings", code);
5704         return -1;
5705     }
5706     /* Load the object. */
5707     obj = find_class(self, module_name, class_name);
5708     if (obj == NULL) {
5709         Py_DECREF(py_code);
5710         return -1;
5711     }
5712     /* Cache code -> obj. */
5713     code = PyDict_SetItem(st->extension_cache, py_code, obj);
5714     Py_DECREF(py_code);
5715     if (code < 0) {
5716         Py_DECREF(obj);
5717         return -1;
5718     }
5719     PDATA_PUSH(self->stack, obj, -1);
5720     return 0;
5721 }
5722 
5723 static int
load_put(UnpicklerObject * self)5724 load_put(UnpicklerObject *self)
5725 {
5726     PyObject *key, *value;
5727     Py_ssize_t idx;
5728     Py_ssize_t len;
5729     char *s;
5730 
5731     if ((len = _Unpickler_Readline(self, &s)) < 0)
5732         return -1;
5733     if (len < 2)
5734         return bad_readline();
5735     if (Py_SIZE(self->stack) <= self->stack->fence)
5736         return Pdata_stack_underflow(self->stack);
5737     value = self->stack->data[Py_SIZE(self->stack) - 1];
5738 
5739     key = PyLong_FromString(s, NULL, 10);
5740     if (key == NULL)
5741         return -1;
5742     idx = PyLong_AsSsize_t(key);
5743     Py_DECREF(key);
5744     if (idx < 0) {
5745         if (!PyErr_Occurred())
5746             PyErr_SetString(PyExc_ValueError,
5747                             "negative PUT argument");
5748         return -1;
5749     }
5750 
5751     return _Unpickler_MemoPut(self, idx, value);
5752 }
5753 
5754 static int
load_binput(UnpicklerObject * self)5755 load_binput(UnpicklerObject *self)
5756 {
5757     PyObject *value;
5758     Py_ssize_t idx;
5759     char *s;
5760 
5761     if (_Unpickler_Read(self, &s, 1) < 0)
5762         return -1;
5763 
5764     if (Py_SIZE(self->stack) <= self->stack->fence)
5765         return Pdata_stack_underflow(self->stack);
5766     value = self->stack->data[Py_SIZE(self->stack) - 1];
5767 
5768     idx = Py_CHARMASK(s[0]);
5769 
5770     return _Unpickler_MemoPut(self, idx, value);
5771 }
5772 
5773 static int
load_long_binput(UnpicklerObject * self)5774 load_long_binput(UnpicklerObject *self)
5775 {
5776     PyObject *value;
5777     Py_ssize_t idx;
5778     char *s;
5779 
5780     if (_Unpickler_Read(self, &s, 4) < 0)
5781         return -1;
5782 
5783     if (Py_SIZE(self->stack) <= self->stack->fence)
5784         return Pdata_stack_underflow(self->stack);
5785     value = self->stack->data[Py_SIZE(self->stack) - 1];
5786 
5787     idx = calc_binsize(s, 4);
5788     if (idx < 0) {
5789         PyErr_SetString(PyExc_ValueError,
5790                         "negative LONG_BINPUT argument");
5791         return -1;
5792     }
5793 
5794     return _Unpickler_MemoPut(self, idx, value);
5795 }
5796 
5797 static int
load_memoize(UnpicklerObject * self)5798 load_memoize(UnpicklerObject *self)
5799 {
5800     PyObject *value;
5801 
5802     if (Py_SIZE(self->stack) <= self->stack->fence)
5803         return Pdata_stack_underflow(self->stack);
5804     value = self->stack->data[Py_SIZE(self->stack) - 1];
5805 
5806     return _Unpickler_MemoPut(self, self->memo_len, value);
5807 }
5808 
5809 static int
do_append(UnpicklerObject * self,Py_ssize_t x)5810 do_append(UnpicklerObject *self, Py_ssize_t x)
5811 {
5812     PyObject *value;
5813     PyObject *list;
5814     Py_ssize_t len, i;
5815 
5816     len = Py_SIZE(self->stack);
5817     if (x > len || x <= self->stack->fence)
5818         return Pdata_stack_underflow(self->stack);
5819     if (len == x)  /* nothing to do */
5820         return 0;
5821 
5822     list = self->stack->data[x - 1];
5823 
5824     if (PyList_Check(list)) {
5825         PyObject *slice;
5826         Py_ssize_t list_len;
5827         int ret;
5828 
5829         slice = Pdata_poplist(self->stack, x);
5830         if (!slice)
5831             return -1;
5832         list_len = PyList_GET_SIZE(list);
5833         ret = PyList_SetSlice(list, list_len, list_len, slice);
5834         Py_DECREF(slice);
5835         return ret;
5836     }
5837     else {
5838         PyObject *append_func;
5839         _Py_IDENTIFIER(append);
5840 
5841         append_func = _PyObject_GetAttrId(list, &PyId_append);
5842         if (append_func == NULL)
5843             return -1;
5844         for (i = x; i < len; i++) {
5845             PyObject *result;
5846 
5847             value = self->stack->data[i];
5848             result = _Pickle_FastCall(append_func, value);
5849             if (result == NULL) {
5850                 Pdata_clear(self->stack, i + 1);
5851                 Py_SIZE(self->stack) = x;
5852                 Py_DECREF(append_func);
5853                 return -1;
5854             }
5855             Py_DECREF(result);
5856         }
5857         Py_SIZE(self->stack) = x;
5858         Py_DECREF(append_func);
5859     }
5860 
5861     return 0;
5862 }
5863 
5864 static int
load_append(UnpicklerObject * self)5865 load_append(UnpicklerObject *self)
5866 {
5867     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
5868         return Pdata_stack_underflow(self->stack);
5869     return do_append(self, Py_SIZE(self->stack) - 1);
5870 }
5871 
5872 static int
load_appends(UnpicklerObject * self)5873 load_appends(UnpicklerObject *self)
5874 {
5875     Py_ssize_t i = marker(self);
5876     if (i < 0)
5877         return -1;
5878     return do_append(self, i);
5879 }
5880 
5881 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)5882 do_setitems(UnpicklerObject *self, Py_ssize_t x)
5883 {
5884     PyObject *value, *key;
5885     PyObject *dict;
5886     Py_ssize_t len, i;
5887     int status = 0;
5888 
5889     len = Py_SIZE(self->stack);
5890     if (x > len || x <= self->stack->fence)
5891         return Pdata_stack_underflow(self->stack);
5892     if (len == x)  /* nothing to do */
5893         return 0;
5894     if ((len - x) % 2 != 0) {
5895         PickleState *st = _Pickle_GetGlobalState();
5896         /* Currupt or hostile pickle -- we never write one like this. */
5897         PyErr_SetString(st->UnpicklingError,
5898                         "odd number of items for SETITEMS");
5899         return -1;
5900     }
5901 
5902     /* Here, dict does not actually need to be a PyDict; it could be anything
5903        that supports the __setitem__ attribute. */
5904     dict = self->stack->data[x - 1];
5905 
5906     for (i = x + 1; i < len; i += 2) {
5907         key = self->stack->data[i - 1];
5908         value = self->stack->data[i];
5909         if (PyObject_SetItem(dict, key, value) < 0) {
5910             status = -1;
5911             break;
5912         }
5913     }
5914 
5915     Pdata_clear(self->stack, x);
5916     return status;
5917 }
5918 
5919 static int
load_setitem(UnpicklerObject * self)5920 load_setitem(UnpicklerObject *self)
5921 {
5922     return do_setitems(self, Py_SIZE(self->stack) - 2);
5923 }
5924 
5925 static int
load_setitems(UnpicklerObject * self)5926 load_setitems(UnpicklerObject *self)
5927 {
5928     Py_ssize_t i = marker(self);
5929     if (i < 0)
5930         return -1;
5931     return do_setitems(self, i);
5932 }
5933 
5934 static int
load_additems(UnpicklerObject * self)5935 load_additems(UnpicklerObject *self)
5936 {
5937     PyObject *set;
5938     Py_ssize_t mark, len, i;
5939 
5940     mark =  marker(self);
5941     if (mark < 0)
5942         return -1;
5943     len = Py_SIZE(self->stack);
5944     if (mark > len || mark <= self->stack->fence)
5945         return Pdata_stack_underflow(self->stack);
5946     if (len == mark)  /* nothing to do */
5947         return 0;
5948 
5949     set = self->stack->data[mark - 1];
5950 
5951     if (PySet_Check(set)) {
5952         PyObject *items;
5953         int status;
5954 
5955         items = Pdata_poptuple(self->stack, mark);
5956         if (items == NULL)
5957             return -1;
5958 
5959         status = _PySet_Update(set, items);
5960         Py_DECREF(items);
5961         return status;
5962     }
5963     else {
5964         PyObject *add_func;
5965         _Py_IDENTIFIER(add);
5966 
5967         add_func = _PyObject_GetAttrId(set, &PyId_add);
5968         if (add_func == NULL)
5969             return -1;
5970         for (i = mark; i < len; i++) {
5971             PyObject *result;
5972             PyObject *item;
5973 
5974             item = self->stack->data[i];
5975             result = _Pickle_FastCall(add_func, item);
5976             if (result == NULL) {
5977                 Pdata_clear(self->stack, i + 1);
5978                 Py_SIZE(self->stack) = mark;
5979                 return -1;
5980             }
5981             Py_DECREF(result);
5982         }
5983         Py_SIZE(self->stack) = mark;
5984     }
5985 
5986     return 0;
5987 }
5988 
5989 static int
load_build(UnpicklerObject * self)5990 load_build(UnpicklerObject *self)
5991 {
5992     PyObject *state, *inst, *slotstate;
5993     PyObject *setstate;
5994     int status = 0;
5995     _Py_IDENTIFIER(__setstate__);
5996 
5997     /* Stack is ... instance, state.  We want to leave instance at
5998      * the stack top, possibly mutated via instance.__setstate__(state).
5999      */
6000     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6001         return Pdata_stack_underflow(self->stack);
6002 
6003     PDATA_POP(self->stack, state);
6004     if (state == NULL)
6005         return -1;
6006 
6007     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6008 
6009     setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
6010     if (setstate == NULL) {
6011         if (PyErr_ExceptionMatches(PyExc_AttributeError))
6012             PyErr_Clear();
6013         else {
6014             Py_DECREF(state);
6015             return -1;
6016         }
6017     }
6018     else {
6019         PyObject *result;
6020 
6021         /* The explicit __setstate__ is responsible for everything. */
6022         result = _Pickle_FastCall(setstate, state);
6023         Py_DECREF(setstate);
6024         if (result == NULL)
6025             return -1;
6026         Py_DECREF(result);
6027         return 0;
6028     }
6029 
6030     /* A default __setstate__.  First see whether state embeds a
6031      * slot state dict too (a proto 2 addition).
6032      */
6033     if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
6034         PyObject *tmp = state;
6035 
6036         state = PyTuple_GET_ITEM(tmp, 0);
6037         slotstate = PyTuple_GET_ITEM(tmp, 1);
6038         Py_INCREF(state);
6039         Py_INCREF(slotstate);
6040         Py_DECREF(tmp);
6041     }
6042     else
6043         slotstate = NULL;
6044 
6045     /* Set inst.__dict__ from the state dict (if any). */
6046     if (state != Py_None) {
6047         PyObject *dict;
6048         PyObject *d_key, *d_value;
6049         Py_ssize_t i;
6050         _Py_IDENTIFIER(__dict__);
6051 
6052         if (!PyDict_Check(state)) {
6053             PickleState *st = _Pickle_GetGlobalState();
6054             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6055             goto error;
6056         }
6057         dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6058         if (dict == NULL)
6059             goto error;
6060 
6061         i = 0;
6062         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6063             /* normally the keys for instance attributes are
6064                interned.  we should try to do that here. */
6065             Py_INCREF(d_key);
6066             if (PyUnicode_CheckExact(d_key))
6067                 PyUnicode_InternInPlace(&d_key);
6068             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6069                 Py_DECREF(d_key);
6070                 goto error;
6071             }
6072             Py_DECREF(d_key);
6073         }
6074         Py_DECREF(dict);
6075     }
6076 
6077     /* Also set instance attributes from the slotstate dict (if any). */
6078     if (slotstate != NULL) {
6079         PyObject *d_key, *d_value;
6080         Py_ssize_t i;
6081 
6082         if (!PyDict_Check(slotstate)) {
6083             PickleState *st = _Pickle_GetGlobalState();
6084             PyErr_SetString(st->UnpicklingError,
6085                             "slot state is not a dictionary");
6086             goto error;
6087         }
6088         i = 0;
6089         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6090             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6091                 goto error;
6092         }
6093     }
6094 
6095     if (0) {
6096   error:
6097         status = -1;
6098     }
6099 
6100     Py_DECREF(state);
6101     Py_XDECREF(slotstate);
6102     return status;
6103 }
6104 
6105 static int
load_mark(UnpicklerObject * self)6106 load_mark(UnpicklerObject *self)
6107 {
6108 
6109     /* Note that we split the (pickle.py) stack into two stacks, an
6110      * object stack and a mark stack. Here we push a mark onto the
6111      * mark stack.
6112      */
6113 
6114     if ((self->num_marks + 1) >= self->marks_size) {
6115         size_t alloc;
6116 
6117         /* Use the size_t type to check for overflow. */
6118         alloc = ((size_t)self->num_marks << 1) + 20;
6119         if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
6120             alloc <= ((size_t)self->num_marks + 1)) {
6121             PyErr_NoMemory();
6122             return -1;
6123         }
6124 
6125         if (self->marks == NULL)
6126             self->marks = PyMem_NEW(Py_ssize_t, alloc);
6127         else
6128             PyMem_RESIZE(self->marks, Py_ssize_t, alloc);
6129         if (self->marks == NULL) {
6130             self->marks_size = 0;
6131             PyErr_NoMemory();
6132             return -1;
6133         }
6134         self->marks_size = (Py_ssize_t)alloc;
6135     }
6136 
6137     self->stack->mark_set = 1;
6138     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6139 
6140     return 0;
6141 }
6142 
6143 static int
load_reduce(UnpicklerObject * self)6144 load_reduce(UnpicklerObject *self)
6145 {
6146     PyObject *callable = NULL;
6147     PyObject *argtup = NULL;
6148     PyObject *obj = NULL;
6149 
6150     PDATA_POP(self->stack, argtup);
6151     if (argtup == NULL)
6152         return -1;
6153     PDATA_POP(self->stack, callable);
6154     if (callable) {
6155         obj = PyObject_CallObject(callable, argtup);
6156         Py_DECREF(callable);
6157     }
6158     Py_DECREF(argtup);
6159 
6160     if (obj == NULL)
6161         return -1;
6162 
6163     PDATA_PUSH(self->stack, obj, -1);
6164     return 0;
6165 }
6166 
6167 /* Just raises an error if we don't know the protocol specified.  PROTO
6168  * is the first opcode for protocols >= 2.
6169  */
6170 static int
load_proto(UnpicklerObject * self)6171 load_proto(UnpicklerObject *self)
6172 {
6173     char *s;
6174     int i;
6175 
6176     if (_Unpickler_Read(self, &s, 1) < 0)
6177         return -1;
6178 
6179     i = (unsigned char)s[0];
6180     if (i <= HIGHEST_PROTOCOL) {
6181         self->proto = i;
6182         return 0;
6183     }
6184 
6185     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6186     return -1;
6187 }
6188 
6189 static int
load_frame(UnpicklerObject * self)6190 load_frame(UnpicklerObject *self)
6191 {
6192     char *s;
6193     Py_ssize_t frame_len;
6194 
6195     if (_Unpickler_Read(self, &s, 8) < 0)
6196         return -1;
6197 
6198     frame_len = calc_binsize(s, 8);
6199     if (frame_len < 0) {
6200         PyErr_Format(PyExc_OverflowError,
6201                      "FRAME length exceeds system's maximum of %zd bytes",
6202                      PY_SSIZE_T_MAX);
6203         return -1;
6204     }
6205 
6206     if (_Unpickler_Read(self, &s, frame_len) < 0)
6207         return -1;
6208 
6209     /* Rewind to start of frame */
6210     self->next_read_idx -= frame_len;
6211     return 0;
6212 }
6213 
6214 static PyObject *
load(UnpicklerObject * self)6215 load(UnpicklerObject *self)
6216 {
6217     PyObject *value = NULL;
6218     char *s = NULL;
6219 
6220     self->num_marks = 0;
6221     self->stack->mark_set = 0;
6222     self->stack->fence = 0;
6223     self->proto = 0;
6224     if (Py_SIZE(self->stack))
6225         Pdata_clear(self->stack, 0);
6226 
6227     /* Convenient macros for the dispatch while-switch loop just below. */
6228 #define OP(opcode, load_func) \
6229     case opcode: if (load_func(self) < 0) break; continue;
6230 
6231 #define OP_ARG(opcode, load_func, arg) \
6232     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6233 
6234     while (1) {
6235         if (_Unpickler_Read(self, &s, 1) < 0) {
6236             PickleState *st = _Pickle_GetGlobalState();
6237             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6238                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6239             }
6240             return NULL;
6241         }
6242 
6243         switch ((enum opcode)s[0]) {
6244         OP(NONE, load_none)
6245         OP(BININT, load_binint)
6246         OP(BININT1, load_binint1)
6247         OP(BININT2, load_binint2)
6248         OP(INT, load_int)
6249         OP(LONG, load_long)
6250         OP_ARG(LONG1, load_counted_long, 1)
6251         OP_ARG(LONG4, load_counted_long, 4)
6252         OP(FLOAT, load_float)
6253         OP(BINFLOAT, load_binfloat)
6254         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6255         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6256         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6257         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6258         OP_ARG(BINSTRING, load_counted_binstring, 4)
6259         OP(STRING, load_string)
6260         OP(UNICODE, load_unicode)
6261         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6262         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6263         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6264         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6265         OP_ARG(TUPLE1, load_counted_tuple, 1)
6266         OP_ARG(TUPLE2, load_counted_tuple, 2)
6267         OP_ARG(TUPLE3, load_counted_tuple, 3)
6268         OP(TUPLE, load_tuple)
6269         OP(EMPTY_LIST, load_empty_list)
6270         OP(LIST, load_list)
6271         OP(EMPTY_DICT, load_empty_dict)
6272         OP(DICT, load_dict)
6273         OP(EMPTY_SET, load_empty_set)
6274         OP(ADDITEMS, load_additems)
6275         OP(FROZENSET, load_frozenset)
6276         OP(OBJ, load_obj)
6277         OP(INST, load_inst)
6278         OP(NEWOBJ, load_newobj)
6279         OP(NEWOBJ_EX, load_newobj_ex)
6280         OP(GLOBAL, load_global)
6281         OP(STACK_GLOBAL, load_stack_global)
6282         OP(APPEND, load_append)
6283         OP(APPENDS, load_appends)
6284         OP(BUILD, load_build)
6285         OP(DUP, load_dup)
6286         OP(BINGET, load_binget)
6287         OP(LONG_BINGET, load_long_binget)
6288         OP(GET, load_get)
6289         OP(MARK, load_mark)
6290         OP(BINPUT, load_binput)
6291         OP(LONG_BINPUT, load_long_binput)
6292         OP(PUT, load_put)
6293         OP(MEMOIZE, load_memoize)
6294         OP(POP, load_pop)
6295         OP(POP_MARK, load_pop_mark)
6296         OP(SETITEM, load_setitem)
6297         OP(SETITEMS, load_setitems)
6298         OP(PERSID, load_persid)
6299         OP(BINPERSID, load_binpersid)
6300         OP(REDUCE, load_reduce)
6301         OP(PROTO, load_proto)
6302         OP(FRAME, load_frame)
6303         OP_ARG(EXT1, load_extension, 1)
6304         OP_ARG(EXT2, load_extension, 2)
6305         OP_ARG(EXT4, load_extension, 4)
6306         OP_ARG(NEWTRUE, load_bool, Py_True)
6307         OP_ARG(NEWFALSE, load_bool, Py_False)
6308 
6309         case STOP:
6310             break;
6311 
6312         default:
6313             {
6314                 PickleState *st = _Pickle_GetGlobalState();
6315                 unsigned char c = (unsigned char) *s;
6316                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6317                     PyErr_Format(st->UnpicklingError,
6318                                  "invalid load key, '%c'.", c);
6319                 }
6320                 else {
6321                     PyErr_Format(st->UnpicklingError,
6322                                  "invalid load key, '\\x%02x'.", c);
6323                 }
6324                 return NULL;
6325             }
6326         }
6327 
6328         break;                  /* and we are done! */
6329     }
6330 
6331     if (PyErr_Occurred()) {
6332         return NULL;
6333     }
6334 
6335     if (_Unpickler_SkipConsumed(self) < 0)
6336         return NULL;
6337 
6338     PDATA_POP(self->stack, value);
6339     return value;
6340 }
6341 
6342 /*[clinic input]
6343 
6344 _pickle.Unpickler.load
6345 
6346 Load a pickle.
6347 
6348 Read a pickled object representation from the open file object given
6349 in the constructor, and return the reconstituted object hierarchy
6350 specified therein.
6351 [clinic start generated code]*/
6352 
6353 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6354 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6355 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6356 {
6357     UnpicklerObject *unpickler = (UnpicklerObject*)self;
6358 
6359     /* Check whether the Unpickler was initialized correctly. This prevents
6360        segfaulting if a subclass overridden __init__ with a function that does
6361        not call Unpickler.__init__(). Here, we simply ensure that self->read
6362        is not NULL. */
6363     if (unpickler->read == NULL) {
6364         PickleState *st = _Pickle_GetGlobalState();
6365         PyErr_Format(st->UnpicklingError,
6366                      "Unpickler.__init__() was not called by %s.__init__()",
6367                      Py_TYPE(unpickler)->tp_name);
6368         return NULL;
6369     }
6370 
6371     return load(unpickler);
6372 }
6373 
6374 /* The name of find_class() is misleading. In newer pickle protocols, this
6375    function is used for loading any global (i.e., functions), not just
6376    classes. The name is kept only for backward compatibility. */
6377 
6378 /*[clinic input]
6379 
6380 _pickle.Unpickler.find_class
6381 
6382   module_name: object
6383   global_name: object
6384   /
6385 
6386 Return an object from a specified module.
6387 
6388 If necessary, the module will be imported. Subclasses may override
6389 this method (e.g. to restrict unpickling of arbitrary classes and
6390 functions).
6391 
6392 This method is called whenever a class or a function object is
6393 needed.  Both arguments passed are str objects.
6394 [clinic start generated code]*/
6395 
6396 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)6397 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
6398                                   PyObject *module_name,
6399                                   PyObject *global_name)
6400 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
6401 {
6402     PyObject *global;
6403     PyObject *modules_dict;
6404     PyObject *module;
6405     _Py_IDENTIFIER(modules);
6406 
6407     /* Try to map the old names used in Python 2.x to the new ones used in
6408        Python 3.x.  We do this only with old pickle protocols and when the
6409        user has not disabled the feature. */
6410     if (self->proto < 3 && self->fix_imports) {
6411         PyObject *key;
6412         PyObject *item;
6413         PickleState *st = _Pickle_GetGlobalState();
6414 
6415         /* Check if the global (i.e., a function or a class) was renamed
6416            or moved to another module. */
6417         key = PyTuple_Pack(2, module_name, global_name);
6418         if (key == NULL)
6419             return NULL;
6420         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
6421         Py_DECREF(key);
6422         if (item) {
6423             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6424                 PyErr_Format(PyExc_RuntimeError,
6425                              "_compat_pickle.NAME_MAPPING values should be "
6426                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6427                 return NULL;
6428             }
6429             module_name = PyTuple_GET_ITEM(item, 0);
6430             global_name = PyTuple_GET_ITEM(item, 1);
6431             if (!PyUnicode_Check(module_name) ||
6432                 !PyUnicode_Check(global_name)) {
6433                 PyErr_Format(PyExc_RuntimeError,
6434                              "_compat_pickle.NAME_MAPPING values should be "
6435                              "pairs of str, not (%.200s, %.200s)",
6436                              Py_TYPE(module_name)->tp_name,
6437                              Py_TYPE(global_name)->tp_name);
6438                 return NULL;
6439             }
6440         }
6441         else if (PyErr_Occurred()) {
6442             return NULL;
6443         }
6444         else {
6445             /* Check if the module was renamed. */
6446             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
6447             if (item) {
6448                 if (!PyUnicode_Check(item)) {
6449                     PyErr_Format(PyExc_RuntimeError,
6450                                 "_compat_pickle.IMPORT_MAPPING values should be "
6451                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
6452                     return NULL;
6453                 }
6454                 module_name = item;
6455             }
6456             else if (PyErr_Occurred()) {
6457                 return NULL;
6458             }
6459         }
6460     }
6461 
6462     modules_dict = _PySys_GetObjectId(&PyId_modules);
6463     if (modules_dict == NULL) {
6464         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
6465         return NULL;
6466     }
6467 
6468     module = PyDict_GetItemWithError(modules_dict, module_name);
6469     if (module == NULL) {
6470         if (PyErr_Occurred())
6471             return NULL;
6472         module = PyImport_Import(module_name);
6473         if (module == NULL)
6474             return NULL;
6475         global = getattribute(module, global_name, self->proto >= 4);
6476         Py_DECREF(module);
6477     }
6478     else {
6479         global = getattribute(module, global_name, self->proto >= 4);
6480     }
6481     return global;
6482 }
6483 
6484 /*[clinic input]
6485 
6486 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
6487 
6488 Returns size in memory, in bytes.
6489 [clinic start generated code]*/
6490 
6491 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)6492 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
6493 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
6494 {
6495     Py_ssize_t res;
6496 
6497     res = _PyObject_SIZE(Py_TYPE(self));
6498     if (self->memo != NULL)
6499         res += self->memo_size * sizeof(PyObject *);
6500     if (self->marks != NULL)
6501         res += self->marks_size * sizeof(Py_ssize_t);
6502     if (self->input_line != NULL)
6503         res += strlen(self->input_line) + 1;
6504     if (self->encoding != NULL)
6505         res += strlen(self->encoding) + 1;
6506     if (self->errors != NULL)
6507         res += strlen(self->errors) + 1;
6508     return res;
6509 }
6510 
6511 static struct PyMethodDef Unpickler_methods[] = {
6512     _PICKLE_UNPICKLER_LOAD_METHODDEF
6513     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
6514     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
6515     {NULL, NULL}                /* sentinel */
6516 };
6517 
6518 static void
Unpickler_dealloc(UnpicklerObject * self)6519 Unpickler_dealloc(UnpicklerObject *self)
6520 {
6521     PyObject_GC_UnTrack((PyObject *)self);
6522     Py_XDECREF(self->readline);
6523     Py_XDECREF(self->read);
6524     Py_XDECREF(self->peek);
6525     Py_XDECREF(self->stack);
6526     Py_XDECREF(self->pers_func);
6527     if (self->buffer.buf != NULL) {
6528         PyBuffer_Release(&self->buffer);
6529         self->buffer.buf = NULL;
6530     }
6531 
6532     _Unpickler_MemoCleanup(self);
6533     PyMem_Free(self->marks);
6534     PyMem_Free(self->input_line);
6535     PyMem_Free(self->encoding);
6536     PyMem_Free(self->errors);
6537 
6538     Py_TYPE(self)->tp_free((PyObject *)self);
6539 }
6540 
6541 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)6542 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6543 {
6544     Py_VISIT(self->readline);
6545     Py_VISIT(self->read);
6546     Py_VISIT(self->peek);
6547     Py_VISIT(self->stack);
6548     Py_VISIT(self->pers_func);
6549     return 0;
6550 }
6551 
6552 static int
Unpickler_clear(UnpicklerObject * self)6553 Unpickler_clear(UnpicklerObject *self)
6554 {
6555     Py_CLEAR(self->readline);
6556     Py_CLEAR(self->read);
6557     Py_CLEAR(self->peek);
6558     Py_CLEAR(self->stack);
6559     Py_CLEAR(self->pers_func);
6560     if (self->buffer.buf != NULL) {
6561         PyBuffer_Release(&self->buffer);
6562         self->buffer.buf = NULL;
6563     }
6564 
6565     _Unpickler_MemoCleanup(self);
6566     PyMem_Free(self->marks);
6567     self->marks = NULL;
6568     PyMem_Free(self->input_line);
6569     self->input_line = NULL;
6570     PyMem_Free(self->encoding);
6571     self->encoding = NULL;
6572     PyMem_Free(self->errors);
6573     self->errors = NULL;
6574 
6575     return 0;
6576 }
6577 
6578 /*[clinic input]
6579 
6580 _pickle.Unpickler.__init__
6581 
6582   file: object
6583   *
6584   fix_imports: bool = True
6585   encoding: str = 'ASCII'
6586   errors: str = 'strict'
6587 
6588 This takes a binary file for reading a pickle data stream.
6589 
6590 The protocol version of the pickle is detected automatically, so no
6591 protocol argument is needed.  Bytes past the pickled object's
6592 representation are ignored.
6593 
6594 The argument *file* must have two methods, a read() method that takes
6595 an integer argument, and a readline() method that requires no
6596 arguments.  Both methods should return bytes.  Thus *file* can be a
6597 binary file object opened for reading, an io.BytesIO object, or any
6598 other custom object that meets this interface.
6599 
6600 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
6601 which are used to control compatibility support for pickle stream
6602 generated by Python 2.  If *fix_imports* is True, pickle will try to
6603 map the old Python 2 names to the new names used in Python 3.  The
6604 *encoding* and *errors* tell pickle how to decode 8-bit string
6605 instances pickled by Python 2; these default to 'ASCII' and 'strict',
6606 respectively.  The *encoding* can be 'bytes' to read these 8-bit
6607 string instances as bytes objects.
6608 [clinic start generated code]*/
6609 
6610 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors)6611 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
6612                                 int fix_imports, const char *encoding,
6613                                 const char *errors)
6614 /*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/
6615 {
6616     _Py_IDENTIFIER(persistent_load);
6617 
6618     /* In case of multiple __init__() calls, clear previous content. */
6619     if (self->read != NULL)
6620         (void)Unpickler_clear(self);
6621 
6622     if (_Unpickler_SetInputStream(self, file) < 0)
6623         return -1;
6624 
6625     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
6626         return -1;
6627 
6628     self->fix_imports = fix_imports;
6629     if (self->fix_imports == -1)
6630         return -1;
6631 
6632     if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
6633         self->pers_func = _PyObject_GetAttrId((PyObject *)self,
6634                                               &PyId_persistent_load);
6635         if (self->pers_func == NULL)
6636             return 1;
6637     }
6638     else {
6639         self->pers_func = NULL;
6640     }
6641 
6642     self->stack = (Pdata *)Pdata_New();
6643     if (self->stack == NULL)
6644         return 1;
6645 
6646     self->memo_size = 32;
6647     self->memo = _Unpickler_NewMemo(self->memo_size);
6648     if (self->memo == NULL)
6649         return -1;
6650 
6651     self->proto = 0;
6652 
6653     return 0;
6654 }
6655 
6656 
6657 /* Define a proxy object for the Unpickler's internal memo object. This is to
6658  * avoid breaking code like:
6659  *  unpickler.memo.clear()
6660  * and
6661  *  unpickler.memo = saved_memo
6662  * Is this a good idea? Not really, but we don't want to break code that uses
6663  * it. Note that we don't implement the entire mapping API here. This is
6664  * intentional, as these should be treated as black-box implementation details.
6665  *
6666  * We do, however, have to implement pickling/unpickling support because of
6667  * real-world code like cvs2svn.
6668  */
6669 
6670 /*[clinic input]
6671 _pickle.UnpicklerMemoProxy.clear
6672 
6673 Remove all items from memo.
6674 [clinic start generated code]*/
6675 
6676 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)6677 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
6678 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
6679 {
6680     _Unpickler_MemoCleanup(self->unpickler);
6681     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6682     if (self->unpickler->memo == NULL)
6683         return NULL;
6684     Py_RETURN_NONE;
6685 }
6686 
6687 /*[clinic input]
6688 _pickle.UnpicklerMemoProxy.copy
6689 
6690 Copy the memo to a new object.
6691 [clinic start generated code]*/
6692 
6693 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)6694 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
6695 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
6696 {
6697     Py_ssize_t i;
6698     PyObject *new_memo = PyDict_New();
6699     if (new_memo == NULL)
6700         return NULL;
6701 
6702     for (i = 0; i < self->unpickler->memo_size; i++) {
6703         int status;
6704         PyObject *key, *value;
6705 
6706         value = self->unpickler->memo[i];
6707         if (value == NULL)
6708             continue;
6709 
6710         key = PyLong_FromSsize_t(i);
6711         if (key == NULL)
6712             goto error;
6713         status = PyDict_SetItem(new_memo, key, value);
6714         Py_DECREF(key);
6715         if (status < 0)
6716             goto error;
6717     }
6718     return new_memo;
6719 
6720 error:
6721     Py_DECREF(new_memo);
6722     return NULL;
6723 }
6724 
6725 /*[clinic input]
6726 _pickle.UnpicklerMemoProxy.__reduce__
6727 
6728 Implement pickling support.
6729 [clinic start generated code]*/
6730 
6731 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)6732 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
6733 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
6734 {
6735     PyObject *reduce_value;
6736     PyObject *constructor_args;
6737     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
6738     if (contents == NULL)
6739         return NULL;
6740 
6741     reduce_value = PyTuple_New(2);
6742     if (reduce_value == NULL) {
6743         Py_DECREF(contents);
6744         return NULL;
6745     }
6746     constructor_args = PyTuple_New(1);
6747     if (constructor_args == NULL) {
6748         Py_DECREF(contents);
6749         Py_DECREF(reduce_value);
6750         return NULL;
6751     }
6752     PyTuple_SET_ITEM(constructor_args, 0, contents);
6753     Py_INCREF((PyObject *)&PyDict_Type);
6754     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6755     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6756     return reduce_value;
6757 }
6758 
6759 static PyMethodDef unpicklerproxy_methods[] = {
6760     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
6761     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
6762     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
6763     {NULL, NULL}    /* sentinel */
6764 };
6765 
6766 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)6767 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6768 {
6769     PyObject_GC_UnTrack(self);
6770     Py_XDECREF(self->unpickler);
6771     PyObject_GC_Del((PyObject *)self);
6772 }
6773 
6774 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)6775 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6776                             visitproc visit, void *arg)
6777 {
6778     Py_VISIT(self->unpickler);
6779     return 0;
6780 }
6781 
6782 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)6783 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6784 {
6785     Py_CLEAR(self->unpickler);
6786     return 0;
6787 }
6788 
6789 static PyTypeObject UnpicklerMemoProxyType = {
6790     PyVarObject_HEAD_INIT(NULL, 0)
6791     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
6792     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
6793     0,
6794     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
6795     0,                                          /* tp_print */
6796     0,                                          /* tp_getattr */
6797     0,                                          /* tp_setattr */
6798     0,                                          /* tp_compare */
6799     0,                                          /* tp_repr */
6800     0,                                          /* tp_as_number */
6801     0,                                          /* tp_as_sequence */
6802     0,                                          /* tp_as_mapping */
6803     PyObject_HashNotImplemented,                /* tp_hash */
6804     0,                                          /* tp_call */
6805     0,                                          /* tp_str */
6806     PyObject_GenericGetAttr,                    /* tp_getattro */
6807     PyObject_GenericSetAttr,                    /* tp_setattro */
6808     0,                                          /* tp_as_buffer */
6809     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6810     0,                                          /* tp_doc */
6811     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
6812     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
6813     0,                                          /* tp_richcompare */
6814     0,                                          /* tp_weaklistoffset */
6815     0,                                          /* tp_iter */
6816     0,                                          /* tp_iternext */
6817     unpicklerproxy_methods,                     /* tp_methods */
6818 };
6819 
6820 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)6821 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
6822 {
6823     UnpicklerMemoProxyObject *self;
6824 
6825     self = PyObject_GC_New(UnpicklerMemoProxyObject,
6826                            &UnpicklerMemoProxyType);
6827     if (self == NULL)
6828         return NULL;
6829     Py_INCREF(unpickler);
6830     self->unpickler = unpickler;
6831     PyObject_GC_Track(self);
6832     return (PyObject *)self;
6833 }
6834 
6835 /*****************************************************************************/
6836 
6837 
6838 static PyObject *
Unpickler_get_memo(UnpicklerObject * self)6839 Unpickler_get_memo(UnpicklerObject *self)
6840 {
6841     return UnpicklerMemoProxy_New(self);
6842 }
6843 
6844 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj)6845 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
6846 {
6847     PyObject **new_memo;
6848     Py_ssize_t new_memo_size = 0;
6849     Py_ssize_t i;
6850 
6851     if (obj == NULL) {
6852         PyErr_SetString(PyExc_TypeError,
6853                         "attribute deletion is not supported");
6854         return -1;
6855     }
6856 
6857     if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
6858         UnpicklerObject *unpickler =
6859             ((UnpicklerMemoProxyObject *)obj)->unpickler;
6860 
6861         new_memo_size = unpickler->memo_size;
6862         new_memo = _Unpickler_NewMemo(new_memo_size);
6863         if (new_memo == NULL)
6864             return -1;
6865 
6866         for (i = 0; i < new_memo_size; i++) {
6867             Py_XINCREF(unpickler->memo[i]);
6868             new_memo[i] = unpickler->memo[i];
6869         }
6870     }
6871     else if (PyDict_Check(obj)) {
6872         Py_ssize_t i = 0;
6873         PyObject *key, *value;
6874 
6875         new_memo_size = PyDict_Size(obj);
6876         new_memo = _Unpickler_NewMemo(new_memo_size);
6877         if (new_memo == NULL)
6878             return -1;
6879 
6880         while (PyDict_Next(obj, &i, &key, &value)) {
6881             Py_ssize_t idx;
6882             if (!PyLong_Check(key)) {
6883                 PyErr_SetString(PyExc_TypeError,
6884                                 "memo key must be integers");
6885                 goto error;
6886             }
6887             idx = PyLong_AsSsize_t(key);
6888             if (idx == -1 && PyErr_Occurred())
6889                 goto error;
6890             if (idx < 0) {
6891                 PyErr_SetString(PyExc_ValueError,
6892                                 "memo key must be positive integers.");
6893                 goto error;
6894             }
6895             if (_Unpickler_MemoPut(self, idx, value) < 0)
6896                 goto error;
6897         }
6898     }
6899     else {
6900         PyErr_Format(PyExc_TypeError,
6901                      "'memo' attribute must be an UnpicklerMemoProxy object"
6902                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
6903         return -1;
6904     }
6905 
6906     _Unpickler_MemoCleanup(self);
6907     self->memo_size = new_memo_size;
6908     self->memo = new_memo;
6909 
6910     return 0;
6911 
6912   error:
6913     if (new_memo_size) {
6914         i = new_memo_size;
6915         while (--i >= 0) {
6916             Py_XDECREF(new_memo[i]);
6917         }
6918         PyMem_FREE(new_memo);
6919     }
6920     return -1;
6921 }
6922 
6923 static PyObject *
Unpickler_get_persload(UnpicklerObject * self)6924 Unpickler_get_persload(UnpicklerObject *self)
6925 {
6926     if (self->pers_func == NULL)
6927         PyErr_SetString(PyExc_AttributeError, "persistent_load");
6928     else
6929         Py_INCREF(self->pers_func);
6930     return self->pers_func;
6931 }
6932 
6933 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value)6934 Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6935 {
6936     if (value == NULL) {
6937         PyErr_SetString(PyExc_TypeError,
6938                         "attribute deletion is not supported");
6939         return -1;
6940     }
6941     if (!PyCallable_Check(value)) {
6942         PyErr_SetString(PyExc_TypeError,
6943                         "persistent_load must be a callable taking "
6944                         "one argument");
6945         return -1;
6946     }
6947 
6948     Py_INCREF(value);
6949     Py_XSETREF(self->pers_func, value);
6950 
6951     return 0;
6952 }
6953 
6954 static PyGetSetDef Unpickler_getsets[] = {
6955     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6956     {"persistent_load", (getter)Unpickler_get_persload,
6957                         (setter)Unpickler_set_persload},
6958     {NULL}
6959 };
6960 
6961 static PyTypeObject Unpickler_Type = {
6962     PyVarObject_HEAD_INIT(NULL, 0)
6963     "_pickle.Unpickler",                /*tp_name*/
6964     sizeof(UnpicklerObject),            /*tp_basicsize*/
6965     0,                                  /*tp_itemsize*/
6966     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
6967     0,                                  /*tp_print*/
6968     0,                                  /*tp_getattr*/
6969     0,                                  /*tp_setattr*/
6970     0,                                  /*tp_reserved*/
6971     0,                                  /*tp_repr*/
6972     0,                                  /*tp_as_number*/
6973     0,                                  /*tp_as_sequence*/
6974     0,                                  /*tp_as_mapping*/
6975     0,                                  /*tp_hash*/
6976     0,                                  /*tp_call*/
6977     0,                                  /*tp_str*/
6978     0,                                  /*tp_getattro*/
6979     0,                                  /*tp_setattro*/
6980     0,                                  /*tp_as_buffer*/
6981     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6982     _pickle_Unpickler___init____doc__,  /*tp_doc*/
6983     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
6984     (inquiry)Unpickler_clear,           /*tp_clear*/
6985     0,                                  /*tp_richcompare*/
6986     0,                                  /*tp_weaklistoffset*/
6987     0,                                  /*tp_iter*/
6988     0,                                  /*tp_iternext*/
6989     Unpickler_methods,                  /*tp_methods*/
6990     0,                                  /*tp_members*/
6991     Unpickler_getsets,                  /*tp_getset*/
6992     0,                                  /*tp_base*/
6993     0,                                  /*tp_dict*/
6994     0,                                  /*tp_descr_get*/
6995     0,                                  /*tp_descr_set*/
6996     0,                                  /*tp_dictoffset*/
6997     _pickle_Unpickler___init__,         /*tp_init*/
6998     PyType_GenericAlloc,                /*tp_alloc*/
6999     PyType_GenericNew,                  /*tp_new*/
7000     PyObject_GC_Del,                    /*tp_free*/
7001     0,                                  /*tp_is_gc*/
7002 };
7003 
7004 /*[clinic input]
7005 
7006 _pickle.dump
7007 
7008   obj: object
7009   file: object
7010   protocol: object = NULL
7011   *
7012   fix_imports: bool = True
7013 
7014 Write a pickled representation of obj to the open file object file.
7015 
7016 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7017 be more efficient.
7018 
7019 The optional *protocol* argument tells the pickler to use the given
7020 protocol supported protocols are 0, 1, 2, 3 and 4.  The default
7021 protocol is 3; a backward-incompatible protocol designed for Python 3.
7022 
7023 Specifying a negative protocol version selects the highest protocol
7024 version supported.  The higher the protocol used, the more recent the
7025 version of Python needed to read the pickle produced.
7026 
7027 The *file* argument must have a write() method that accepts a single
7028 bytes argument.  It can thus be a file object opened for binary
7029 writing, an io.BytesIO instance, or any other custom object that meets
7030 this interface.
7031 
7032 If *fix_imports* is True and protocol is less than 3, pickle will try
7033 to map the new Python 3 names to the old module names used in Python
7034 2, so that the pickle data stream is readable with Python 2.
7035 [clinic start generated code]*/
7036 
7037 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports)7038 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7039                   PyObject *protocol, int fix_imports)
7040 /*[clinic end generated code: output=a4774d5fde7d34de input=830f8a64cef6f042]*/
7041 {
7042     PicklerObject *pickler = _Pickler_New();
7043 
7044     if (pickler == NULL)
7045         return NULL;
7046 
7047     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7048         goto error;
7049 
7050     if (_Pickler_SetOutputStream(pickler, file) < 0)
7051         goto error;
7052 
7053     if (dump(pickler, obj) < 0)
7054         goto error;
7055 
7056     if (_Pickler_FlushToFile(pickler) < 0)
7057         goto error;
7058 
7059     Py_DECREF(pickler);
7060     Py_RETURN_NONE;
7061 
7062   error:
7063     Py_XDECREF(pickler);
7064     return NULL;
7065 }
7066 
7067 /*[clinic input]
7068 
7069 _pickle.dumps
7070 
7071   obj: object
7072   protocol: object = NULL
7073   *
7074   fix_imports: bool = True
7075 
7076 Return the pickled representation of the object as a bytes object.
7077 
7078 The optional *protocol* argument tells the pickler to use the given
7079 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
7080 protocol is 3; a backward-incompatible protocol designed for Python 3.
7081 
7082 Specifying a negative protocol version selects the highest protocol
7083 version supported.  The higher the protocol used, the more recent the
7084 version of Python needed to read the pickle produced.
7085 
7086 If *fix_imports* is True and *protocol* is less than 3, pickle will
7087 try to map the new Python 3 names to the old module names used in
7088 Python 2, so that the pickle data stream is readable with Python 2.
7089 [clinic start generated code]*/
7090 
7091 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports)7092 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7093                    int fix_imports)
7094 /*[clinic end generated code: output=d75d5cda456fd261 input=293dbeda181580b7]*/
7095 {
7096     PyObject *result;
7097     PicklerObject *pickler = _Pickler_New();
7098 
7099     if (pickler == NULL)
7100         return NULL;
7101 
7102     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7103         goto error;
7104 
7105     if (dump(pickler, obj) < 0)
7106         goto error;
7107 
7108     result = _Pickler_GetString(pickler);
7109     Py_DECREF(pickler);
7110     return result;
7111 
7112   error:
7113     Py_XDECREF(pickler);
7114     return NULL;
7115 }
7116 
7117 /*[clinic input]
7118 
7119 _pickle.load
7120 
7121   file: object
7122   *
7123   fix_imports: bool = True
7124   encoding: str = 'ASCII'
7125   errors: str = 'strict'
7126 
7127 Read and return an object from the pickle data stored in a file.
7128 
7129 This is equivalent to ``Unpickler(file).load()``, but may be more
7130 efficient.
7131 
7132 The protocol version of the pickle is detected automatically, so no
7133 protocol argument is needed.  Bytes past the pickled object's
7134 representation are ignored.
7135 
7136 The argument *file* must have two methods, a read() method that takes
7137 an integer argument, and a readline() method that requires no
7138 arguments.  Both methods should return bytes.  Thus *file* can be a
7139 binary file object opened for reading, an io.BytesIO object, or any
7140 other custom object that meets this interface.
7141 
7142 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7143 which are used to control compatibility support for pickle stream
7144 generated by Python 2.  If *fix_imports* is True, pickle will try to
7145 map the old Python 2 names to the new names used in Python 3.  The
7146 *encoding* and *errors* tell pickle how to decode 8-bit string
7147 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7148 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7149 string instances as bytes objects.
7150 [clinic start generated code]*/
7151 
7152 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors)7153 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7154                   const char *encoding, const char *errors)
7155 /*[clinic end generated code: output=69e298160285199e input=01b44dd3fc07afa7]*/
7156 {
7157     PyObject *result;
7158     UnpicklerObject *unpickler = _Unpickler_New();
7159 
7160     if (unpickler == NULL)
7161         return NULL;
7162 
7163     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7164         goto error;
7165 
7166     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7167         goto error;
7168 
7169     unpickler->fix_imports = fix_imports;
7170 
7171     result = load(unpickler);
7172     Py_DECREF(unpickler);
7173     return result;
7174 
7175   error:
7176     Py_XDECREF(unpickler);
7177     return NULL;
7178 }
7179 
7180 /*[clinic input]
7181 
7182 _pickle.loads
7183 
7184   data: object
7185   *
7186   fix_imports: bool = True
7187   encoding: str = 'ASCII'
7188   errors: str = 'strict'
7189 
7190 Read and return an object from the given pickle data.
7191 
7192 The protocol version of the pickle is detected automatically, so no
7193 protocol argument is needed.  Bytes past the pickled object's
7194 representation are ignored.
7195 
7196 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7197 which are used to control compatibility support for pickle stream
7198 generated by Python 2.  If *fix_imports* is True, pickle will try to
7199 map the old Python 2 names to the new names used in Python 3.  The
7200 *encoding* and *errors* tell pickle how to decode 8-bit string
7201 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7202 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7203 string instances as bytes objects.
7204 [clinic start generated code]*/
7205 
7206 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors)7207 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7208                    const char *encoding, const char *errors)
7209 /*[clinic end generated code: output=1e7cb2343f2c440f input=70605948a719feb9]*/
7210 {
7211     PyObject *result;
7212     UnpicklerObject *unpickler = _Unpickler_New();
7213 
7214     if (unpickler == NULL)
7215         return NULL;
7216 
7217     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7218         goto error;
7219 
7220     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7221         goto error;
7222 
7223     unpickler->fix_imports = fix_imports;
7224 
7225     result = load(unpickler);
7226     Py_DECREF(unpickler);
7227     return result;
7228 
7229   error:
7230     Py_XDECREF(unpickler);
7231     return NULL;
7232 }
7233 
7234 static struct PyMethodDef pickle_methods[] = {
7235     _PICKLE_DUMP_METHODDEF
7236     _PICKLE_DUMPS_METHODDEF
7237     _PICKLE_LOAD_METHODDEF
7238     _PICKLE_LOADS_METHODDEF
7239     {NULL, NULL} /* sentinel */
7240 };
7241 
7242 static int
pickle_clear(PyObject * m)7243 pickle_clear(PyObject *m)
7244 {
7245     _Pickle_ClearState(_Pickle_GetState(m));
7246     return 0;
7247 }
7248 
7249 static void
pickle_free(PyObject * m)7250 pickle_free(PyObject *m)
7251 {
7252     _Pickle_ClearState(_Pickle_GetState(m));
7253 }
7254 
7255 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7256 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7257 {
7258     PickleState *st = _Pickle_GetState(m);
7259     Py_VISIT(st->PickleError);
7260     Py_VISIT(st->PicklingError);
7261     Py_VISIT(st->UnpicklingError);
7262     Py_VISIT(st->dispatch_table);
7263     Py_VISIT(st->extension_registry);
7264     Py_VISIT(st->extension_cache);
7265     Py_VISIT(st->inverted_registry);
7266     Py_VISIT(st->name_mapping_2to3);
7267     Py_VISIT(st->import_mapping_2to3);
7268     Py_VISIT(st->name_mapping_3to2);
7269     Py_VISIT(st->import_mapping_3to2);
7270     Py_VISIT(st->codecs_encode);
7271     Py_VISIT(st->getattr);
7272     return 0;
7273 }
7274 
7275 static struct PyModuleDef _picklemodule = {
7276     PyModuleDef_HEAD_INIT,
7277     "_pickle",            /* m_name */
7278     pickle_module_doc,    /* m_doc */
7279     sizeof(PickleState),  /* m_size */
7280     pickle_methods,       /* m_methods */
7281     NULL,                 /* m_reload */
7282     pickle_traverse,      /* m_traverse */
7283     pickle_clear,         /* m_clear */
7284     (freefunc)pickle_free /* m_free */
7285 };
7286 
7287 PyMODINIT_FUNC
PyInit__pickle(void)7288 PyInit__pickle(void)
7289 {
7290     PyObject *m;
7291     PickleState *st;
7292 
7293     m = PyState_FindModule(&_picklemodule);
7294     if (m) {
7295         Py_INCREF(m);
7296         return m;
7297     }
7298 
7299     if (PyType_Ready(&Unpickler_Type) < 0)
7300         return NULL;
7301     if (PyType_Ready(&Pickler_Type) < 0)
7302         return NULL;
7303     if (PyType_Ready(&Pdata_Type) < 0)
7304         return NULL;
7305     if (PyType_Ready(&PicklerMemoProxyType) < 0)
7306         return NULL;
7307     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7308         return NULL;
7309 
7310     /* Create the module and add the functions. */
7311     m = PyModule_Create(&_picklemodule);
7312     if (m == NULL)
7313         return NULL;
7314 
7315     Py_INCREF(&Pickler_Type);
7316     if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7317         return NULL;
7318     Py_INCREF(&Unpickler_Type);
7319     if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7320         return NULL;
7321 
7322     st = _Pickle_GetState(m);
7323 
7324     /* Initialize the exceptions. */
7325     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7326     if (st->PickleError == NULL)
7327         return NULL;
7328     st->PicklingError = \
7329         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7330     if (st->PicklingError == NULL)
7331         return NULL;
7332     st->UnpicklingError = \
7333         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7334     if (st->UnpicklingError == NULL)
7335         return NULL;
7336 
7337     Py_INCREF(st->PickleError);
7338     if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
7339         return NULL;
7340     Py_INCREF(st->PicklingError);
7341     if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
7342         return NULL;
7343     Py_INCREF(st->UnpicklingError);
7344     if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
7345         return NULL;
7346 
7347     if (_Pickle_InitState(st) < 0)
7348         return NULL;
7349 
7350     return m;
7351 }
7352