1 /* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 # error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10
11 #include "Python.h"
12 #include "pycore_moduleobject.h" // _PyModule_GetState()
13 #include "structmember.h" // PyMemberDef
14
15 PyDoc_STRVAR(pickle_module_doc,
16 "Optimized C implementation for the Python pickle module.");
17
18 /*[clinic input]
19 module _pickle
20 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
21 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
22 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
23 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
24 [clinic start generated code]*/
25 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
26
27 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
28 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
29 already includes it. */
30 enum {
31 HIGHEST_PROTOCOL = 5,
32 DEFAULT_PROTOCOL = 4
33 };
34
35 /* Pickle opcodes. These must be kept updated with pickle.py.
36 Extensive docs are in pickletools.py. */
37 enum opcode {
38 MARK = '(',
39 STOP = '.',
40 POP = '0',
41 POP_MARK = '1',
42 DUP = '2',
43 FLOAT = 'F',
44 INT = 'I',
45 BININT = 'J',
46 BININT1 = 'K',
47 LONG = 'L',
48 BININT2 = 'M',
49 NONE = 'N',
50 PERSID = 'P',
51 BINPERSID = 'Q',
52 REDUCE = 'R',
53 STRING = 'S',
54 BINSTRING = 'T',
55 SHORT_BINSTRING = 'U',
56 UNICODE = 'V',
57 BINUNICODE = 'X',
58 APPEND = 'a',
59 BUILD = 'b',
60 GLOBAL = 'c',
61 DICT = 'd',
62 EMPTY_DICT = '}',
63 APPENDS = 'e',
64 GET = 'g',
65 BINGET = 'h',
66 INST = 'i',
67 LONG_BINGET = 'j',
68 LIST = 'l',
69 EMPTY_LIST = ']',
70 OBJ = 'o',
71 PUT = 'p',
72 BINPUT = 'q',
73 LONG_BINPUT = 'r',
74 SETITEM = 's',
75 TUPLE = 't',
76 EMPTY_TUPLE = ')',
77 SETITEMS = 'u',
78 BINFLOAT = 'G',
79
80 /* Protocol 2. */
81 PROTO = '\x80',
82 NEWOBJ = '\x81',
83 EXT1 = '\x82',
84 EXT2 = '\x83',
85 EXT4 = '\x84',
86 TUPLE1 = '\x85',
87 TUPLE2 = '\x86',
88 TUPLE3 = '\x87',
89 NEWTRUE = '\x88',
90 NEWFALSE = '\x89',
91 LONG1 = '\x8a',
92 LONG4 = '\x8b',
93
94 /* Protocol 3 (Python 3.x) */
95 BINBYTES = 'B',
96 SHORT_BINBYTES = 'C',
97
98 /* Protocol 4 */
99 SHORT_BINUNICODE = '\x8c',
100 BINUNICODE8 = '\x8d',
101 BINBYTES8 = '\x8e',
102 EMPTY_SET = '\x8f',
103 ADDITEMS = '\x90',
104 FROZENSET = '\x91',
105 NEWOBJ_EX = '\x92',
106 STACK_GLOBAL = '\x93',
107 MEMOIZE = '\x94',
108 FRAME = '\x95',
109
110 /* Protocol 5 */
111 BYTEARRAY8 = '\x96',
112 NEXT_BUFFER = '\x97',
113 READONLY_BUFFER = '\x98'
114 };
115
116 enum {
117 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
118 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
119 break if this gets out of synch with pickle.py, but it's unclear that would
120 help anything either. */
121 BATCHSIZE = 1000,
122
123 /* Nesting limit until Pickler, when running in "fast mode", starts
124 checking for self-referential data-structures. */
125 FAST_NESTING_LIMIT = 50,
126
127 /* Initial size of the write buffer of Pickler. */
128 WRITE_BUF_SIZE = 4096,
129
130 /* Prefetch size when unpickling (disabled on unpeekable streams) */
131 PREFETCH = 8192 * 16,
132
133 FRAME_SIZE_MIN = 4,
134 FRAME_SIZE_TARGET = 64 * 1024,
135 FRAME_HEADER_SIZE = 9
136 };
137
138 /*************************************************************************/
139
140 /* State of the pickle module, per PEP 3121. */
141 typedef struct {
142 /* Exception classes for pickle. */
143 PyObject *PickleError;
144 PyObject *PicklingError;
145 PyObject *UnpicklingError;
146
147 /* copyreg.dispatch_table, {type_object: pickling_function} */
148 PyObject *dispatch_table;
149
150 /* For the extension opcodes EXT1, EXT2 and EXT4. */
151
152 /* copyreg._extension_registry, {(module_name, function_name): code} */
153 PyObject *extension_registry;
154 /* copyreg._extension_cache, {code: object} */
155 PyObject *extension_cache;
156 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
157 PyObject *inverted_registry;
158
159 /* Import mappings for compatibility with Python 2.x */
160
161 /* _compat_pickle.NAME_MAPPING,
162 {(oldmodule, oldname): (newmodule, newname)} */
163 PyObject *name_mapping_2to3;
164 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
165 PyObject *import_mapping_2to3;
166 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
167 PyObject *name_mapping_3to2;
168 PyObject *import_mapping_3to2;
169
170 /* codecs.encode, used for saving bytes in older protocols */
171 PyObject *codecs_encode;
172 /* builtins.getattr, used for saving nested names with protocol < 4 */
173 PyObject *getattr;
174 /* functools.partial, used for implementing __newobj_ex__ with protocols
175 2 and 3 */
176 PyObject *partial;
177 } PickleState;
178
179 /* Forward declaration of the _pickle module definition. */
180 static struct PyModuleDef _picklemodule;
181
182 /* Given a module object, get its per-module state. */
183 static PickleState *
_Pickle_GetState(PyObject * module)184 _Pickle_GetState(PyObject *module)
185 {
186 return (PickleState *)_PyModule_GetState(module);
187 }
188
189 /* Find the module instance imported in the currently running sub-interpreter
190 and get its state. */
191 static PickleState *
_Pickle_GetGlobalState(void)192 _Pickle_GetGlobalState(void)
193 {
194 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
195 }
196
197 /* Clear the given pickle module state. */
198 static void
_Pickle_ClearState(PickleState * st)199 _Pickle_ClearState(PickleState *st)
200 {
201 Py_CLEAR(st->PickleError);
202 Py_CLEAR(st->PicklingError);
203 Py_CLEAR(st->UnpicklingError);
204 Py_CLEAR(st->dispatch_table);
205 Py_CLEAR(st->extension_registry);
206 Py_CLEAR(st->extension_cache);
207 Py_CLEAR(st->inverted_registry);
208 Py_CLEAR(st->name_mapping_2to3);
209 Py_CLEAR(st->import_mapping_2to3);
210 Py_CLEAR(st->name_mapping_3to2);
211 Py_CLEAR(st->import_mapping_3to2);
212 Py_CLEAR(st->codecs_encode);
213 Py_CLEAR(st->getattr);
214 Py_CLEAR(st->partial);
215 }
216
217 /* Initialize the given pickle module state. */
218 static int
_Pickle_InitState(PickleState * st)219 _Pickle_InitState(PickleState *st)
220 {
221 PyObject *copyreg = NULL;
222 PyObject *compat_pickle = NULL;
223 PyObject *codecs = NULL;
224 PyObject *functools = NULL;
225 _Py_IDENTIFIER(getattr);
226
227 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
228 if (st->getattr == NULL)
229 goto error;
230
231 copyreg = PyImport_ImportModule("copyreg");
232 if (!copyreg)
233 goto error;
234 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
235 if (!st->dispatch_table)
236 goto error;
237 if (!PyDict_CheckExact(st->dispatch_table)) {
238 PyErr_Format(PyExc_RuntimeError,
239 "copyreg.dispatch_table should be a dict, not %.200s",
240 Py_TYPE(st->dispatch_table)->tp_name);
241 goto error;
242 }
243 st->extension_registry = \
244 PyObject_GetAttrString(copyreg, "_extension_registry");
245 if (!st->extension_registry)
246 goto error;
247 if (!PyDict_CheckExact(st->extension_registry)) {
248 PyErr_Format(PyExc_RuntimeError,
249 "copyreg._extension_registry should be a dict, "
250 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
251 goto error;
252 }
253 st->inverted_registry = \
254 PyObject_GetAttrString(copyreg, "_inverted_registry");
255 if (!st->inverted_registry)
256 goto error;
257 if (!PyDict_CheckExact(st->inverted_registry)) {
258 PyErr_Format(PyExc_RuntimeError,
259 "copyreg._inverted_registry should be a dict, "
260 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
261 goto error;
262 }
263 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
264 if (!st->extension_cache)
265 goto error;
266 if (!PyDict_CheckExact(st->extension_cache)) {
267 PyErr_Format(PyExc_RuntimeError,
268 "copyreg._extension_cache should be a dict, "
269 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
270 goto error;
271 }
272 Py_CLEAR(copyreg);
273
274 /* Load the 2.x -> 3.x stdlib module mapping tables */
275 compat_pickle = PyImport_ImportModule("_compat_pickle");
276 if (!compat_pickle)
277 goto error;
278 st->name_mapping_2to3 = \
279 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
280 if (!st->name_mapping_2to3)
281 goto error;
282 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
283 PyErr_Format(PyExc_RuntimeError,
284 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
285 Py_TYPE(st->name_mapping_2to3)->tp_name);
286 goto error;
287 }
288 st->import_mapping_2to3 = \
289 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
290 if (!st->import_mapping_2to3)
291 goto error;
292 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
293 PyErr_Format(PyExc_RuntimeError,
294 "_compat_pickle.IMPORT_MAPPING should be a dict, "
295 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
296 goto error;
297 }
298 /* ... and the 3.x -> 2.x mapping tables */
299 st->name_mapping_3to2 = \
300 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
301 if (!st->name_mapping_3to2)
302 goto error;
303 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
304 PyErr_Format(PyExc_RuntimeError,
305 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
306 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
307 goto error;
308 }
309 st->import_mapping_3to2 = \
310 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
311 if (!st->import_mapping_3to2)
312 goto error;
313 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
314 PyErr_Format(PyExc_RuntimeError,
315 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
316 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
317 goto error;
318 }
319 Py_CLEAR(compat_pickle);
320
321 codecs = PyImport_ImportModule("codecs");
322 if (codecs == NULL)
323 goto error;
324 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
325 if (st->codecs_encode == NULL) {
326 goto error;
327 }
328 if (!PyCallable_Check(st->codecs_encode)) {
329 PyErr_Format(PyExc_RuntimeError,
330 "codecs.encode should be a callable, not %.200s",
331 Py_TYPE(st->codecs_encode)->tp_name);
332 goto error;
333 }
334 Py_CLEAR(codecs);
335
336 functools = PyImport_ImportModule("functools");
337 if (!functools)
338 goto error;
339 st->partial = PyObject_GetAttrString(functools, "partial");
340 if (!st->partial)
341 goto error;
342 Py_CLEAR(functools);
343
344 return 0;
345
346 error:
347 Py_CLEAR(copyreg);
348 Py_CLEAR(compat_pickle);
349 Py_CLEAR(codecs);
350 Py_CLEAR(functools);
351 _Pickle_ClearState(st);
352 return -1;
353 }
354
355 /* Helper for calling a function with a single argument quickly.
356
357 This function steals the reference of the given argument. */
358 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)359 _Pickle_FastCall(PyObject *func, PyObject *obj)
360 {
361 PyObject *result;
362
363 result = PyObject_CallOneArg(func, obj);
364 Py_DECREF(obj);
365 return result;
366 }
367
368 /*************************************************************************/
369
370 /* Retrieve and deconstruct a method for avoiding a reference cycle
371 (pickler -> bound method of pickler -> pickler) */
372 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)373 init_method_ref(PyObject *self, _Py_Identifier *name,
374 PyObject **method_func, PyObject **method_self)
375 {
376 PyObject *func, *func2;
377 int ret;
378
379 /* *method_func and *method_self should be consistent. All refcount decrements
380 should be occurred after setting *method_self and *method_func. */
381 ret = _PyObject_LookupAttrId(self, name, &func);
382 if (func == NULL) {
383 *method_self = NULL;
384 Py_CLEAR(*method_func);
385 return ret;
386 }
387
388 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
389 /* Deconstruct a bound Python method */
390 func2 = PyMethod_GET_FUNCTION(func);
391 Py_INCREF(func2);
392 *method_self = self; /* borrowed */
393 Py_XSETREF(*method_func, func2);
394 Py_DECREF(func);
395 return 0;
396 }
397 else {
398 *method_self = NULL;
399 Py_XSETREF(*method_func, func);
400 return 0;
401 }
402 }
403
404 /* Bind a method if it was deconstructed */
405 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)406 reconstruct_method(PyObject *func, PyObject *self)
407 {
408 if (self) {
409 return PyMethod_New(func, self);
410 }
411 else {
412 Py_INCREF(func);
413 return func;
414 }
415 }
416
417 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)418 call_method(PyObject *func, PyObject *self, PyObject *obj)
419 {
420 if (self) {
421 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
422 }
423 else {
424 return PyObject_CallOneArg(func, obj);
425 }
426 }
427
428 /*************************************************************************/
429
430 /* Internal data type used as the unpickling stack. */
431 typedef struct {
432 PyObject_VAR_HEAD
433 PyObject **data;
434 int mark_set; /* is MARK set? */
435 Py_ssize_t fence; /* position of top MARK or 0 */
436 Py_ssize_t allocated; /* number of slots in data allocated */
437 } Pdata;
438
439 static void
Pdata_dealloc(Pdata * self)440 Pdata_dealloc(Pdata *self)
441 {
442 Py_ssize_t i = Py_SIZE(self);
443 while (--i >= 0) {
444 Py_DECREF(self->data[i]);
445 }
446 PyMem_Free(self->data);
447 PyObject_Free(self);
448 }
449
450 static PyTypeObject Pdata_Type = {
451 PyVarObject_HEAD_INIT(NULL, 0)
452 "_pickle.Pdata", /*tp_name*/
453 sizeof(Pdata), /*tp_basicsize*/
454 sizeof(PyObject *), /*tp_itemsize*/
455 (destructor)Pdata_dealloc, /*tp_dealloc*/
456 };
457
458 static PyObject *
Pdata_New(void)459 Pdata_New(void)
460 {
461 Pdata *self;
462
463 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
464 return NULL;
465 Py_SET_SIZE(self, 0);
466 self->mark_set = 0;
467 self->fence = 0;
468 self->allocated = 8;
469 self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
470 if (self->data)
471 return (PyObject *)self;
472 Py_DECREF(self);
473 return PyErr_NoMemory();
474 }
475
476
477 /* Retain only the initial clearto items. If clearto >= the current
478 * number of items, this is a (non-erroneous) NOP.
479 */
480 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)481 Pdata_clear(Pdata *self, Py_ssize_t clearto)
482 {
483 Py_ssize_t i = Py_SIZE(self);
484
485 assert(clearto >= self->fence);
486 if (clearto >= i)
487 return 0;
488
489 while (--i >= clearto) {
490 Py_CLEAR(self->data[i]);
491 }
492 Py_SET_SIZE(self, clearto);
493 return 0;
494 }
495
496 static int
Pdata_grow(Pdata * self)497 Pdata_grow(Pdata *self)
498 {
499 PyObject **data = self->data;
500 size_t allocated = (size_t)self->allocated;
501 size_t new_allocated;
502
503 new_allocated = (allocated >> 3) + 6;
504 /* check for integer overflow */
505 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
506 goto nomemory;
507 new_allocated += allocated;
508 PyMem_RESIZE(data, PyObject *, new_allocated);
509 if (data == NULL)
510 goto nomemory;
511
512 self->data = data;
513 self->allocated = (Py_ssize_t)new_allocated;
514 return 0;
515
516 nomemory:
517 PyErr_NoMemory();
518 return -1;
519 }
520
521 static int
Pdata_stack_underflow(Pdata * self)522 Pdata_stack_underflow(Pdata *self)
523 {
524 PickleState *st = _Pickle_GetGlobalState();
525 PyErr_SetString(st->UnpicklingError,
526 self->mark_set ?
527 "unexpected MARK found" :
528 "unpickling stack underflow");
529 return -1;
530 }
531
532 /* D is a Pdata*. Pop the topmost element and store it into V, which
533 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
534 * is raised and V is set to NULL.
535 */
536 static PyObject *
Pdata_pop(Pdata * self)537 Pdata_pop(Pdata *self)
538 {
539 if (Py_SIZE(self) <= self->fence) {
540 Pdata_stack_underflow(self);
541 return NULL;
542 }
543 Py_SET_SIZE(self, Py_SIZE(self) - 1);
544 return self->data[Py_SIZE(self)];
545 }
546 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
547
548 static int
Pdata_push(Pdata * self,PyObject * obj)549 Pdata_push(Pdata *self, PyObject *obj)
550 {
551 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
552 return -1;
553 }
554 self->data[Py_SIZE(self)] = obj;
555 Py_SET_SIZE(self, Py_SIZE(self) + 1);
556 return 0;
557 }
558
559 /* Push an object on stack, transferring its ownership to the stack. */
560 #define PDATA_PUSH(D, O, ER) do { \
561 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
562
563 /* Push an object on stack, adding a new reference to the object. */
564 #define PDATA_APPEND(D, O, ER) do { \
565 Py_INCREF((O)); \
566 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
567
568 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)569 Pdata_poptuple(Pdata *self, Py_ssize_t start)
570 {
571 PyObject *tuple;
572 Py_ssize_t len, i, j;
573
574 if (start < self->fence) {
575 Pdata_stack_underflow(self);
576 return NULL;
577 }
578 len = Py_SIZE(self) - start;
579 tuple = PyTuple_New(len);
580 if (tuple == NULL)
581 return NULL;
582 for (i = start, j = 0; j < len; i++, j++)
583 PyTuple_SET_ITEM(tuple, j, self->data[i]);
584
585 Py_SET_SIZE(self, start);
586 return tuple;
587 }
588
589 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)590 Pdata_poplist(Pdata *self, Py_ssize_t start)
591 {
592 PyObject *list;
593 Py_ssize_t len, i, j;
594
595 len = Py_SIZE(self) - start;
596 list = PyList_New(len);
597 if (list == NULL)
598 return NULL;
599 for (i = start, j = 0; j < len; i++, j++)
600 PyList_SET_ITEM(list, j, self->data[i]);
601
602 Py_SET_SIZE(self, start);
603 return list;
604 }
605
606 typedef struct {
607 PyObject *me_key;
608 Py_ssize_t me_value;
609 } PyMemoEntry;
610
611 typedef struct {
612 size_t mt_mask;
613 size_t mt_used;
614 size_t mt_allocated;
615 PyMemoEntry *mt_table;
616 } PyMemoTable;
617
618 typedef struct PicklerObject {
619 PyObject_HEAD
620 PyMemoTable *memo; /* Memo table, keep track of the seen
621 objects to support self-referential objects
622 pickling. */
623 PyObject *pers_func; /* persistent_id() method, can be NULL */
624 PyObject *pers_func_self; /* borrowed reference to self if pers_func
625 is an unbound method, NULL otherwise */
626 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
627 PyObject *reducer_override; /* hook for invoking user-defined callbacks
628 instead of save_global when pickling
629 functions and classes*/
630
631 PyObject *write; /* write() method of the output stream. */
632 PyObject *output_buffer; /* Write into a local bytearray buffer before
633 flushing to the stream. */
634 Py_ssize_t output_len; /* Length of output_buffer. */
635 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
636 int proto; /* Pickle protocol number, >= 0 */
637 int bin; /* Boolean, true if proto > 0 */
638 int framing; /* True when framing is enabled, proto >= 4 */
639 Py_ssize_t frame_start; /* Position in output_buffer where the
640 current frame begins. -1 if there
641 is no frame currently open. */
642
643 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
644 int fast; /* Enable fast mode if set to a true value.
645 The fast mode disable the usage of memo,
646 therefore speeding the pickling process by
647 not generating superfluous PUT opcodes. It
648 should not be used if with self-referential
649 objects. */
650 int fast_nesting;
651 int fix_imports; /* Indicate whether Pickler should fix
652 the name of globals for Python 2.x. */
653 PyObject *fast_memo;
654 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
655 } PicklerObject;
656
657 typedef struct UnpicklerObject {
658 PyObject_HEAD
659 Pdata *stack; /* Pickle data stack, store unpickled objects. */
660
661 /* The unpickler memo is just an array of PyObject *s. Using a dict
662 is unnecessary, since the keys are contiguous ints. */
663 PyObject **memo;
664 size_t memo_size; /* Capacity of the memo array */
665 size_t memo_len; /* Number of objects in the memo */
666
667 PyObject *pers_func; /* persistent_load() method, can be NULL. */
668 PyObject *pers_func_self; /* borrowed reference to self if pers_func
669 is an unbound method, NULL otherwise */
670
671 Py_buffer buffer;
672 char *input_buffer;
673 char *input_line;
674 Py_ssize_t input_len;
675 Py_ssize_t next_read_idx;
676 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
677
678 PyObject *read; /* read() method of the input stream. */
679 PyObject *readinto; /* readinto() method of the input stream. */
680 PyObject *readline; /* readline() method of the input stream. */
681 PyObject *peek; /* peek() method of the input stream, or NULL */
682 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
683
684 char *encoding; /* Name of the encoding to be used for
685 decoding strings pickled using Python
686 2.x. The default value is "ASCII" */
687 char *errors; /* Name of errors handling scheme to used when
688 decoding strings. The default value is
689 "strict". */
690 Py_ssize_t *marks; /* Mark stack, used for unpickling container
691 objects. */
692 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
693 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
694 int proto; /* Protocol of the pickle loaded. */
695 int fix_imports; /* Indicate whether Unpickler should fix
696 the name of globals pickled by Python 2.x. */
697 } UnpicklerObject;
698
699 typedef struct {
700 PyObject_HEAD
701 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
702 } PicklerMemoProxyObject;
703
704 typedef struct {
705 PyObject_HEAD
706 UnpicklerObject *unpickler;
707 } UnpicklerMemoProxyObject;
708
709 /* Forward declarations */
710 static int save(PicklerObject *, PyObject *, int);
711 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
712 static PyTypeObject Pickler_Type;
713 static PyTypeObject Unpickler_Type;
714
715 #include "clinic/_pickle.c.h"
716
717 /*************************************************************************
718 A custom hashtable mapping void* to Python ints. This is used by the pickler
719 for memoization. Using a custom hashtable rather than PyDict allows us to skip
720 a bunch of unnecessary object creation. This makes a huge performance
721 difference. */
722
723 #define MT_MINSIZE 8
724 #define PERTURB_SHIFT 5
725
726
727 static PyMemoTable *
PyMemoTable_New(void)728 PyMemoTable_New(void)
729 {
730 PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
731 if (memo == NULL) {
732 PyErr_NoMemory();
733 return NULL;
734 }
735
736 memo->mt_used = 0;
737 memo->mt_allocated = MT_MINSIZE;
738 memo->mt_mask = MT_MINSIZE - 1;
739 memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
740 if (memo->mt_table == NULL) {
741 PyMem_Free(memo);
742 PyErr_NoMemory();
743 return NULL;
744 }
745 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
746
747 return memo;
748 }
749
750 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)751 PyMemoTable_Copy(PyMemoTable *self)
752 {
753 PyMemoTable *new = PyMemoTable_New();
754 if (new == NULL)
755 return NULL;
756
757 new->mt_used = self->mt_used;
758 new->mt_allocated = self->mt_allocated;
759 new->mt_mask = self->mt_mask;
760 /* The table we get from _New() is probably smaller than we wanted.
761 Free it and allocate one that's the right size. */
762 PyMem_Free(new->mt_table);
763 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
764 if (new->mt_table == NULL) {
765 PyMem_Free(new);
766 PyErr_NoMemory();
767 return NULL;
768 }
769 for (size_t i = 0; i < self->mt_allocated; i++) {
770 Py_XINCREF(self->mt_table[i].me_key);
771 }
772 memcpy(new->mt_table, self->mt_table,
773 sizeof(PyMemoEntry) * self->mt_allocated);
774
775 return new;
776 }
777
778 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)779 PyMemoTable_Size(PyMemoTable *self)
780 {
781 return self->mt_used;
782 }
783
784 static int
PyMemoTable_Clear(PyMemoTable * self)785 PyMemoTable_Clear(PyMemoTable *self)
786 {
787 Py_ssize_t i = self->mt_allocated;
788
789 while (--i >= 0) {
790 Py_XDECREF(self->mt_table[i].me_key);
791 }
792 self->mt_used = 0;
793 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
794 return 0;
795 }
796
797 static void
PyMemoTable_Del(PyMemoTable * self)798 PyMemoTable_Del(PyMemoTable *self)
799 {
800 if (self == NULL)
801 return;
802 PyMemoTable_Clear(self);
803
804 PyMem_Free(self->mt_table);
805 PyMem_Free(self);
806 }
807
808 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
809 can be considerably simpler than dictobject.c's lookdict(). */
810 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)811 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
812 {
813 size_t i;
814 size_t perturb;
815 size_t mask = self->mt_mask;
816 PyMemoEntry *table = self->mt_table;
817 PyMemoEntry *entry;
818 Py_hash_t hash = (Py_hash_t)key >> 3;
819
820 i = hash & mask;
821 entry = &table[i];
822 if (entry->me_key == NULL || entry->me_key == key)
823 return entry;
824
825 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
826 i = (i << 2) + i + perturb + 1;
827 entry = &table[i & mask];
828 if (entry->me_key == NULL || entry->me_key == key)
829 return entry;
830 }
831 Py_UNREACHABLE();
832 }
833
834 /* Returns -1 on failure, 0 on success. */
835 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)836 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
837 {
838 PyMemoEntry *oldtable = NULL;
839 PyMemoEntry *oldentry, *newentry;
840 size_t new_size = MT_MINSIZE;
841 size_t to_process;
842
843 assert(min_size > 0);
844
845 if (min_size > PY_SSIZE_T_MAX) {
846 PyErr_NoMemory();
847 return -1;
848 }
849
850 /* Find the smallest valid table size >= min_size. */
851 while (new_size < min_size) {
852 new_size <<= 1;
853 }
854 /* new_size needs to be a power of two. */
855 assert((new_size & (new_size - 1)) == 0);
856
857 /* Allocate new table. */
858 oldtable = self->mt_table;
859 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
860 if (self->mt_table == NULL) {
861 self->mt_table = oldtable;
862 PyErr_NoMemory();
863 return -1;
864 }
865 self->mt_allocated = new_size;
866 self->mt_mask = new_size - 1;
867 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
868
869 /* Copy entries from the old table. */
870 to_process = self->mt_used;
871 for (oldentry = oldtable; to_process > 0; oldentry++) {
872 if (oldentry->me_key != NULL) {
873 to_process--;
874 /* newentry is a pointer to a chunk of the new
875 mt_table, so we're setting the key:value pair
876 in-place. */
877 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
878 newentry->me_key = oldentry->me_key;
879 newentry->me_value = oldentry->me_value;
880 }
881 }
882
883 /* Deallocate the old table. */
884 PyMem_Free(oldtable);
885 return 0;
886 }
887
888 /* Returns NULL on failure, a pointer to the value otherwise. */
889 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)890 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
891 {
892 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
893 if (entry->me_key == NULL)
894 return NULL;
895 return &entry->me_value;
896 }
897
898 /* Returns -1 on failure, 0 on success. */
899 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)900 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
901 {
902 PyMemoEntry *entry;
903
904 assert(key != NULL);
905
906 entry = _PyMemoTable_Lookup(self, key);
907 if (entry->me_key != NULL) {
908 entry->me_value = value;
909 return 0;
910 }
911 Py_INCREF(key);
912 entry->me_key = key;
913 entry->me_value = value;
914 self->mt_used++;
915
916 /* If we added a key, we can safely resize. Otherwise just return!
917 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
918 *
919 * Quadrupling the size improves average table sparseness
920 * (reducing collisions) at the cost of some memory. It also halves
921 * the number of expensive resize operations in a growing memo table.
922 *
923 * Very large memo tables (over 50K items) use doubling instead.
924 * This may help applications with severe memory constraints.
925 */
926 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
927 return 0;
928 }
929 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
930 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
931 return _PyMemoTable_ResizeTable(self, desired_size);
932 }
933
934 #undef MT_MINSIZE
935 #undef PERTURB_SHIFT
936
937 /*************************************************************************/
938
939
940 static int
_Pickler_ClearBuffer(PicklerObject * self)941 _Pickler_ClearBuffer(PicklerObject *self)
942 {
943 Py_XSETREF(self->output_buffer,
944 PyBytes_FromStringAndSize(NULL, self->max_output_len));
945 if (self->output_buffer == NULL)
946 return -1;
947 self->output_len = 0;
948 self->frame_start = -1;
949 return 0;
950 }
951
952 static void
_write_size64(char * out,size_t value)953 _write_size64(char *out, size_t value)
954 {
955 size_t i;
956
957 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
958
959 for (i = 0; i < sizeof(size_t); i++) {
960 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
961 }
962 for (i = sizeof(size_t); i < 8; i++) {
963 out[i] = 0;
964 }
965 }
966
967 static int
_Pickler_CommitFrame(PicklerObject * self)968 _Pickler_CommitFrame(PicklerObject *self)
969 {
970 size_t frame_len;
971 char *qdata;
972
973 if (!self->framing || self->frame_start == -1)
974 return 0;
975 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
976 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
977 if (frame_len >= FRAME_SIZE_MIN) {
978 qdata[0] = FRAME;
979 _write_size64(qdata + 1, frame_len);
980 }
981 else {
982 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
983 self->output_len -= FRAME_HEADER_SIZE;
984 }
985 self->frame_start = -1;
986 return 0;
987 }
988
989 static PyObject *
_Pickler_GetString(PicklerObject * self)990 _Pickler_GetString(PicklerObject *self)
991 {
992 PyObject *output_buffer = self->output_buffer;
993
994 assert(self->output_buffer != NULL);
995
996 if (_Pickler_CommitFrame(self))
997 return NULL;
998
999 self->output_buffer = NULL;
1000 /* Resize down to exact size */
1001 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1002 return NULL;
1003 return output_buffer;
1004 }
1005
1006 static int
_Pickler_FlushToFile(PicklerObject * self)1007 _Pickler_FlushToFile(PicklerObject *self)
1008 {
1009 PyObject *output, *result;
1010
1011 assert(self->write != NULL);
1012
1013 /* This will commit the frame first */
1014 output = _Pickler_GetString(self);
1015 if (output == NULL)
1016 return -1;
1017
1018 result = _Pickle_FastCall(self->write, output);
1019 Py_XDECREF(result);
1020 return (result == NULL) ? -1 : 0;
1021 }
1022
1023 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1024 _Pickler_OpcodeBoundary(PicklerObject *self)
1025 {
1026 Py_ssize_t frame_len;
1027
1028 if (!self->framing || self->frame_start == -1) {
1029 return 0;
1030 }
1031 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1032 if (frame_len >= FRAME_SIZE_TARGET) {
1033 if(_Pickler_CommitFrame(self)) {
1034 return -1;
1035 }
1036 /* Flush the content of the committed frame to the underlying
1037 * file and reuse the pickler buffer for the next frame so as
1038 * to limit memory usage when dumping large complex objects to
1039 * a file.
1040 *
1041 * self->write is NULL when called via dumps.
1042 */
1043 if (self->write != NULL) {
1044 if (_Pickler_FlushToFile(self) < 0) {
1045 return -1;
1046 }
1047 if (_Pickler_ClearBuffer(self) < 0) {
1048 return -1;
1049 }
1050 }
1051 }
1052 return 0;
1053 }
1054
1055 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1056 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1057 {
1058 Py_ssize_t i, n, required;
1059 char *buffer;
1060 int need_new_frame;
1061
1062 assert(s != NULL);
1063 need_new_frame = (self->framing && self->frame_start == -1);
1064
1065 if (need_new_frame)
1066 n = data_len + FRAME_HEADER_SIZE;
1067 else
1068 n = data_len;
1069
1070 required = self->output_len + n;
1071 if (required > self->max_output_len) {
1072 /* Make place in buffer for the pickle chunk */
1073 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1074 PyErr_NoMemory();
1075 return -1;
1076 }
1077 self->max_output_len = (self->output_len + n) / 2 * 3;
1078 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1079 return -1;
1080 }
1081 buffer = PyBytes_AS_STRING(self->output_buffer);
1082 if (need_new_frame) {
1083 /* Setup new frame */
1084 Py_ssize_t frame_start = self->output_len;
1085 self->frame_start = frame_start;
1086 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1087 /* Write an invalid value, for debugging */
1088 buffer[frame_start + i] = 0xFE;
1089 }
1090 self->output_len += FRAME_HEADER_SIZE;
1091 }
1092 if (data_len < 8) {
1093 /* This is faster than memcpy when the string is short. */
1094 for (i = 0; i < data_len; i++) {
1095 buffer[self->output_len + i] = s[i];
1096 }
1097 }
1098 else {
1099 memcpy(buffer + self->output_len, s, data_len);
1100 }
1101 self->output_len += data_len;
1102 return data_len;
1103 }
1104
1105 static PicklerObject *
_Pickler_New(void)1106 _Pickler_New(void)
1107 {
1108 PicklerObject *self;
1109
1110 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1111 if (self == NULL)
1112 return NULL;
1113
1114 self->pers_func = NULL;
1115 self->dispatch_table = NULL;
1116 self->buffer_callback = NULL;
1117 self->write = NULL;
1118 self->proto = 0;
1119 self->bin = 0;
1120 self->framing = 0;
1121 self->frame_start = -1;
1122 self->fast = 0;
1123 self->fast_nesting = 0;
1124 self->fix_imports = 0;
1125 self->fast_memo = NULL;
1126 self->max_output_len = WRITE_BUF_SIZE;
1127 self->output_len = 0;
1128 self->reducer_override = NULL;
1129
1130 self->memo = PyMemoTable_New();
1131 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1132 self->max_output_len);
1133
1134 if (self->memo == NULL || self->output_buffer == NULL) {
1135 Py_DECREF(self);
1136 return NULL;
1137 }
1138
1139 PyObject_GC_Track(self);
1140 return self;
1141 }
1142
1143 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1144 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1145 {
1146 long proto;
1147
1148 if (protocol == Py_None) {
1149 proto = DEFAULT_PROTOCOL;
1150 }
1151 else {
1152 proto = PyLong_AsLong(protocol);
1153 if (proto < 0) {
1154 if (proto == -1 && PyErr_Occurred())
1155 return -1;
1156 proto = HIGHEST_PROTOCOL;
1157 }
1158 else if (proto > HIGHEST_PROTOCOL) {
1159 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1160 HIGHEST_PROTOCOL);
1161 return -1;
1162 }
1163 }
1164 self->proto = (int)proto;
1165 self->bin = proto > 0;
1166 self->fix_imports = fix_imports && proto < 3;
1167 return 0;
1168 }
1169
1170 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1171 be called once on a freshly created Pickler. */
1172 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1173 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1174 {
1175 _Py_IDENTIFIER(write);
1176 assert(file != NULL);
1177 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1178 return -1;
1179 }
1180 if (self->write == NULL) {
1181 PyErr_SetString(PyExc_TypeError,
1182 "file must have a 'write' attribute");
1183 return -1;
1184 }
1185
1186 return 0;
1187 }
1188
1189 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1190 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1191 {
1192 if (buffer_callback == Py_None) {
1193 buffer_callback = NULL;
1194 }
1195 if (buffer_callback != NULL && self->proto < 5) {
1196 PyErr_SetString(PyExc_ValueError,
1197 "buffer_callback needs protocol >= 5");
1198 return -1;
1199 }
1200
1201 Py_XINCREF(buffer_callback);
1202 self->buffer_callback = buffer_callback;
1203 return 0;
1204 }
1205
1206 /* Returns the size of the input on success, -1 on failure. This takes its
1207 own reference to `input`. */
1208 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1209 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1210 {
1211 if (self->buffer.buf != NULL)
1212 PyBuffer_Release(&self->buffer);
1213 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1214 return -1;
1215 self->input_buffer = self->buffer.buf;
1216 self->input_len = self->buffer.len;
1217 self->next_read_idx = 0;
1218 self->prefetched_idx = self->input_len;
1219 return self->input_len;
1220 }
1221
1222 static int
bad_readline(void)1223 bad_readline(void)
1224 {
1225 PickleState *st = _Pickle_GetGlobalState();
1226 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1227 return -1;
1228 }
1229
1230 /* Skip any consumed data that was only prefetched using peek() */
1231 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1232 _Unpickler_SkipConsumed(UnpicklerObject *self)
1233 {
1234 Py_ssize_t consumed;
1235 PyObject *r;
1236
1237 consumed = self->next_read_idx - self->prefetched_idx;
1238 if (consumed <= 0)
1239 return 0;
1240
1241 assert(self->peek); /* otherwise we did something wrong */
1242 /* This makes a useless copy... */
1243 r = PyObject_CallFunction(self->read, "n", consumed);
1244 if (r == NULL)
1245 return -1;
1246 Py_DECREF(r);
1247
1248 self->prefetched_idx = self->next_read_idx;
1249 return 0;
1250 }
1251
1252 static const Py_ssize_t READ_WHOLE_LINE = -1;
1253
1254 /* If reading from a file, we need to only pull the bytes we need, since there
1255 may be multiple pickle objects arranged contiguously in the same input
1256 buffer.
1257
1258 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1259 bytes from the input stream/buffer.
1260
1261 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1262 failure; on success, returns the number of bytes read from the file.
1263
1264 On success, self->input_len will be 0; this is intentional so that when
1265 unpickling from a file, the "we've run out of data" code paths will trigger,
1266 causing the Unpickler to go back to the file for more data. Use the returned
1267 size to tell you how much data you can process. */
1268 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1269 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1270 {
1271 PyObject *data;
1272 Py_ssize_t read_size;
1273
1274 assert(self->read != NULL);
1275
1276 if (_Unpickler_SkipConsumed(self) < 0)
1277 return -1;
1278
1279 if (n == READ_WHOLE_LINE) {
1280 data = PyObject_CallNoArgs(self->readline);
1281 }
1282 else {
1283 PyObject *len;
1284 /* Prefetch some data without advancing the file pointer, if possible */
1285 if (self->peek && n < PREFETCH) {
1286 len = PyLong_FromSsize_t(PREFETCH);
1287 if (len == NULL)
1288 return -1;
1289 data = _Pickle_FastCall(self->peek, len);
1290 if (data == NULL) {
1291 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1292 return -1;
1293 /* peek() is probably not supported by the given file object */
1294 PyErr_Clear();
1295 Py_CLEAR(self->peek);
1296 }
1297 else {
1298 read_size = _Unpickler_SetStringInput(self, data);
1299 Py_DECREF(data);
1300 self->prefetched_idx = 0;
1301 if (n <= read_size)
1302 return n;
1303 }
1304 }
1305 len = PyLong_FromSsize_t(n);
1306 if (len == NULL)
1307 return -1;
1308 data = _Pickle_FastCall(self->read, len);
1309 }
1310 if (data == NULL)
1311 return -1;
1312
1313 read_size = _Unpickler_SetStringInput(self, data);
1314 Py_DECREF(data);
1315 return read_size;
1316 }
1317
1318 /* Don't call it directly: use _Unpickler_Read() */
1319 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1320 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1321 {
1322 Py_ssize_t num_read;
1323
1324 *s = NULL;
1325 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1326 PickleState *st = _Pickle_GetGlobalState();
1327 PyErr_SetString(st->UnpicklingError,
1328 "read would overflow (invalid bytecode)");
1329 return -1;
1330 }
1331
1332 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1333 assert(self->next_read_idx + n > self->input_len);
1334
1335 if (!self->read)
1336 return bad_readline();
1337
1338 /* Extend the buffer to satisfy desired size */
1339 num_read = _Unpickler_ReadFromFile(self, n);
1340 if (num_read < 0)
1341 return -1;
1342 if (num_read < n)
1343 return bad_readline();
1344 *s = self->input_buffer;
1345 self->next_read_idx = n;
1346 return n;
1347 }
1348
1349 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1350 *
1351 * This should only be used for non-small data reads where potentially
1352 * avoiding a copy is beneficial. This method does not try to prefetch
1353 * more data into the input buffer.
1354 *
1355 * _Unpickler_Read() is recommended in most cases.
1356 */
1357 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1358 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1359 {
1360 assert(n != READ_WHOLE_LINE);
1361
1362 /* Read from available buffer data, if any */
1363 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1364 if (in_buffer > 0) {
1365 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1366 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1367 self->next_read_idx += to_read;
1368 buf += to_read;
1369 n -= to_read;
1370 if (n == 0) {
1371 /* Entire read was satisfied from buffer */
1372 return n;
1373 }
1374 }
1375
1376 /* Read from file */
1377 if (!self->read) {
1378 /* We're unpickling memory, this means the input is truncated */
1379 return bad_readline();
1380 }
1381 if (_Unpickler_SkipConsumed(self) < 0) {
1382 return -1;
1383 }
1384
1385 if (!self->readinto) {
1386 /* readinto() not supported on file-like object, fall back to read()
1387 * and copy into destination buffer (bpo-39681) */
1388 PyObject* len = PyLong_FromSsize_t(n);
1389 if (len == NULL) {
1390 return -1;
1391 }
1392 PyObject* data = _Pickle_FastCall(self->read, len);
1393 if (data == NULL) {
1394 return -1;
1395 }
1396 if (!PyBytes_Check(data)) {
1397 PyErr_Format(PyExc_ValueError,
1398 "read() returned non-bytes object (%R)",
1399 Py_TYPE(data));
1400 Py_DECREF(data);
1401 return -1;
1402 }
1403 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1404 if (read_size < n) {
1405 Py_DECREF(data);
1406 return bad_readline();
1407 }
1408 memcpy(buf, PyBytes_AS_STRING(data), n);
1409 Py_DECREF(data);
1410 return n;
1411 }
1412
1413 /* Call readinto() into user buffer */
1414 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1415 if (buf_obj == NULL) {
1416 return -1;
1417 }
1418 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1419 if (read_size_obj == NULL) {
1420 return -1;
1421 }
1422 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1423 Py_DECREF(read_size_obj);
1424
1425 if (read_size < 0) {
1426 if (!PyErr_Occurred()) {
1427 PyErr_SetString(PyExc_ValueError,
1428 "readinto() returned negative size");
1429 }
1430 return -1;
1431 }
1432 if (read_size < n) {
1433 return bad_readline();
1434 }
1435 return n;
1436 }
1437
1438 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1439
1440 This should be used for all data reads, rather than accessing the unpickler's
1441 input buffer directly. This method deals correctly with reading from input
1442 streams, which the input buffer doesn't deal with.
1443
1444 Note that when reading from a file-like object, self->next_read_idx won't
1445 be updated (it should remain at 0 for the entire unpickling process). You
1446 should use this function's return value to know how many bytes you can
1447 consume.
1448
1449 Returns -1 (with an exception set) on failure. On success, return the
1450 number of chars read. */
1451 #define _Unpickler_Read(self, s, n) \
1452 (((n) <= (self)->input_len - (self)->next_read_idx) \
1453 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1454 (self)->next_read_idx += (n), \
1455 (n)) \
1456 : _Unpickler_ReadImpl(self, (s), (n)))
1457
1458 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1459 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1460 char **result)
1461 {
1462 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1463 if (input_line == NULL) {
1464 PyErr_NoMemory();
1465 return -1;
1466 }
1467
1468 memcpy(input_line, line, len);
1469 input_line[len] = '\0';
1470 self->input_line = input_line;
1471 *result = self->input_line;
1472 return len;
1473 }
1474
1475 /* Read a line from the input stream/buffer. If we run off the end of the input
1476 before hitting \n, raise an error.
1477
1478 Returns the number of chars read, or -1 on failure. */
1479 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1480 _Unpickler_Readline(UnpicklerObject *self, char **result)
1481 {
1482 Py_ssize_t i, num_read;
1483
1484 for (i = self->next_read_idx; i < self->input_len; i++) {
1485 if (self->input_buffer[i] == '\n') {
1486 char *line_start = self->input_buffer + self->next_read_idx;
1487 num_read = i - self->next_read_idx + 1;
1488 self->next_read_idx = i + 1;
1489 return _Unpickler_CopyLine(self, line_start, num_read, result);
1490 }
1491 }
1492 if (!self->read)
1493 return bad_readline();
1494
1495 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1496 if (num_read < 0)
1497 return -1;
1498 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1499 return bad_readline();
1500 self->next_read_idx = num_read;
1501 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1502 }
1503
1504 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1505 will be modified in place. */
1506 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1507 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1508 {
1509 size_t i;
1510
1511 assert(new_size > self->memo_size);
1512
1513 PyObject **memo_new = self->memo;
1514 PyMem_RESIZE(memo_new, PyObject *, new_size);
1515 if (memo_new == NULL) {
1516 PyErr_NoMemory();
1517 return -1;
1518 }
1519 self->memo = memo_new;
1520 for (i = self->memo_size; i < new_size; i++)
1521 self->memo[i] = NULL;
1522 self->memo_size = new_size;
1523 return 0;
1524 }
1525
1526 /* Returns NULL if idx is out of bounds. */
1527 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1528 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1529 {
1530 if (idx >= self->memo_size)
1531 return NULL;
1532
1533 return self->memo[idx];
1534 }
1535
1536 /* Returns -1 (with an exception set) on failure, 0 on success.
1537 This takes its own reference to `value`. */
1538 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1539 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1540 {
1541 PyObject *old_item;
1542
1543 if (idx >= self->memo_size) {
1544 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1545 return -1;
1546 assert(idx < self->memo_size);
1547 }
1548 Py_INCREF(value);
1549 old_item = self->memo[idx];
1550 self->memo[idx] = value;
1551 if (old_item != NULL) {
1552 Py_DECREF(old_item);
1553 }
1554 else {
1555 self->memo_len++;
1556 }
1557 return 0;
1558 }
1559
1560 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1561 _Unpickler_NewMemo(Py_ssize_t new_size)
1562 {
1563 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1564 if (memo == NULL) {
1565 PyErr_NoMemory();
1566 return NULL;
1567 }
1568 memset(memo, 0, new_size * sizeof(PyObject *));
1569 return memo;
1570 }
1571
1572 /* Free the unpickler's memo, taking care to decref any items left in it. */
1573 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1574 _Unpickler_MemoCleanup(UnpicklerObject *self)
1575 {
1576 Py_ssize_t i;
1577 PyObject **memo = self->memo;
1578
1579 if (self->memo == NULL)
1580 return;
1581 self->memo = NULL;
1582 i = self->memo_size;
1583 while (--i >= 0) {
1584 Py_XDECREF(memo[i]);
1585 }
1586 PyMem_Free(memo);
1587 }
1588
1589 static UnpicklerObject *
_Unpickler_New(void)1590 _Unpickler_New(void)
1591 {
1592 UnpicklerObject *self;
1593
1594 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1595 if (self == NULL)
1596 return NULL;
1597
1598 self->pers_func = NULL;
1599 self->input_buffer = NULL;
1600 self->input_line = NULL;
1601 self->input_len = 0;
1602 self->next_read_idx = 0;
1603 self->prefetched_idx = 0;
1604 self->read = NULL;
1605 self->readinto = NULL;
1606 self->readline = NULL;
1607 self->peek = NULL;
1608 self->buffers = NULL;
1609 self->encoding = NULL;
1610 self->errors = NULL;
1611 self->marks = NULL;
1612 self->num_marks = 0;
1613 self->marks_size = 0;
1614 self->proto = 0;
1615 self->fix_imports = 0;
1616 memset(&self->buffer, 0, sizeof(Py_buffer));
1617 self->memo_size = 32;
1618 self->memo_len = 0;
1619 self->memo = _Unpickler_NewMemo(self->memo_size);
1620 self->stack = (Pdata *)Pdata_New();
1621
1622 if (self->memo == NULL || self->stack == NULL) {
1623 Py_DECREF(self);
1624 return NULL;
1625 }
1626
1627 PyObject_GC_Track(self);
1628 return self;
1629 }
1630
1631 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1632 be called once on a freshly created Unpickler. */
1633 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1634 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1635 {
1636 _Py_IDENTIFIER(peek);
1637 _Py_IDENTIFIER(read);
1638 _Py_IDENTIFIER(readinto);
1639 _Py_IDENTIFIER(readline);
1640
1641 /* Optional file methods */
1642 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1643 return -1;
1644 }
1645 if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1646 return -1;
1647 }
1648 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1649 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1650 if (!self->readline || !self->read) {
1651 if (!PyErr_Occurred()) {
1652 PyErr_SetString(PyExc_TypeError,
1653 "file must have 'read' and 'readline' attributes");
1654 }
1655 Py_CLEAR(self->read);
1656 Py_CLEAR(self->readinto);
1657 Py_CLEAR(self->readline);
1658 Py_CLEAR(self->peek);
1659 return -1;
1660 }
1661 return 0;
1662 }
1663
1664 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1665 be called once on a freshly created Unpickler. */
1666 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1667 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1668 const char *encoding,
1669 const char *errors)
1670 {
1671 if (encoding == NULL)
1672 encoding = "ASCII";
1673 if (errors == NULL)
1674 errors = "strict";
1675
1676 self->encoding = _PyMem_Strdup(encoding);
1677 self->errors = _PyMem_Strdup(errors);
1678 if (self->encoding == NULL || self->errors == NULL) {
1679 PyErr_NoMemory();
1680 return -1;
1681 }
1682 return 0;
1683 }
1684
1685 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1686 be called once on a freshly created Unpickler. */
1687 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1688 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1689 {
1690 if (buffers == NULL || buffers == Py_None) {
1691 self->buffers = NULL;
1692 }
1693 else {
1694 self->buffers = PyObject_GetIter(buffers);
1695 if (self->buffers == NULL) {
1696 return -1;
1697 }
1698 }
1699 return 0;
1700 }
1701
1702 /* Generate a GET opcode for an object stored in the memo. */
1703 static int
memo_get(PicklerObject * self,PyObject * key)1704 memo_get(PicklerObject *self, PyObject *key)
1705 {
1706 Py_ssize_t *value;
1707 char pdata[30];
1708 Py_ssize_t len;
1709
1710 value = PyMemoTable_Get(self->memo, key);
1711 if (value == NULL) {
1712 PyErr_SetObject(PyExc_KeyError, key);
1713 return -1;
1714 }
1715
1716 if (!self->bin) {
1717 pdata[0] = GET;
1718 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1719 "%zd\n", *value);
1720 len = strlen(pdata);
1721 }
1722 else {
1723 if (*value < 256) {
1724 pdata[0] = BINGET;
1725 pdata[1] = (unsigned char)(*value & 0xff);
1726 len = 2;
1727 }
1728 else if ((size_t)*value <= 0xffffffffUL) {
1729 pdata[0] = LONG_BINGET;
1730 pdata[1] = (unsigned char)(*value & 0xff);
1731 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1732 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1733 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1734 len = 5;
1735 }
1736 else { /* unlikely */
1737 PickleState *st = _Pickle_GetGlobalState();
1738 PyErr_SetString(st->PicklingError,
1739 "memo id too large for LONG_BINGET");
1740 return -1;
1741 }
1742 }
1743
1744 if (_Pickler_Write(self, pdata, len) < 0)
1745 return -1;
1746
1747 return 0;
1748 }
1749
1750 /* Store an object in the memo, assign it a new unique ID based on the number
1751 of objects currently stored in the memo and generate a PUT opcode. */
1752 static int
memo_put(PicklerObject * self,PyObject * obj)1753 memo_put(PicklerObject *self, PyObject *obj)
1754 {
1755 char pdata[30];
1756 Py_ssize_t len;
1757 Py_ssize_t idx;
1758
1759 const char memoize_op = MEMOIZE;
1760
1761 if (self->fast)
1762 return 0;
1763
1764 idx = PyMemoTable_Size(self->memo);
1765 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1766 return -1;
1767
1768 if (self->proto >= 4) {
1769 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1770 return -1;
1771 return 0;
1772 }
1773 else if (!self->bin) {
1774 pdata[0] = PUT;
1775 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1776 "%zd\n", idx);
1777 len = strlen(pdata);
1778 }
1779 else {
1780 if (idx < 256) {
1781 pdata[0] = BINPUT;
1782 pdata[1] = (unsigned char)idx;
1783 len = 2;
1784 }
1785 else if ((size_t)idx <= 0xffffffffUL) {
1786 pdata[0] = LONG_BINPUT;
1787 pdata[1] = (unsigned char)(idx & 0xff);
1788 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1789 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1790 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1791 len = 5;
1792 }
1793 else { /* unlikely */
1794 PickleState *st = _Pickle_GetGlobalState();
1795 PyErr_SetString(st->PicklingError,
1796 "memo id too large for LONG_BINPUT");
1797 return -1;
1798 }
1799 }
1800 if (_Pickler_Write(self, pdata, len) < 0)
1801 return -1;
1802
1803 return 0;
1804 }
1805
1806 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1807 get_dotted_path(PyObject *obj, PyObject *name)
1808 {
1809 _Py_static_string(PyId_dot, ".");
1810 PyObject *dotted_path;
1811 Py_ssize_t i, n;
1812
1813 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1814 if (dotted_path == NULL)
1815 return NULL;
1816 n = PyList_GET_SIZE(dotted_path);
1817 assert(n >= 1);
1818 for (i = 0; i < n; i++) {
1819 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1820 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1821 if (obj == NULL)
1822 PyErr_Format(PyExc_AttributeError,
1823 "Can't pickle local object %R", name);
1824 else
1825 PyErr_Format(PyExc_AttributeError,
1826 "Can't pickle local attribute %R on %R", name, obj);
1827 Py_DECREF(dotted_path);
1828 return NULL;
1829 }
1830 }
1831 return dotted_path;
1832 }
1833
1834 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1835 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1836 {
1837 Py_ssize_t i, n;
1838 PyObject *parent = NULL;
1839
1840 assert(PyList_CheckExact(names));
1841 Py_INCREF(obj);
1842 n = PyList_GET_SIZE(names);
1843 for (i = 0; i < n; i++) {
1844 PyObject *name = PyList_GET_ITEM(names, i);
1845 Py_XDECREF(parent);
1846 parent = obj;
1847 (void)_PyObject_LookupAttr(parent, name, &obj);
1848 if (obj == NULL) {
1849 Py_DECREF(parent);
1850 return NULL;
1851 }
1852 }
1853 if (pparent != NULL)
1854 *pparent = parent;
1855 else
1856 Py_XDECREF(parent);
1857 return obj;
1858 }
1859
1860
1861 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1862 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1863 {
1864 PyObject *dotted_path, *attr;
1865
1866 if (allow_qualname) {
1867 dotted_path = get_dotted_path(obj, name);
1868 if (dotted_path == NULL)
1869 return NULL;
1870 attr = get_deep_attribute(obj, dotted_path, NULL);
1871 Py_DECREF(dotted_path);
1872 }
1873 else {
1874 (void)_PyObject_LookupAttr(obj, name, &attr);
1875 }
1876 if (attr == NULL && !PyErr_Occurred()) {
1877 PyErr_Format(PyExc_AttributeError,
1878 "Can't get attribute %R on %R", name, obj);
1879 }
1880 return attr;
1881 }
1882
1883 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1884 _checkmodule(PyObject *module_name, PyObject *module,
1885 PyObject *global, PyObject *dotted_path)
1886 {
1887 if (module == Py_None) {
1888 return -1;
1889 }
1890 if (PyUnicode_Check(module_name) &&
1891 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1892 return -1;
1893 }
1894
1895 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1896 if (candidate == NULL) {
1897 return -1;
1898 }
1899 if (candidate != global) {
1900 Py_DECREF(candidate);
1901 return -1;
1902 }
1903 Py_DECREF(candidate);
1904 return 0;
1905 }
1906
1907 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1908 whichmodule(PyObject *global, PyObject *dotted_path)
1909 {
1910 PyObject *module_name;
1911 PyObject *module = NULL;
1912 Py_ssize_t i;
1913 PyObject *modules;
1914 _Py_IDENTIFIER(__module__);
1915 _Py_IDENTIFIER(modules);
1916 _Py_IDENTIFIER(__main__);
1917
1918 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1919 return NULL;
1920 }
1921 if (module_name) {
1922 /* In some rare cases (e.g., bound methods of extension types),
1923 __module__ can be None. If it is so, then search sys.modules for
1924 the module of global. */
1925 if (module_name != Py_None)
1926 return module_name;
1927 Py_CLEAR(module_name);
1928 }
1929 assert(module_name == NULL);
1930
1931 /* Fallback on walking sys.modules */
1932 modules = _PySys_GetObjectId(&PyId_modules);
1933 if (modules == NULL) {
1934 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1935 return NULL;
1936 }
1937 if (PyDict_CheckExact(modules)) {
1938 i = 0;
1939 while (PyDict_Next(modules, &i, &module_name, &module)) {
1940 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1941 Py_INCREF(module_name);
1942 return module_name;
1943 }
1944 if (PyErr_Occurred()) {
1945 return NULL;
1946 }
1947 }
1948 }
1949 else {
1950 PyObject *iterator = PyObject_GetIter(modules);
1951 if (iterator == NULL) {
1952 return NULL;
1953 }
1954 while ((module_name = PyIter_Next(iterator))) {
1955 module = PyObject_GetItem(modules, module_name);
1956 if (module == NULL) {
1957 Py_DECREF(module_name);
1958 Py_DECREF(iterator);
1959 return NULL;
1960 }
1961 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1962 Py_DECREF(module);
1963 Py_DECREF(iterator);
1964 return module_name;
1965 }
1966 Py_DECREF(module);
1967 Py_DECREF(module_name);
1968 if (PyErr_Occurred()) {
1969 Py_DECREF(iterator);
1970 return NULL;
1971 }
1972 }
1973 Py_DECREF(iterator);
1974 }
1975
1976 /* If no module is found, use __main__. */
1977 module_name = _PyUnicode_FromId(&PyId___main__);
1978 Py_XINCREF(module_name);
1979 return module_name;
1980 }
1981
1982 /* fast_save_enter() and fast_save_leave() are guards against recursive
1983 objects when Pickler is used with the "fast mode" (i.e., with object
1984 memoization disabled). If the nesting of a list or dict object exceed
1985 FAST_NESTING_LIMIT, these guards will start keeping an internal
1986 reference to the seen list or dict objects and check whether these objects
1987 are recursive. These are not strictly necessary, since save() has a
1988 hard-coded recursion limit, but they give a nicer error message than the
1989 typical RuntimeError. */
1990 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1991 fast_save_enter(PicklerObject *self, PyObject *obj)
1992 {
1993 /* if fast_nesting < 0, we're doing an error exit. */
1994 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1995 PyObject *key = NULL;
1996 if (self->fast_memo == NULL) {
1997 self->fast_memo = PyDict_New();
1998 if (self->fast_memo == NULL) {
1999 self->fast_nesting = -1;
2000 return 0;
2001 }
2002 }
2003 key = PyLong_FromVoidPtr(obj);
2004 if (key == NULL) {
2005 self->fast_nesting = -1;
2006 return 0;
2007 }
2008 int r = PyDict_Contains(self->fast_memo, key);
2009 if (r > 0) {
2010 PyErr_Format(PyExc_ValueError,
2011 "fast mode: can't pickle cyclic objects "
2012 "including object type %.200s at %p",
2013 Py_TYPE(obj)->tp_name, obj);
2014 }
2015 else if (r == 0) {
2016 r = PyDict_SetItem(self->fast_memo, key, Py_None);
2017 }
2018 Py_DECREF(key);
2019 if (r != 0) {
2020 self->fast_nesting = -1;
2021 return 0;
2022 }
2023 }
2024 return 1;
2025 }
2026
2027 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2028 fast_save_leave(PicklerObject *self, PyObject *obj)
2029 {
2030 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2031 PyObject *key = PyLong_FromVoidPtr(obj);
2032 if (key == NULL)
2033 return 0;
2034 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2035 Py_DECREF(key);
2036 return 0;
2037 }
2038 Py_DECREF(key);
2039 }
2040 return 1;
2041 }
2042
2043 static int
save_none(PicklerObject * self,PyObject * obj)2044 save_none(PicklerObject *self, PyObject *obj)
2045 {
2046 const char none_op = NONE;
2047 if (_Pickler_Write(self, &none_op, 1) < 0)
2048 return -1;
2049
2050 return 0;
2051 }
2052
2053 static int
save_bool(PicklerObject * self,PyObject * obj)2054 save_bool(PicklerObject *self, PyObject *obj)
2055 {
2056 if (self->proto >= 2) {
2057 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2058 if (_Pickler_Write(self, &bool_op, 1) < 0)
2059 return -1;
2060 }
2061 else {
2062 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2063 * so that unpicklers written before bools were introduced unpickle them
2064 * as ints, but unpicklers after can recognize that bools were intended.
2065 * Note that protocol 2 added direct ways to pickle bools.
2066 */
2067 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2068 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2069 return -1;
2070 }
2071 return 0;
2072 }
2073
2074 static int
save_long(PicklerObject * self,PyObject * obj)2075 save_long(PicklerObject *self, PyObject *obj)
2076 {
2077 PyObject *repr = NULL;
2078 Py_ssize_t size;
2079 long val;
2080 int overflow;
2081 int status = 0;
2082
2083 val= PyLong_AsLongAndOverflow(obj, &overflow);
2084 if (!overflow && (sizeof(long) <= 4 ||
2085 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2086 {
2087 /* result fits in a signed 4-byte integer.
2088
2089 Note: we can't use -0x80000000L in the above condition because some
2090 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2091 before applying the unary minus when sizeof(long) <= 4. The
2092 resulting value stays unsigned which is commonly not what we want,
2093 so MSVC happily warns us about it. However, that result would have
2094 been fine because we guard for sizeof(long) <= 4 which turns the
2095 condition true in that particular case. */
2096 char pdata[32];
2097 Py_ssize_t len = 0;
2098
2099 if (self->bin) {
2100 pdata[1] = (unsigned char)(val & 0xff);
2101 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2102 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2103 pdata[4] = (unsigned char)((val >> 24) & 0xff);
2104
2105 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2106 pdata[0] = BININT;
2107 len = 5;
2108 }
2109 else if (pdata[2] != 0) {
2110 pdata[0] = BININT2;
2111 len = 3;
2112 }
2113 else {
2114 pdata[0] = BININT1;
2115 len = 2;
2116 }
2117 }
2118 else {
2119 sprintf(pdata, "%c%ld\n", INT, val);
2120 len = strlen(pdata);
2121 }
2122 if (_Pickler_Write(self, pdata, len) < 0)
2123 return -1;
2124
2125 return 0;
2126 }
2127 assert(!PyErr_Occurred());
2128
2129 if (self->proto >= 2) {
2130 /* Linear-time pickling. */
2131 size_t nbits;
2132 size_t nbytes;
2133 unsigned char *pdata;
2134 char header[5];
2135 int i;
2136 int sign = _PyLong_Sign(obj);
2137
2138 if (sign == 0) {
2139 header[0] = LONG1;
2140 header[1] = 0; /* It's 0 -- an empty bytestring. */
2141 if (_Pickler_Write(self, header, 2) < 0)
2142 goto error;
2143 return 0;
2144 }
2145 nbits = _PyLong_NumBits(obj);
2146 if (nbits == (size_t)-1 && PyErr_Occurred())
2147 goto error;
2148 /* How many bytes do we need? There are nbits >> 3 full
2149 * bytes of data, and nbits & 7 leftover bits. If there
2150 * are any leftover bits, then we clearly need another
2151 * byte. What's not so obvious is that we *probably*
2152 * need another byte even if there aren't any leftovers:
2153 * the most-significant bit of the most-significant byte
2154 * acts like a sign bit, and it's usually got a sense
2155 * opposite of the one we need. The exception is ints
2156 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2157 * its own 256's-complement, so has the right sign bit
2158 * even without the extra byte. That's a pain to check
2159 * for in advance, though, so we always grab an extra
2160 * byte at the start, and cut it back later if possible.
2161 */
2162 nbytes = (nbits >> 3) + 1;
2163 if (nbytes > 0x7fffffffL) {
2164 PyErr_SetString(PyExc_OverflowError,
2165 "int too large to pickle");
2166 goto error;
2167 }
2168 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2169 if (repr == NULL)
2170 goto error;
2171 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2172 i = _PyLong_AsByteArray((PyLongObject *)obj,
2173 pdata, nbytes,
2174 1 /* little endian */ , 1 /* signed */ );
2175 if (i < 0)
2176 goto error;
2177 /* If the int is negative, this may be a byte more than
2178 * needed. This is so iff the MSB is all redundant sign
2179 * bits.
2180 */
2181 if (sign < 0 &&
2182 nbytes > 1 &&
2183 pdata[nbytes - 1] == 0xff &&
2184 (pdata[nbytes - 2] & 0x80) != 0) {
2185 nbytes--;
2186 }
2187
2188 if (nbytes < 256) {
2189 header[0] = LONG1;
2190 header[1] = (unsigned char)nbytes;
2191 size = 2;
2192 }
2193 else {
2194 header[0] = LONG4;
2195 size = (Py_ssize_t) nbytes;
2196 for (i = 1; i < 5; i++) {
2197 header[i] = (unsigned char)(size & 0xff);
2198 size >>= 8;
2199 }
2200 size = 5;
2201 }
2202 if (_Pickler_Write(self, header, size) < 0 ||
2203 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2204 goto error;
2205 }
2206 else {
2207 const char long_op = LONG;
2208 const char *string;
2209
2210 /* proto < 2: write the repr and newline. This is quadratic-time (in
2211 the number of digits), in both directions. We add a trailing 'L'
2212 to the repr, for compatibility with Python 2.x. */
2213
2214 repr = PyObject_Repr(obj);
2215 if (repr == NULL)
2216 goto error;
2217
2218 string = PyUnicode_AsUTF8AndSize(repr, &size);
2219 if (string == NULL)
2220 goto error;
2221
2222 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2223 _Pickler_Write(self, string, size) < 0 ||
2224 _Pickler_Write(self, "L\n", 2) < 0)
2225 goto error;
2226 }
2227
2228 if (0) {
2229 error:
2230 status = -1;
2231 }
2232 Py_XDECREF(repr);
2233
2234 return status;
2235 }
2236
2237 static int
save_float(PicklerObject * self,PyObject * obj)2238 save_float(PicklerObject *self, PyObject *obj)
2239 {
2240 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2241
2242 if (self->bin) {
2243 char pdata[9];
2244 pdata[0] = BINFLOAT;
2245 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2246 return -1;
2247 if (_Pickler_Write(self, pdata, 9) < 0)
2248 return -1;
2249 }
2250 else {
2251 int result = -1;
2252 char *buf = NULL;
2253 char op = FLOAT;
2254
2255 if (_Pickler_Write(self, &op, 1) < 0)
2256 goto done;
2257
2258 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2259 if (!buf) {
2260 PyErr_NoMemory();
2261 goto done;
2262 }
2263
2264 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2265 goto done;
2266
2267 if (_Pickler_Write(self, "\n", 1) < 0)
2268 goto done;
2269
2270 result = 0;
2271 done:
2272 PyMem_Free(buf);
2273 return result;
2274 }
2275
2276 return 0;
2277 }
2278
2279 /* Perform direct write of the header and payload of the binary object.
2280
2281 The large contiguous data is written directly into the underlying file
2282 object, bypassing the output_buffer of the Pickler. We intentionally
2283 do not insert a protocol 4 frame opcode to make it possible to optimize
2284 file.read calls in the loader.
2285 */
2286 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2287 _Pickler_write_bytes(PicklerObject *self,
2288 const char *header, Py_ssize_t header_size,
2289 const char *data, Py_ssize_t data_size,
2290 PyObject *payload)
2291 {
2292 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2293 int framing = self->framing;
2294
2295 if (bypass_buffer) {
2296 assert(self->output_buffer != NULL);
2297 /* Commit the previous frame. */
2298 if (_Pickler_CommitFrame(self)) {
2299 return -1;
2300 }
2301 /* Disable framing temporarily */
2302 self->framing = 0;
2303 }
2304
2305 if (_Pickler_Write(self, header, header_size) < 0) {
2306 return -1;
2307 }
2308
2309 if (bypass_buffer && self->write != NULL) {
2310 /* Bypass the in-memory buffer to directly stream large data
2311 into the underlying file object. */
2312 PyObject *result, *mem = NULL;
2313 /* Dump the output buffer to the file. */
2314 if (_Pickler_FlushToFile(self) < 0) {
2315 return -1;
2316 }
2317
2318 /* Stream write the payload into the file without going through the
2319 output buffer. */
2320 if (payload == NULL) {
2321 /* TODO: It would be better to use a memoryview with a linked
2322 original string if this is possible. */
2323 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2324 if (payload == NULL) {
2325 return -1;
2326 }
2327 }
2328 result = PyObject_CallOneArg(self->write, payload);
2329 Py_XDECREF(mem);
2330 if (result == NULL) {
2331 return -1;
2332 }
2333 Py_DECREF(result);
2334
2335 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2336 if (_Pickler_ClearBuffer(self) < 0) {
2337 return -1;
2338 }
2339 }
2340 else {
2341 if (_Pickler_Write(self, data, data_size) < 0) {
2342 return -1;
2343 }
2344 }
2345
2346 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2347 self->framing = framing;
2348
2349 return 0;
2350 }
2351
2352 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2353 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2354 Py_ssize_t size)
2355 {
2356 assert(self->proto >= 3);
2357
2358 char header[9];
2359 Py_ssize_t len;
2360
2361 if (size < 0)
2362 return -1;
2363
2364 if (size <= 0xff) {
2365 header[0] = SHORT_BINBYTES;
2366 header[1] = (unsigned char)size;
2367 len = 2;
2368 }
2369 else if ((size_t)size <= 0xffffffffUL) {
2370 header[0] = BINBYTES;
2371 header[1] = (unsigned char)(size & 0xff);
2372 header[2] = (unsigned char)((size >> 8) & 0xff);
2373 header[3] = (unsigned char)((size >> 16) & 0xff);
2374 header[4] = (unsigned char)((size >> 24) & 0xff);
2375 len = 5;
2376 }
2377 else if (self->proto >= 4) {
2378 header[0] = BINBYTES8;
2379 _write_size64(header + 1, size);
2380 len = 9;
2381 }
2382 else {
2383 PyErr_SetString(PyExc_OverflowError,
2384 "serializing a bytes object larger than 4 GiB "
2385 "requires pickle protocol 4 or higher");
2386 return -1;
2387 }
2388
2389 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2390 return -1;
2391 }
2392
2393 if (memo_put(self, obj) < 0) {
2394 return -1;
2395 }
2396
2397 return 0;
2398 }
2399
2400 static int
save_bytes(PicklerObject * self,PyObject * obj)2401 save_bytes(PicklerObject *self, PyObject *obj)
2402 {
2403 if (self->proto < 3) {
2404 /* Older pickle protocols do not have an opcode for pickling bytes
2405 objects. Therefore, we need to fake the copy protocol (i.e.,
2406 the __reduce__ method) to permit bytes object unpickling.
2407
2408 Here we use a hack to be compatible with Python 2. Since in Python
2409 2 'bytes' is just an alias for 'str' (which has different
2410 parameters than the actual bytes object), we use codecs.encode
2411 to create the appropriate 'str' object when unpickled using
2412 Python 2 *and* the appropriate 'bytes' object when unpickled
2413 using Python 3. Again this is a hack and we don't need to do this
2414 with newer protocols. */
2415 PyObject *reduce_value;
2416 int status;
2417
2418 if (PyBytes_GET_SIZE(obj) == 0) {
2419 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2420 }
2421 else {
2422 PickleState *st = _Pickle_GetGlobalState();
2423 PyObject *unicode_str =
2424 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2425 PyBytes_GET_SIZE(obj),
2426 "strict");
2427 _Py_IDENTIFIER(latin1);
2428
2429 if (unicode_str == NULL)
2430 return -1;
2431 reduce_value = Py_BuildValue("(O(OO))",
2432 st->codecs_encode, unicode_str,
2433 _PyUnicode_FromId(&PyId_latin1));
2434 Py_DECREF(unicode_str);
2435 }
2436
2437 if (reduce_value == NULL)
2438 return -1;
2439
2440 /* save_reduce() will memoize the object automatically. */
2441 status = save_reduce(self, reduce_value, obj);
2442 Py_DECREF(reduce_value);
2443 return status;
2444 }
2445 else {
2446 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2447 PyBytes_GET_SIZE(obj));
2448 }
2449 }
2450
2451 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2452 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2453 Py_ssize_t size)
2454 {
2455 assert(self->proto >= 5);
2456
2457 char header[9];
2458 Py_ssize_t len;
2459
2460 if (size < 0)
2461 return -1;
2462
2463 header[0] = BYTEARRAY8;
2464 _write_size64(header + 1, size);
2465 len = 9;
2466
2467 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2468 return -1;
2469 }
2470
2471 if (memo_put(self, obj) < 0) {
2472 return -1;
2473 }
2474
2475 return 0;
2476 }
2477
2478 static int
save_bytearray(PicklerObject * self,PyObject * obj)2479 save_bytearray(PicklerObject *self, PyObject *obj)
2480 {
2481 if (self->proto < 5) {
2482 /* Older pickle protocols do not have an opcode for pickling
2483 * bytearrays. */
2484 PyObject *reduce_value = NULL;
2485 int status;
2486
2487 if (PyByteArray_GET_SIZE(obj) == 0) {
2488 reduce_value = Py_BuildValue("(O())",
2489 (PyObject *) &PyByteArray_Type);
2490 }
2491 else {
2492 PyObject *bytes_obj = PyBytes_FromObject(obj);
2493 if (bytes_obj != NULL) {
2494 reduce_value = Py_BuildValue("(O(O))",
2495 (PyObject *) &PyByteArray_Type,
2496 bytes_obj);
2497 Py_DECREF(bytes_obj);
2498 }
2499 }
2500 if (reduce_value == NULL)
2501 return -1;
2502
2503 /* save_reduce() will memoize the object automatically. */
2504 status = save_reduce(self, reduce_value, obj);
2505 Py_DECREF(reduce_value);
2506 return status;
2507 }
2508 else {
2509 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2510 PyByteArray_GET_SIZE(obj));
2511 }
2512 }
2513
2514 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2515 save_picklebuffer(PicklerObject *self, PyObject *obj)
2516 {
2517 if (self->proto < 5) {
2518 PickleState *st = _Pickle_GetGlobalState();
2519 PyErr_SetString(st->PicklingError,
2520 "PickleBuffer can only pickled with protocol >= 5");
2521 return -1;
2522 }
2523 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2524 if (view == NULL) {
2525 return -1;
2526 }
2527 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2528 PickleState *st = _Pickle_GetGlobalState();
2529 PyErr_SetString(st->PicklingError,
2530 "PickleBuffer can not be pickled when "
2531 "pointing to a non-contiguous buffer");
2532 return -1;
2533 }
2534 int in_band = 1;
2535 if (self->buffer_callback != NULL) {
2536 PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2537 if (ret == NULL) {
2538 return -1;
2539 }
2540 in_band = PyObject_IsTrue(ret);
2541 Py_DECREF(ret);
2542 if (in_band == -1) {
2543 return -1;
2544 }
2545 }
2546 if (in_band) {
2547 /* Write data in-band */
2548 if (view->readonly) {
2549 return _save_bytes_data(self, obj, (const char*) view->buf,
2550 view->len);
2551 }
2552 else {
2553 return _save_bytearray_data(self, obj, (const char*) view->buf,
2554 view->len);
2555 }
2556 }
2557 else {
2558 /* Write data out-of-band */
2559 const char next_buffer_op = NEXT_BUFFER;
2560 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2561 return -1;
2562 }
2563 if (view->readonly) {
2564 const char readonly_buffer_op = READONLY_BUFFER;
2565 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2566 return -1;
2567 }
2568 }
2569 }
2570 return 0;
2571 }
2572
2573 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2574 backslash and newline characters to \uXXXX escapes. */
2575 static PyObject *
raw_unicode_escape(PyObject * obj)2576 raw_unicode_escape(PyObject *obj)
2577 {
2578 char *p;
2579 Py_ssize_t i, size;
2580 const void *data;
2581 unsigned int kind;
2582 _PyBytesWriter writer;
2583
2584 if (PyUnicode_READY(obj))
2585 return NULL;
2586
2587 _PyBytesWriter_Init(&writer);
2588
2589 size = PyUnicode_GET_LENGTH(obj);
2590 data = PyUnicode_DATA(obj);
2591 kind = PyUnicode_KIND(obj);
2592
2593 p = _PyBytesWriter_Alloc(&writer, size);
2594 if (p == NULL)
2595 goto error;
2596 writer.overallocate = 1;
2597
2598 for (i=0; i < size; i++) {
2599 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2600 /* Map 32-bit characters to '\Uxxxxxxxx' */
2601 if (ch >= 0x10000) {
2602 /* -1: subtract 1 preallocated byte */
2603 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2604 if (p == NULL)
2605 goto error;
2606
2607 *p++ = '\\';
2608 *p++ = 'U';
2609 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2610 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2611 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2612 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2613 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2614 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2615 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2616 *p++ = Py_hexdigits[ch & 15];
2617 }
2618 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2619 else if (ch >= 256 ||
2620 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2621 ch == 0x1a)
2622 {
2623 /* -1: subtract 1 preallocated byte */
2624 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2625 if (p == NULL)
2626 goto error;
2627
2628 *p++ = '\\';
2629 *p++ = 'u';
2630 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2631 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2632 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2633 *p++ = Py_hexdigits[ch & 15];
2634 }
2635 /* Copy everything else as-is */
2636 else
2637 *p++ = (char) ch;
2638 }
2639
2640 return _PyBytesWriter_Finish(&writer, p);
2641
2642 error:
2643 _PyBytesWriter_Dealloc(&writer);
2644 return NULL;
2645 }
2646
2647 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2648 write_unicode_binary(PicklerObject *self, PyObject *obj)
2649 {
2650 char header[9];
2651 Py_ssize_t len;
2652 PyObject *encoded = NULL;
2653 Py_ssize_t size;
2654 const char *data;
2655
2656 if (PyUnicode_READY(obj))
2657 return -1;
2658
2659 data = PyUnicode_AsUTF8AndSize(obj, &size);
2660 if (data == NULL) {
2661 /* Issue #8383: for strings with lone surrogates, fallback on the
2662 "surrogatepass" error handler. */
2663 PyErr_Clear();
2664 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2665 if (encoded == NULL)
2666 return -1;
2667
2668 data = PyBytes_AS_STRING(encoded);
2669 size = PyBytes_GET_SIZE(encoded);
2670 }
2671
2672 assert(size >= 0);
2673 if (size <= 0xff && self->proto >= 4) {
2674 header[0] = SHORT_BINUNICODE;
2675 header[1] = (unsigned char)(size & 0xff);
2676 len = 2;
2677 }
2678 else if ((size_t)size <= 0xffffffffUL) {
2679 header[0] = BINUNICODE;
2680 header[1] = (unsigned char)(size & 0xff);
2681 header[2] = (unsigned char)((size >> 8) & 0xff);
2682 header[3] = (unsigned char)((size >> 16) & 0xff);
2683 header[4] = (unsigned char)((size >> 24) & 0xff);
2684 len = 5;
2685 }
2686 else if (self->proto >= 4) {
2687 header[0] = BINUNICODE8;
2688 _write_size64(header + 1, size);
2689 len = 9;
2690 }
2691 else {
2692 PyErr_SetString(PyExc_OverflowError,
2693 "serializing a string larger than 4 GiB "
2694 "requires pickle protocol 4 or higher");
2695 Py_XDECREF(encoded);
2696 return -1;
2697 }
2698
2699 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2700 Py_XDECREF(encoded);
2701 return -1;
2702 }
2703 Py_XDECREF(encoded);
2704 return 0;
2705 }
2706
2707 static int
save_unicode(PicklerObject * self,PyObject * obj)2708 save_unicode(PicklerObject *self, PyObject *obj)
2709 {
2710 if (self->bin) {
2711 if (write_unicode_binary(self, obj) < 0)
2712 return -1;
2713 }
2714 else {
2715 PyObject *encoded;
2716 Py_ssize_t size;
2717 const char unicode_op = UNICODE;
2718
2719 encoded = raw_unicode_escape(obj);
2720 if (encoded == NULL)
2721 return -1;
2722
2723 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2724 Py_DECREF(encoded);
2725 return -1;
2726 }
2727
2728 size = PyBytes_GET_SIZE(encoded);
2729 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2730 Py_DECREF(encoded);
2731 return -1;
2732 }
2733 Py_DECREF(encoded);
2734
2735 if (_Pickler_Write(self, "\n", 1) < 0)
2736 return -1;
2737 }
2738 if (memo_put(self, obj) < 0)
2739 return -1;
2740
2741 return 0;
2742 }
2743
2744 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2745 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2746 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2747 {
2748 Py_ssize_t i;
2749
2750 assert(PyTuple_Size(t) == len);
2751
2752 for (i = 0; i < len; i++) {
2753 PyObject *element = PyTuple_GET_ITEM(t, i);
2754
2755 if (element == NULL)
2756 return -1;
2757 if (save(self, element, 0) < 0)
2758 return -1;
2759 }
2760
2761 return 0;
2762 }
2763
2764 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2765 * used across protocols to minimize the space needed to pickle them.
2766 * Tuples are also the only builtin immutable type that can be recursive
2767 * (a tuple can be reached from itself), and that requires some subtle
2768 * magic so that it works in all cases. IOW, this is a long routine.
2769 */
2770 static int
save_tuple(PicklerObject * self,PyObject * obj)2771 save_tuple(PicklerObject *self, PyObject *obj)
2772 {
2773 Py_ssize_t len, i;
2774
2775 const char mark_op = MARK;
2776 const char tuple_op = TUPLE;
2777 const char pop_op = POP;
2778 const char pop_mark_op = POP_MARK;
2779 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2780
2781 if ((len = PyTuple_Size(obj)) < 0)
2782 return -1;
2783
2784 if (len == 0) {
2785 char pdata[2];
2786
2787 if (self->proto) {
2788 pdata[0] = EMPTY_TUPLE;
2789 len = 1;
2790 }
2791 else {
2792 pdata[0] = MARK;
2793 pdata[1] = TUPLE;
2794 len = 2;
2795 }
2796 if (_Pickler_Write(self, pdata, len) < 0)
2797 return -1;
2798 return 0;
2799 }
2800
2801 /* The tuple isn't in the memo now. If it shows up there after
2802 * saving the tuple elements, the tuple must be recursive, in
2803 * which case we'll pop everything we put on the stack, and fetch
2804 * its value from the memo.
2805 */
2806 if (len <= 3 && self->proto >= 2) {
2807 /* Use TUPLE{1,2,3} opcodes. */
2808 if (store_tuple_elements(self, obj, len) < 0)
2809 return -1;
2810
2811 if (PyMemoTable_Get(self->memo, obj)) {
2812 /* pop the len elements */
2813 for (i = 0; i < len; i++)
2814 if (_Pickler_Write(self, &pop_op, 1) < 0)
2815 return -1;
2816 /* fetch from memo */
2817 if (memo_get(self, obj) < 0)
2818 return -1;
2819
2820 return 0;
2821 }
2822 else { /* Not recursive. */
2823 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2824 return -1;
2825 }
2826 goto memoize;
2827 }
2828
2829 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2830 * Generate MARK e1 e2 ... TUPLE
2831 */
2832 if (_Pickler_Write(self, &mark_op, 1) < 0)
2833 return -1;
2834
2835 if (store_tuple_elements(self, obj, len) < 0)
2836 return -1;
2837
2838 if (PyMemoTable_Get(self->memo, obj)) {
2839 /* pop the stack stuff we pushed */
2840 if (self->bin) {
2841 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2842 return -1;
2843 }
2844 else {
2845 /* Note that we pop one more than len, to remove
2846 * the MARK too.
2847 */
2848 for (i = 0; i <= len; i++)
2849 if (_Pickler_Write(self, &pop_op, 1) < 0)
2850 return -1;
2851 }
2852 /* fetch from memo */
2853 if (memo_get(self, obj) < 0)
2854 return -1;
2855
2856 return 0;
2857 }
2858 else { /* Not recursive. */
2859 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2860 return -1;
2861 }
2862
2863 memoize:
2864 if (memo_put(self, obj) < 0)
2865 return -1;
2866
2867 return 0;
2868 }
2869
2870 /* iter is an iterator giving items, and we batch up chunks of
2871 * MARK item item ... item APPENDS
2872 * opcode sequences. Calling code should have arranged to first create an
2873 * empty list, or list-like object, for the APPENDS to operate on.
2874 * Returns 0 on success, <0 on error.
2875 */
2876 static int
batch_list(PicklerObject * self,PyObject * iter)2877 batch_list(PicklerObject *self, PyObject *iter)
2878 {
2879 PyObject *obj = NULL;
2880 PyObject *firstitem = NULL;
2881 int i, n;
2882
2883 const char mark_op = MARK;
2884 const char append_op = APPEND;
2885 const char appends_op = APPENDS;
2886
2887 assert(iter != NULL);
2888
2889 /* XXX: I think this function could be made faster by avoiding the
2890 iterator interface and fetching objects directly from list using
2891 PyList_GET_ITEM.
2892 */
2893
2894 if (self->proto == 0) {
2895 /* APPENDS isn't available; do one at a time. */
2896 for (;;) {
2897 obj = PyIter_Next(iter);
2898 if (obj == NULL) {
2899 if (PyErr_Occurred())
2900 return -1;
2901 break;
2902 }
2903 i = save(self, obj, 0);
2904 Py_DECREF(obj);
2905 if (i < 0)
2906 return -1;
2907 if (_Pickler_Write(self, &append_op, 1) < 0)
2908 return -1;
2909 }
2910 return 0;
2911 }
2912
2913 /* proto > 0: write in batches of BATCHSIZE. */
2914 do {
2915 /* Get first item */
2916 firstitem = PyIter_Next(iter);
2917 if (firstitem == NULL) {
2918 if (PyErr_Occurred())
2919 goto error;
2920
2921 /* nothing more to add */
2922 break;
2923 }
2924
2925 /* Try to get a second item */
2926 obj = PyIter_Next(iter);
2927 if (obj == NULL) {
2928 if (PyErr_Occurred())
2929 goto error;
2930
2931 /* Only one item to write */
2932 if (save(self, firstitem, 0) < 0)
2933 goto error;
2934 if (_Pickler_Write(self, &append_op, 1) < 0)
2935 goto error;
2936 Py_CLEAR(firstitem);
2937 break;
2938 }
2939
2940 /* More than one item to write */
2941
2942 /* Pump out MARK, items, APPENDS. */
2943 if (_Pickler_Write(self, &mark_op, 1) < 0)
2944 goto error;
2945
2946 if (save(self, firstitem, 0) < 0)
2947 goto error;
2948 Py_CLEAR(firstitem);
2949 n = 1;
2950
2951 /* Fetch and save up to BATCHSIZE items */
2952 while (obj) {
2953 if (save(self, obj, 0) < 0)
2954 goto error;
2955 Py_CLEAR(obj);
2956 n += 1;
2957
2958 if (n == BATCHSIZE)
2959 break;
2960
2961 obj = PyIter_Next(iter);
2962 if (obj == NULL) {
2963 if (PyErr_Occurred())
2964 goto error;
2965 break;
2966 }
2967 }
2968
2969 if (_Pickler_Write(self, &appends_op, 1) < 0)
2970 goto error;
2971
2972 } while (n == BATCHSIZE);
2973 return 0;
2974
2975 error:
2976 Py_XDECREF(firstitem);
2977 Py_XDECREF(obj);
2978 return -1;
2979 }
2980
2981 /* This is a variant of batch_list() above, specialized for lists (with no
2982 * support for list subclasses). Like batch_list(), we batch up chunks of
2983 * MARK item item ... item APPENDS
2984 * opcode sequences. Calling code should have arranged to first create an
2985 * empty list, or list-like object, for the APPENDS to operate on.
2986 * Returns 0 on success, -1 on error.
2987 *
2988 * This version is considerably faster than batch_list(), if less general.
2989 *
2990 * Note that this only works for protocols > 0.
2991 */
2992 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2993 batch_list_exact(PicklerObject *self, PyObject *obj)
2994 {
2995 PyObject *item = NULL;
2996 Py_ssize_t this_batch, total;
2997
2998 const char append_op = APPEND;
2999 const char appends_op = APPENDS;
3000 const char mark_op = MARK;
3001
3002 assert(obj != NULL);
3003 assert(self->proto > 0);
3004 assert(PyList_CheckExact(obj));
3005
3006 if (PyList_GET_SIZE(obj) == 1) {
3007 item = PyList_GET_ITEM(obj, 0);
3008 if (save(self, item, 0) < 0)
3009 return -1;
3010 if (_Pickler_Write(self, &append_op, 1) < 0)
3011 return -1;
3012 return 0;
3013 }
3014
3015 /* Write in batches of BATCHSIZE. */
3016 total = 0;
3017 do {
3018 this_batch = 0;
3019 if (_Pickler_Write(self, &mark_op, 1) < 0)
3020 return -1;
3021 while (total < PyList_GET_SIZE(obj)) {
3022 item = PyList_GET_ITEM(obj, total);
3023 if (save(self, item, 0) < 0)
3024 return -1;
3025 total++;
3026 if (++this_batch == BATCHSIZE)
3027 break;
3028 }
3029 if (_Pickler_Write(self, &appends_op, 1) < 0)
3030 return -1;
3031
3032 } while (total < PyList_GET_SIZE(obj));
3033
3034 return 0;
3035 }
3036
3037 static int
save_list(PicklerObject * self,PyObject * obj)3038 save_list(PicklerObject *self, PyObject *obj)
3039 {
3040 char header[3];
3041 Py_ssize_t len;
3042 int status = 0;
3043
3044 if (self->fast && !fast_save_enter(self, obj))
3045 goto error;
3046
3047 /* Create an empty list. */
3048 if (self->bin) {
3049 header[0] = EMPTY_LIST;
3050 len = 1;
3051 }
3052 else {
3053 header[0] = MARK;
3054 header[1] = LIST;
3055 len = 2;
3056 }
3057
3058 if (_Pickler_Write(self, header, len) < 0)
3059 goto error;
3060
3061 /* Get list length, and bow out early if empty. */
3062 if ((len = PyList_Size(obj)) < 0)
3063 goto error;
3064
3065 if (memo_put(self, obj) < 0)
3066 goto error;
3067
3068 if (len != 0) {
3069 /* Materialize the list elements. */
3070 if (PyList_CheckExact(obj) && self->proto > 0) {
3071 if (Py_EnterRecursiveCall(" while pickling an object"))
3072 goto error;
3073 status = batch_list_exact(self, obj);
3074 Py_LeaveRecursiveCall();
3075 } else {
3076 PyObject *iter = PyObject_GetIter(obj);
3077 if (iter == NULL)
3078 goto error;
3079
3080 if (Py_EnterRecursiveCall(" while pickling an object")) {
3081 Py_DECREF(iter);
3082 goto error;
3083 }
3084 status = batch_list(self, iter);
3085 Py_LeaveRecursiveCall();
3086 Py_DECREF(iter);
3087 }
3088 }
3089 if (0) {
3090 error:
3091 status = -1;
3092 }
3093
3094 if (self->fast && !fast_save_leave(self, obj))
3095 status = -1;
3096
3097 return status;
3098 }
3099
3100 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3101 * MARK key value ... key value SETITEMS
3102 * opcode sequences. Calling code should have arranged to first create an
3103 * empty dict, or dict-like object, for the SETITEMS to operate on.
3104 * Returns 0 on success, <0 on error.
3105 *
3106 * This is very much like batch_list(). The difference between saving
3107 * elements directly, and picking apart two-tuples, is so long-winded at
3108 * the C level, though, that attempts to combine these routines were too
3109 * ugly to bear.
3110 */
3111 static int
batch_dict(PicklerObject * self,PyObject * iter)3112 batch_dict(PicklerObject *self, PyObject *iter)
3113 {
3114 PyObject *obj = NULL;
3115 PyObject *firstitem = NULL;
3116 int i, n;
3117
3118 const char mark_op = MARK;
3119 const char setitem_op = SETITEM;
3120 const char setitems_op = SETITEMS;
3121
3122 assert(iter != NULL);
3123
3124 if (self->proto == 0) {
3125 /* SETITEMS isn't available; do one at a time. */
3126 for (;;) {
3127 obj = PyIter_Next(iter);
3128 if (obj == NULL) {
3129 if (PyErr_Occurred())
3130 return -1;
3131 break;
3132 }
3133 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3134 PyErr_SetString(PyExc_TypeError, "dict items "
3135 "iterator must return 2-tuples");
3136 return -1;
3137 }
3138 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3139 if (i >= 0)
3140 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3141 Py_DECREF(obj);
3142 if (i < 0)
3143 return -1;
3144 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3145 return -1;
3146 }
3147 return 0;
3148 }
3149
3150 /* proto > 0: write in batches of BATCHSIZE. */
3151 do {
3152 /* Get first item */
3153 firstitem = PyIter_Next(iter);
3154 if (firstitem == NULL) {
3155 if (PyErr_Occurred())
3156 goto error;
3157
3158 /* nothing more to add */
3159 break;
3160 }
3161 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3162 PyErr_SetString(PyExc_TypeError, "dict items "
3163 "iterator must return 2-tuples");
3164 goto error;
3165 }
3166
3167 /* Try to get a second item */
3168 obj = PyIter_Next(iter);
3169 if (obj == NULL) {
3170 if (PyErr_Occurred())
3171 goto error;
3172
3173 /* Only one item to write */
3174 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3175 goto error;
3176 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3177 goto error;
3178 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3179 goto error;
3180 Py_CLEAR(firstitem);
3181 break;
3182 }
3183
3184 /* More than one item to write */
3185
3186 /* Pump out MARK, items, SETITEMS. */
3187 if (_Pickler_Write(self, &mark_op, 1) < 0)
3188 goto error;
3189
3190 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3191 goto error;
3192 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3193 goto error;
3194 Py_CLEAR(firstitem);
3195 n = 1;
3196
3197 /* Fetch and save up to BATCHSIZE items */
3198 while (obj) {
3199 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3200 PyErr_SetString(PyExc_TypeError, "dict items "
3201 "iterator must return 2-tuples");
3202 goto error;
3203 }
3204 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3205 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3206 goto error;
3207 Py_CLEAR(obj);
3208 n += 1;
3209
3210 if (n == BATCHSIZE)
3211 break;
3212
3213 obj = PyIter_Next(iter);
3214 if (obj == NULL) {
3215 if (PyErr_Occurred())
3216 goto error;
3217 break;
3218 }
3219 }
3220
3221 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3222 goto error;
3223
3224 } while (n == BATCHSIZE);
3225 return 0;
3226
3227 error:
3228 Py_XDECREF(firstitem);
3229 Py_XDECREF(obj);
3230 return -1;
3231 }
3232
3233 /* This is a variant of batch_dict() above that specializes for dicts, with no
3234 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3235 * MARK key value ... key value SETITEMS
3236 * opcode sequences. Calling code should have arranged to first create an
3237 * empty dict, or dict-like object, for the SETITEMS to operate on.
3238 * Returns 0 on success, -1 on error.
3239 *
3240 * Note that this currently doesn't work for protocol 0.
3241 */
3242 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3243 batch_dict_exact(PicklerObject *self, PyObject *obj)
3244 {
3245 PyObject *key = NULL, *value = NULL;
3246 int i;
3247 Py_ssize_t dict_size, ppos = 0;
3248
3249 const char mark_op = MARK;
3250 const char setitem_op = SETITEM;
3251 const char setitems_op = SETITEMS;
3252
3253 assert(obj != NULL && PyDict_CheckExact(obj));
3254 assert(self->proto > 0);
3255
3256 dict_size = PyDict_GET_SIZE(obj);
3257
3258 /* Special-case len(d) == 1 to save space. */
3259 if (dict_size == 1) {
3260 PyDict_Next(obj, &ppos, &key, &value);
3261 if (save(self, key, 0) < 0)
3262 return -1;
3263 if (save(self, value, 0) < 0)
3264 return -1;
3265 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3266 return -1;
3267 return 0;
3268 }
3269
3270 /* Write in batches of BATCHSIZE. */
3271 do {
3272 i = 0;
3273 if (_Pickler_Write(self, &mark_op, 1) < 0)
3274 return -1;
3275 while (PyDict_Next(obj, &ppos, &key, &value)) {
3276 if (save(self, key, 0) < 0)
3277 return -1;
3278 if (save(self, value, 0) < 0)
3279 return -1;
3280 if (++i == BATCHSIZE)
3281 break;
3282 }
3283 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3284 return -1;
3285 if (PyDict_GET_SIZE(obj) != dict_size) {
3286 PyErr_Format(
3287 PyExc_RuntimeError,
3288 "dictionary changed size during iteration");
3289 return -1;
3290 }
3291
3292 } while (i == BATCHSIZE);
3293 return 0;
3294 }
3295
3296 static int
save_dict(PicklerObject * self,PyObject * obj)3297 save_dict(PicklerObject *self, PyObject *obj)
3298 {
3299 PyObject *items, *iter;
3300 char header[3];
3301 Py_ssize_t len;
3302 int status = 0;
3303 assert(PyDict_Check(obj));
3304
3305 if (self->fast && !fast_save_enter(self, obj))
3306 goto error;
3307
3308 /* Create an empty dict. */
3309 if (self->bin) {
3310 header[0] = EMPTY_DICT;
3311 len = 1;
3312 }
3313 else {
3314 header[0] = MARK;
3315 header[1] = DICT;
3316 len = 2;
3317 }
3318
3319 if (_Pickler_Write(self, header, len) < 0)
3320 goto error;
3321
3322 if (memo_put(self, obj) < 0)
3323 goto error;
3324
3325 if (PyDict_GET_SIZE(obj)) {
3326 /* Save the dict items. */
3327 if (PyDict_CheckExact(obj) && self->proto > 0) {
3328 /* We can take certain shortcuts if we know this is a dict and
3329 not a dict subclass. */
3330 if (Py_EnterRecursiveCall(" while pickling an object"))
3331 goto error;
3332 status = batch_dict_exact(self, obj);
3333 Py_LeaveRecursiveCall();
3334 } else {
3335 _Py_IDENTIFIER(items);
3336
3337 items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
3338 if (items == NULL)
3339 goto error;
3340 iter = PyObject_GetIter(items);
3341 Py_DECREF(items);
3342 if (iter == NULL)
3343 goto error;
3344 if (Py_EnterRecursiveCall(" while pickling an object")) {
3345 Py_DECREF(iter);
3346 goto error;
3347 }
3348 status = batch_dict(self, iter);
3349 Py_LeaveRecursiveCall();
3350 Py_DECREF(iter);
3351 }
3352 }
3353
3354 if (0) {
3355 error:
3356 status = -1;
3357 }
3358
3359 if (self->fast && !fast_save_leave(self, obj))
3360 status = -1;
3361
3362 return status;
3363 }
3364
3365 static int
save_set(PicklerObject * self,PyObject * obj)3366 save_set(PicklerObject *self, PyObject *obj)
3367 {
3368 PyObject *item;
3369 int i;
3370 Py_ssize_t set_size, ppos = 0;
3371 Py_hash_t hash;
3372
3373 const char empty_set_op = EMPTY_SET;
3374 const char mark_op = MARK;
3375 const char additems_op = ADDITEMS;
3376
3377 if (self->proto < 4) {
3378 PyObject *items;
3379 PyObject *reduce_value;
3380 int status;
3381
3382 items = PySequence_List(obj);
3383 if (items == NULL) {
3384 return -1;
3385 }
3386 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3387 Py_DECREF(items);
3388 if (reduce_value == NULL) {
3389 return -1;
3390 }
3391 /* save_reduce() will memoize the object automatically. */
3392 status = save_reduce(self, reduce_value, obj);
3393 Py_DECREF(reduce_value);
3394 return status;
3395 }
3396
3397 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3398 return -1;
3399
3400 if (memo_put(self, obj) < 0)
3401 return -1;
3402
3403 set_size = PySet_GET_SIZE(obj);
3404 if (set_size == 0)
3405 return 0; /* nothing to do */
3406
3407 /* Write in batches of BATCHSIZE. */
3408 do {
3409 i = 0;
3410 if (_Pickler_Write(self, &mark_op, 1) < 0)
3411 return -1;
3412 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3413 if (save(self, item, 0) < 0)
3414 return -1;
3415 if (++i == BATCHSIZE)
3416 break;
3417 }
3418 if (_Pickler_Write(self, &additems_op, 1) < 0)
3419 return -1;
3420 if (PySet_GET_SIZE(obj) != set_size) {
3421 PyErr_Format(
3422 PyExc_RuntimeError,
3423 "set changed size during iteration");
3424 return -1;
3425 }
3426 } while (i == BATCHSIZE);
3427
3428 return 0;
3429 }
3430
3431 static int
save_frozenset(PicklerObject * self,PyObject * obj)3432 save_frozenset(PicklerObject *self, PyObject *obj)
3433 {
3434 PyObject *iter;
3435
3436 const char mark_op = MARK;
3437 const char frozenset_op = FROZENSET;
3438
3439 if (self->fast && !fast_save_enter(self, obj))
3440 return -1;
3441
3442 if (self->proto < 4) {
3443 PyObject *items;
3444 PyObject *reduce_value;
3445 int status;
3446
3447 items = PySequence_List(obj);
3448 if (items == NULL) {
3449 return -1;
3450 }
3451 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3452 items);
3453 Py_DECREF(items);
3454 if (reduce_value == NULL) {
3455 return -1;
3456 }
3457 /* save_reduce() will memoize the object automatically. */
3458 status = save_reduce(self, reduce_value, obj);
3459 Py_DECREF(reduce_value);
3460 return status;
3461 }
3462
3463 if (_Pickler_Write(self, &mark_op, 1) < 0)
3464 return -1;
3465
3466 iter = PyObject_GetIter(obj);
3467 if (iter == NULL) {
3468 return -1;
3469 }
3470 for (;;) {
3471 PyObject *item;
3472
3473 item = PyIter_Next(iter);
3474 if (item == NULL) {
3475 if (PyErr_Occurred()) {
3476 Py_DECREF(iter);
3477 return -1;
3478 }
3479 break;
3480 }
3481 if (save(self, item, 0) < 0) {
3482 Py_DECREF(item);
3483 Py_DECREF(iter);
3484 return -1;
3485 }
3486 Py_DECREF(item);
3487 }
3488 Py_DECREF(iter);
3489
3490 /* If the object is already in the memo, this means it is
3491 recursive. In this case, throw away everything we put on the
3492 stack, and fetch the object back from the memo. */
3493 if (PyMemoTable_Get(self->memo, obj)) {
3494 const char pop_mark_op = POP_MARK;
3495
3496 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3497 return -1;
3498 if (memo_get(self, obj) < 0)
3499 return -1;
3500 return 0;
3501 }
3502
3503 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3504 return -1;
3505 if (memo_put(self, obj) < 0)
3506 return -1;
3507
3508 return 0;
3509 }
3510
3511 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3512 fix_imports(PyObject **module_name, PyObject **global_name)
3513 {
3514 PyObject *key;
3515 PyObject *item;
3516 PickleState *st = _Pickle_GetGlobalState();
3517
3518 key = PyTuple_Pack(2, *module_name, *global_name);
3519 if (key == NULL)
3520 return -1;
3521 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3522 Py_DECREF(key);
3523 if (item) {
3524 PyObject *fixed_module_name;
3525 PyObject *fixed_global_name;
3526
3527 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3528 PyErr_Format(PyExc_RuntimeError,
3529 "_compat_pickle.REVERSE_NAME_MAPPING values "
3530 "should be 2-tuples, not %.200s",
3531 Py_TYPE(item)->tp_name);
3532 return -1;
3533 }
3534 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3535 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3536 if (!PyUnicode_Check(fixed_module_name) ||
3537 !PyUnicode_Check(fixed_global_name)) {
3538 PyErr_Format(PyExc_RuntimeError,
3539 "_compat_pickle.REVERSE_NAME_MAPPING values "
3540 "should be pairs of str, not (%.200s, %.200s)",
3541 Py_TYPE(fixed_module_name)->tp_name,
3542 Py_TYPE(fixed_global_name)->tp_name);
3543 return -1;
3544 }
3545
3546 Py_CLEAR(*module_name);
3547 Py_CLEAR(*global_name);
3548 Py_INCREF(fixed_module_name);
3549 Py_INCREF(fixed_global_name);
3550 *module_name = fixed_module_name;
3551 *global_name = fixed_global_name;
3552 return 0;
3553 }
3554 else if (PyErr_Occurred()) {
3555 return -1;
3556 }
3557
3558 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3559 if (item) {
3560 if (!PyUnicode_Check(item)) {
3561 PyErr_Format(PyExc_RuntimeError,
3562 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3563 "should be strings, not %.200s",
3564 Py_TYPE(item)->tp_name);
3565 return -1;
3566 }
3567 Py_INCREF(item);
3568 Py_XSETREF(*module_name, item);
3569 }
3570 else if (PyErr_Occurred()) {
3571 return -1;
3572 }
3573
3574 return 0;
3575 }
3576
3577 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3578 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3579 {
3580 PyObject *global_name = NULL;
3581 PyObject *module_name = NULL;
3582 PyObject *module = NULL;
3583 PyObject *parent = NULL;
3584 PyObject *dotted_path = NULL;
3585 PyObject *lastname = NULL;
3586 PyObject *cls;
3587 PickleState *st = _Pickle_GetGlobalState();
3588 int status = 0;
3589 _Py_IDENTIFIER(__name__);
3590 _Py_IDENTIFIER(__qualname__);
3591
3592 const char global_op = GLOBAL;
3593
3594 if (name) {
3595 Py_INCREF(name);
3596 global_name = name;
3597 }
3598 else {
3599 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3600 goto error;
3601 if (global_name == NULL) {
3602 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3603 if (global_name == NULL)
3604 goto error;
3605 }
3606 }
3607
3608 dotted_path = get_dotted_path(module, global_name);
3609 if (dotted_path == NULL)
3610 goto error;
3611 module_name = whichmodule(obj, dotted_path);
3612 if (module_name == NULL)
3613 goto error;
3614
3615 /* XXX: Change to use the import C API directly with level=0 to disallow
3616 relative imports.
3617
3618 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3619 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3620 custom import functions (IMHO, this would be a nice security
3621 feature). The import C API would need to be extended to support the
3622 extra parameters of __import__ to fix that. */
3623 module = PyImport_Import(module_name);
3624 if (module == NULL) {
3625 PyErr_Format(st->PicklingError,
3626 "Can't pickle %R: import of module %R failed",
3627 obj, module_name);
3628 goto error;
3629 }
3630 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3631 Py_INCREF(lastname);
3632 cls = get_deep_attribute(module, dotted_path, &parent);
3633 Py_CLEAR(dotted_path);
3634 if (cls == NULL) {
3635 PyErr_Format(st->PicklingError,
3636 "Can't pickle %R: attribute lookup %S on %S failed",
3637 obj, global_name, module_name);
3638 goto error;
3639 }
3640 if (cls != obj) {
3641 Py_DECREF(cls);
3642 PyErr_Format(st->PicklingError,
3643 "Can't pickle %R: it's not the same object as %S.%S",
3644 obj, module_name, global_name);
3645 goto error;
3646 }
3647 Py_DECREF(cls);
3648
3649 if (self->proto >= 2) {
3650 /* See whether this is in the extension registry, and if
3651 * so generate an EXT opcode.
3652 */
3653 PyObject *extension_key;
3654 PyObject *code_obj; /* extension code as Python object */
3655 long code; /* extension code as C value */
3656 char pdata[5];
3657 Py_ssize_t n;
3658
3659 extension_key = PyTuple_Pack(2, module_name, global_name);
3660 if (extension_key == NULL) {
3661 goto error;
3662 }
3663 code_obj = PyDict_GetItemWithError(st->extension_registry,
3664 extension_key);
3665 Py_DECREF(extension_key);
3666 /* The object is not registered in the extension registry.
3667 This is the most likely code path. */
3668 if (code_obj == NULL) {
3669 if (PyErr_Occurred()) {
3670 goto error;
3671 }
3672 goto gen_global;
3673 }
3674
3675 /* XXX: pickle.py doesn't check neither the type, nor the range
3676 of the value returned by the extension_registry. It should for
3677 consistency. */
3678
3679 /* Verify code_obj has the right type and value. */
3680 if (!PyLong_Check(code_obj)) {
3681 PyErr_Format(st->PicklingError,
3682 "Can't pickle %R: extension code %R isn't an integer",
3683 obj, code_obj);
3684 goto error;
3685 }
3686 code = PyLong_AS_LONG(code_obj);
3687 if (code <= 0 || code > 0x7fffffffL) {
3688 if (!PyErr_Occurred())
3689 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3690 "code %ld is out of range", obj, code);
3691 goto error;
3692 }
3693
3694 /* Generate an EXT opcode. */
3695 if (code <= 0xff) {
3696 pdata[0] = EXT1;
3697 pdata[1] = (unsigned char)code;
3698 n = 2;
3699 }
3700 else if (code <= 0xffff) {
3701 pdata[0] = EXT2;
3702 pdata[1] = (unsigned char)(code & 0xff);
3703 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3704 n = 3;
3705 }
3706 else {
3707 pdata[0] = EXT4;
3708 pdata[1] = (unsigned char)(code & 0xff);
3709 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3710 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3711 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3712 n = 5;
3713 }
3714
3715 if (_Pickler_Write(self, pdata, n) < 0)
3716 goto error;
3717 }
3718 else {
3719 gen_global:
3720 if (parent == module) {
3721 Py_INCREF(lastname);
3722 Py_DECREF(global_name);
3723 global_name = lastname;
3724 }
3725 if (self->proto >= 4) {
3726 const char stack_global_op = STACK_GLOBAL;
3727
3728 if (save(self, module_name, 0) < 0)
3729 goto error;
3730 if (save(self, global_name, 0) < 0)
3731 goto error;
3732
3733 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3734 goto error;
3735 }
3736 else if (parent != module) {
3737 PickleState *st = _Pickle_GetGlobalState();
3738 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3739 st->getattr, parent, lastname);
3740 if (reduce_value == NULL)
3741 goto error;
3742 status = save_reduce(self, reduce_value, NULL);
3743 Py_DECREF(reduce_value);
3744 if (status < 0)
3745 goto error;
3746 }
3747 else {
3748 /* Generate a normal global opcode if we are using a pickle
3749 protocol < 4, or if the object is not registered in the
3750 extension registry. */
3751 PyObject *encoded;
3752 PyObject *(*unicode_encoder)(PyObject *);
3753
3754 if (_Pickler_Write(self, &global_op, 1) < 0)
3755 goto error;
3756
3757 /* For protocol < 3 and if the user didn't request against doing
3758 so, we convert module names to the old 2.x module names. */
3759 if (self->proto < 3 && self->fix_imports) {
3760 if (fix_imports(&module_name, &global_name) < 0) {
3761 goto error;
3762 }
3763 }
3764
3765 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3766 both the module name and the global name using UTF-8. We do so
3767 only when we are using the pickle protocol newer than version
3768 3. This is to ensure compatibility with older Unpickler running
3769 on Python 2.x. */
3770 if (self->proto == 3) {
3771 unicode_encoder = PyUnicode_AsUTF8String;
3772 }
3773 else {
3774 unicode_encoder = PyUnicode_AsASCIIString;
3775 }
3776 encoded = unicode_encoder(module_name);
3777 if (encoded == NULL) {
3778 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3779 PyErr_Format(st->PicklingError,
3780 "can't pickle module identifier '%S' using "
3781 "pickle protocol %i",
3782 module_name, self->proto);
3783 goto error;
3784 }
3785 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3786 PyBytes_GET_SIZE(encoded)) < 0) {
3787 Py_DECREF(encoded);
3788 goto error;
3789 }
3790 Py_DECREF(encoded);
3791 if(_Pickler_Write(self, "\n", 1) < 0)
3792 goto error;
3793
3794 /* Save the name of the module. */
3795 encoded = unicode_encoder(global_name);
3796 if (encoded == NULL) {
3797 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3798 PyErr_Format(st->PicklingError,
3799 "can't pickle global identifier '%S' using "
3800 "pickle protocol %i",
3801 global_name, self->proto);
3802 goto error;
3803 }
3804 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3805 PyBytes_GET_SIZE(encoded)) < 0) {
3806 Py_DECREF(encoded);
3807 goto error;
3808 }
3809 Py_DECREF(encoded);
3810 if (_Pickler_Write(self, "\n", 1) < 0)
3811 goto error;
3812 }
3813 /* Memoize the object. */
3814 if (memo_put(self, obj) < 0)
3815 goto error;
3816 }
3817
3818 if (0) {
3819 error:
3820 status = -1;
3821 }
3822 Py_XDECREF(module_name);
3823 Py_XDECREF(global_name);
3824 Py_XDECREF(module);
3825 Py_XDECREF(parent);
3826 Py_XDECREF(dotted_path);
3827 Py_XDECREF(lastname);
3828
3829 return status;
3830 }
3831
3832 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3833 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3834 {
3835 PyObject *reduce_value;
3836 int status;
3837
3838 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3839 if (reduce_value == NULL) {
3840 return -1;
3841 }
3842 status = save_reduce(self, reduce_value, obj);
3843 Py_DECREF(reduce_value);
3844 return status;
3845 }
3846
3847 static int
save_type(PicklerObject * self,PyObject * obj)3848 save_type(PicklerObject *self, PyObject *obj)
3849 {
3850 if (obj == (PyObject *)&_PyNone_Type) {
3851 return save_singleton_type(self, obj, Py_None);
3852 }
3853 else if (obj == (PyObject *)&PyEllipsis_Type) {
3854 return save_singleton_type(self, obj, Py_Ellipsis);
3855 }
3856 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3857 return save_singleton_type(self, obj, Py_NotImplemented);
3858 }
3859 return save_global(self, obj, NULL);
3860 }
3861
3862 static int
save_pers(PicklerObject * self,PyObject * obj)3863 save_pers(PicklerObject *self, PyObject *obj)
3864 {
3865 PyObject *pid = NULL;
3866 int status = 0;
3867
3868 const char persid_op = PERSID;
3869 const char binpersid_op = BINPERSID;
3870
3871 pid = call_method(self->pers_func, self->pers_func_self, obj);
3872 if (pid == NULL)
3873 return -1;
3874
3875 if (pid != Py_None) {
3876 if (self->bin) {
3877 if (save(self, pid, 1) < 0 ||
3878 _Pickler_Write(self, &binpersid_op, 1) < 0)
3879 goto error;
3880 }
3881 else {
3882 PyObject *pid_str;
3883
3884 pid_str = PyObject_Str(pid);
3885 if (pid_str == NULL)
3886 goto error;
3887
3888 /* XXX: Should it check whether the pid contains embedded
3889 newlines? */
3890 if (!PyUnicode_IS_ASCII(pid_str)) {
3891 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3892 "persistent IDs in protocol 0 must be "
3893 "ASCII strings");
3894 Py_DECREF(pid_str);
3895 goto error;
3896 }
3897
3898 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3899 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3900 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3901 _Pickler_Write(self, "\n", 1) < 0) {
3902 Py_DECREF(pid_str);
3903 goto error;
3904 }
3905 Py_DECREF(pid_str);
3906 }
3907 status = 1;
3908 }
3909
3910 if (0) {
3911 error:
3912 status = -1;
3913 }
3914 Py_XDECREF(pid);
3915
3916 return status;
3917 }
3918
3919 static PyObject *
get_class(PyObject * obj)3920 get_class(PyObject *obj)
3921 {
3922 PyObject *cls;
3923 _Py_IDENTIFIER(__class__);
3924
3925 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3926 cls = (PyObject *) Py_TYPE(obj);
3927 Py_INCREF(cls);
3928 }
3929 return cls;
3930 }
3931
3932 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3933 * appropriate __reduce__ method for obj.
3934 */
3935 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3936 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3937 {
3938 PyObject *callable;
3939 PyObject *argtup;
3940 PyObject *state = NULL;
3941 PyObject *listitems = Py_None;
3942 PyObject *dictitems = Py_None;
3943 PyObject *state_setter = Py_None;
3944 PickleState *st = _Pickle_GetGlobalState();
3945 Py_ssize_t size;
3946 int use_newobj = 0, use_newobj_ex = 0;
3947
3948 const char reduce_op = REDUCE;
3949 const char build_op = BUILD;
3950 const char newobj_op = NEWOBJ;
3951 const char newobj_ex_op = NEWOBJ_EX;
3952
3953 size = PyTuple_Size(args);
3954 if (size < 2 || size > 6) {
3955 PyErr_SetString(st->PicklingError, "tuple returned by "
3956 "__reduce__ must contain 2 through 6 elements");
3957 return -1;
3958 }
3959
3960 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3961 &callable, &argtup, &state, &listitems, &dictitems,
3962 &state_setter))
3963 return -1;
3964
3965 if (!PyCallable_Check(callable)) {
3966 PyErr_SetString(st->PicklingError, "first item of the tuple "
3967 "returned by __reduce__ must be callable");
3968 return -1;
3969 }
3970 if (!PyTuple_Check(argtup)) {
3971 PyErr_SetString(st->PicklingError, "second item of the tuple "
3972 "returned by __reduce__ must be a tuple");
3973 return -1;
3974 }
3975
3976 if (state == Py_None)
3977 state = NULL;
3978
3979 if (listitems == Py_None)
3980 listitems = NULL;
3981 else if (!PyIter_Check(listitems)) {
3982 PyErr_Format(st->PicklingError, "fourth element of the tuple "
3983 "returned by __reduce__ must be an iterator, not %s",
3984 Py_TYPE(listitems)->tp_name);
3985 return -1;
3986 }
3987
3988 if (dictitems == Py_None)
3989 dictitems = NULL;
3990 else if (!PyIter_Check(dictitems)) {
3991 PyErr_Format(st->PicklingError, "fifth element of the tuple "
3992 "returned by __reduce__ must be an iterator, not %s",
3993 Py_TYPE(dictitems)->tp_name);
3994 return -1;
3995 }
3996
3997 if (state_setter == Py_None)
3998 state_setter = NULL;
3999 else if (!PyCallable_Check(state_setter)) {
4000 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4001 "returned by __reduce__ must be a function, not %s",
4002 Py_TYPE(state_setter)->tp_name);
4003 return -1;
4004 }
4005
4006 if (self->proto >= 2) {
4007 PyObject *name;
4008 _Py_IDENTIFIER(__name__);
4009
4010 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4011 return -1;
4012 }
4013 if (name != NULL && PyUnicode_Check(name)) {
4014 _Py_IDENTIFIER(__newobj_ex__);
4015 use_newobj_ex = _PyUnicode_EqualToASCIIId(
4016 name, &PyId___newobj_ex__);
4017 if (!use_newobj_ex) {
4018 _Py_IDENTIFIER(__newobj__);
4019 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
4020 }
4021 }
4022 Py_XDECREF(name);
4023 }
4024
4025 if (use_newobj_ex) {
4026 PyObject *cls;
4027 PyObject *args;
4028 PyObject *kwargs;
4029
4030 if (PyTuple_GET_SIZE(argtup) != 3) {
4031 PyErr_Format(st->PicklingError,
4032 "length of the NEWOBJ_EX argument tuple must be "
4033 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4034 return -1;
4035 }
4036
4037 cls = PyTuple_GET_ITEM(argtup, 0);
4038 if (!PyType_Check(cls)) {
4039 PyErr_Format(st->PicklingError,
4040 "first item from NEWOBJ_EX argument tuple must "
4041 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4042 return -1;
4043 }
4044 args = PyTuple_GET_ITEM(argtup, 1);
4045 if (!PyTuple_Check(args)) {
4046 PyErr_Format(st->PicklingError,
4047 "second item from NEWOBJ_EX argument tuple must "
4048 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4049 return -1;
4050 }
4051 kwargs = PyTuple_GET_ITEM(argtup, 2);
4052 if (!PyDict_Check(kwargs)) {
4053 PyErr_Format(st->PicklingError,
4054 "third item from NEWOBJ_EX argument tuple must "
4055 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4056 return -1;
4057 }
4058
4059 if (self->proto >= 4) {
4060 if (save(self, cls, 0) < 0 ||
4061 save(self, args, 0) < 0 ||
4062 save(self, kwargs, 0) < 0 ||
4063 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4064 return -1;
4065 }
4066 }
4067 else {
4068 PyObject *newargs;
4069 PyObject *cls_new;
4070 Py_ssize_t i;
4071 _Py_IDENTIFIER(__new__);
4072
4073 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4074 if (newargs == NULL)
4075 return -1;
4076
4077 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4078 if (cls_new == NULL) {
4079 Py_DECREF(newargs);
4080 return -1;
4081 }
4082 PyTuple_SET_ITEM(newargs, 0, cls_new);
4083 Py_INCREF(cls);
4084 PyTuple_SET_ITEM(newargs, 1, cls);
4085 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4086 PyObject *item = PyTuple_GET_ITEM(args, i);
4087 Py_INCREF(item);
4088 PyTuple_SET_ITEM(newargs, i + 2, item);
4089 }
4090
4091 callable = PyObject_Call(st->partial, newargs, kwargs);
4092 Py_DECREF(newargs);
4093 if (callable == NULL)
4094 return -1;
4095
4096 newargs = PyTuple_New(0);
4097 if (newargs == NULL) {
4098 Py_DECREF(callable);
4099 return -1;
4100 }
4101
4102 if (save(self, callable, 0) < 0 ||
4103 save(self, newargs, 0) < 0 ||
4104 _Pickler_Write(self, &reduce_op, 1) < 0) {
4105 Py_DECREF(newargs);
4106 Py_DECREF(callable);
4107 return -1;
4108 }
4109 Py_DECREF(newargs);
4110 Py_DECREF(callable);
4111 }
4112 }
4113 else if (use_newobj) {
4114 PyObject *cls;
4115 PyObject *newargtup;
4116 PyObject *obj_class;
4117 int p;
4118
4119 /* Sanity checks. */
4120 if (PyTuple_GET_SIZE(argtup) < 1) {
4121 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4122 return -1;
4123 }
4124
4125 cls = PyTuple_GET_ITEM(argtup, 0);
4126 if (!PyType_Check(cls)) {
4127 PyErr_SetString(st->PicklingError, "args[0] from "
4128 "__newobj__ args is not a type");
4129 return -1;
4130 }
4131
4132 if (obj != NULL) {
4133 obj_class = get_class(obj);
4134 if (obj_class == NULL) {
4135 return -1;
4136 }
4137 p = obj_class != cls;
4138 Py_DECREF(obj_class);
4139 if (p) {
4140 PyErr_SetString(st->PicklingError, "args[0] from "
4141 "__newobj__ args has the wrong class");
4142 return -1;
4143 }
4144 }
4145 /* XXX: These calls save() are prone to infinite recursion. Imagine
4146 what happen if the value returned by the __reduce__() method of
4147 some extension type contains another object of the same type. Ouch!
4148
4149 Here is a quick example, that I ran into, to illustrate what I
4150 mean:
4151
4152 >>> import pickle, copyreg
4153 >>> copyreg.dispatch_table.pop(complex)
4154 >>> pickle.dumps(1+2j)
4155 Traceback (most recent call last):
4156 ...
4157 RecursionError: maximum recursion depth exceeded
4158
4159 Removing the complex class from copyreg.dispatch_table made the
4160 __reduce_ex__() method emit another complex object:
4161
4162 >>> (1+1j).__reduce_ex__(2)
4163 (<function __newobj__ at 0xb7b71c3c>,
4164 (<class 'complex'>, (1+1j)), None, None, None)
4165
4166 Thus when save() was called on newargstup (the 2nd item) recursion
4167 ensued. Of course, the bug was in the complex class which had a
4168 broken __getnewargs__() that emitted another complex object. But,
4169 the point, here, is it is quite easy to end up with a broken reduce
4170 function. */
4171
4172 /* Save the class and its __new__ arguments. */
4173 if (save(self, cls, 0) < 0)
4174 return -1;
4175
4176 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4177 if (newargtup == NULL)
4178 return -1;
4179
4180 p = save(self, newargtup, 0);
4181 Py_DECREF(newargtup);
4182 if (p < 0)
4183 return -1;
4184
4185 /* Add NEWOBJ opcode. */
4186 if (_Pickler_Write(self, &newobj_op, 1) < 0)
4187 return -1;
4188 }
4189 else { /* Not using NEWOBJ. */
4190 if (save(self, callable, 0) < 0 ||
4191 save(self, argtup, 0) < 0 ||
4192 _Pickler_Write(self, &reduce_op, 1) < 0)
4193 return -1;
4194 }
4195
4196 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4197 the caller do not want to memoize the object. Not particularly useful,
4198 but that is to mimic the behavior save_reduce() in pickle.py when
4199 obj is None. */
4200 if (obj != NULL) {
4201 /* If the object is already in the memo, this means it is
4202 recursive. In this case, throw away everything we put on the
4203 stack, and fetch the object back from the memo. */
4204 if (PyMemoTable_Get(self->memo, obj)) {
4205 const char pop_op = POP;
4206
4207 if (_Pickler_Write(self, &pop_op, 1) < 0)
4208 return -1;
4209 if (memo_get(self, obj) < 0)
4210 return -1;
4211
4212 return 0;
4213 }
4214 else if (memo_put(self, obj) < 0)
4215 return -1;
4216 }
4217
4218 if (listitems && batch_list(self, listitems) < 0)
4219 return -1;
4220
4221 if (dictitems && batch_dict(self, dictitems) < 0)
4222 return -1;
4223
4224 if (state) {
4225 if (state_setter == NULL) {
4226 if (save(self, state, 0) < 0 ||
4227 _Pickler_Write(self, &build_op, 1) < 0)
4228 return -1;
4229 }
4230 else {
4231
4232 /* If a state_setter is specified, call it instead of load_build to
4233 * update obj's with its previous state.
4234 * The first 4 save/write instructions push state_setter and its
4235 * tuple of expected arguments (obj, state) onto the stack. The
4236 * REDUCE opcode triggers the state_setter(obj, state) function
4237 * call. Finally, because state-updating routines only do in-place
4238 * modification, the whole operation has to be stack-transparent.
4239 * Thus, we finally pop the call's output from the stack.*/
4240
4241 const char tupletwo_op = TUPLE2;
4242 const char pop_op = POP;
4243 if (save(self, state_setter, 0) < 0 ||
4244 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4245 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4246 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4247 _Pickler_Write(self, &pop_op, 1) < 0)
4248 return -1;
4249 }
4250 }
4251 return 0;
4252 }
4253
4254 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4255 save(PicklerObject *self, PyObject *obj, int pers_save)
4256 {
4257 PyTypeObject *type;
4258 PyObject *reduce_func = NULL;
4259 PyObject *reduce_value = NULL;
4260 int status = 0;
4261
4262 if (_Pickler_OpcodeBoundary(self) < 0)
4263 return -1;
4264
4265 /* The extra pers_save argument is necessary to avoid calling save_pers()
4266 on its returned object. */
4267 if (!pers_save && self->pers_func) {
4268 /* save_pers() returns:
4269 -1 to signal an error;
4270 0 if it did nothing successfully;
4271 1 if a persistent id was saved.
4272 */
4273 if ((status = save_pers(self, obj)) != 0)
4274 return status;
4275 }
4276
4277 type = Py_TYPE(obj);
4278
4279 /* The old cPickle had an optimization that used switch-case statement
4280 dispatching on the first letter of the type name. This has was removed
4281 since benchmarks shown that this optimization was actually slowing
4282 things down. */
4283
4284 /* Atom types; these aren't memoized, so don't check the memo. */
4285
4286 if (obj == Py_None) {
4287 return save_none(self, obj);
4288 }
4289 else if (obj == Py_False || obj == Py_True) {
4290 return save_bool(self, obj);
4291 }
4292 else if (type == &PyLong_Type) {
4293 return save_long(self, obj);
4294 }
4295 else if (type == &PyFloat_Type) {
4296 return save_float(self, obj);
4297 }
4298
4299 /* Check the memo to see if it has the object. If so, generate
4300 a GET (or BINGET) opcode, instead of pickling the object
4301 once again. */
4302 if (PyMemoTable_Get(self->memo, obj)) {
4303 return memo_get(self, obj);
4304 }
4305
4306 if (type == &PyBytes_Type) {
4307 return save_bytes(self, obj);
4308 }
4309 else if (type == &PyUnicode_Type) {
4310 return save_unicode(self, obj);
4311 }
4312
4313 /* We're only calling Py_EnterRecursiveCall here so that atomic
4314 types above are pickled faster. */
4315 if (Py_EnterRecursiveCall(" while pickling an object")) {
4316 return -1;
4317 }
4318
4319 if (type == &PyDict_Type) {
4320 status = save_dict(self, obj);
4321 goto done;
4322 }
4323 else if (type == &PySet_Type) {
4324 status = save_set(self, obj);
4325 goto done;
4326 }
4327 else if (type == &PyFrozenSet_Type) {
4328 status = save_frozenset(self, obj);
4329 goto done;
4330 }
4331 else if (type == &PyList_Type) {
4332 status = save_list(self, obj);
4333 goto done;
4334 }
4335 else if (type == &PyTuple_Type) {
4336 status = save_tuple(self, obj);
4337 goto done;
4338 }
4339 else if (type == &PyByteArray_Type) {
4340 status = save_bytearray(self, obj);
4341 goto done;
4342 }
4343 else if (type == &PyPickleBuffer_Type) {
4344 status = save_picklebuffer(self, obj);
4345 goto done;
4346 }
4347
4348 /* Now, check reducer_override. If it returns NotImplemented,
4349 * fallback to save_type or save_global, and then perhaps to the
4350 * regular reduction mechanism.
4351 */
4352 if (self->reducer_override != NULL) {
4353 reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4354 if (reduce_value == NULL) {
4355 goto error;
4356 }
4357 if (reduce_value != Py_NotImplemented) {
4358 goto reduce;
4359 }
4360 Py_DECREF(reduce_value);
4361 reduce_value = NULL;
4362 }
4363
4364 if (type == &PyType_Type) {
4365 status = save_type(self, obj);
4366 goto done;
4367 }
4368 else if (type == &PyFunction_Type) {
4369 status = save_global(self, obj, NULL);
4370 goto done;
4371 }
4372
4373 /* XXX: This part needs some unit tests. */
4374
4375 /* Get a reduction callable, and call it. This may come from
4376 * self.dispatch_table, copyreg.dispatch_table, the object's
4377 * __reduce_ex__ method, or the object's __reduce__ method.
4378 */
4379 if (self->dispatch_table == NULL) {
4380 PickleState *st = _Pickle_GetGlobalState();
4381 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4382 (PyObject *)type);
4383 if (reduce_func == NULL) {
4384 if (PyErr_Occurred()) {
4385 goto error;
4386 }
4387 } else {
4388 /* PyDict_GetItemWithError() returns a borrowed reference.
4389 Increase the reference count to be consistent with
4390 PyObject_GetItem and _PyObject_GetAttrId used below. */
4391 Py_INCREF(reduce_func);
4392 }
4393 } else {
4394 reduce_func = PyObject_GetItem(self->dispatch_table,
4395 (PyObject *)type);
4396 if (reduce_func == NULL) {
4397 if (PyErr_ExceptionMatches(PyExc_KeyError))
4398 PyErr_Clear();
4399 else
4400 goto error;
4401 }
4402 }
4403 if (reduce_func != NULL) {
4404 Py_INCREF(obj);
4405 reduce_value = _Pickle_FastCall(reduce_func, obj);
4406 }
4407 else if (PyType_IsSubtype(type, &PyType_Type)) {
4408 status = save_global(self, obj, NULL);
4409 goto done;
4410 }
4411 else {
4412 _Py_IDENTIFIER(__reduce__);
4413 _Py_IDENTIFIER(__reduce_ex__);
4414
4415 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4416 automatically defined as __reduce__. While this is convenient, this
4417 make it impossible to know which method was actually called. Of
4418 course, this is not a big deal. But still, it would be nice to let
4419 the user know which method was called when something go
4420 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4421 don't actually have to check for a __reduce__ method. */
4422
4423 /* Check for a __reduce_ex__ method. */
4424 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4425 goto error;
4426 }
4427 if (reduce_func != NULL) {
4428 PyObject *proto;
4429 proto = PyLong_FromLong(self->proto);
4430 if (proto != NULL) {
4431 reduce_value = _Pickle_FastCall(reduce_func, proto);
4432 }
4433 }
4434 else {
4435 /* Check for a __reduce__ method. */
4436 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4437 goto error;
4438 }
4439 if (reduce_func != NULL) {
4440 reduce_value = PyObject_CallNoArgs(reduce_func);
4441 }
4442 else {
4443 PickleState *st = _Pickle_GetGlobalState();
4444 PyErr_Format(st->PicklingError,
4445 "can't pickle '%.200s' object: %R",
4446 type->tp_name, obj);
4447 goto error;
4448 }
4449 }
4450 }
4451
4452 if (reduce_value == NULL)
4453 goto error;
4454
4455 reduce:
4456 if (PyUnicode_Check(reduce_value)) {
4457 status = save_global(self, obj, reduce_value);
4458 goto done;
4459 }
4460
4461 if (!PyTuple_Check(reduce_value)) {
4462 PickleState *st = _Pickle_GetGlobalState();
4463 PyErr_SetString(st->PicklingError,
4464 "__reduce__ must return a string or tuple");
4465 goto error;
4466 }
4467
4468 status = save_reduce(self, reduce_value, obj);
4469
4470 if (0) {
4471 error:
4472 status = -1;
4473 }
4474 done:
4475
4476 Py_LeaveRecursiveCall();
4477 Py_XDECREF(reduce_func);
4478 Py_XDECREF(reduce_value);
4479
4480 return status;
4481 }
4482
4483 static int
dump(PicklerObject * self,PyObject * obj)4484 dump(PicklerObject *self, PyObject *obj)
4485 {
4486 const char stop_op = STOP;
4487 int status = -1;
4488 PyObject *tmp;
4489 _Py_IDENTIFIER(reducer_override);
4490
4491 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4492 &tmp) < 0) {
4493 goto error;
4494 }
4495 /* Cache the reducer_override method, if it exists. */
4496 if (tmp != NULL) {
4497 Py_XSETREF(self->reducer_override, tmp);
4498 }
4499 else {
4500 Py_CLEAR(self->reducer_override);
4501 }
4502
4503 if (self->proto >= 2) {
4504 char header[2];
4505
4506 header[0] = PROTO;
4507 assert(self->proto >= 0 && self->proto < 256);
4508 header[1] = (unsigned char)self->proto;
4509 if (_Pickler_Write(self, header, 2) < 0)
4510 goto error;
4511 if (self->proto >= 4)
4512 self->framing = 1;
4513 }
4514
4515 if (save(self, obj, 0) < 0 ||
4516 _Pickler_Write(self, &stop_op, 1) < 0 ||
4517 _Pickler_CommitFrame(self) < 0)
4518 goto error;
4519
4520 // Success
4521 status = 0;
4522
4523 error:
4524 self->framing = 0;
4525
4526 /* Break the reference cycle we generated at the beginning this function
4527 * call when setting the reducer_override attribute of the Pickler instance
4528 * to a bound method of the same instance. This is important as the Pickler
4529 * instance holds a reference to each object it has pickled (through its
4530 * memo): thus, these objects won't be garbage-collected as long as the
4531 * Pickler itself is not collected. */
4532 Py_CLEAR(self->reducer_override);
4533 return status;
4534 }
4535
4536 /*[clinic input]
4537
4538 _pickle.Pickler.clear_memo
4539
4540 Clears the pickler's "memo".
4541
4542 The memo is the data structure that remembers which objects the
4543 pickler has already seen, so that shared or recursive objects are
4544 pickled by reference and not by value. This method is useful when
4545 re-using picklers.
4546 [clinic start generated code]*/
4547
4548 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4549 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4550 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4551 {
4552 if (self->memo)
4553 PyMemoTable_Clear(self->memo);
4554
4555 Py_RETURN_NONE;
4556 }
4557
4558 /*[clinic input]
4559
4560 _pickle.Pickler.dump
4561
4562 obj: object
4563 /
4564
4565 Write a pickled representation of the given object to the open file.
4566 [clinic start generated code]*/
4567
4568 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4569 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4570 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4571 {
4572 /* Check whether the Pickler was initialized correctly (issue3664).
4573 Developers often forget to call __init__() in their subclasses, which
4574 would trigger a segfault without this check. */
4575 if (self->write == NULL) {
4576 PickleState *st = _Pickle_GetGlobalState();
4577 PyErr_Format(st->PicklingError,
4578 "Pickler.__init__() was not called by %s.__init__()",
4579 Py_TYPE(self)->tp_name);
4580 return NULL;
4581 }
4582
4583 if (_Pickler_ClearBuffer(self) < 0)
4584 return NULL;
4585
4586 if (dump(self, obj) < 0)
4587 return NULL;
4588
4589 if (_Pickler_FlushToFile(self) < 0)
4590 return NULL;
4591
4592 Py_RETURN_NONE;
4593 }
4594
4595 /*[clinic input]
4596
4597 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4598
4599 Returns size in memory, in bytes.
4600 [clinic start generated code]*/
4601
4602 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4603 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4604 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4605 {
4606 Py_ssize_t res, s;
4607
4608 res = _PyObject_SIZE(Py_TYPE(self));
4609 if (self->memo != NULL) {
4610 res += sizeof(PyMemoTable);
4611 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4612 }
4613 if (self->output_buffer != NULL) {
4614 s = _PySys_GetSizeOf(self->output_buffer);
4615 if (s == -1)
4616 return -1;
4617 res += s;
4618 }
4619 return res;
4620 }
4621
4622 static struct PyMethodDef Pickler_methods[] = {
4623 _PICKLE_PICKLER_DUMP_METHODDEF
4624 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4625 _PICKLE_PICKLER___SIZEOF___METHODDEF
4626 {NULL, NULL} /* sentinel */
4627 };
4628
4629 static void
Pickler_dealloc(PicklerObject * self)4630 Pickler_dealloc(PicklerObject *self)
4631 {
4632 PyObject_GC_UnTrack(self);
4633
4634 Py_XDECREF(self->output_buffer);
4635 Py_XDECREF(self->write);
4636 Py_XDECREF(self->pers_func);
4637 Py_XDECREF(self->dispatch_table);
4638 Py_XDECREF(self->fast_memo);
4639 Py_XDECREF(self->reducer_override);
4640 Py_XDECREF(self->buffer_callback);
4641
4642 PyMemoTable_Del(self->memo);
4643
4644 Py_TYPE(self)->tp_free((PyObject *)self);
4645 }
4646
4647 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4648 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4649 {
4650 Py_VISIT(self->write);
4651 Py_VISIT(self->pers_func);
4652 Py_VISIT(self->dispatch_table);
4653 Py_VISIT(self->fast_memo);
4654 Py_VISIT(self->reducer_override);
4655 Py_VISIT(self->buffer_callback);
4656 return 0;
4657 }
4658
4659 static int
Pickler_clear(PicklerObject * self)4660 Pickler_clear(PicklerObject *self)
4661 {
4662 Py_CLEAR(self->output_buffer);
4663 Py_CLEAR(self->write);
4664 Py_CLEAR(self->pers_func);
4665 Py_CLEAR(self->dispatch_table);
4666 Py_CLEAR(self->fast_memo);
4667 Py_CLEAR(self->reducer_override);
4668 Py_CLEAR(self->buffer_callback);
4669
4670 if (self->memo != NULL) {
4671 PyMemoTable *memo = self->memo;
4672 self->memo = NULL;
4673 PyMemoTable_Del(memo);
4674 }
4675 return 0;
4676 }
4677
4678
4679 /*[clinic input]
4680
4681 _pickle.Pickler.__init__
4682
4683 file: object
4684 protocol: object = None
4685 fix_imports: bool = True
4686 buffer_callback: object = None
4687
4688 This takes a binary file for writing a pickle data stream.
4689
4690 The optional *protocol* argument tells the pickler to use the given
4691 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4692 protocol is 4. It was introduced in Python 3.4, and is incompatible
4693 with previous versions.
4694
4695 Specifying a negative protocol version selects the highest protocol
4696 version supported. The higher the protocol used, the more recent the
4697 version of Python needed to read the pickle produced.
4698
4699 The *file* argument must have a write() method that accepts a single
4700 bytes argument. It can thus be a file object opened for binary
4701 writing, an io.BytesIO instance, or any other custom object that meets
4702 this interface.
4703
4704 If *fix_imports* is True and protocol is less than 3, pickle will try
4705 to map the new Python 3 names to the old module names used in Python
4706 2, so that the pickle data stream is readable with Python 2.
4707
4708 If *buffer_callback* is None (the default), buffer views are
4709 serialized into *file* as part of the pickle stream.
4710
4711 If *buffer_callback* is not None, then it can be called any number
4712 of times with a buffer view. If the callback returns a false value
4713 (such as None), the given buffer is out-of-band; otherwise the
4714 buffer is serialized in-band, i.e. inside the pickle stream.
4715
4716 It is an error if *buffer_callback* is not None and *protocol*
4717 is None or smaller than 5.
4718
4719 [clinic start generated code]*/
4720
4721 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4722 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4723 PyObject *protocol, int fix_imports,
4724 PyObject *buffer_callback)
4725 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4726 {
4727 _Py_IDENTIFIER(persistent_id);
4728 _Py_IDENTIFIER(dispatch_table);
4729
4730 /* In case of multiple __init__() calls, clear previous content. */
4731 if (self->write != NULL)
4732 (void)Pickler_clear(self);
4733
4734 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4735 return -1;
4736
4737 if (_Pickler_SetOutputStream(self, file) < 0)
4738 return -1;
4739
4740 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4741 return -1;
4742
4743 /* memo and output_buffer may have already been created in _Pickler_New */
4744 if (self->memo == NULL) {
4745 self->memo = PyMemoTable_New();
4746 if (self->memo == NULL)
4747 return -1;
4748 }
4749 self->output_len = 0;
4750 if (self->output_buffer == NULL) {
4751 self->max_output_len = WRITE_BUF_SIZE;
4752 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4753 self->max_output_len);
4754 if (self->output_buffer == NULL)
4755 return -1;
4756 }
4757
4758 self->fast = 0;
4759 self->fast_nesting = 0;
4760 self->fast_memo = NULL;
4761
4762 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4763 &self->pers_func, &self->pers_func_self) < 0)
4764 {
4765 return -1;
4766 }
4767
4768 if (_PyObject_LookupAttrId((PyObject *)self,
4769 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4770 return -1;
4771 }
4772
4773 return 0;
4774 }
4775
4776
4777 /* Define a proxy object for the Pickler's internal memo object. This is to
4778 * avoid breaking code like:
4779 * pickler.memo.clear()
4780 * and
4781 * pickler.memo = saved_memo
4782 * Is this a good idea? Not really, but we don't want to break code that uses
4783 * it. Note that we don't implement the entire mapping API here. This is
4784 * intentional, as these should be treated as black-box implementation details.
4785 */
4786
4787 /*[clinic input]
4788 _pickle.PicklerMemoProxy.clear
4789
4790 Remove all items from memo.
4791 [clinic start generated code]*/
4792
4793 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4794 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4795 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4796 {
4797 if (self->pickler->memo)
4798 PyMemoTable_Clear(self->pickler->memo);
4799 Py_RETURN_NONE;
4800 }
4801
4802 /*[clinic input]
4803 _pickle.PicklerMemoProxy.copy
4804
4805 Copy the memo to a new object.
4806 [clinic start generated code]*/
4807
4808 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4809 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4810 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4811 {
4812 PyMemoTable *memo;
4813 PyObject *new_memo = PyDict_New();
4814 if (new_memo == NULL)
4815 return NULL;
4816
4817 memo = self->pickler->memo;
4818 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4819 PyMemoEntry entry = memo->mt_table[i];
4820 if (entry.me_key != NULL) {
4821 int status;
4822 PyObject *key, *value;
4823
4824 key = PyLong_FromVoidPtr(entry.me_key);
4825 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4826
4827 if (key == NULL || value == NULL) {
4828 Py_XDECREF(key);
4829 Py_XDECREF(value);
4830 goto error;
4831 }
4832 status = PyDict_SetItem(new_memo, key, value);
4833 Py_DECREF(key);
4834 Py_DECREF(value);
4835 if (status < 0)
4836 goto error;
4837 }
4838 }
4839 return new_memo;
4840
4841 error:
4842 Py_XDECREF(new_memo);
4843 return NULL;
4844 }
4845
4846 /*[clinic input]
4847 _pickle.PicklerMemoProxy.__reduce__
4848
4849 Implement pickle support.
4850 [clinic start generated code]*/
4851
4852 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4853 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4854 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4855 {
4856 PyObject *reduce_value, *dict_args;
4857 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4858 if (contents == NULL)
4859 return NULL;
4860
4861 reduce_value = PyTuple_New(2);
4862 if (reduce_value == NULL) {
4863 Py_DECREF(contents);
4864 return NULL;
4865 }
4866 dict_args = PyTuple_New(1);
4867 if (dict_args == NULL) {
4868 Py_DECREF(contents);
4869 Py_DECREF(reduce_value);
4870 return NULL;
4871 }
4872 PyTuple_SET_ITEM(dict_args, 0, contents);
4873 Py_INCREF((PyObject *)&PyDict_Type);
4874 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4875 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4876 return reduce_value;
4877 }
4878
4879 static PyMethodDef picklerproxy_methods[] = {
4880 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4881 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4882 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4883 {NULL, NULL} /* sentinel */
4884 };
4885
4886 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4887 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4888 {
4889 PyObject_GC_UnTrack(self);
4890 Py_XDECREF(self->pickler);
4891 PyObject_GC_Del((PyObject *)self);
4892 }
4893
4894 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4895 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4896 visitproc visit, void *arg)
4897 {
4898 Py_VISIT(self->pickler);
4899 return 0;
4900 }
4901
4902 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4903 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4904 {
4905 Py_CLEAR(self->pickler);
4906 return 0;
4907 }
4908
4909 static PyTypeObject PicklerMemoProxyType = {
4910 PyVarObject_HEAD_INIT(NULL, 0)
4911 "_pickle.PicklerMemoProxy", /*tp_name*/
4912 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4913 0,
4914 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4915 0, /* tp_vectorcall_offset */
4916 0, /* tp_getattr */
4917 0, /* tp_setattr */
4918 0, /* tp_as_async */
4919 0, /* tp_repr */
4920 0, /* tp_as_number */
4921 0, /* tp_as_sequence */
4922 0, /* tp_as_mapping */
4923 PyObject_HashNotImplemented, /* tp_hash */
4924 0, /* tp_call */
4925 0, /* tp_str */
4926 PyObject_GenericGetAttr, /* tp_getattro */
4927 PyObject_GenericSetAttr, /* tp_setattro */
4928 0, /* tp_as_buffer */
4929 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4930 0, /* tp_doc */
4931 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4932 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4933 0, /* tp_richcompare */
4934 0, /* tp_weaklistoffset */
4935 0, /* tp_iter */
4936 0, /* tp_iternext */
4937 picklerproxy_methods, /* tp_methods */
4938 };
4939
4940 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4941 PicklerMemoProxy_New(PicklerObject *pickler)
4942 {
4943 PicklerMemoProxyObject *self;
4944
4945 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4946 if (self == NULL)
4947 return NULL;
4948 Py_INCREF(pickler);
4949 self->pickler = pickler;
4950 PyObject_GC_Track(self);
4951 return (PyObject *)self;
4952 }
4953
4954 /*****************************************************************************/
4955
4956 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4957 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4958 {
4959 return PicklerMemoProxy_New(self);
4960 }
4961
4962 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4963 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4964 {
4965 PyMemoTable *new_memo = NULL;
4966
4967 if (obj == NULL) {
4968 PyErr_SetString(PyExc_TypeError,
4969 "attribute deletion is not supported");
4970 return -1;
4971 }
4972
4973 if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
4974 PicklerObject *pickler =
4975 ((PicklerMemoProxyObject *)obj)->pickler;
4976
4977 new_memo = PyMemoTable_Copy(pickler->memo);
4978 if (new_memo == NULL)
4979 return -1;
4980 }
4981 else if (PyDict_Check(obj)) {
4982 Py_ssize_t i = 0;
4983 PyObject *key, *value;
4984
4985 new_memo = PyMemoTable_New();
4986 if (new_memo == NULL)
4987 return -1;
4988
4989 while (PyDict_Next(obj, &i, &key, &value)) {
4990 Py_ssize_t memo_id;
4991 PyObject *memo_obj;
4992
4993 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4994 PyErr_SetString(PyExc_TypeError,
4995 "'memo' values must be 2-item tuples");
4996 goto error;
4997 }
4998 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4999 if (memo_id == -1 && PyErr_Occurred())
5000 goto error;
5001 memo_obj = PyTuple_GET_ITEM(value, 1);
5002 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5003 goto error;
5004 }
5005 }
5006 else {
5007 PyErr_Format(PyExc_TypeError,
5008 "'memo' attribute must be a PicklerMemoProxy object "
5009 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5010 return -1;
5011 }
5012
5013 PyMemoTable_Del(self->memo);
5014 self->memo = new_memo;
5015
5016 return 0;
5017
5018 error:
5019 if (new_memo)
5020 PyMemoTable_Del(new_memo);
5021 return -1;
5022 }
5023
5024 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5025 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5026 {
5027 if (self->pers_func == NULL) {
5028 PyErr_SetString(PyExc_AttributeError, "persistent_id");
5029 return NULL;
5030 }
5031 return reconstruct_method(self->pers_func, self->pers_func_self);
5032 }
5033
5034 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5035 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5036 {
5037 if (value == NULL) {
5038 PyErr_SetString(PyExc_TypeError,
5039 "attribute deletion is not supported");
5040 return -1;
5041 }
5042 if (!PyCallable_Check(value)) {
5043 PyErr_SetString(PyExc_TypeError,
5044 "persistent_id must be a callable taking one argument");
5045 return -1;
5046 }
5047
5048 self->pers_func_self = NULL;
5049 Py_INCREF(value);
5050 Py_XSETREF(self->pers_func, value);
5051
5052 return 0;
5053 }
5054
5055 static PyMemberDef Pickler_members[] = {
5056 {"bin", T_INT, offsetof(PicklerObject, bin)},
5057 {"fast", T_INT, offsetof(PicklerObject, fast)},
5058 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5059 {NULL}
5060 };
5061
5062 static PyGetSetDef Pickler_getsets[] = {
5063 {"memo", (getter)Pickler_get_memo,
5064 (setter)Pickler_set_memo},
5065 {"persistent_id", (getter)Pickler_get_persid,
5066 (setter)Pickler_set_persid},
5067 {NULL}
5068 };
5069
5070 static PyTypeObject Pickler_Type = {
5071 PyVarObject_HEAD_INIT(NULL, 0)
5072 "_pickle.Pickler" , /*tp_name*/
5073 sizeof(PicklerObject), /*tp_basicsize*/
5074 0, /*tp_itemsize*/
5075 (destructor)Pickler_dealloc, /*tp_dealloc*/
5076 0, /*tp_vectorcall_offset*/
5077 0, /*tp_getattr*/
5078 0, /*tp_setattr*/
5079 0, /*tp_as_async*/
5080 0, /*tp_repr*/
5081 0, /*tp_as_number*/
5082 0, /*tp_as_sequence*/
5083 0, /*tp_as_mapping*/
5084 0, /*tp_hash*/
5085 0, /*tp_call*/
5086 0, /*tp_str*/
5087 0, /*tp_getattro*/
5088 0, /*tp_setattro*/
5089 0, /*tp_as_buffer*/
5090 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5091 _pickle_Pickler___init____doc__, /*tp_doc*/
5092 (traverseproc)Pickler_traverse, /*tp_traverse*/
5093 (inquiry)Pickler_clear, /*tp_clear*/
5094 0, /*tp_richcompare*/
5095 0, /*tp_weaklistoffset*/
5096 0, /*tp_iter*/
5097 0, /*tp_iternext*/
5098 Pickler_methods, /*tp_methods*/
5099 Pickler_members, /*tp_members*/
5100 Pickler_getsets, /*tp_getset*/
5101 0, /*tp_base*/
5102 0, /*tp_dict*/
5103 0, /*tp_descr_get*/
5104 0, /*tp_descr_set*/
5105 0, /*tp_dictoffset*/
5106 _pickle_Pickler___init__, /*tp_init*/
5107 PyType_GenericAlloc, /*tp_alloc*/
5108 PyType_GenericNew, /*tp_new*/
5109 PyObject_GC_Del, /*tp_free*/
5110 0, /*tp_is_gc*/
5111 };
5112
5113 /* Temporary helper for calling self.find_class().
5114
5115 XXX: It would be nice to able to avoid Python function call overhead, by
5116 using directly the C version of find_class(), when find_class() is not
5117 overridden by a subclass. Although, this could become rather hackish. A
5118 simpler optimization would be to call the C function when self is not a
5119 subclass instance. */
5120 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5121 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5122 {
5123 _Py_IDENTIFIER(find_class);
5124
5125 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5126 module_name, global_name, NULL);
5127 }
5128
5129 static Py_ssize_t
marker(UnpicklerObject * self)5130 marker(UnpicklerObject *self)
5131 {
5132 Py_ssize_t mark;
5133
5134 if (self->num_marks < 1) {
5135 PickleState *st = _Pickle_GetGlobalState();
5136 PyErr_SetString(st->UnpicklingError, "could not find MARK");
5137 return -1;
5138 }
5139
5140 mark = self->marks[--self->num_marks];
5141 self->stack->mark_set = self->num_marks != 0;
5142 self->stack->fence = self->num_marks ?
5143 self->marks[self->num_marks - 1] : 0;
5144 return mark;
5145 }
5146
5147 static int
load_none(UnpicklerObject * self)5148 load_none(UnpicklerObject *self)
5149 {
5150 PDATA_APPEND(self->stack, Py_None, -1);
5151 return 0;
5152 }
5153
5154 static int
load_int(UnpicklerObject * self)5155 load_int(UnpicklerObject *self)
5156 {
5157 PyObject *value;
5158 char *endptr, *s;
5159 Py_ssize_t len;
5160 long x;
5161
5162 if ((len = _Unpickler_Readline(self, &s)) < 0)
5163 return -1;
5164 if (len < 2)
5165 return bad_readline();
5166
5167 errno = 0;
5168 /* XXX: Should the base argument of strtol() be explicitly set to 10?
5169 XXX(avassalotti): Should this uses PyOS_strtol()? */
5170 x = strtol(s, &endptr, 0);
5171
5172 if (errno || (*endptr != '\n' && *endptr != '\0')) {
5173 /* Hm, maybe we've got something long. Let's try reading
5174 * it as a Python int object. */
5175 errno = 0;
5176 /* XXX: Same thing about the base here. */
5177 value = PyLong_FromString(s, NULL, 0);
5178 if (value == NULL) {
5179 PyErr_SetString(PyExc_ValueError,
5180 "could not convert string to int");
5181 return -1;
5182 }
5183 }
5184 else {
5185 if (len == 3 && (x == 0 || x == 1)) {
5186 if ((value = PyBool_FromLong(x)) == NULL)
5187 return -1;
5188 }
5189 else {
5190 if ((value = PyLong_FromLong(x)) == NULL)
5191 return -1;
5192 }
5193 }
5194
5195 PDATA_PUSH(self->stack, value, -1);
5196 return 0;
5197 }
5198
5199 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5200 load_bool(UnpicklerObject *self, PyObject *boolean)
5201 {
5202 assert(boolean == Py_True || boolean == Py_False);
5203 PDATA_APPEND(self->stack, boolean, -1);
5204 return 0;
5205 }
5206
5207 /* s contains x bytes of an unsigned little-endian integer. Return its value
5208 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5209 */
5210 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5211 calc_binsize(char *bytes, int nbytes)
5212 {
5213 unsigned char *s = (unsigned char *)bytes;
5214 int i;
5215 size_t x = 0;
5216
5217 if (nbytes > (int)sizeof(size_t)) {
5218 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5219 * have 64-bit size that can't be represented on 32-bit platform.
5220 */
5221 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5222 if (s[i])
5223 return -1;
5224 }
5225 nbytes = (int)sizeof(size_t);
5226 }
5227 for (i = 0; i < nbytes; i++) {
5228 x |= (size_t) s[i] << (8 * i);
5229 }
5230
5231 if (x > PY_SSIZE_T_MAX)
5232 return -1;
5233 else
5234 return (Py_ssize_t) x;
5235 }
5236
5237 /* s contains x bytes of a little-endian integer. Return its value as a
5238 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
5239 * int, but when x is 4 it's a signed one. This is a historical source
5240 * of x-platform bugs.
5241 */
5242 static long
calc_binint(char * bytes,int nbytes)5243 calc_binint(char *bytes, int nbytes)
5244 {
5245 unsigned char *s = (unsigned char *)bytes;
5246 Py_ssize_t i;
5247 long x = 0;
5248
5249 for (i = 0; i < nbytes; i++) {
5250 x |= (long)s[i] << (8 * i);
5251 }
5252
5253 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5254 * is signed, so on a box with longs bigger than 4 bytes we need
5255 * to extend a BININT's sign bit to the full width.
5256 */
5257 if (SIZEOF_LONG > 4 && nbytes == 4) {
5258 x |= -(x & (1L << 31));
5259 }
5260
5261 return x;
5262 }
5263
5264 static int
load_binintx(UnpicklerObject * self,char * s,int size)5265 load_binintx(UnpicklerObject *self, char *s, int size)
5266 {
5267 PyObject *value;
5268 long x;
5269
5270 x = calc_binint(s, size);
5271
5272 if ((value = PyLong_FromLong(x)) == NULL)
5273 return -1;
5274
5275 PDATA_PUSH(self->stack, value, -1);
5276 return 0;
5277 }
5278
5279 static int
load_binint(UnpicklerObject * self)5280 load_binint(UnpicklerObject *self)
5281 {
5282 char *s;
5283
5284 if (_Unpickler_Read(self, &s, 4) < 0)
5285 return -1;
5286
5287 return load_binintx(self, s, 4);
5288 }
5289
5290 static int
load_binint1(UnpicklerObject * self)5291 load_binint1(UnpicklerObject *self)
5292 {
5293 char *s;
5294
5295 if (_Unpickler_Read(self, &s, 1) < 0)
5296 return -1;
5297
5298 return load_binintx(self, s, 1);
5299 }
5300
5301 static int
load_binint2(UnpicklerObject * self)5302 load_binint2(UnpicklerObject *self)
5303 {
5304 char *s;
5305
5306 if (_Unpickler_Read(self, &s, 2) < 0)
5307 return -1;
5308
5309 return load_binintx(self, s, 2);
5310 }
5311
5312 static int
load_long(UnpicklerObject * self)5313 load_long(UnpicklerObject *self)
5314 {
5315 PyObject *value;
5316 char *s = NULL;
5317 Py_ssize_t len;
5318
5319 if ((len = _Unpickler_Readline(self, &s)) < 0)
5320 return -1;
5321 if (len < 2)
5322 return bad_readline();
5323
5324 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5325 the 'L' before calling PyLong_FromString. In order to maintain
5326 compatibility with Python 3.0.0, we don't actually *require*
5327 the 'L' to be present. */
5328 if (s[len-2] == 'L')
5329 s[len-2] = '\0';
5330 /* XXX: Should the base argument explicitly set to 10? */
5331 value = PyLong_FromString(s, NULL, 0);
5332 if (value == NULL)
5333 return -1;
5334
5335 PDATA_PUSH(self->stack, value, -1);
5336 return 0;
5337 }
5338
5339 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5340 * data following.
5341 */
5342 static int
load_counted_long(UnpicklerObject * self,int size)5343 load_counted_long(UnpicklerObject *self, int size)
5344 {
5345 PyObject *value;
5346 char *nbytes;
5347 char *pdata;
5348
5349 assert(size == 1 || size == 4);
5350 if (_Unpickler_Read(self, &nbytes, size) < 0)
5351 return -1;
5352
5353 size = calc_binint(nbytes, size);
5354 if (size < 0) {
5355 PickleState *st = _Pickle_GetGlobalState();
5356 /* Corrupt or hostile pickle -- we never write one like this */
5357 PyErr_SetString(st->UnpicklingError,
5358 "LONG pickle has negative byte count");
5359 return -1;
5360 }
5361
5362 if (size == 0)
5363 value = PyLong_FromLong(0L);
5364 else {
5365 /* Read the raw little-endian bytes and convert. */
5366 if (_Unpickler_Read(self, &pdata, size) < 0)
5367 return -1;
5368 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5369 1 /* little endian */ , 1 /* signed */ );
5370 }
5371 if (value == NULL)
5372 return -1;
5373 PDATA_PUSH(self->stack, value, -1);
5374 return 0;
5375 }
5376
5377 static int
load_float(UnpicklerObject * self)5378 load_float(UnpicklerObject *self)
5379 {
5380 PyObject *value;
5381 char *endptr, *s;
5382 Py_ssize_t len;
5383 double d;
5384
5385 if ((len = _Unpickler_Readline(self, &s)) < 0)
5386 return -1;
5387 if (len < 2)
5388 return bad_readline();
5389
5390 errno = 0;
5391 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5392 if (d == -1.0 && PyErr_Occurred())
5393 return -1;
5394 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5395 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5396 return -1;
5397 }
5398 value = PyFloat_FromDouble(d);
5399 if (value == NULL)
5400 return -1;
5401
5402 PDATA_PUSH(self->stack, value, -1);
5403 return 0;
5404 }
5405
5406 static int
load_binfloat(UnpicklerObject * self)5407 load_binfloat(UnpicklerObject *self)
5408 {
5409 PyObject *value;
5410 double x;
5411 char *s;
5412
5413 if (_Unpickler_Read(self, &s, 8) < 0)
5414 return -1;
5415
5416 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5417 if (x == -1.0 && PyErr_Occurred())
5418 return -1;
5419
5420 if ((value = PyFloat_FromDouble(x)) == NULL)
5421 return -1;
5422
5423 PDATA_PUSH(self->stack, value, -1);
5424 return 0;
5425 }
5426
5427 static int
load_string(UnpicklerObject * self)5428 load_string(UnpicklerObject *self)
5429 {
5430 PyObject *bytes;
5431 PyObject *obj;
5432 Py_ssize_t len;
5433 char *s, *p;
5434
5435 if ((len = _Unpickler_Readline(self, &s)) < 0)
5436 return -1;
5437 /* Strip the newline */
5438 len--;
5439 /* Strip outermost quotes */
5440 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5441 p = s + 1;
5442 len -= 2;
5443 }
5444 else {
5445 PickleState *st = _Pickle_GetGlobalState();
5446 PyErr_SetString(st->UnpicklingError,
5447 "the STRING opcode argument must be quoted");
5448 return -1;
5449 }
5450 assert(len >= 0);
5451
5452 /* Use the PyBytes API to decode the string, since that is what is used
5453 to encode, and then coerce the result to Unicode. */
5454 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5455 if (bytes == NULL)
5456 return -1;
5457
5458 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5459 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5460 if (strcmp(self->encoding, "bytes") == 0) {
5461 obj = bytes;
5462 }
5463 else {
5464 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5465 Py_DECREF(bytes);
5466 if (obj == NULL) {
5467 return -1;
5468 }
5469 }
5470
5471 PDATA_PUSH(self->stack, obj, -1);
5472 return 0;
5473 }
5474
5475 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5476 load_counted_binstring(UnpicklerObject *self, int nbytes)
5477 {
5478 PyObject *obj;
5479 Py_ssize_t size;
5480 char *s;
5481
5482 if (_Unpickler_Read(self, &s, nbytes) < 0)
5483 return -1;
5484
5485 size = calc_binsize(s, nbytes);
5486 if (size < 0) {
5487 PickleState *st = _Pickle_GetGlobalState();
5488 PyErr_Format(st->UnpicklingError,
5489 "BINSTRING exceeds system's maximum size of %zd bytes",
5490 PY_SSIZE_T_MAX);
5491 return -1;
5492 }
5493
5494 if (_Unpickler_Read(self, &s, size) < 0)
5495 return -1;
5496
5497 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5498 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5499 if (strcmp(self->encoding, "bytes") == 0) {
5500 obj = PyBytes_FromStringAndSize(s, size);
5501 }
5502 else {
5503 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5504 }
5505 if (obj == NULL) {
5506 return -1;
5507 }
5508
5509 PDATA_PUSH(self->stack, obj, -1);
5510 return 0;
5511 }
5512
5513 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5514 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5515 {
5516 PyObject *bytes;
5517 Py_ssize_t size;
5518 char *s;
5519
5520 if (_Unpickler_Read(self, &s, nbytes) < 0)
5521 return -1;
5522
5523 size = calc_binsize(s, nbytes);
5524 if (size < 0) {
5525 PyErr_Format(PyExc_OverflowError,
5526 "BINBYTES exceeds system's maximum size of %zd bytes",
5527 PY_SSIZE_T_MAX);
5528 return -1;
5529 }
5530
5531 bytes = PyBytes_FromStringAndSize(NULL, size);
5532 if (bytes == NULL)
5533 return -1;
5534 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5535 Py_DECREF(bytes);
5536 return -1;
5537 }
5538
5539 PDATA_PUSH(self->stack, bytes, -1);
5540 return 0;
5541 }
5542
5543 static int
load_counted_bytearray(UnpicklerObject * self)5544 load_counted_bytearray(UnpicklerObject *self)
5545 {
5546 PyObject *bytearray;
5547 Py_ssize_t size;
5548 char *s;
5549
5550 if (_Unpickler_Read(self, &s, 8) < 0) {
5551 return -1;
5552 }
5553
5554 size = calc_binsize(s, 8);
5555 if (size < 0) {
5556 PyErr_Format(PyExc_OverflowError,
5557 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5558 PY_SSIZE_T_MAX);
5559 return -1;
5560 }
5561
5562 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5563 if (bytearray == NULL) {
5564 return -1;
5565 }
5566 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5567 Py_DECREF(bytearray);
5568 return -1;
5569 }
5570
5571 PDATA_PUSH(self->stack, bytearray, -1);
5572 return 0;
5573 }
5574
5575 static int
load_next_buffer(UnpicklerObject * self)5576 load_next_buffer(UnpicklerObject *self)
5577 {
5578 if (self->buffers == NULL) {
5579 PickleState *st = _Pickle_GetGlobalState();
5580 PyErr_SetString(st->UnpicklingError,
5581 "pickle stream refers to out-of-band data "
5582 "but no *buffers* argument was given");
5583 return -1;
5584 }
5585 PyObject *buf = PyIter_Next(self->buffers);
5586 if (buf == NULL) {
5587 if (!PyErr_Occurred()) {
5588 PickleState *st = _Pickle_GetGlobalState();
5589 PyErr_SetString(st->UnpicklingError,
5590 "not enough out-of-band buffers");
5591 }
5592 return -1;
5593 }
5594
5595 PDATA_PUSH(self->stack, buf, -1);
5596 return 0;
5597 }
5598
5599 static int
load_readonly_buffer(UnpicklerObject * self)5600 load_readonly_buffer(UnpicklerObject *self)
5601 {
5602 Py_ssize_t len = Py_SIZE(self->stack);
5603 if (len <= self->stack->fence) {
5604 return Pdata_stack_underflow(self->stack);
5605 }
5606
5607 PyObject *obj = self->stack->data[len - 1];
5608 PyObject *view = PyMemoryView_FromObject(obj);
5609 if (view == NULL) {
5610 return -1;
5611 }
5612 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5613 /* Original object is writable */
5614 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5615 self->stack->data[len - 1] = view;
5616 Py_DECREF(obj);
5617 }
5618 else {
5619 /* Original object is read-only, no need to replace it */
5620 Py_DECREF(view);
5621 }
5622 return 0;
5623 }
5624
5625 static int
load_unicode(UnpicklerObject * self)5626 load_unicode(UnpicklerObject *self)
5627 {
5628 PyObject *str;
5629 Py_ssize_t len;
5630 char *s = NULL;
5631
5632 if ((len = _Unpickler_Readline(self, &s)) < 0)
5633 return -1;
5634 if (len < 1)
5635 return bad_readline();
5636
5637 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5638 if (str == NULL)
5639 return -1;
5640
5641 PDATA_PUSH(self->stack, str, -1);
5642 return 0;
5643 }
5644
5645 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5646 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5647 {
5648 PyObject *str;
5649 Py_ssize_t size;
5650 char *s;
5651
5652 if (_Unpickler_Read(self, &s, nbytes) < 0)
5653 return -1;
5654
5655 size = calc_binsize(s, nbytes);
5656 if (size < 0) {
5657 PyErr_Format(PyExc_OverflowError,
5658 "BINUNICODE exceeds system's maximum size of %zd bytes",
5659 PY_SSIZE_T_MAX);
5660 return -1;
5661 }
5662
5663 if (_Unpickler_Read(self, &s, size) < 0)
5664 return -1;
5665
5666 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5667 if (str == NULL)
5668 return -1;
5669
5670 PDATA_PUSH(self->stack, str, -1);
5671 return 0;
5672 }
5673
5674 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5675 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5676 {
5677 PyObject *tuple;
5678
5679 if (Py_SIZE(self->stack) < len)
5680 return Pdata_stack_underflow(self->stack);
5681
5682 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5683 if (tuple == NULL)
5684 return -1;
5685 PDATA_PUSH(self->stack, tuple, -1);
5686 return 0;
5687 }
5688
5689 static int
load_tuple(UnpicklerObject * self)5690 load_tuple(UnpicklerObject *self)
5691 {
5692 Py_ssize_t i;
5693
5694 if ((i = marker(self)) < 0)
5695 return -1;
5696
5697 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5698 }
5699
5700 static int
load_empty_list(UnpicklerObject * self)5701 load_empty_list(UnpicklerObject *self)
5702 {
5703 PyObject *list;
5704
5705 if ((list = PyList_New(0)) == NULL)
5706 return -1;
5707 PDATA_PUSH(self->stack, list, -1);
5708 return 0;
5709 }
5710
5711 static int
load_empty_dict(UnpicklerObject * self)5712 load_empty_dict(UnpicklerObject *self)
5713 {
5714 PyObject *dict;
5715
5716 if ((dict = PyDict_New()) == NULL)
5717 return -1;
5718 PDATA_PUSH(self->stack, dict, -1);
5719 return 0;
5720 }
5721
5722 static int
load_empty_set(UnpicklerObject * self)5723 load_empty_set(UnpicklerObject *self)
5724 {
5725 PyObject *set;
5726
5727 if ((set = PySet_New(NULL)) == NULL)
5728 return -1;
5729 PDATA_PUSH(self->stack, set, -1);
5730 return 0;
5731 }
5732
5733 static int
load_list(UnpicklerObject * self)5734 load_list(UnpicklerObject *self)
5735 {
5736 PyObject *list;
5737 Py_ssize_t i;
5738
5739 if ((i = marker(self)) < 0)
5740 return -1;
5741
5742 list = Pdata_poplist(self->stack, i);
5743 if (list == NULL)
5744 return -1;
5745 PDATA_PUSH(self->stack, list, -1);
5746 return 0;
5747 }
5748
5749 static int
load_dict(UnpicklerObject * self)5750 load_dict(UnpicklerObject *self)
5751 {
5752 PyObject *dict, *key, *value;
5753 Py_ssize_t i, j, k;
5754
5755 if ((i = marker(self)) < 0)
5756 return -1;
5757 j = Py_SIZE(self->stack);
5758
5759 if ((dict = PyDict_New()) == NULL)
5760 return -1;
5761
5762 if ((j - i) % 2 != 0) {
5763 PickleState *st = _Pickle_GetGlobalState();
5764 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5765 Py_DECREF(dict);
5766 return -1;
5767 }
5768
5769 for (k = i + 1; k < j; k += 2) {
5770 key = self->stack->data[k - 1];
5771 value = self->stack->data[k];
5772 if (PyDict_SetItem(dict, key, value) < 0) {
5773 Py_DECREF(dict);
5774 return -1;
5775 }
5776 }
5777 Pdata_clear(self->stack, i);
5778 PDATA_PUSH(self->stack, dict, -1);
5779 return 0;
5780 }
5781
5782 static int
load_frozenset(UnpicklerObject * self)5783 load_frozenset(UnpicklerObject *self)
5784 {
5785 PyObject *items;
5786 PyObject *frozenset;
5787 Py_ssize_t i;
5788
5789 if ((i = marker(self)) < 0)
5790 return -1;
5791
5792 items = Pdata_poptuple(self->stack, i);
5793 if (items == NULL)
5794 return -1;
5795
5796 frozenset = PyFrozenSet_New(items);
5797 Py_DECREF(items);
5798 if (frozenset == NULL)
5799 return -1;
5800
5801 PDATA_PUSH(self->stack, frozenset, -1);
5802 return 0;
5803 }
5804
5805 static PyObject *
instantiate(PyObject * cls,PyObject * args)5806 instantiate(PyObject *cls, PyObject *args)
5807 {
5808 /* Caller must assure args are a tuple. Normally, args come from
5809 Pdata_poptuple which packs objects from the top of the stack
5810 into a newly created tuple. */
5811 assert(PyTuple_Check(args));
5812 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5813 _Py_IDENTIFIER(__getinitargs__);
5814 _Py_IDENTIFIER(__new__);
5815 PyObject *func;
5816 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5817 return NULL;
5818 }
5819 if (func == NULL) {
5820 return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
5821 }
5822 Py_DECREF(func);
5823 }
5824 return PyObject_CallObject(cls, args);
5825 }
5826
5827 static int
load_obj(UnpicklerObject * self)5828 load_obj(UnpicklerObject *self)
5829 {
5830 PyObject *cls, *args, *obj = NULL;
5831 Py_ssize_t i;
5832
5833 if ((i = marker(self)) < 0)
5834 return -1;
5835
5836 if (Py_SIZE(self->stack) - i < 1)
5837 return Pdata_stack_underflow(self->stack);
5838
5839 args = Pdata_poptuple(self->stack, i + 1);
5840 if (args == NULL)
5841 return -1;
5842
5843 PDATA_POP(self->stack, cls);
5844 if (cls) {
5845 obj = instantiate(cls, args);
5846 Py_DECREF(cls);
5847 }
5848 Py_DECREF(args);
5849 if (obj == NULL)
5850 return -1;
5851
5852 PDATA_PUSH(self->stack, obj, -1);
5853 return 0;
5854 }
5855
5856 static int
load_inst(UnpicklerObject * self)5857 load_inst(UnpicklerObject *self)
5858 {
5859 PyObject *cls = NULL;
5860 PyObject *args = NULL;
5861 PyObject *obj = NULL;
5862 PyObject *module_name;
5863 PyObject *class_name;
5864 Py_ssize_t len;
5865 Py_ssize_t i;
5866 char *s;
5867
5868 if ((i = marker(self)) < 0)
5869 return -1;
5870 if ((len = _Unpickler_Readline(self, &s)) < 0)
5871 return -1;
5872 if (len < 2)
5873 return bad_readline();
5874
5875 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5876 identifiers are permitted in Python 3.0, since the INST opcode is only
5877 supported by older protocols on Python 2.x. */
5878 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5879 if (module_name == NULL)
5880 return -1;
5881
5882 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5883 if (len < 2) {
5884 Py_DECREF(module_name);
5885 return bad_readline();
5886 }
5887 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5888 if (class_name != NULL) {
5889 cls = find_class(self, module_name, class_name);
5890 Py_DECREF(class_name);
5891 }
5892 }
5893 Py_DECREF(module_name);
5894
5895 if (cls == NULL)
5896 return -1;
5897
5898 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5899 obj = instantiate(cls, args);
5900 Py_DECREF(args);
5901 }
5902 Py_DECREF(cls);
5903
5904 if (obj == NULL)
5905 return -1;
5906
5907 PDATA_PUSH(self->stack, obj, -1);
5908 return 0;
5909 }
5910
5911 static void
newobj_unpickling_error(const char * msg,int use_kwargs,PyObject * arg)5912 newobj_unpickling_error(const char * msg, int use_kwargs, PyObject *arg)
5913 {
5914 PickleState *st = _Pickle_GetGlobalState();
5915 PyErr_Format(st->UnpicklingError, msg,
5916 use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5917 Py_TYPE(arg)->tp_name);
5918 }
5919
5920 static int
load_newobj(UnpicklerObject * self,int use_kwargs)5921 load_newobj(UnpicklerObject *self, int use_kwargs)
5922 {
5923 PyObject *cls, *args, *kwargs = NULL;
5924 PyObject *obj;
5925
5926 /* Stack is ... cls args [kwargs], and we want to call
5927 * cls.__new__(cls, *args, **kwargs).
5928 */
5929 if (use_kwargs) {
5930 PDATA_POP(self->stack, kwargs);
5931 if (kwargs == NULL) {
5932 return -1;
5933 }
5934 }
5935 PDATA_POP(self->stack, args);
5936 if (args == NULL) {
5937 Py_XDECREF(kwargs);
5938 return -1;
5939 }
5940 PDATA_POP(self->stack, cls);
5941 if (cls == NULL) {
5942 Py_XDECREF(kwargs);
5943 Py_DECREF(args);
5944 return -1;
5945 }
5946
5947 if (!PyType_Check(cls)) {
5948 newobj_unpickling_error("%s class argument must be a type, not %.200s",
5949 use_kwargs, cls);
5950 goto error;
5951 }
5952 if (((PyTypeObject *)cls)->tp_new == NULL) {
5953 newobj_unpickling_error("%s class argument '%.200s' doesn't have __new__",
5954 use_kwargs, cls);
5955 goto error;
5956 }
5957 if (!PyTuple_Check(args)) {
5958 newobj_unpickling_error("%s args argument must be a tuple, not %.200s",
5959 use_kwargs, args);
5960 goto error;
5961 }
5962 if (use_kwargs && !PyDict_Check(kwargs)) {
5963 newobj_unpickling_error("%s kwargs argument must be a dict, not %.200s",
5964 use_kwargs, kwargs);
5965 goto error;
5966 }
5967
5968 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5969 if (obj == NULL) {
5970 goto error;
5971 }
5972 Py_XDECREF(kwargs);
5973 Py_DECREF(args);
5974 Py_DECREF(cls);
5975 PDATA_PUSH(self->stack, obj, -1);
5976 return 0;
5977
5978 error:
5979 Py_XDECREF(kwargs);
5980 Py_DECREF(args);
5981 Py_DECREF(cls);
5982 return -1;
5983 }
5984
5985 static int
load_global(UnpicklerObject * self)5986 load_global(UnpicklerObject *self)
5987 {
5988 PyObject *global = NULL;
5989 PyObject *module_name;
5990 PyObject *global_name;
5991 Py_ssize_t len;
5992 char *s;
5993
5994 if ((len = _Unpickler_Readline(self, &s)) < 0)
5995 return -1;
5996 if (len < 2)
5997 return bad_readline();
5998 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5999 if (!module_name)
6000 return -1;
6001
6002 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6003 if (len < 2) {
6004 Py_DECREF(module_name);
6005 return bad_readline();
6006 }
6007 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6008 if (global_name) {
6009 global = find_class(self, module_name, global_name);
6010 Py_DECREF(global_name);
6011 }
6012 }
6013 Py_DECREF(module_name);
6014
6015 if (global == NULL)
6016 return -1;
6017 PDATA_PUSH(self->stack, global, -1);
6018 return 0;
6019 }
6020
6021 static int
load_stack_global(UnpicklerObject * self)6022 load_stack_global(UnpicklerObject *self)
6023 {
6024 PyObject *global;
6025 PyObject *module_name;
6026 PyObject *global_name;
6027
6028 PDATA_POP(self->stack, global_name);
6029 PDATA_POP(self->stack, module_name);
6030 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6031 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6032 PickleState *st = _Pickle_GetGlobalState();
6033 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6034 Py_XDECREF(global_name);
6035 Py_XDECREF(module_name);
6036 return -1;
6037 }
6038 global = find_class(self, module_name, global_name);
6039 Py_DECREF(global_name);
6040 Py_DECREF(module_name);
6041 if (global == NULL)
6042 return -1;
6043 PDATA_PUSH(self->stack, global, -1);
6044 return 0;
6045 }
6046
6047 static int
load_persid(UnpicklerObject * self)6048 load_persid(UnpicklerObject *self)
6049 {
6050 PyObject *pid, *obj;
6051 Py_ssize_t len;
6052 char *s;
6053
6054 if (self->pers_func) {
6055 if ((len = _Unpickler_Readline(self, &s)) < 0)
6056 return -1;
6057 if (len < 1)
6058 return bad_readline();
6059
6060 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6061 if (pid == NULL) {
6062 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6063 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6064 "persistent IDs in protocol 0 must be "
6065 "ASCII strings");
6066 }
6067 return -1;
6068 }
6069
6070 obj = call_method(self->pers_func, self->pers_func_self, pid);
6071 Py_DECREF(pid);
6072 if (obj == NULL)
6073 return -1;
6074
6075 PDATA_PUSH(self->stack, obj, -1);
6076 return 0;
6077 }
6078 else {
6079 PickleState *st = _Pickle_GetGlobalState();
6080 PyErr_SetString(st->UnpicklingError,
6081 "A load persistent id instruction was encountered,\n"
6082 "but no persistent_load function was specified.");
6083 return -1;
6084 }
6085 }
6086
6087 static int
load_binpersid(UnpicklerObject * self)6088 load_binpersid(UnpicklerObject *self)
6089 {
6090 PyObject *pid, *obj;
6091
6092 if (self->pers_func) {
6093 PDATA_POP(self->stack, pid);
6094 if (pid == NULL)
6095 return -1;
6096
6097 obj = call_method(self->pers_func, self->pers_func_self, pid);
6098 Py_DECREF(pid);
6099 if (obj == NULL)
6100 return -1;
6101
6102 PDATA_PUSH(self->stack, obj, -1);
6103 return 0;
6104 }
6105 else {
6106 PickleState *st = _Pickle_GetGlobalState();
6107 PyErr_SetString(st->UnpicklingError,
6108 "A load persistent id instruction was encountered,\n"
6109 "but no persistent_load function was specified.");
6110 return -1;
6111 }
6112 }
6113
6114 static int
load_pop(UnpicklerObject * self)6115 load_pop(UnpicklerObject *self)
6116 {
6117 Py_ssize_t len = Py_SIZE(self->stack);
6118
6119 /* Note that we split the (pickle.py) stack into two stacks,
6120 * an object stack and a mark stack. We have to be clever and
6121 * pop the right one. We do this by looking at the top of the
6122 * mark stack first, and only signalling a stack underflow if
6123 * the object stack is empty and the mark stack doesn't match
6124 * our expectations.
6125 */
6126 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6127 self->num_marks--;
6128 self->stack->mark_set = self->num_marks != 0;
6129 self->stack->fence = self->num_marks ?
6130 self->marks[self->num_marks - 1] : 0;
6131 } else if (len <= self->stack->fence)
6132 return Pdata_stack_underflow(self->stack);
6133 else {
6134 len--;
6135 Py_DECREF(self->stack->data[len]);
6136 Py_SET_SIZE(self->stack, len);
6137 }
6138 return 0;
6139 }
6140
6141 static int
load_pop_mark(UnpicklerObject * self)6142 load_pop_mark(UnpicklerObject *self)
6143 {
6144 Py_ssize_t i;
6145
6146 if ((i = marker(self)) < 0)
6147 return -1;
6148
6149 Pdata_clear(self->stack, i);
6150
6151 return 0;
6152 }
6153
6154 static int
load_dup(UnpicklerObject * self)6155 load_dup(UnpicklerObject *self)
6156 {
6157 PyObject *last;
6158 Py_ssize_t len = Py_SIZE(self->stack);
6159
6160 if (len <= self->stack->fence)
6161 return Pdata_stack_underflow(self->stack);
6162 last = self->stack->data[len - 1];
6163 PDATA_APPEND(self->stack, last, -1);
6164 return 0;
6165 }
6166
6167 static int
load_get(UnpicklerObject * self)6168 load_get(UnpicklerObject *self)
6169 {
6170 PyObject *key, *value;
6171 Py_ssize_t idx;
6172 Py_ssize_t len;
6173 char *s;
6174
6175 if ((len = _Unpickler_Readline(self, &s)) < 0)
6176 return -1;
6177 if (len < 2)
6178 return bad_readline();
6179
6180 key = PyLong_FromString(s, NULL, 10);
6181 if (key == NULL)
6182 return -1;
6183 idx = PyLong_AsSsize_t(key);
6184 if (idx == -1 && PyErr_Occurred()) {
6185 Py_DECREF(key);
6186 return -1;
6187 }
6188
6189 value = _Unpickler_MemoGet(self, idx);
6190 if (value == NULL) {
6191 if (!PyErr_Occurred()) {
6192 PickleState *st = _Pickle_GetGlobalState();
6193 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6194 }
6195 Py_DECREF(key);
6196 return -1;
6197 }
6198 Py_DECREF(key);
6199
6200 PDATA_APPEND(self->stack, value, -1);
6201 return 0;
6202 }
6203
6204 static int
load_binget(UnpicklerObject * self)6205 load_binget(UnpicklerObject *self)
6206 {
6207 PyObject *value;
6208 Py_ssize_t idx;
6209 char *s;
6210
6211 if (_Unpickler_Read(self, &s, 1) < 0)
6212 return -1;
6213
6214 idx = Py_CHARMASK(s[0]);
6215
6216 value = _Unpickler_MemoGet(self, idx);
6217 if (value == NULL) {
6218 PyObject *key = PyLong_FromSsize_t(idx);
6219 if (key != NULL) {
6220 PickleState *st = _Pickle_GetGlobalState();
6221 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6222 Py_DECREF(key);
6223 }
6224 return -1;
6225 }
6226
6227 PDATA_APPEND(self->stack, value, -1);
6228 return 0;
6229 }
6230
6231 static int
load_long_binget(UnpicklerObject * self)6232 load_long_binget(UnpicklerObject *self)
6233 {
6234 PyObject *value;
6235 Py_ssize_t idx;
6236 char *s;
6237
6238 if (_Unpickler_Read(self, &s, 4) < 0)
6239 return -1;
6240
6241 idx = calc_binsize(s, 4);
6242
6243 value = _Unpickler_MemoGet(self, idx);
6244 if (value == NULL) {
6245 PyObject *key = PyLong_FromSsize_t(idx);
6246 if (key != NULL) {
6247 PickleState *st = _Pickle_GetGlobalState();
6248 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6249 Py_DECREF(key);
6250 }
6251 return -1;
6252 }
6253
6254 PDATA_APPEND(self->stack, value, -1);
6255 return 0;
6256 }
6257
6258 /* Push an object from the extension registry (EXT[124]). nbytes is
6259 * the number of bytes following the opcode, holding the index (code) value.
6260 */
6261 static int
load_extension(UnpicklerObject * self,int nbytes)6262 load_extension(UnpicklerObject *self, int nbytes)
6263 {
6264 char *codebytes; /* the nbytes bytes after the opcode */
6265 long code; /* calc_binint returns long */
6266 PyObject *py_code; /* code as a Python int */
6267 PyObject *obj; /* the object to push */
6268 PyObject *pair; /* (module_name, class_name) */
6269 PyObject *module_name, *class_name;
6270 PickleState *st = _Pickle_GetGlobalState();
6271
6272 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6273 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6274 return -1;
6275 code = calc_binint(codebytes, nbytes);
6276 if (code <= 0) { /* note that 0 is forbidden */
6277 /* Corrupt or hostile pickle. */
6278 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6279 return -1;
6280 }
6281
6282 /* Look for the code in the cache. */
6283 py_code = PyLong_FromLong(code);
6284 if (py_code == NULL)
6285 return -1;
6286 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6287 if (obj != NULL) {
6288 /* Bingo. */
6289 Py_DECREF(py_code);
6290 PDATA_APPEND(self->stack, obj, -1);
6291 return 0;
6292 }
6293 if (PyErr_Occurred()) {
6294 Py_DECREF(py_code);
6295 return -1;
6296 }
6297
6298 /* Look up the (module_name, class_name) pair. */
6299 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6300 if (pair == NULL) {
6301 Py_DECREF(py_code);
6302 if (!PyErr_Occurred()) {
6303 PyErr_Format(PyExc_ValueError, "unregistered extension "
6304 "code %ld", code);
6305 }
6306 return -1;
6307 }
6308 /* Since the extension registry is manipulable via Python code,
6309 * confirm that pair is really a 2-tuple of strings.
6310 */
6311 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6312 goto error;
6313 }
6314
6315 module_name = PyTuple_GET_ITEM(pair, 0);
6316 if (!PyUnicode_Check(module_name)) {
6317 goto error;
6318 }
6319
6320 class_name = PyTuple_GET_ITEM(pair, 1);
6321 if (!PyUnicode_Check(class_name)) {
6322 goto error;
6323 }
6324
6325 /* Load the object. */
6326 obj = find_class(self, module_name, class_name);
6327 if (obj == NULL) {
6328 Py_DECREF(py_code);
6329 return -1;
6330 }
6331 /* Cache code -> obj. */
6332 code = PyDict_SetItem(st->extension_cache, py_code, obj);
6333 Py_DECREF(py_code);
6334 if (code < 0) {
6335 Py_DECREF(obj);
6336 return -1;
6337 }
6338 PDATA_PUSH(self->stack, obj, -1);
6339 return 0;
6340
6341 error:
6342 Py_DECREF(py_code);
6343 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6344 "isn't a 2-tuple of strings", code);
6345 return -1;
6346 }
6347
6348 static int
load_put(UnpicklerObject * self)6349 load_put(UnpicklerObject *self)
6350 {
6351 PyObject *key, *value;
6352 Py_ssize_t idx;
6353 Py_ssize_t len;
6354 char *s = NULL;
6355
6356 if ((len = _Unpickler_Readline(self, &s)) < 0)
6357 return -1;
6358 if (len < 2)
6359 return bad_readline();
6360 if (Py_SIZE(self->stack) <= self->stack->fence)
6361 return Pdata_stack_underflow(self->stack);
6362 value = self->stack->data[Py_SIZE(self->stack) - 1];
6363
6364 key = PyLong_FromString(s, NULL, 10);
6365 if (key == NULL)
6366 return -1;
6367 idx = PyLong_AsSsize_t(key);
6368 Py_DECREF(key);
6369 if (idx < 0) {
6370 if (!PyErr_Occurred())
6371 PyErr_SetString(PyExc_ValueError,
6372 "negative PUT argument");
6373 return -1;
6374 }
6375
6376 return _Unpickler_MemoPut(self, idx, value);
6377 }
6378
6379 static int
load_binput(UnpicklerObject * self)6380 load_binput(UnpicklerObject *self)
6381 {
6382 PyObject *value;
6383 Py_ssize_t idx;
6384 char *s;
6385
6386 if (_Unpickler_Read(self, &s, 1) < 0)
6387 return -1;
6388
6389 if (Py_SIZE(self->stack) <= self->stack->fence)
6390 return Pdata_stack_underflow(self->stack);
6391 value = self->stack->data[Py_SIZE(self->stack) - 1];
6392
6393 idx = Py_CHARMASK(s[0]);
6394
6395 return _Unpickler_MemoPut(self, idx, value);
6396 }
6397
6398 static int
load_long_binput(UnpicklerObject * self)6399 load_long_binput(UnpicklerObject *self)
6400 {
6401 PyObject *value;
6402 Py_ssize_t idx;
6403 char *s;
6404
6405 if (_Unpickler_Read(self, &s, 4) < 0)
6406 return -1;
6407
6408 if (Py_SIZE(self->stack) <= self->stack->fence)
6409 return Pdata_stack_underflow(self->stack);
6410 value = self->stack->data[Py_SIZE(self->stack) - 1];
6411
6412 idx = calc_binsize(s, 4);
6413 if (idx < 0) {
6414 PyErr_SetString(PyExc_ValueError,
6415 "negative LONG_BINPUT argument");
6416 return -1;
6417 }
6418
6419 return _Unpickler_MemoPut(self, idx, value);
6420 }
6421
6422 static int
load_memoize(UnpicklerObject * self)6423 load_memoize(UnpicklerObject *self)
6424 {
6425 PyObject *value;
6426
6427 if (Py_SIZE(self->stack) <= self->stack->fence)
6428 return Pdata_stack_underflow(self->stack);
6429 value = self->stack->data[Py_SIZE(self->stack) - 1];
6430
6431 return _Unpickler_MemoPut(self, self->memo_len, value);
6432 }
6433
6434 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6435 do_append(UnpicklerObject *self, Py_ssize_t x)
6436 {
6437 PyObject *value;
6438 PyObject *slice;
6439 PyObject *list;
6440 PyObject *result;
6441 Py_ssize_t len, i;
6442
6443 len = Py_SIZE(self->stack);
6444 if (x > len || x <= self->stack->fence)
6445 return Pdata_stack_underflow(self->stack);
6446 if (len == x) /* nothing to do */
6447 return 0;
6448
6449 list = self->stack->data[x - 1];
6450
6451 if (PyList_CheckExact(list)) {
6452 Py_ssize_t list_len;
6453 int ret;
6454
6455 slice = Pdata_poplist(self->stack, x);
6456 if (!slice)
6457 return -1;
6458 list_len = PyList_GET_SIZE(list);
6459 ret = PyList_SetSlice(list, list_len, list_len, slice);
6460 Py_DECREF(slice);
6461 return ret;
6462 }
6463 else {
6464 PyObject *extend_func;
6465 _Py_IDENTIFIER(extend);
6466
6467 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6468 return -1;
6469 }
6470 if (extend_func != NULL) {
6471 slice = Pdata_poplist(self->stack, x);
6472 if (!slice) {
6473 Py_DECREF(extend_func);
6474 return -1;
6475 }
6476 result = _Pickle_FastCall(extend_func, slice);
6477 Py_DECREF(extend_func);
6478 if (result == NULL)
6479 return -1;
6480 Py_DECREF(result);
6481 }
6482 else {
6483 PyObject *append_func;
6484 _Py_IDENTIFIER(append);
6485
6486 /* Even if the PEP 307 requires extend() and append() methods,
6487 fall back on append() if the object has no extend() method
6488 for backward compatibility. */
6489 append_func = _PyObject_GetAttrId(list, &PyId_append);
6490 if (append_func == NULL)
6491 return -1;
6492 for (i = x; i < len; i++) {
6493 value = self->stack->data[i];
6494 result = _Pickle_FastCall(append_func, value);
6495 if (result == NULL) {
6496 Pdata_clear(self->stack, i + 1);
6497 Py_SET_SIZE(self->stack, x);
6498 Py_DECREF(append_func);
6499 return -1;
6500 }
6501 Py_DECREF(result);
6502 }
6503 Py_SET_SIZE(self->stack, x);
6504 Py_DECREF(append_func);
6505 }
6506 }
6507
6508 return 0;
6509 }
6510
6511 static int
load_append(UnpicklerObject * self)6512 load_append(UnpicklerObject *self)
6513 {
6514 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6515 return Pdata_stack_underflow(self->stack);
6516 return do_append(self, Py_SIZE(self->stack) - 1);
6517 }
6518
6519 static int
load_appends(UnpicklerObject * self)6520 load_appends(UnpicklerObject *self)
6521 {
6522 Py_ssize_t i = marker(self);
6523 if (i < 0)
6524 return -1;
6525 return do_append(self, i);
6526 }
6527
6528 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6529 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6530 {
6531 PyObject *value, *key;
6532 PyObject *dict;
6533 Py_ssize_t len, i;
6534 int status = 0;
6535
6536 len = Py_SIZE(self->stack);
6537 if (x > len || x <= self->stack->fence)
6538 return Pdata_stack_underflow(self->stack);
6539 if (len == x) /* nothing to do */
6540 return 0;
6541 if ((len - x) % 2 != 0) {
6542 PickleState *st = _Pickle_GetGlobalState();
6543 /* Corrupt or hostile pickle -- we never write one like this. */
6544 PyErr_SetString(st->UnpicklingError,
6545 "odd number of items for SETITEMS");
6546 return -1;
6547 }
6548
6549 /* Here, dict does not actually need to be a PyDict; it could be anything
6550 that supports the __setitem__ attribute. */
6551 dict = self->stack->data[x - 1];
6552
6553 for (i = x + 1; i < len; i += 2) {
6554 key = self->stack->data[i - 1];
6555 value = self->stack->data[i];
6556 if (PyObject_SetItem(dict, key, value) < 0) {
6557 status = -1;
6558 break;
6559 }
6560 }
6561
6562 Pdata_clear(self->stack, x);
6563 return status;
6564 }
6565
6566 static int
load_setitem(UnpicklerObject * self)6567 load_setitem(UnpicklerObject *self)
6568 {
6569 return do_setitems(self, Py_SIZE(self->stack) - 2);
6570 }
6571
6572 static int
load_setitems(UnpicklerObject * self)6573 load_setitems(UnpicklerObject *self)
6574 {
6575 Py_ssize_t i = marker(self);
6576 if (i < 0)
6577 return -1;
6578 return do_setitems(self, i);
6579 }
6580
6581 static int
load_additems(UnpicklerObject * self)6582 load_additems(UnpicklerObject *self)
6583 {
6584 PyObject *set;
6585 Py_ssize_t mark, len, i;
6586
6587 mark = marker(self);
6588 if (mark < 0)
6589 return -1;
6590 len = Py_SIZE(self->stack);
6591 if (mark > len || mark <= self->stack->fence)
6592 return Pdata_stack_underflow(self->stack);
6593 if (len == mark) /* nothing to do */
6594 return 0;
6595
6596 set = self->stack->data[mark - 1];
6597
6598 if (PySet_Check(set)) {
6599 PyObject *items;
6600 int status;
6601
6602 items = Pdata_poptuple(self->stack, mark);
6603 if (items == NULL)
6604 return -1;
6605
6606 status = _PySet_Update(set, items);
6607 Py_DECREF(items);
6608 return status;
6609 }
6610 else {
6611 PyObject *add_func;
6612 _Py_IDENTIFIER(add);
6613
6614 add_func = _PyObject_GetAttrId(set, &PyId_add);
6615 if (add_func == NULL)
6616 return -1;
6617 for (i = mark; i < len; i++) {
6618 PyObject *result;
6619 PyObject *item;
6620
6621 item = self->stack->data[i];
6622 result = _Pickle_FastCall(add_func, item);
6623 if (result == NULL) {
6624 Pdata_clear(self->stack, i + 1);
6625 Py_SET_SIZE(self->stack, mark);
6626 return -1;
6627 }
6628 Py_DECREF(result);
6629 }
6630 Py_SET_SIZE(self->stack, mark);
6631 }
6632
6633 return 0;
6634 }
6635
6636 static int
load_build(UnpicklerObject * self)6637 load_build(UnpicklerObject *self)
6638 {
6639 PyObject *state, *inst, *slotstate;
6640 PyObject *setstate;
6641 int status = 0;
6642 _Py_IDENTIFIER(__setstate__);
6643
6644 /* Stack is ... instance, state. We want to leave instance at
6645 * the stack top, possibly mutated via instance.__setstate__(state).
6646 */
6647 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6648 return Pdata_stack_underflow(self->stack);
6649
6650 PDATA_POP(self->stack, state);
6651 if (state == NULL)
6652 return -1;
6653
6654 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6655
6656 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6657 Py_DECREF(state);
6658 return -1;
6659 }
6660 if (setstate != NULL) {
6661 PyObject *result;
6662
6663 /* The explicit __setstate__ is responsible for everything. */
6664 result = _Pickle_FastCall(setstate, state);
6665 Py_DECREF(setstate);
6666 if (result == NULL)
6667 return -1;
6668 Py_DECREF(result);
6669 return 0;
6670 }
6671
6672 /* A default __setstate__. First see whether state embeds a
6673 * slot state dict too (a proto 2 addition).
6674 */
6675 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6676 PyObject *tmp = state;
6677
6678 state = PyTuple_GET_ITEM(tmp, 0);
6679 slotstate = PyTuple_GET_ITEM(tmp, 1);
6680 Py_INCREF(state);
6681 Py_INCREF(slotstate);
6682 Py_DECREF(tmp);
6683 }
6684 else
6685 slotstate = NULL;
6686
6687 /* Set inst.__dict__ from the state dict (if any). */
6688 if (state != Py_None) {
6689 PyObject *dict;
6690 PyObject *d_key, *d_value;
6691 Py_ssize_t i;
6692 _Py_IDENTIFIER(__dict__);
6693
6694 if (!PyDict_Check(state)) {
6695 PickleState *st = _Pickle_GetGlobalState();
6696 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6697 goto error;
6698 }
6699 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6700 if (dict == NULL)
6701 goto error;
6702
6703 i = 0;
6704 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6705 /* normally the keys for instance attributes are
6706 interned. we should try to do that here. */
6707 Py_INCREF(d_key);
6708 if (PyUnicode_CheckExact(d_key))
6709 PyUnicode_InternInPlace(&d_key);
6710 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6711 Py_DECREF(d_key);
6712 goto error;
6713 }
6714 Py_DECREF(d_key);
6715 }
6716 Py_DECREF(dict);
6717 }
6718
6719 /* Also set instance attributes from the slotstate dict (if any). */
6720 if (slotstate != NULL) {
6721 PyObject *d_key, *d_value;
6722 Py_ssize_t i;
6723
6724 if (!PyDict_Check(slotstate)) {
6725 PickleState *st = _Pickle_GetGlobalState();
6726 PyErr_SetString(st->UnpicklingError,
6727 "slot state is not a dictionary");
6728 goto error;
6729 }
6730 i = 0;
6731 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6732 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6733 goto error;
6734 }
6735 }
6736
6737 if (0) {
6738 error:
6739 status = -1;
6740 }
6741
6742 Py_DECREF(state);
6743 Py_XDECREF(slotstate);
6744 return status;
6745 }
6746
6747 static int
load_mark(UnpicklerObject * self)6748 load_mark(UnpicklerObject *self)
6749 {
6750
6751 /* Note that we split the (pickle.py) stack into two stacks, an
6752 * object stack and a mark stack. Here we push a mark onto the
6753 * mark stack.
6754 */
6755
6756 if (self->num_marks >= self->marks_size) {
6757 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6758 Py_ssize_t *marks_new = self->marks;
6759 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6760 if (marks_new == NULL) {
6761 PyErr_NoMemory();
6762 return -1;
6763 }
6764 self->marks = marks_new;
6765 self->marks_size = (Py_ssize_t)alloc;
6766 }
6767
6768 self->stack->mark_set = 1;
6769 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6770
6771 return 0;
6772 }
6773
6774 static int
load_reduce(UnpicklerObject * self)6775 load_reduce(UnpicklerObject *self)
6776 {
6777 PyObject *callable = NULL;
6778 PyObject *argtup = NULL;
6779 PyObject *obj = NULL;
6780
6781 PDATA_POP(self->stack, argtup);
6782 if (argtup == NULL)
6783 return -1;
6784 PDATA_POP(self->stack, callable);
6785 if (callable) {
6786 obj = PyObject_CallObject(callable, argtup);
6787 Py_DECREF(callable);
6788 }
6789 Py_DECREF(argtup);
6790
6791 if (obj == NULL)
6792 return -1;
6793
6794 PDATA_PUSH(self->stack, obj, -1);
6795 return 0;
6796 }
6797
6798 /* Just raises an error if we don't know the protocol specified. PROTO
6799 * is the first opcode for protocols >= 2.
6800 */
6801 static int
load_proto(UnpicklerObject * self)6802 load_proto(UnpicklerObject *self)
6803 {
6804 char *s;
6805 int i;
6806
6807 if (_Unpickler_Read(self, &s, 1) < 0)
6808 return -1;
6809
6810 i = (unsigned char)s[0];
6811 if (i <= HIGHEST_PROTOCOL) {
6812 self->proto = i;
6813 return 0;
6814 }
6815
6816 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6817 return -1;
6818 }
6819
6820 static int
load_frame(UnpicklerObject * self)6821 load_frame(UnpicklerObject *self)
6822 {
6823 char *s;
6824 Py_ssize_t frame_len;
6825
6826 if (_Unpickler_Read(self, &s, 8) < 0)
6827 return -1;
6828
6829 frame_len = calc_binsize(s, 8);
6830 if (frame_len < 0) {
6831 PyErr_Format(PyExc_OverflowError,
6832 "FRAME length exceeds system's maximum of %zd bytes",
6833 PY_SSIZE_T_MAX);
6834 return -1;
6835 }
6836
6837 if (_Unpickler_Read(self, &s, frame_len) < 0)
6838 return -1;
6839
6840 /* Rewind to start of frame */
6841 self->next_read_idx -= frame_len;
6842 return 0;
6843 }
6844
6845 static PyObject *
load(UnpicklerObject * self)6846 load(UnpicklerObject *self)
6847 {
6848 PyObject *value = NULL;
6849 char *s = NULL;
6850
6851 self->num_marks = 0;
6852 self->stack->mark_set = 0;
6853 self->stack->fence = 0;
6854 self->proto = 0;
6855 if (Py_SIZE(self->stack))
6856 Pdata_clear(self->stack, 0);
6857
6858 /* Convenient macros for the dispatch while-switch loop just below. */
6859 #define OP(opcode, load_func) \
6860 case opcode: if (load_func(self) < 0) break; continue;
6861
6862 #define OP_ARG(opcode, load_func, arg) \
6863 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6864
6865 while (1) {
6866 if (_Unpickler_Read(self, &s, 1) < 0) {
6867 PickleState *st = _Pickle_GetGlobalState();
6868 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6869 PyErr_Format(PyExc_EOFError, "Ran out of input");
6870 }
6871 return NULL;
6872 }
6873
6874 switch ((enum opcode)s[0]) {
6875 OP(NONE, load_none)
6876 OP(BININT, load_binint)
6877 OP(BININT1, load_binint1)
6878 OP(BININT2, load_binint2)
6879 OP(INT, load_int)
6880 OP(LONG, load_long)
6881 OP_ARG(LONG1, load_counted_long, 1)
6882 OP_ARG(LONG4, load_counted_long, 4)
6883 OP(FLOAT, load_float)
6884 OP(BINFLOAT, load_binfloat)
6885 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6886 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6887 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6888 OP(BYTEARRAY8, load_counted_bytearray)
6889 OP(NEXT_BUFFER, load_next_buffer)
6890 OP(READONLY_BUFFER, load_readonly_buffer)
6891 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6892 OP_ARG(BINSTRING, load_counted_binstring, 4)
6893 OP(STRING, load_string)
6894 OP(UNICODE, load_unicode)
6895 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6896 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6897 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6898 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6899 OP_ARG(TUPLE1, load_counted_tuple, 1)
6900 OP_ARG(TUPLE2, load_counted_tuple, 2)
6901 OP_ARG(TUPLE3, load_counted_tuple, 3)
6902 OP(TUPLE, load_tuple)
6903 OP(EMPTY_LIST, load_empty_list)
6904 OP(LIST, load_list)
6905 OP(EMPTY_DICT, load_empty_dict)
6906 OP(DICT, load_dict)
6907 OP(EMPTY_SET, load_empty_set)
6908 OP(ADDITEMS, load_additems)
6909 OP(FROZENSET, load_frozenset)
6910 OP(OBJ, load_obj)
6911 OP(INST, load_inst)
6912 OP_ARG(NEWOBJ, load_newobj, 0)
6913 OP_ARG(NEWOBJ_EX, load_newobj, 1)
6914 OP(GLOBAL, load_global)
6915 OP(STACK_GLOBAL, load_stack_global)
6916 OP(APPEND, load_append)
6917 OP(APPENDS, load_appends)
6918 OP(BUILD, load_build)
6919 OP(DUP, load_dup)
6920 OP(BINGET, load_binget)
6921 OP(LONG_BINGET, load_long_binget)
6922 OP(GET, load_get)
6923 OP(MARK, load_mark)
6924 OP(BINPUT, load_binput)
6925 OP(LONG_BINPUT, load_long_binput)
6926 OP(PUT, load_put)
6927 OP(MEMOIZE, load_memoize)
6928 OP(POP, load_pop)
6929 OP(POP_MARK, load_pop_mark)
6930 OP(SETITEM, load_setitem)
6931 OP(SETITEMS, load_setitems)
6932 OP(PERSID, load_persid)
6933 OP(BINPERSID, load_binpersid)
6934 OP(REDUCE, load_reduce)
6935 OP(PROTO, load_proto)
6936 OP(FRAME, load_frame)
6937 OP_ARG(EXT1, load_extension, 1)
6938 OP_ARG(EXT2, load_extension, 2)
6939 OP_ARG(EXT4, load_extension, 4)
6940 OP_ARG(NEWTRUE, load_bool, Py_True)
6941 OP_ARG(NEWFALSE, load_bool, Py_False)
6942
6943 case STOP:
6944 break;
6945
6946 default:
6947 {
6948 PickleState *st = _Pickle_GetGlobalState();
6949 unsigned char c = (unsigned char) *s;
6950 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6951 PyErr_Format(st->UnpicklingError,
6952 "invalid load key, '%c'.", c);
6953 }
6954 else {
6955 PyErr_Format(st->UnpicklingError,
6956 "invalid load key, '\\x%02x'.", c);
6957 }
6958 return NULL;
6959 }
6960 }
6961
6962 break; /* and we are done! */
6963 }
6964
6965 if (PyErr_Occurred()) {
6966 return NULL;
6967 }
6968
6969 if (_Unpickler_SkipConsumed(self) < 0)
6970 return NULL;
6971
6972 PDATA_POP(self->stack, value);
6973 return value;
6974 }
6975
6976 /*[clinic input]
6977
6978 _pickle.Unpickler.load
6979
6980 Load a pickle.
6981
6982 Read a pickled object representation from the open file object given
6983 in the constructor, and return the reconstituted object hierarchy
6984 specified therein.
6985 [clinic start generated code]*/
6986
6987 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6988 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6989 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6990 {
6991 UnpicklerObject *unpickler = (UnpicklerObject*)self;
6992
6993 /* Check whether the Unpickler was initialized correctly. This prevents
6994 segfaulting if a subclass overridden __init__ with a function that does
6995 not call Unpickler.__init__(). Here, we simply ensure that self->read
6996 is not NULL. */
6997 if (unpickler->read == NULL) {
6998 PickleState *st = _Pickle_GetGlobalState();
6999 PyErr_Format(st->UnpicklingError,
7000 "Unpickler.__init__() was not called by %s.__init__()",
7001 Py_TYPE(unpickler)->tp_name);
7002 return NULL;
7003 }
7004
7005 return load(unpickler);
7006 }
7007
7008 /* The name of find_class() is misleading. In newer pickle protocols, this
7009 function is used for loading any global (i.e., functions), not just
7010 classes. The name is kept only for backward compatibility. */
7011
7012 /*[clinic input]
7013
7014 _pickle.Unpickler.find_class
7015
7016 module_name: object
7017 global_name: object
7018 /
7019
7020 Return an object from a specified module.
7021
7022 If necessary, the module will be imported. Subclasses may override
7023 this method (e.g. to restrict unpickling of arbitrary classes and
7024 functions).
7025
7026 This method is called whenever a class or a function object is
7027 needed. Both arguments passed are str objects.
7028 [clinic start generated code]*/
7029
7030 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7031 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7032 PyObject *module_name,
7033 PyObject *global_name)
7034 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7035 {
7036 PyObject *global;
7037 PyObject *module;
7038
7039 if (PySys_Audit("pickle.find_class", "OO",
7040 module_name, global_name) < 0) {
7041 return NULL;
7042 }
7043
7044 /* Try to map the old names used in Python 2.x to the new ones used in
7045 Python 3.x. We do this only with old pickle protocols and when the
7046 user has not disabled the feature. */
7047 if (self->proto < 3 && self->fix_imports) {
7048 PyObject *key;
7049 PyObject *item;
7050 PickleState *st = _Pickle_GetGlobalState();
7051
7052 /* Check if the global (i.e., a function or a class) was renamed
7053 or moved to another module. */
7054 key = PyTuple_Pack(2, module_name, global_name);
7055 if (key == NULL)
7056 return NULL;
7057 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7058 Py_DECREF(key);
7059 if (item) {
7060 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7061 PyErr_Format(PyExc_RuntimeError,
7062 "_compat_pickle.NAME_MAPPING values should be "
7063 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7064 return NULL;
7065 }
7066 module_name = PyTuple_GET_ITEM(item, 0);
7067 global_name = PyTuple_GET_ITEM(item, 1);
7068 if (!PyUnicode_Check(module_name) ||
7069 !PyUnicode_Check(global_name)) {
7070 PyErr_Format(PyExc_RuntimeError,
7071 "_compat_pickle.NAME_MAPPING values should be "
7072 "pairs of str, not (%.200s, %.200s)",
7073 Py_TYPE(module_name)->tp_name,
7074 Py_TYPE(global_name)->tp_name);
7075 return NULL;
7076 }
7077 }
7078 else if (PyErr_Occurred()) {
7079 return NULL;
7080 }
7081 else {
7082 /* Check if the module was renamed. */
7083 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7084 if (item) {
7085 if (!PyUnicode_Check(item)) {
7086 PyErr_Format(PyExc_RuntimeError,
7087 "_compat_pickle.IMPORT_MAPPING values should be "
7088 "strings, not %.200s", Py_TYPE(item)->tp_name);
7089 return NULL;
7090 }
7091 module_name = item;
7092 }
7093 else if (PyErr_Occurred()) {
7094 return NULL;
7095 }
7096 }
7097 }
7098
7099 /*
7100 * we don't use PyImport_GetModule here, because it can return partially-
7101 * initialised modules, which then cause the getattribute to fail.
7102 */
7103 module = PyImport_Import(module_name);
7104 if (module == NULL) {
7105 return NULL;
7106 }
7107 global = getattribute(module, global_name, self->proto >= 4);
7108 Py_DECREF(module);
7109 return global;
7110 }
7111
7112 /*[clinic input]
7113
7114 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7115
7116 Returns size in memory, in bytes.
7117 [clinic start generated code]*/
7118
7119 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7120 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7121 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7122 {
7123 Py_ssize_t res;
7124
7125 res = _PyObject_SIZE(Py_TYPE(self));
7126 if (self->memo != NULL)
7127 res += self->memo_size * sizeof(PyObject *);
7128 if (self->marks != NULL)
7129 res += self->marks_size * sizeof(Py_ssize_t);
7130 if (self->input_line != NULL)
7131 res += strlen(self->input_line) + 1;
7132 if (self->encoding != NULL)
7133 res += strlen(self->encoding) + 1;
7134 if (self->errors != NULL)
7135 res += strlen(self->errors) + 1;
7136 return res;
7137 }
7138
7139 static struct PyMethodDef Unpickler_methods[] = {
7140 _PICKLE_UNPICKLER_LOAD_METHODDEF
7141 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7142 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7143 {NULL, NULL} /* sentinel */
7144 };
7145
7146 static void
Unpickler_dealloc(UnpicklerObject * self)7147 Unpickler_dealloc(UnpicklerObject *self)
7148 {
7149 PyObject_GC_UnTrack((PyObject *)self);
7150 Py_XDECREF(self->readline);
7151 Py_XDECREF(self->readinto);
7152 Py_XDECREF(self->read);
7153 Py_XDECREF(self->peek);
7154 Py_XDECREF(self->stack);
7155 Py_XDECREF(self->pers_func);
7156 Py_XDECREF(self->buffers);
7157 if (self->buffer.buf != NULL) {
7158 PyBuffer_Release(&self->buffer);
7159 self->buffer.buf = NULL;
7160 }
7161
7162 _Unpickler_MemoCleanup(self);
7163 PyMem_Free(self->marks);
7164 PyMem_Free(self->input_line);
7165 PyMem_Free(self->encoding);
7166 PyMem_Free(self->errors);
7167
7168 Py_TYPE(self)->tp_free((PyObject *)self);
7169 }
7170
7171 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7172 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7173 {
7174 Py_VISIT(self->readline);
7175 Py_VISIT(self->readinto);
7176 Py_VISIT(self->read);
7177 Py_VISIT(self->peek);
7178 Py_VISIT(self->stack);
7179 Py_VISIT(self->pers_func);
7180 Py_VISIT(self->buffers);
7181 return 0;
7182 }
7183
7184 static int
Unpickler_clear(UnpicklerObject * self)7185 Unpickler_clear(UnpicklerObject *self)
7186 {
7187 Py_CLEAR(self->readline);
7188 Py_CLEAR(self->readinto);
7189 Py_CLEAR(self->read);
7190 Py_CLEAR(self->peek);
7191 Py_CLEAR(self->stack);
7192 Py_CLEAR(self->pers_func);
7193 Py_CLEAR(self->buffers);
7194 if (self->buffer.buf != NULL) {
7195 PyBuffer_Release(&self->buffer);
7196 self->buffer.buf = NULL;
7197 }
7198
7199 _Unpickler_MemoCleanup(self);
7200 PyMem_Free(self->marks);
7201 self->marks = NULL;
7202 PyMem_Free(self->input_line);
7203 self->input_line = NULL;
7204 PyMem_Free(self->encoding);
7205 self->encoding = NULL;
7206 PyMem_Free(self->errors);
7207 self->errors = NULL;
7208
7209 return 0;
7210 }
7211
7212 /*[clinic input]
7213
7214 _pickle.Unpickler.__init__
7215
7216 file: object
7217 *
7218 fix_imports: bool = True
7219 encoding: str = 'ASCII'
7220 errors: str = 'strict'
7221 buffers: object(c_default="NULL") = ()
7222
7223 This takes a binary file for reading a pickle data stream.
7224
7225 The protocol version of the pickle is detected automatically, so no
7226 protocol argument is needed. Bytes past the pickled object's
7227 representation are ignored.
7228
7229 The argument *file* must have two methods, a read() method that takes
7230 an integer argument, and a readline() method that requires no
7231 arguments. Both methods should return bytes. Thus *file* can be a
7232 binary file object opened for reading, an io.BytesIO object, or any
7233 other custom object that meets this interface.
7234
7235 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7236 which are used to control compatibility support for pickle stream
7237 generated by Python 2. If *fix_imports* is True, pickle will try to
7238 map the old Python 2 names to the new names used in Python 3. The
7239 *encoding* and *errors* tell pickle how to decode 8-bit string
7240 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7241 respectively. The *encoding* can be 'bytes' to read these 8-bit
7242 string instances as bytes objects.
7243 [clinic start generated code]*/
7244
7245 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7246 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7247 int fix_imports, const char *encoding,
7248 const char *errors, PyObject *buffers)
7249 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7250 {
7251 _Py_IDENTIFIER(persistent_load);
7252
7253 /* In case of multiple __init__() calls, clear previous content. */
7254 if (self->read != NULL)
7255 (void)Unpickler_clear(self);
7256
7257 if (_Unpickler_SetInputStream(self, file) < 0)
7258 return -1;
7259
7260 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7261 return -1;
7262
7263 if (_Unpickler_SetBuffers(self, buffers) < 0)
7264 return -1;
7265
7266 self->fix_imports = fix_imports;
7267
7268 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7269 &self->pers_func, &self->pers_func_self) < 0)
7270 {
7271 return -1;
7272 }
7273
7274 self->stack = (Pdata *)Pdata_New();
7275 if (self->stack == NULL)
7276 return -1;
7277
7278 self->memo_size = 32;
7279 self->memo = _Unpickler_NewMemo(self->memo_size);
7280 if (self->memo == NULL)
7281 return -1;
7282
7283 self->proto = 0;
7284
7285 return 0;
7286 }
7287
7288
7289 /* Define a proxy object for the Unpickler's internal memo object. This is to
7290 * avoid breaking code like:
7291 * unpickler.memo.clear()
7292 * and
7293 * unpickler.memo = saved_memo
7294 * Is this a good idea? Not really, but we don't want to break code that uses
7295 * it. Note that we don't implement the entire mapping API here. This is
7296 * intentional, as these should be treated as black-box implementation details.
7297 *
7298 * We do, however, have to implement pickling/unpickling support because of
7299 * real-world code like cvs2svn.
7300 */
7301
7302 /*[clinic input]
7303 _pickle.UnpicklerMemoProxy.clear
7304
7305 Remove all items from memo.
7306 [clinic start generated code]*/
7307
7308 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7309 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7310 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7311 {
7312 _Unpickler_MemoCleanup(self->unpickler);
7313 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7314 if (self->unpickler->memo == NULL)
7315 return NULL;
7316 Py_RETURN_NONE;
7317 }
7318
7319 /*[clinic input]
7320 _pickle.UnpicklerMemoProxy.copy
7321
7322 Copy the memo to a new object.
7323 [clinic start generated code]*/
7324
7325 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7326 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7327 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7328 {
7329 size_t i;
7330 PyObject *new_memo = PyDict_New();
7331 if (new_memo == NULL)
7332 return NULL;
7333
7334 for (i = 0; i < self->unpickler->memo_size; i++) {
7335 int status;
7336 PyObject *key, *value;
7337
7338 value = self->unpickler->memo[i];
7339 if (value == NULL)
7340 continue;
7341
7342 key = PyLong_FromSsize_t(i);
7343 if (key == NULL)
7344 goto error;
7345 status = PyDict_SetItem(new_memo, key, value);
7346 Py_DECREF(key);
7347 if (status < 0)
7348 goto error;
7349 }
7350 return new_memo;
7351
7352 error:
7353 Py_DECREF(new_memo);
7354 return NULL;
7355 }
7356
7357 /*[clinic input]
7358 _pickle.UnpicklerMemoProxy.__reduce__
7359
7360 Implement pickling support.
7361 [clinic start generated code]*/
7362
7363 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7364 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7365 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7366 {
7367 PyObject *reduce_value;
7368 PyObject *constructor_args;
7369 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7370 if (contents == NULL)
7371 return NULL;
7372
7373 reduce_value = PyTuple_New(2);
7374 if (reduce_value == NULL) {
7375 Py_DECREF(contents);
7376 return NULL;
7377 }
7378 constructor_args = PyTuple_New(1);
7379 if (constructor_args == NULL) {
7380 Py_DECREF(contents);
7381 Py_DECREF(reduce_value);
7382 return NULL;
7383 }
7384 PyTuple_SET_ITEM(constructor_args, 0, contents);
7385 Py_INCREF((PyObject *)&PyDict_Type);
7386 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7387 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7388 return reduce_value;
7389 }
7390
7391 static PyMethodDef unpicklerproxy_methods[] = {
7392 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7393 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7394 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7395 {NULL, NULL} /* sentinel */
7396 };
7397
7398 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7399 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7400 {
7401 PyObject_GC_UnTrack(self);
7402 Py_XDECREF(self->unpickler);
7403 PyObject_GC_Del((PyObject *)self);
7404 }
7405
7406 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7407 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7408 visitproc visit, void *arg)
7409 {
7410 Py_VISIT(self->unpickler);
7411 return 0;
7412 }
7413
7414 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7415 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7416 {
7417 Py_CLEAR(self->unpickler);
7418 return 0;
7419 }
7420
7421 static PyTypeObject UnpicklerMemoProxyType = {
7422 PyVarObject_HEAD_INIT(NULL, 0)
7423 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7424 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7425 0,
7426 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
7427 0, /* tp_vectorcall_offset */
7428 0, /* tp_getattr */
7429 0, /* tp_setattr */
7430 0, /* tp_as_async */
7431 0, /* tp_repr */
7432 0, /* tp_as_number */
7433 0, /* tp_as_sequence */
7434 0, /* tp_as_mapping */
7435 PyObject_HashNotImplemented, /* tp_hash */
7436 0, /* tp_call */
7437 0, /* tp_str */
7438 PyObject_GenericGetAttr, /* tp_getattro */
7439 PyObject_GenericSetAttr, /* tp_setattro */
7440 0, /* tp_as_buffer */
7441 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7442 0, /* tp_doc */
7443 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7444 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7445 0, /* tp_richcompare */
7446 0, /* tp_weaklistoffset */
7447 0, /* tp_iter */
7448 0, /* tp_iternext */
7449 unpicklerproxy_methods, /* tp_methods */
7450 };
7451
7452 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7453 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7454 {
7455 UnpicklerMemoProxyObject *self;
7456
7457 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7458 &UnpicklerMemoProxyType);
7459 if (self == NULL)
7460 return NULL;
7461 Py_INCREF(unpickler);
7462 self->unpickler = unpickler;
7463 PyObject_GC_Track(self);
7464 return (PyObject *)self;
7465 }
7466
7467 /*****************************************************************************/
7468
7469
7470 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7471 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7472 {
7473 return UnpicklerMemoProxy_New(self);
7474 }
7475
7476 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7477 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7478 {
7479 PyObject **new_memo;
7480 size_t new_memo_size = 0;
7481
7482 if (obj == NULL) {
7483 PyErr_SetString(PyExc_TypeError,
7484 "attribute deletion is not supported");
7485 return -1;
7486 }
7487
7488 if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
7489 UnpicklerObject *unpickler =
7490 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7491
7492 new_memo_size = unpickler->memo_size;
7493 new_memo = _Unpickler_NewMemo(new_memo_size);
7494 if (new_memo == NULL)
7495 return -1;
7496
7497 for (size_t i = 0; i < new_memo_size; i++) {
7498 Py_XINCREF(unpickler->memo[i]);
7499 new_memo[i] = unpickler->memo[i];
7500 }
7501 }
7502 else if (PyDict_Check(obj)) {
7503 Py_ssize_t i = 0;
7504 PyObject *key, *value;
7505
7506 new_memo_size = PyDict_GET_SIZE(obj);
7507 new_memo = _Unpickler_NewMemo(new_memo_size);
7508 if (new_memo == NULL)
7509 return -1;
7510
7511 while (PyDict_Next(obj, &i, &key, &value)) {
7512 Py_ssize_t idx;
7513 if (!PyLong_Check(key)) {
7514 PyErr_SetString(PyExc_TypeError,
7515 "memo key must be integers");
7516 goto error;
7517 }
7518 idx = PyLong_AsSsize_t(key);
7519 if (idx == -1 && PyErr_Occurred())
7520 goto error;
7521 if (idx < 0) {
7522 PyErr_SetString(PyExc_ValueError,
7523 "memo key must be positive integers.");
7524 goto error;
7525 }
7526 if (_Unpickler_MemoPut(self, idx, value) < 0)
7527 goto error;
7528 }
7529 }
7530 else {
7531 PyErr_Format(PyExc_TypeError,
7532 "'memo' attribute must be an UnpicklerMemoProxy object "
7533 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7534 return -1;
7535 }
7536
7537 _Unpickler_MemoCleanup(self);
7538 self->memo_size = new_memo_size;
7539 self->memo = new_memo;
7540
7541 return 0;
7542
7543 error:
7544 if (new_memo_size) {
7545 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7546 Py_XDECREF(new_memo[i]);
7547 }
7548 PyMem_Free(new_memo);
7549 }
7550 return -1;
7551 }
7552
7553 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7554 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7555 {
7556 if (self->pers_func == NULL) {
7557 PyErr_SetString(PyExc_AttributeError, "persistent_load");
7558 return NULL;
7559 }
7560 return reconstruct_method(self->pers_func, self->pers_func_self);
7561 }
7562
7563 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7564 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7565 {
7566 if (value == NULL) {
7567 PyErr_SetString(PyExc_TypeError,
7568 "attribute deletion is not supported");
7569 return -1;
7570 }
7571 if (!PyCallable_Check(value)) {
7572 PyErr_SetString(PyExc_TypeError,
7573 "persistent_load must be a callable taking "
7574 "one argument");
7575 return -1;
7576 }
7577
7578 self->pers_func_self = NULL;
7579 Py_INCREF(value);
7580 Py_XSETREF(self->pers_func, value);
7581
7582 return 0;
7583 }
7584
7585 static PyGetSetDef Unpickler_getsets[] = {
7586 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7587 {"persistent_load", (getter)Unpickler_get_persload,
7588 (setter)Unpickler_set_persload},
7589 {NULL}
7590 };
7591
7592 static PyTypeObject Unpickler_Type = {
7593 PyVarObject_HEAD_INIT(NULL, 0)
7594 "_pickle.Unpickler", /*tp_name*/
7595 sizeof(UnpicklerObject), /*tp_basicsize*/
7596 0, /*tp_itemsize*/
7597 (destructor)Unpickler_dealloc, /*tp_dealloc*/
7598 0, /*tp_vectorcall_offset*/
7599 0, /*tp_getattr*/
7600 0, /*tp_setattr*/
7601 0, /*tp_as_async*/
7602 0, /*tp_repr*/
7603 0, /*tp_as_number*/
7604 0, /*tp_as_sequence*/
7605 0, /*tp_as_mapping*/
7606 0, /*tp_hash*/
7607 0, /*tp_call*/
7608 0, /*tp_str*/
7609 0, /*tp_getattro*/
7610 0, /*tp_setattro*/
7611 0, /*tp_as_buffer*/
7612 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7613 _pickle_Unpickler___init____doc__, /*tp_doc*/
7614 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7615 (inquiry)Unpickler_clear, /*tp_clear*/
7616 0, /*tp_richcompare*/
7617 0, /*tp_weaklistoffset*/
7618 0, /*tp_iter*/
7619 0, /*tp_iternext*/
7620 Unpickler_methods, /*tp_methods*/
7621 0, /*tp_members*/
7622 Unpickler_getsets, /*tp_getset*/
7623 0, /*tp_base*/
7624 0, /*tp_dict*/
7625 0, /*tp_descr_get*/
7626 0, /*tp_descr_set*/
7627 0, /*tp_dictoffset*/
7628 _pickle_Unpickler___init__, /*tp_init*/
7629 PyType_GenericAlloc, /*tp_alloc*/
7630 PyType_GenericNew, /*tp_new*/
7631 PyObject_GC_Del, /*tp_free*/
7632 0, /*tp_is_gc*/
7633 };
7634
7635 /*[clinic input]
7636
7637 _pickle.dump
7638
7639 obj: object
7640 file: object
7641 protocol: object = None
7642 *
7643 fix_imports: bool = True
7644 buffer_callback: object = None
7645
7646 Write a pickled representation of obj to the open file object file.
7647
7648 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7649 be more efficient.
7650
7651 The optional *protocol* argument tells the pickler to use the given
7652 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7653 protocol is 4. It was introduced in Python 3.4, and is incompatible
7654 with previous versions.
7655
7656 Specifying a negative protocol version selects the highest protocol
7657 version supported. The higher the protocol used, the more recent the
7658 version of Python needed to read the pickle produced.
7659
7660 The *file* argument must have a write() method that accepts a single
7661 bytes argument. It can thus be a file object opened for binary
7662 writing, an io.BytesIO instance, or any other custom object that meets
7663 this interface.
7664
7665 If *fix_imports* is True and protocol is less than 3, pickle will try
7666 to map the new Python 3 names to the old module names used in Python
7667 2, so that the pickle data stream is readable with Python 2.
7668
7669 If *buffer_callback* is None (the default), buffer views are serialized
7670 into *file* as part of the pickle stream. It is an error if
7671 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7672
7673 [clinic start generated code]*/
7674
7675 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7676 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7677 PyObject *protocol, int fix_imports,
7678 PyObject *buffer_callback)
7679 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7680 {
7681 PicklerObject *pickler = _Pickler_New();
7682
7683 if (pickler == NULL)
7684 return NULL;
7685
7686 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7687 goto error;
7688
7689 if (_Pickler_SetOutputStream(pickler, file) < 0)
7690 goto error;
7691
7692 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7693 goto error;
7694
7695 if (dump(pickler, obj) < 0)
7696 goto error;
7697
7698 if (_Pickler_FlushToFile(pickler) < 0)
7699 goto error;
7700
7701 Py_DECREF(pickler);
7702 Py_RETURN_NONE;
7703
7704 error:
7705 Py_XDECREF(pickler);
7706 return NULL;
7707 }
7708
7709 /*[clinic input]
7710
7711 _pickle.dumps
7712
7713 obj: object
7714 protocol: object = None
7715 *
7716 fix_imports: bool = True
7717 buffer_callback: object = None
7718
7719 Return the pickled representation of the object as a bytes object.
7720
7721 The optional *protocol* argument tells the pickler to use the given
7722 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7723 protocol is 4. It was introduced in Python 3.4, and is incompatible
7724 with previous versions.
7725
7726 Specifying a negative protocol version selects the highest protocol
7727 version supported. The higher the protocol used, the more recent the
7728 version of Python needed to read the pickle produced.
7729
7730 If *fix_imports* is True and *protocol* is less than 3, pickle will
7731 try to map the new Python 3 names to the old module names used in
7732 Python 2, so that the pickle data stream is readable with Python 2.
7733
7734 If *buffer_callback* is None (the default), buffer views are serialized
7735 into *file* as part of the pickle stream. It is an error if
7736 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7737
7738 [clinic start generated code]*/
7739
7740 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7741 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7742 int fix_imports, PyObject *buffer_callback)
7743 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7744 {
7745 PyObject *result;
7746 PicklerObject *pickler = _Pickler_New();
7747
7748 if (pickler == NULL)
7749 return NULL;
7750
7751 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7752 goto error;
7753
7754 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7755 goto error;
7756
7757 if (dump(pickler, obj) < 0)
7758 goto error;
7759
7760 result = _Pickler_GetString(pickler);
7761 Py_DECREF(pickler);
7762 return result;
7763
7764 error:
7765 Py_XDECREF(pickler);
7766 return NULL;
7767 }
7768
7769 /*[clinic input]
7770
7771 _pickle.load
7772
7773 file: object
7774 *
7775 fix_imports: bool = True
7776 encoding: str = 'ASCII'
7777 errors: str = 'strict'
7778 buffers: object(c_default="NULL") = ()
7779
7780 Read and return an object from the pickle data stored in a file.
7781
7782 This is equivalent to ``Unpickler(file).load()``, but may be more
7783 efficient.
7784
7785 The protocol version of the pickle is detected automatically, so no
7786 protocol argument is needed. Bytes past the pickled object's
7787 representation are ignored.
7788
7789 The argument *file* must have two methods, a read() method that takes
7790 an integer argument, and a readline() method that requires no
7791 arguments. Both methods should return bytes. Thus *file* can be a
7792 binary file object opened for reading, an io.BytesIO object, or any
7793 other custom object that meets this interface.
7794
7795 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7796 which are used to control compatibility support for pickle stream
7797 generated by Python 2. If *fix_imports* is True, pickle will try to
7798 map the old Python 2 names to the new names used in Python 3. The
7799 *encoding* and *errors* tell pickle how to decode 8-bit string
7800 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7801 respectively. The *encoding* can be 'bytes' to read these 8-bit
7802 string instances as bytes objects.
7803 [clinic start generated code]*/
7804
7805 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7806 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7807 const char *encoding, const char *errors,
7808 PyObject *buffers)
7809 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7810 {
7811 PyObject *result;
7812 UnpicklerObject *unpickler = _Unpickler_New();
7813
7814 if (unpickler == NULL)
7815 return NULL;
7816
7817 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7818 goto error;
7819
7820 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7821 goto error;
7822
7823 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7824 goto error;
7825
7826 unpickler->fix_imports = fix_imports;
7827
7828 result = load(unpickler);
7829 Py_DECREF(unpickler);
7830 return result;
7831
7832 error:
7833 Py_XDECREF(unpickler);
7834 return NULL;
7835 }
7836
7837 /*[clinic input]
7838
7839 _pickle.loads
7840
7841 data: object
7842 /
7843 *
7844 fix_imports: bool = True
7845 encoding: str = 'ASCII'
7846 errors: str = 'strict'
7847 buffers: object(c_default="NULL") = ()
7848
7849 Read and return an object from the given pickle data.
7850
7851 The protocol version of the pickle is detected automatically, so no
7852 protocol argument is needed. Bytes past the pickled object's
7853 representation are ignored.
7854
7855 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7856 which are used to control compatibility support for pickle stream
7857 generated by Python 2. If *fix_imports* is True, pickle will try to
7858 map the old Python 2 names to the new names used in Python 3. The
7859 *encoding* and *errors* tell pickle how to decode 8-bit string
7860 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7861 respectively. The *encoding* can be 'bytes' to read these 8-bit
7862 string instances as bytes objects.
7863 [clinic start generated code]*/
7864
7865 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7866 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7867 const char *encoding, const char *errors,
7868 PyObject *buffers)
7869 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7870 {
7871 PyObject *result;
7872 UnpicklerObject *unpickler = _Unpickler_New();
7873
7874 if (unpickler == NULL)
7875 return NULL;
7876
7877 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7878 goto error;
7879
7880 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7881 goto error;
7882
7883 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7884 goto error;
7885
7886 unpickler->fix_imports = fix_imports;
7887
7888 result = load(unpickler);
7889 Py_DECREF(unpickler);
7890 return result;
7891
7892 error:
7893 Py_XDECREF(unpickler);
7894 return NULL;
7895 }
7896
7897 static struct PyMethodDef pickle_methods[] = {
7898 _PICKLE_DUMP_METHODDEF
7899 _PICKLE_DUMPS_METHODDEF
7900 _PICKLE_LOAD_METHODDEF
7901 _PICKLE_LOADS_METHODDEF
7902 {NULL, NULL} /* sentinel */
7903 };
7904
7905 static int
pickle_clear(PyObject * m)7906 pickle_clear(PyObject *m)
7907 {
7908 _Pickle_ClearState(_Pickle_GetState(m));
7909 return 0;
7910 }
7911
7912 static void
pickle_free(PyObject * m)7913 pickle_free(PyObject *m)
7914 {
7915 _Pickle_ClearState(_Pickle_GetState(m));
7916 }
7917
7918 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7919 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7920 {
7921 PickleState *st = _Pickle_GetState(m);
7922 Py_VISIT(st->PickleError);
7923 Py_VISIT(st->PicklingError);
7924 Py_VISIT(st->UnpicklingError);
7925 Py_VISIT(st->dispatch_table);
7926 Py_VISIT(st->extension_registry);
7927 Py_VISIT(st->extension_cache);
7928 Py_VISIT(st->inverted_registry);
7929 Py_VISIT(st->name_mapping_2to3);
7930 Py_VISIT(st->import_mapping_2to3);
7931 Py_VISIT(st->name_mapping_3to2);
7932 Py_VISIT(st->import_mapping_3to2);
7933 Py_VISIT(st->codecs_encode);
7934 Py_VISIT(st->getattr);
7935 Py_VISIT(st->partial);
7936 return 0;
7937 }
7938
7939 static struct PyModuleDef _picklemodule = {
7940 PyModuleDef_HEAD_INIT,
7941 "_pickle", /* m_name */
7942 pickle_module_doc, /* m_doc */
7943 sizeof(PickleState), /* m_size */
7944 pickle_methods, /* m_methods */
7945 NULL, /* m_reload */
7946 pickle_traverse, /* m_traverse */
7947 pickle_clear, /* m_clear */
7948 (freefunc)pickle_free /* m_free */
7949 };
7950
7951 PyMODINIT_FUNC
PyInit__pickle(void)7952 PyInit__pickle(void)
7953 {
7954 PyObject *m;
7955 PickleState *st;
7956
7957 m = PyState_FindModule(&_picklemodule);
7958 if (m) {
7959 Py_INCREF(m);
7960 return m;
7961 }
7962
7963 if (PyType_Ready(&Pdata_Type) < 0)
7964 return NULL;
7965 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7966 return NULL;
7967 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7968 return NULL;
7969
7970 /* Create the module and add the functions. */
7971 m = PyModule_Create(&_picklemodule);
7972 if (m == NULL)
7973 return NULL;
7974
7975 /* Add types */
7976 if (PyModule_AddType(m, &Pickler_Type) < 0) {
7977 return NULL;
7978 }
7979 if (PyModule_AddType(m, &Unpickler_Type) < 0) {
7980 return NULL;
7981 }
7982 if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
7983 return NULL;
7984 }
7985
7986 st = _Pickle_GetState(m);
7987
7988 /* Initialize the exceptions. */
7989 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7990 if (st->PickleError == NULL)
7991 return NULL;
7992 st->PicklingError = \
7993 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7994 if (st->PicklingError == NULL)
7995 return NULL;
7996 st->UnpicklingError = \
7997 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7998 if (st->UnpicklingError == NULL)
7999 return NULL;
8000
8001 Py_INCREF(st->PickleError);
8002 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
8003 return NULL;
8004 Py_INCREF(st->PicklingError);
8005 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
8006 return NULL;
8007 Py_INCREF(st->UnpicklingError);
8008 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
8009 return NULL;
8010
8011 if (_Pickle_InitState(st) < 0)
8012 return NULL;
8013
8014 return m;
8015 }
8016