1 /* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 # error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10
11 #include "Python.h"
12 #include "structmember.h" // PyMemberDef
13
14 PyDoc_STRVAR(pickle_module_doc,
15 "Optimized C implementation for the Python pickle module.");
16
17 /*[clinic input]
18 module _pickle
19 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
25
26 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28 already includes it. */
29 enum {
30 HIGHEST_PROTOCOL = 5,
31 DEFAULT_PROTOCOL = 4
32 };
33
34 /* Pickle opcodes. These must be kept updated with pickle.py.
35 Extensive docs are in pickletools.py. */
36 enum opcode {
37 MARK = '(',
38 STOP = '.',
39 POP = '0',
40 POP_MARK = '1',
41 DUP = '2',
42 FLOAT = 'F',
43 INT = 'I',
44 BININT = 'J',
45 BININT1 = 'K',
46 LONG = 'L',
47 BININT2 = 'M',
48 NONE = 'N',
49 PERSID = 'P',
50 BINPERSID = 'Q',
51 REDUCE = 'R',
52 STRING = 'S',
53 BINSTRING = 'T',
54 SHORT_BINSTRING = 'U',
55 UNICODE = 'V',
56 BINUNICODE = 'X',
57 APPEND = 'a',
58 BUILD = 'b',
59 GLOBAL = 'c',
60 DICT = 'd',
61 EMPTY_DICT = '}',
62 APPENDS = 'e',
63 GET = 'g',
64 BINGET = 'h',
65 INST = 'i',
66 LONG_BINGET = 'j',
67 LIST = 'l',
68 EMPTY_LIST = ']',
69 OBJ = 'o',
70 PUT = 'p',
71 BINPUT = 'q',
72 LONG_BINPUT = 'r',
73 SETITEM = 's',
74 TUPLE = 't',
75 EMPTY_TUPLE = ')',
76 SETITEMS = 'u',
77 BINFLOAT = 'G',
78
79 /* Protocol 2. */
80 PROTO = '\x80',
81 NEWOBJ = '\x81',
82 EXT1 = '\x82',
83 EXT2 = '\x83',
84 EXT4 = '\x84',
85 TUPLE1 = '\x85',
86 TUPLE2 = '\x86',
87 TUPLE3 = '\x87',
88 NEWTRUE = '\x88',
89 NEWFALSE = '\x89',
90 LONG1 = '\x8a',
91 LONG4 = '\x8b',
92
93 /* Protocol 3 (Python 3.x) */
94 BINBYTES = 'B',
95 SHORT_BINBYTES = 'C',
96
97 /* Protocol 4 */
98 SHORT_BINUNICODE = '\x8c',
99 BINUNICODE8 = '\x8d',
100 BINBYTES8 = '\x8e',
101 EMPTY_SET = '\x8f',
102 ADDITEMS = '\x90',
103 FROZENSET = '\x91',
104 NEWOBJ_EX = '\x92',
105 STACK_GLOBAL = '\x93',
106 MEMOIZE = '\x94',
107 FRAME = '\x95',
108
109 /* Protocol 5 */
110 BYTEARRAY8 = '\x96',
111 NEXT_BUFFER = '\x97',
112 READONLY_BUFFER = '\x98'
113 };
114
115 enum {
116 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
117 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
118 break if this gets out of synch with pickle.py, but it's unclear that would
119 help anything either. */
120 BATCHSIZE = 1000,
121
122 /* Nesting limit until Pickler, when running in "fast mode", starts
123 checking for self-referential data-structures. */
124 FAST_NESTING_LIMIT = 50,
125
126 /* Initial size of the write buffer of Pickler. */
127 WRITE_BUF_SIZE = 4096,
128
129 /* Prefetch size when unpickling (disabled on unpeekable streams) */
130 PREFETCH = 8192 * 16,
131
132 FRAME_SIZE_MIN = 4,
133 FRAME_SIZE_TARGET = 64 * 1024,
134 FRAME_HEADER_SIZE = 9
135 };
136
137 /*************************************************************************/
138
139 /* State of the pickle module, per PEP 3121. */
140 typedef struct {
141 /* Exception classes for pickle. */
142 PyObject *PickleError;
143 PyObject *PicklingError;
144 PyObject *UnpicklingError;
145
146 /* copyreg.dispatch_table, {type_object: pickling_function} */
147 PyObject *dispatch_table;
148
149 /* For the extension opcodes EXT1, EXT2 and EXT4. */
150
151 /* copyreg._extension_registry, {(module_name, function_name): code} */
152 PyObject *extension_registry;
153 /* copyreg._extension_cache, {code: object} */
154 PyObject *extension_cache;
155 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156 PyObject *inverted_registry;
157
158 /* Import mappings for compatibility with Python 2.x */
159
160 /* _compat_pickle.NAME_MAPPING,
161 {(oldmodule, oldname): (newmodule, newname)} */
162 PyObject *name_mapping_2to3;
163 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164 PyObject *import_mapping_2to3;
165 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166 PyObject *name_mapping_3to2;
167 PyObject *import_mapping_3to2;
168
169 /* codecs.encode, used for saving bytes in older protocols */
170 PyObject *codecs_encode;
171 /* builtins.getattr, used for saving nested names with protocol < 4 */
172 PyObject *getattr;
173 /* functools.partial, used for implementing __newobj_ex__ with protocols
174 2 and 3 */
175 PyObject *partial;
176 } PickleState;
177
178 /* Forward declaration of the _pickle module definition. */
179 static struct PyModuleDef _picklemodule;
180
181 /* Given a module object, get its per-module state. */
182 static PickleState *
_Pickle_GetState(PyObject * module)183 _Pickle_GetState(PyObject *module)
184 {
185 return (PickleState *)PyModule_GetState(module);
186 }
187
188 /* Find the module instance imported in the currently running sub-interpreter
189 and get its state. */
190 static PickleState *
_Pickle_GetGlobalState(void)191 _Pickle_GetGlobalState(void)
192 {
193 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194 }
195
196 /* Clear the given pickle module state. */
197 static void
_Pickle_ClearState(PickleState * st)198 _Pickle_ClearState(PickleState *st)
199 {
200 Py_CLEAR(st->PickleError);
201 Py_CLEAR(st->PicklingError);
202 Py_CLEAR(st->UnpicklingError);
203 Py_CLEAR(st->dispatch_table);
204 Py_CLEAR(st->extension_registry);
205 Py_CLEAR(st->extension_cache);
206 Py_CLEAR(st->inverted_registry);
207 Py_CLEAR(st->name_mapping_2to3);
208 Py_CLEAR(st->import_mapping_2to3);
209 Py_CLEAR(st->name_mapping_3to2);
210 Py_CLEAR(st->import_mapping_3to2);
211 Py_CLEAR(st->codecs_encode);
212 Py_CLEAR(st->getattr);
213 Py_CLEAR(st->partial);
214 }
215
216 /* Initialize the given pickle module state. */
217 static int
_Pickle_InitState(PickleState * st)218 _Pickle_InitState(PickleState *st)
219 {
220 PyObject *copyreg = NULL;
221 PyObject *compat_pickle = NULL;
222 PyObject *codecs = NULL;
223 PyObject *functools = NULL;
224 _Py_IDENTIFIER(getattr);
225
226 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
227 if (st->getattr == NULL)
228 goto error;
229
230 copyreg = PyImport_ImportModule("copyreg");
231 if (!copyreg)
232 goto error;
233 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234 if (!st->dispatch_table)
235 goto error;
236 if (!PyDict_CheckExact(st->dispatch_table)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg.dispatch_table should be a dict, not %.200s",
239 Py_TYPE(st->dispatch_table)->tp_name);
240 goto error;
241 }
242 st->extension_registry = \
243 PyObject_GetAttrString(copyreg, "_extension_registry");
244 if (!st->extension_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->extension_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._extension_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250 goto error;
251 }
252 st->inverted_registry = \
253 PyObject_GetAttrString(copyreg, "_inverted_registry");
254 if (!st->inverted_registry)
255 goto error;
256 if (!PyDict_CheckExact(st->inverted_registry)) {
257 PyErr_Format(PyExc_RuntimeError,
258 "copyreg._inverted_registry should be a dict, "
259 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260 goto error;
261 }
262 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263 if (!st->extension_cache)
264 goto error;
265 if (!PyDict_CheckExact(st->extension_cache)) {
266 PyErr_Format(PyExc_RuntimeError,
267 "copyreg._extension_cache should be a dict, "
268 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269 goto error;
270 }
271 Py_CLEAR(copyreg);
272
273 /* Load the 2.x -> 3.x stdlib module mapping tables */
274 compat_pickle = PyImport_ImportModule("_compat_pickle");
275 if (!compat_pickle)
276 goto error;
277 st->name_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279 if (!st->name_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284 Py_TYPE(st->name_mapping_2to3)->tp_name);
285 goto error;
286 }
287 st->import_mapping_2to3 = \
288 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289 if (!st->import_mapping_2to3)
290 goto error;
291 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292 PyErr_Format(PyExc_RuntimeError,
293 "_compat_pickle.IMPORT_MAPPING should be a dict, "
294 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295 goto error;
296 }
297 /* ... and the 3.x -> 2.x mapping tables */
298 st->name_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300 if (!st->name_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306 goto error;
307 }
308 st->import_mapping_3to2 = \
309 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310 if (!st->import_mapping_3to2)
311 goto error;
312 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313 PyErr_Format(PyExc_RuntimeError,
314 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316 goto error;
317 }
318 Py_CLEAR(compat_pickle);
319
320 codecs = PyImport_ImportModule("codecs");
321 if (codecs == NULL)
322 goto error;
323 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324 if (st->codecs_encode == NULL) {
325 goto error;
326 }
327 if (!PyCallable_Check(st->codecs_encode)) {
328 PyErr_Format(PyExc_RuntimeError,
329 "codecs.encode should be a callable, not %.200s",
330 Py_TYPE(st->codecs_encode)->tp_name);
331 goto error;
332 }
333 Py_CLEAR(codecs);
334
335 functools = PyImport_ImportModule("functools");
336 if (!functools)
337 goto error;
338 st->partial = PyObject_GetAttrString(functools, "partial");
339 if (!st->partial)
340 goto error;
341 Py_CLEAR(functools);
342
343 return 0;
344
345 error:
346 Py_CLEAR(copyreg);
347 Py_CLEAR(compat_pickle);
348 Py_CLEAR(codecs);
349 Py_CLEAR(functools);
350 _Pickle_ClearState(st);
351 return -1;
352 }
353
354 /* Helper for calling a function with a single argument quickly.
355
356 This function steals the reference of the given argument. */
357 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)358 _Pickle_FastCall(PyObject *func, PyObject *obj)
359 {
360 PyObject *result;
361
362 result = PyObject_CallOneArg(func, obj);
363 Py_DECREF(obj);
364 return result;
365 }
366
367 /*************************************************************************/
368
369 /* Retrieve and deconstruct a method for avoiding a reference cycle
370 (pickler -> bound method of pickler -> pickler) */
371 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)372 init_method_ref(PyObject *self, _Py_Identifier *name,
373 PyObject **method_func, PyObject **method_self)
374 {
375 PyObject *func, *func2;
376 int ret;
377
378 /* *method_func and *method_self should be consistent. All refcount decrements
379 should be occurred after setting *method_self and *method_func. */
380 ret = _PyObject_LookupAttrId(self, name, &func);
381 if (func == NULL) {
382 *method_self = NULL;
383 Py_CLEAR(*method_func);
384 return ret;
385 }
386
387 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388 /* Deconstruct a bound Python method */
389 func2 = PyMethod_GET_FUNCTION(func);
390 Py_INCREF(func2);
391 *method_self = self; /* borrowed */
392 Py_XSETREF(*method_func, func2);
393 Py_DECREF(func);
394 return 0;
395 }
396 else {
397 *method_self = NULL;
398 Py_XSETREF(*method_func, func);
399 return 0;
400 }
401 }
402
403 /* Bind a method if it was deconstructed */
404 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)405 reconstruct_method(PyObject *func, PyObject *self)
406 {
407 if (self) {
408 return PyMethod_New(func, self);
409 }
410 else {
411 Py_INCREF(func);
412 return func;
413 }
414 }
415
416 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)417 call_method(PyObject *func, PyObject *self, PyObject *obj)
418 {
419 if (self) {
420 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421 }
422 else {
423 return PyObject_CallOneArg(func, obj);
424 }
425 }
426
427 /*************************************************************************/
428
429 /* Internal data type used as the unpickling stack. */
430 typedef struct {
431 PyObject_VAR_HEAD
432 PyObject **data;
433 int mark_set; /* is MARK set? */
434 Py_ssize_t fence; /* position of top MARK or 0 */
435 Py_ssize_t allocated; /* number of slots in data allocated */
436 } Pdata;
437
438 static void
Pdata_dealloc(Pdata * self)439 Pdata_dealloc(Pdata *self)
440 {
441 Py_ssize_t i = Py_SIZE(self);
442 while (--i >= 0) {
443 Py_DECREF(self->data[i]);
444 }
445 PyMem_FREE(self->data);
446 PyObject_Del(self);
447 }
448
449 static PyTypeObject Pdata_Type = {
450 PyVarObject_HEAD_INIT(NULL, 0)
451 "_pickle.Pdata", /*tp_name*/
452 sizeof(Pdata), /*tp_basicsize*/
453 sizeof(PyObject *), /*tp_itemsize*/
454 (destructor)Pdata_dealloc, /*tp_dealloc*/
455 };
456
457 static PyObject *
Pdata_New(void)458 Pdata_New(void)
459 {
460 Pdata *self;
461
462 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463 return NULL;
464 Py_SET_SIZE(self, 0);
465 self->mark_set = 0;
466 self->fence = 0;
467 self->allocated = 8;
468 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
469 if (self->data)
470 return (PyObject *)self;
471 Py_DECREF(self);
472 return PyErr_NoMemory();
473 }
474
475
476 /* Retain only the initial clearto items. If clearto >= the current
477 * number of items, this is a (non-erroneous) NOP.
478 */
479 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)480 Pdata_clear(Pdata *self, Py_ssize_t clearto)
481 {
482 Py_ssize_t i = Py_SIZE(self);
483
484 assert(clearto >= self->fence);
485 if (clearto >= i)
486 return 0;
487
488 while (--i >= clearto) {
489 Py_CLEAR(self->data[i]);
490 }
491 Py_SET_SIZE(self, clearto);
492 return 0;
493 }
494
495 static int
Pdata_grow(Pdata * self)496 Pdata_grow(Pdata *self)
497 {
498 PyObject **data = self->data;
499 size_t allocated = (size_t)self->allocated;
500 size_t new_allocated;
501
502 new_allocated = (allocated >> 3) + 6;
503 /* check for integer overflow */
504 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
505 goto nomemory;
506 new_allocated += allocated;
507 PyMem_RESIZE(data, PyObject *, new_allocated);
508 if (data == NULL)
509 goto nomemory;
510
511 self->data = data;
512 self->allocated = (Py_ssize_t)new_allocated;
513 return 0;
514
515 nomemory:
516 PyErr_NoMemory();
517 return -1;
518 }
519
520 static int
Pdata_stack_underflow(Pdata * self)521 Pdata_stack_underflow(Pdata *self)
522 {
523 PickleState *st = _Pickle_GetGlobalState();
524 PyErr_SetString(st->UnpicklingError,
525 self->mark_set ?
526 "unexpected MARK found" :
527 "unpickling stack underflow");
528 return -1;
529 }
530
531 /* D is a Pdata*. Pop the topmost element and store it into V, which
532 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
533 * is raised and V is set to NULL.
534 */
535 static PyObject *
Pdata_pop(Pdata * self)536 Pdata_pop(Pdata *self)
537 {
538 if (Py_SIZE(self) <= self->fence) {
539 Pdata_stack_underflow(self);
540 return NULL;
541 }
542 Py_SET_SIZE(self, Py_SIZE(self) - 1);
543 return self->data[Py_SIZE(self)];
544 }
545 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
546
547 static int
Pdata_push(Pdata * self,PyObject * obj)548 Pdata_push(Pdata *self, PyObject *obj)
549 {
550 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
551 return -1;
552 }
553 self->data[Py_SIZE(self)] = obj;
554 Py_SET_SIZE(self, Py_SIZE(self) + 1);
555 return 0;
556 }
557
558 /* Push an object on stack, transferring its ownership to the stack. */
559 #define PDATA_PUSH(D, O, ER) do { \
560 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
561
562 /* Push an object on stack, adding a new reference to the object. */
563 #define PDATA_APPEND(D, O, ER) do { \
564 Py_INCREF((O)); \
565 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
566
567 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)568 Pdata_poptuple(Pdata *self, Py_ssize_t start)
569 {
570 PyObject *tuple;
571 Py_ssize_t len, i, j;
572
573 if (start < self->fence) {
574 Pdata_stack_underflow(self);
575 return NULL;
576 }
577 len = Py_SIZE(self) - start;
578 tuple = PyTuple_New(len);
579 if (tuple == NULL)
580 return NULL;
581 for (i = start, j = 0; j < len; i++, j++)
582 PyTuple_SET_ITEM(tuple, j, self->data[i]);
583
584 Py_SET_SIZE(self, start);
585 return tuple;
586 }
587
588 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)589 Pdata_poplist(Pdata *self, Py_ssize_t start)
590 {
591 PyObject *list;
592 Py_ssize_t len, i, j;
593
594 len = Py_SIZE(self) - start;
595 list = PyList_New(len);
596 if (list == NULL)
597 return NULL;
598 for (i = start, j = 0; j < len; i++, j++)
599 PyList_SET_ITEM(list, j, self->data[i]);
600
601 Py_SET_SIZE(self, start);
602 return list;
603 }
604
605 typedef struct {
606 PyObject *me_key;
607 Py_ssize_t me_value;
608 } PyMemoEntry;
609
610 typedef struct {
611 size_t mt_mask;
612 size_t mt_used;
613 size_t mt_allocated;
614 PyMemoEntry *mt_table;
615 } PyMemoTable;
616
617 typedef struct PicklerObject {
618 PyObject_HEAD
619 PyMemoTable *memo; /* Memo table, keep track of the seen
620 objects to support self-referential objects
621 pickling. */
622 PyObject *pers_func; /* persistent_id() method, can be NULL */
623 PyObject *pers_func_self; /* borrowed reference to self if pers_func
624 is an unbound method, NULL otherwise */
625 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
626 PyObject *reducer_override; /* hook for invoking user-defined callbacks
627 instead of save_global when pickling
628 functions and classes*/
629
630 PyObject *write; /* write() method of the output stream. */
631 PyObject *output_buffer; /* Write into a local bytearray buffer before
632 flushing to the stream. */
633 Py_ssize_t output_len; /* Length of output_buffer. */
634 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
635 int proto; /* Pickle protocol number, >= 0 */
636 int bin; /* Boolean, true if proto > 0 */
637 int framing; /* True when framing is enabled, proto >= 4 */
638 Py_ssize_t frame_start; /* Position in output_buffer where the
639 current frame begins. -1 if there
640 is no frame currently open. */
641
642 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
643 int fast; /* Enable fast mode if set to a true value.
644 The fast mode disable the usage of memo,
645 therefore speeding the pickling process by
646 not generating superfluous PUT opcodes. It
647 should not be used if with self-referential
648 objects. */
649 int fast_nesting;
650 int fix_imports; /* Indicate whether Pickler should fix
651 the name of globals for Python 2.x. */
652 PyObject *fast_memo;
653 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
654 } PicklerObject;
655
656 typedef struct UnpicklerObject {
657 PyObject_HEAD
658 Pdata *stack; /* Pickle data stack, store unpickled objects. */
659
660 /* The unpickler memo is just an array of PyObject *s. Using a dict
661 is unnecessary, since the keys are contiguous ints. */
662 PyObject **memo;
663 size_t memo_size; /* Capacity of the memo array */
664 size_t memo_len; /* Number of objects in the memo */
665
666 PyObject *pers_func; /* persistent_load() method, can be NULL. */
667 PyObject *pers_func_self; /* borrowed reference to self if pers_func
668 is an unbound method, NULL otherwise */
669
670 Py_buffer buffer;
671 char *input_buffer;
672 char *input_line;
673 Py_ssize_t input_len;
674 Py_ssize_t next_read_idx;
675 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
676
677 PyObject *read; /* read() method of the input stream. */
678 PyObject *readinto; /* readinto() method of the input stream. */
679 PyObject *readline; /* readline() method of the input stream. */
680 PyObject *peek; /* peek() method of the input stream, or NULL */
681 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
682
683 char *encoding; /* Name of the encoding to be used for
684 decoding strings pickled using Python
685 2.x. The default value is "ASCII" */
686 char *errors; /* Name of errors handling scheme to used when
687 decoding strings. The default value is
688 "strict". */
689 Py_ssize_t *marks; /* Mark stack, used for unpickling container
690 objects. */
691 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
692 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
693 int proto; /* Protocol of the pickle loaded. */
694 int fix_imports; /* Indicate whether Unpickler should fix
695 the name of globals pickled by Python 2.x. */
696 } UnpicklerObject;
697
698 typedef struct {
699 PyObject_HEAD
700 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
701 } PicklerMemoProxyObject;
702
703 typedef struct {
704 PyObject_HEAD
705 UnpicklerObject *unpickler;
706 } UnpicklerMemoProxyObject;
707
708 /* Forward declarations */
709 static int save(PicklerObject *, PyObject *, int);
710 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
711 static PyTypeObject Pickler_Type;
712 static PyTypeObject Unpickler_Type;
713
714 #include "clinic/_pickle.c.h"
715
716 /*************************************************************************
717 A custom hashtable mapping void* to Python ints. This is used by the pickler
718 for memoization. Using a custom hashtable rather than PyDict allows us to skip
719 a bunch of unnecessary object creation. This makes a huge performance
720 difference. */
721
722 #define MT_MINSIZE 8
723 #define PERTURB_SHIFT 5
724
725
726 static PyMemoTable *
PyMemoTable_New(void)727 PyMemoTable_New(void)
728 {
729 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
730 if (memo == NULL) {
731 PyErr_NoMemory();
732 return NULL;
733 }
734
735 memo->mt_used = 0;
736 memo->mt_allocated = MT_MINSIZE;
737 memo->mt_mask = MT_MINSIZE - 1;
738 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
739 if (memo->mt_table == NULL) {
740 PyMem_FREE(memo);
741 PyErr_NoMemory();
742 return NULL;
743 }
744 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
745
746 return memo;
747 }
748
749 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)750 PyMemoTable_Copy(PyMemoTable *self)
751 {
752 PyMemoTable *new = PyMemoTable_New();
753 if (new == NULL)
754 return NULL;
755
756 new->mt_used = self->mt_used;
757 new->mt_allocated = self->mt_allocated;
758 new->mt_mask = self->mt_mask;
759 /* The table we get from _New() is probably smaller than we wanted.
760 Free it and allocate one that's the right size. */
761 PyMem_FREE(new->mt_table);
762 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
763 if (new->mt_table == NULL) {
764 PyMem_FREE(new);
765 PyErr_NoMemory();
766 return NULL;
767 }
768 for (size_t i = 0; i < self->mt_allocated; i++) {
769 Py_XINCREF(self->mt_table[i].me_key);
770 }
771 memcpy(new->mt_table, self->mt_table,
772 sizeof(PyMemoEntry) * self->mt_allocated);
773
774 return new;
775 }
776
777 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)778 PyMemoTable_Size(PyMemoTable *self)
779 {
780 return self->mt_used;
781 }
782
783 static int
PyMemoTable_Clear(PyMemoTable * self)784 PyMemoTable_Clear(PyMemoTable *self)
785 {
786 Py_ssize_t i = self->mt_allocated;
787
788 while (--i >= 0) {
789 Py_XDECREF(self->mt_table[i].me_key);
790 }
791 self->mt_used = 0;
792 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
793 return 0;
794 }
795
796 static void
PyMemoTable_Del(PyMemoTable * self)797 PyMemoTable_Del(PyMemoTable *self)
798 {
799 if (self == NULL)
800 return;
801 PyMemoTable_Clear(self);
802
803 PyMem_FREE(self->mt_table);
804 PyMem_FREE(self);
805 }
806
807 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
808 can be considerably simpler than dictobject.c's lookdict(). */
809 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)810 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
811 {
812 size_t i;
813 size_t perturb;
814 size_t mask = self->mt_mask;
815 PyMemoEntry *table = self->mt_table;
816 PyMemoEntry *entry;
817 Py_hash_t hash = (Py_hash_t)key >> 3;
818
819 i = hash & mask;
820 entry = &table[i];
821 if (entry->me_key == NULL || entry->me_key == key)
822 return entry;
823
824 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
825 i = (i << 2) + i + perturb + 1;
826 entry = &table[i & mask];
827 if (entry->me_key == NULL || entry->me_key == key)
828 return entry;
829 }
830 Py_UNREACHABLE();
831 }
832
833 /* Returns -1 on failure, 0 on success. */
834 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)835 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
836 {
837 PyMemoEntry *oldtable = NULL;
838 PyMemoEntry *oldentry, *newentry;
839 size_t new_size = MT_MINSIZE;
840 size_t to_process;
841
842 assert(min_size > 0);
843
844 if (min_size > PY_SSIZE_T_MAX) {
845 PyErr_NoMemory();
846 return -1;
847 }
848
849 /* Find the smallest valid table size >= min_size. */
850 while (new_size < min_size) {
851 new_size <<= 1;
852 }
853 /* new_size needs to be a power of two. */
854 assert((new_size & (new_size - 1)) == 0);
855
856 /* Allocate new table. */
857 oldtable = self->mt_table;
858 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
859 if (self->mt_table == NULL) {
860 self->mt_table = oldtable;
861 PyErr_NoMemory();
862 return -1;
863 }
864 self->mt_allocated = new_size;
865 self->mt_mask = new_size - 1;
866 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
867
868 /* Copy entries from the old table. */
869 to_process = self->mt_used;
870 for (oldentry = oldtable; to_process > 0; oldentry++) {
871 if (oldentry->me_key != NULL) {
872 to_process--;
873 /* newentry is a pointer to a chunk of the new
874 mt_table, so we're setting the key:value pair
875 in-place. */
876 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
877 newentry->me_key = oldentry->me_key;
878 newentry->me_value = oldentry->me_value;
879 }
880 }
881
882 /* Deallocate the old table. */
883 PyMem_FREE(oldtable);
884 return 0;
885 }
886
887 /* Returns NULL on failure, a pointer to the value otherwise. */
888 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)889 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
890 {
891 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
892 if (entry->me_key == NULL)
893 return NULL;
894 return &entry->me_value;
895 }
896
897 /* Returns -1 on failure, 0 on success. */
898 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)899 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
900 {
901 PyMemoEntry *entry;
902
903 assert(key != NULL);
904
905 entry = _PyMemoTable_Lookup(self, key);
906 if (entry->me_key != NULL) {
907 entry->me_value = value;
908 return 0;
909 }
910 Py_INCREF(key);
911 entry->me_key = key;
912 entry->me_value = value;
913 self->mt_used++;
914
915 /* If we added a key, we can safely resize. Otherwise just return!
916 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
917 *
918 * Quadrupling the size improves average table sparseness
919 * (reducing collisions) at the cost of some memory. It also halves
920 * the number of expensive resize operations in a growing memo table.
921 *
922 * Very large memo tables (over 50K items) use doubling instead.
923 * This may help applications with severe memory constraints.
924 */
925 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
926 return 0;
927 }
928 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
929 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
930 return _PyMemoTable_ResizeTable(self, desired_size);
931 }
932
933 #undef MT_MINSIZE
934 #undef PERTURB_SHIFT
935
936 /*************************************************************************/
937
938
939 static int
_Pickler_ClearBuffer(PicklerObject * self)940 _Pickler_ClearBuffer(PicklerObject *self)
941 {
942 Py_XSETREF(self->output_buffer,
943 PyBytes_FromStringAndSize(NULL, self->max_output_len));
944 if (self->output_buffer == NULL)
945 return -1;
946 self->output_len = 0;
947 self->frame_start = -1;
948 return 0;
949 }
950
951 static void
_write_size64(char * out,size_t value)952 _write_size64(char *out, size_t value)
953 {
954 size_t i;
955
956 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
957
958 for (i = 0; i < sizeof(size_t); i++) {
959 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
960 }
961 for (i = sizeof(size_t); i < 8; i++) {
962 out[i] = 0;
963 }
964 }
965
966 static int
_Pickler_CommitFrame(PicklerObject * self)967 _Pickler_CommitFrame(PicklerObject *self)
968 {
969 size_t frame_len;
970 char *qdata;
971
972 if (!self->framing || self->frame_start == -1)
973 return 0;
974 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
975 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
976 if (frame_len >= FRAME_SIZE_MIN) {
977 qdata[0] = FRAME;
978 _write_size64(qdata + 1, frame_len);
979 }
980 else {
981 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
982 self->output_len -= FRAME_HEADER_SIZE;
983 }
984 self->frame_start = -1;
985 return 0;
986 }
987
988 static PyObject *
_Pickler_GetString(PicklerObject * self)989 _Pickler_GetString(PicklerObject *self)
990 {
991 PyObject *output_buffer = self->output_buffer;
992
993 assert(self->output_buffer != NULL);
994
995 if (_Pickler_CommitFrame(self))
996 return NULL;
997
998 self->output_buffer = NULL;
999 /* Resize down to exact size */
1000 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1001 return NULL;
1002 return output_buffer;
1003 }
1004
1005 static int
_Pickler_FlushToFile(PicklerObject * self)1006 _Pickler_FlushToFile(PicklerObject *self)
1007 {
1008 PyObject *output, *result;
1009
1010 assert(self->write != NULL);
1011
1012 /* This will commit the frame first */
1013 output = _Pickler_GetString(self);
1014 if (output == NULL)
1015 return -1;
1016
1017 result = _Pickle_FastCall(self->write, output);
1018 Py_XDECREF(result);
1019 return (result == NULL) ? -1 : 0;
1020 }
1021
1022 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1023 _Pickler_OpcodeBoundary(PicklerObject *self)
1024 {
1025 Py_ssize_t frame_len;
1026
1027 if (!self->framing || self->frame_start == -1) {
1028 return 0;
1029 }
1030 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1031 if (frame_len >= FRAME_SIZE_TARGET) {
1032 if(_Pickler_CommitFrame(self)) {
1033 return -1;
1034 }
1035 /* Flush the content of the committed frame to the underlying
1036 * file and reuse the pickler buffer for the next frame so as
1037 * to limit memory usage when dumping large complex objects to
1038 * a file.
1039 *
1040 * self->write is NULL when called via dumps.
1041 */
1042 if (self->write != NULL) {
1043 if (_Pickler_FlushToFile(self) < 0) {
1044 return -1;
1045 }
1046 if (_Pickler_ClearBuffer(self) < 0) {
1047 return -1;
1048 }
1049 }
1050 }
1051 return 0;
1052 }
1053
1054 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1055 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1056 {
1057 Py_ssize_t i, n, required;
1058 char *buffer;
1059 int need_new_frame;
1060
1061 assert(s != NULL);
1062 need_new_frame = (self->framing && self->frame_start == -1);
1063
1064 if (need_new_frame)
1065 n = data_len + FRAME_HEADER_SIZE;
1066 else
1067 n = data_len;
1068
1069 required = self->output_len + n;
1070 if (required > self->max_output_len) {
1071 /* Make place in buffer for the pickle chunk */
1072 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1073 PyErr_NoMemory();
1074 return -1;
1075 }
1076 self->max_output_len = (self->output_len + n) / 2 * 3;
1077 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1078 return -1;
1079 }
1080 buffer = PyBytes_AS_STRING(self->output_buffer);
1081 if (need_new_frame) {
1082 /* Setup new frame */
1083 Py_ssize_t frame_start = self->output_len;
1084 self->frame_start = frame_start;
1085 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1086 /* Write an invalid value, for debugging */
1087 buffer[frame_start + i] = 0xFE;
1088 }
1089 self->output_len += FRAME_HEADER_SIZE;
1090 }
1091 if (data_len < 8) {
1092 /* This is faster than memcpy when the string is short. */
1093 for (i = 0; i < data_len; i++) {
1094 buffer[self->output_len + i] = s[i];
1095 }
1096 }
1097 else {
1098 memcpy(buffer + self->output_len, s, data_len);
1099 }
1100 self->output_len += data_len;
1101 return data_len;
1102 }
1103
1104 static PicklerObject *
_Pickler_New(void)1105 _Pickler_New(void)
1106 {
1107 PicklerObject *self;
1108
1109 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1110 if (self == NULL)
1111 return NULL;
1112
1113 self->pers_func = NULL;
1114 self->dispatch_table = NULL;
1115 self->buffer_callback = NULL;
1116 self->write = NULL;
1117 self->proto = 0;
1118 self->bin = 0;
1119 self->framing = 0;
1120 self->frame_start = -1;
1121 self->fast = 0;
1122 self->fast_nesting = 0;
1123 self->fix_imports = 0;
1124 self->fast_memo = NULL;
1125 self->max_output_len = WRITE_BUF_SIZE;
1126 self->output_len = 0;
1127 self->reducer_override = NULL;
1128
1129 self->memo = PyMemoTable_New();
1130 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1131 self->max_output_len);
1132
1133 if (self->memo == NULL || self->output_buffer == NULL) {
1134 Py_DECREF(self);
1135 return NULL;
1136 }
1137
1138 PyObject_GC_Track(self);
1139 return self;
1140 }
1141
1142 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1143 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1144 {
1145 long proto;
1146
1147 if (protocol == Py_None) {
1148 proto = DEFAULT_PROTOCOL;
1149 }
1150 else {
1151 proto = PyLong_AsLong(protocol);
1152 if (proto < 0) {
1153 if (proto == -1 && PyErr_Occurred())
1154 return -1;
1155 proto = HIGHEST_PROTOCOL;
1156 }
1157 else if (proto > HIGHEST_PROTOCOL) {
1158 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1159 HIGHEST_PROTOCOL);
1160 return -1;
1161 }
1162 }
1163 self->proto = (int)proto;
1164 self->bin = proto > 0;
1165 self->fix_imports = fix_imports && proto < 3;
1166 return 0;
1167 }
1168
1169 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1170 be called once on a freshly created Pickler. */
1171 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1172 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1173 {
1174 _Py_IDENTIFIER(write);
1175 assert(file != NULL);
1176 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1177 return -1;
1178 }
1179 if (self->write == NULL) {
1180 PyErr_SetString(PyExc_TypeError,
1181 "file must have a 'write' attribute");
1182 return -1;
1183 }
1184
1185 return 0;
1186 }
1187
1188 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1189 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1190 {
1191 if (buffer_callback == Py_None) {
1192 buffer_callback = NULL;
1193 }
1194 if (buffer_callback != NULL && self->proto < 5) {
1195 PyErr_SetString(PyExc_ValueError,
1196 "buffer_callback needs protocol >= 5");
1197 return -1;
1198 }
1199
1200 Py_XINCREF(buffer_callback);
1201 self->buffer_callback = buffer_callback;
1202 return 0;
1203 }
1204
1205 /* Returns the size of the input on success, -1 on failure. This takes its
1206 own reference to `input`. */
1207 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1208 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1209 {
1210 if (self->buffer.buf != NULL)
1211 PyBuffer_Release(&self->buffer);
1212 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1213 return -1;
1214 self->input_buffer = self->buffer.buf;
1215 self->input_len = self->buffer.len;
1216 self->next_read_idx = 0;
1217 self->prefetched_idx = self->input_len;
1218 return self->input_len;
1219 }
1220
1221 static int
bad_readline(void)1222 bad_readline(void)
1223 {
1224 PickleState *st = _Pickle_GetGlobalState();
1225 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1226 return -1;
1227 }
1228
1229 /* Skip any consumed data that was only prefetched using peek() */
1230 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1231 _Unpickler_SkipConsumed(UnpicklerObject *self)
1232 {
1233 Py_ssize_t consumed;
1234 PyObject *r;
1235
1236 consumed = self->next_read_idx - self->prefetched_idx;
1237 if (consumed <= 0)
1238 return 0;
1239
1240 assert(self->peek); /* otherwise we did something wrong */
1241 /* This makes a useless copy... */
1242 r = PyObject_CallFunction(self->read, "n", consumed);
1243 if (r == NULL)
1244 return -1;
1245 Py_DECREF(r);
1246
1247 self->prefetched_idx = self->next_read_idx;
1248 return 0;
1249 }
1250
1251 static const Py_ssize_t READ_WHOLE_LINE = -1;
1252
1253 /* If reading from a file, we need to only pull the bytes we need, since there
1254 may be multiple pickle objects arranged contiguously in the same input
1255 buffer.
1256
1257 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1258 bytes from the input stream/buffer.
1259
1260 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1261 failure; on success, returns the number of bytes read from the file.
1262
1263 On success, self->input_len will be 0; this is intentional so that when
1264 unpickling from a file, the "we've run out of data" code paths will trigger,
1265 causing the Unpickler to go back to the file for more data. Use the returned
1266 size to tell you how much data you can process. */
1267 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1268 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1269 {
1270 PyObject *data;
1271 Py_ssize_t read_size;
1272
1273 assert(self->read != NULL);
1274
1275 if (_Unpickler_SkipConsumed(self) < 0)
1276 return -1;
1277
1278 if (n == READ_WHOLE_LINE) {
1279 data = PyObject_CallNoArgs(self->readline);
1280 }
1281 else {
1282 PyObject *len;
1283 /* Prefetch some data without advancing the file pointer, if possible */
1284 if (self->peek && n < PREFETCH) {
1285 len = PyLong_FromSsize_t(PREFETCH);
1286 if (len == NULL)
1287 return -1;
1288 data = _Pickle_FastCall(self->peek, len);
1289 if (data == NULL) {
1290 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1291 return -1;
1292 /* peek() is probably not supported by the given file object */
1293 PyErr_Clear();
1294 Py_CLEAR(self->peek);
1295 }
1296 else {
1297 read_size = _Unpickler_SetStringInput(self, data);
1298 Py_DECREF(data);
1299 self->prefetched_idx = 0;
1300 if (n <= read_size)
1301 return n;
1302 }
1303 }
1304 len = PyLong_FromSsize_t(n);
1305 if (len == NULL)
1306 return -1;
1307 data = _Pickle_FastCall(self->read, len);
1308 }
1309 if (data == NULL)
1310 return -1;
1311
1312 read_size = _Unpickler_SetStringInput(self, data);
1313 Py_DECREF(data);
1314 return read_size;
1315 }
1316
1317 /* Don't call it directly: use _Unpickler_Read() */
1318 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1319 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1320 {
1321 Py_ssize_t num_read;
1322
1323 *s = NULL;
1324 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1325 PickleState *st = _Pickle_GetGlobalState();
1326 PyErr_SetString(st->UnpicklingError,
1327 "read would overflow (invalid bytecode)");
1328 return -1;
1329 }
1330
1331 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1332 assert(self->next_read_idx + n > self->input_len);
1333
1334 if (!self->read)
1335 return bad_readline();
1336
1337 /* Extend the buffer to satisfy desired size */
1338 num_read = _Unpickler_ReadFromFile(self, n);
1339 if (num_read < 0)
1340 return -1;
1341 if (num_read < n)
1342 return bad_readline();
1343 *s = self->input_buffer;
1344 self->next_read_idx = n;
1345 return n;
1346 }
1347
1348 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1349 *
1350 * This should only be used for non-small data reads where potentially
1351 * avoiding a copy is beneficial. This method does not try to prefetch
1352 * more data into the input buffer.
1353 *
1354 * _Unpickler_Read() is recommended in most cases.
1355 */
1356 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1357 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1358 {
1359 assert(n != READ_WHOLE_LINE);
1360
1361 /* Read from available buffer data, if any */
1362 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1363 if (in_buffer > 0) {
1364 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1365 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1366 self->next_read_idx += to_read;
1367 buf += to_read;
1368 n -= to_read;
1369 if (n == 0) {
1370 /* Entire read was satisfied from buffer */
1371 return n;
1372 }
1373 }
1374
1375 /* Read from file */
1376 if (!self->read) {
1377 /* We're unpickling memory, this means the input is truncated */
1378 return bad_readline();
1379 }
1380 if (_Unpickler_SkipConsumed(self) < 0) {
1381 return -1;
1382 }
1383
1384 if (!self->readinto) {
1385 /* readinto() not supported on file-like object, fall back to read()
1386 * and copy into destination buffer (bpo-39681) */
1387 PyObject* len = PyLong_FromSsize_t(n);
1388 if (len == NULL) {
1389 return -1;
1390 }
1391 PyObject* data = _Pickle_FastCall(self->read, len);
1392 if (data == NULL) {
1393 return -1;
1394 }
1395 if (!PyBytes_Check(data)) {
1396 PyErr_Format(PyExc_ValueError,
1397 "read() returned non-bytes object (%R)",
1398 Py_TYPE(data));
1399 Py_DECREF(data);
1400 return -1;
1401 }
1402 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1403 if (read_size < n) {
1404 Py_DECREF(data);
1405 return bad_readline();
1406 }
1407 memcpy(buf, PyBytes_AS_STRING(data), n);
1408 Py_DECREF(data);
1409 return n;
1410 }
1411
1412 /* Call readinto() into user buffer */
1413 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1414 if (buf_obj == NULL) {
1415 return -1;
1416 }
1417 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1418 if (read_size_obj == NULL) {
1419 return -1;
1420 }
1421 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1422 Py_DECREF(read_size_obj);
1423
1424 if (read_size < 0) {
1425 if (!PyErr_Occurred()) {
1426 PyErr_SetString(PyExc_ValueError,
1427 "readinto() returned negative size");
1428 }
1429 return -1;
1430 }
1431 if (read_size < n) {
1432 return bad_readline();
1433 }
1434 return n;
1435 }
1436
1437 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1438
1439 This should be used for all data reads, rather than accessing the unpickler's
1440 input buffer directly. This method deals correctly with reading from input
1441 streams, which the input buffer doesn't deal with.
1442
1443 Note that when reading from a file-like object, self->next_read_idx won't
1444 be updated (it should remain at 0 for the entire unpickling process). You
1445 should use this function's return value to know how many bytes you can
1446 consume.
1447
1448 Returns -1 (with an exception set) on failure. On success, return the
1449 number of chars read. */
1450 #define _Unpickler_Read(self, s, n) \
1451 (((n) <= (self)->input_len - (self)->next_read_idx) \
1452 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1453 (self)->next_read_idx += (n), \
1454 (n)) \
1455 : _Unpickler_ReadImpl(self, (s), (n)))
1456
1457 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1458 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1459 char **result)
1460 {
1461 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1462 if (input_line == NULL) {
1463 PyErr_NoMemory();
1464 return -1;
1465 }
1466
1467 memcpy(input_line, line, len);
1468 input_line[len] = '\0';
1469 self->input_line = input_line;
1470 *result = self->input_line;
1471 return len;
1472 }
1473
1474 /* Read a line from the input stream/buffer. If we run off the end of the input
1475 before hitting \n, raise an error.
1476
1477 Returns the number of chars read, or -1 on failure. */
1478 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1479 _Unpickler_Readline(UnpicklerObject *self, char **result)
1480 {
1481 Py_ssize_t i, num_read;
1482
1483 for (i = self->next_read_idx; i < self->input_len; i++) {
1484 if (self->input_buffer[i] == '\n') {
1485 char *line_start = self->input_buffer + self->next_read_idx;
1486 num_read = i - self->next_read_idx + 1;
1487 self->next_read_idx = i + 1;
1488 return _Unpickler_CopyLine(self, line_start, num_read, result);
1489 }
1490 }
1491 if (!self->read)
1492 return bad_readline();
1493
1494 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1495 if (num_read < 0)
1496 return -1;
1497 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1498 return bad_readline();
1499 self->next_read_idx = num_read;
1500 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1501 }
1502
1503 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1504 will be modified in place. */
1505 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1506 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1507 {
1508 size_t i;
1509
1510 assert(new_size > self->memo_size);
1511
1512 PyObject **memo_new = self->memo;
1513 PyMem_RESIZE(memo_new, PyObject *, new_size);
1514 if (memo_new == NULL) {
1515 PyErr_NoMemory();
1516 return -1;
1517 }
1518 self->memo = memo_new;
1519 for (i = self->memo_size; i < new_size; i++)
1520 self->memo[i] = NULL;
1521 self->memo_size = new_size;
1522 return 0;
1523 }
1524
1525 /* Returns NULL if idx is out of bounds. */
1526 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1527 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1528 {
1529 if (idx >= self->memo_size)
1530 return NULL;
1531
1532 return self->memo[idx];
1533 }
1534
1535 /* Returns -1 (with an exception set) on failure, 0 on success.
1536 This takes its own reference to `value`. */
1537 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1538 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1539 {
1540 PyObject *old_item;
1541
1542 if (idx >= self->memo_size) {
1543 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1544 return -1;
1545 assert(idx < self->memo_size);
1546 }
1547 Py_INCREF(value);
1548 old_item = self->memo[idx];
1549 self->memo[idx] = value;
1550 if (old_item != NULL) {
1551 Py_DECREF(old_item);
1552 }
1553 else {
1554 self->memo_len++;
1555 }
1556 return 0;
1557 }
1558
1559 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1560 _Unpickler_NewMemo(Py_ssize_t new_size)
1561 {
1562 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1563 if (memo == NULL) {
1564 PyErr_NoMemory();
1565 return NULL;
1566 }
1567 memset(memo, 0, new_size * sizeof(PyObject *));
1568 return memo;
1569 }
1570
1571 /* Free the unpickler's memo, taking care to decref any items left in it. */
1572 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1573 _Unpickler_MemoCleanup(UnpicklerObject *self)
1574 {
1575 Py_ssize_t i;
1576 PyObject **memo = self->memo;
1577
1578 if (self->memo == NULL)
1579 return;
1580 self->memo = NULL;
1581 i = self->memo_size;
1582 while (--i >= 0) {
1583 Py_XDECREF(memo[i]);
1584 }
1585 PyMem_FREE(memo);
1586 }
1587
1588 static UnpicklerObject *
_Unpickler_New(void)1589 _Unpickler_New(void)
1590 {
1591 UnpicklerObject *self;
1592
1593 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1594 if (self == NULL)
1595 return NULL;
1596
1597 self->pers_func = NULL;
1598 self->input_buffer = NULL;
1599 self->input_line = NULL;
1600 self->input_len = 0;
1601 self->next_read_idx = 0;
1602 self->prefetched_idx = 0;
1603 self->read = NULL;
1604 self->readinto = NULL;
1605 self->readline = NULL;
1606 self->peek = NULL;
1607 self->buffers = NULL;
1608 self->encoding = NULL;
1609 self->errors = NULL;
1610 self->marks = NULL;
1611 self->num_marks = 0;
1612 self->marks_size = 0;
1613 self->proto = 0;
1614 self->fix_imports = 0;
1615 memset(&self->buffer, 0, sizeof(Py_buffer));
1616 self->memo_size = 32;
1617 self->memo_len = 0;
1618 self->memo = _Unpickler_NewMemo(self->memo_size);
1619 self->stack = (Pdata *)Pdata_New();
1620
1621 if (self->memo == NULL || self->stack == NULL) {
1622 Py_DECREF(self);
1623 return NULL;
1624 }
1625
1626 PyObject_GC_Track(self);
1627 return self;
1628 }
1629
1630 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1631 be called once on a freshly created Unpickler. */
1632 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1633 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1634 {
1635 _Py_IDENTIFIER(peek);
1636 _Py_IDENTIFIER(read);
1637 _Py_IDENTIFIER(readinto);
1638 _Py_IDENTIFIER(readline);
1639
1640 /* Optional file methods */
1641 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1642 return -1;
1643 }
1644 if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1645 return -1;
1646 }
1647 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1648 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1649 if (!self->readline || !self->read) {
1650 if (!PyErr_Occurred()) {
1651 PyErr_SetString(PyExc_TypeError,
1652 "file must have 'read' and 'readline' attributes");
1653 }
1654 Py_CLEAR(self->read);
1655 Py_CLEAR(self->readinto);
1656 Py_CLEAR(self->readline);
1657 Py_CLEAR(self->peek);
1658 return -1;
1659 }
1660 return 0;
1661 }
1662
1663 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1664 be called once on a freshly created Unpickler. */
1665 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1666 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1667 const char *encoding,
1668 const char *errors)
1669 {
1670 if (encoding == NULL)
1671 encoding = "ASCII";
1672 if (errors == NULL)
1673 errors = "strict";
1674
1675 self->encoding = _PyMem_Strdup(encoding);
1676 self->errors = _PyMem_Strdup(errors);
1677 if (self->encoding == NULL || self->errors == NULL) {
1678 PyErr_NoMemory();
1679 return -1;
1680 }
1681 return 0;
1682 }
1683
1684 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1685 be called once on a freshly created Unpickler. */
1686 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1687 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1688 {
1689 if (buffers == NULL || buffers == Py_None) {
1690 self->buffers = NULL;
1691 }
1692 else {
1693 self->buffers = PyObject_GetIter(buffers);
1694 if (self->buffers == NULL) {
1695 return -1;
1696 }
1697 }
1698 return 0;
1699 }
1700
1701 /* Generate a GET opcode for an object stored in the memo. */
1702 static int
memo_get(PicklerObject * self,PyObject * key)1703 memo_get(PicklerObject *self, PyObject *key)
1704 {
1705 Py_ssize_t *value;
1706 char pdata[30];
1707 Py_ssize_t len;
1708
1709 value = PyMemoTable_Get(self->memo, key);
1710 if (value == NULL) {
1711 PyErr_SetObject(PyExc_KeyError, key);
1712 return -1;
1713 }
1714
1715 if (!self->bin) {
1716 pdata[0] = GET;
1717 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1718 "%" PY_FORMAT_SIZE_T "d\n", *value);
1719 len = strlen(pdata);
1720 }
1721 else {
1722 if (*value < 256) {
1723 pdata[0] = BINGET;
1724 pdata[1] = (unsigned char)(*value & 0xff);
1725 len = 2;
1726 }
1727 else if ((size_t)*value <= 0xffffffffUL) {
1728 pdata[0] = LONG_BINGET;
1729 pdata[1] = (unsigned char)(*value & 0xff);
1730 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1731 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1732 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1733 len = 5;
1734 }
1735 else { /* unlikely */
1736 PickleState *st = _Pickle_GetGlobalState();
1737 PyErr_SetString(st->PicklingError,
1738 "memo id too large for LONG_BINGET");
1739 return -1;
1740 }
1741 }
1742
1743 if (_Pickler_Write(self, pdata, len) < 0)
1744 return -1;
1745
1746 return 0;
1747 }
1748
1749 /* Store an object in the memo, assign it a new unique ID based on the number
1750 of objects currently stored in the memo and generate a PUT opcode. */
1751 static int
memo_put(PicklerObject * self,PyObject * obj)1752 memo_put(PicklerObject *self, PyObject *obj)
1753 {
1754 char pdata[30];
1755 Py_ssize_t len;
1756 Py_ssize_t idx;
1757
1758 const char memoize_op = MEMOIZE;
1759
1760 if (self->fast)
1761 return 0;
1762
1763 idx = PyMemoTable_Size(self->memo);
1764 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1765 return -1;
1766
1767 if (self->proto >= 4) {
1768 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1769 return -1;
1770 return 0;
1771 }
1772 else if (!self->bin) {
1773 pdata[0] = PUT;
1774 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1775 "%" PY_FORMAT_SIZE_T "d\n", idx);
1776 len = strlen(pdata);
1777 }
1778 else {
1779 if (idx < 256) {
1780 pdata[0] = BINPUT;
1781 pdata[1] = (unsigned char)idx;
1782 len = 2;
1783 }
1784 else if ((size_t)idx <= 0xffffffffUL) {
1785 pdata[0] = LONG_BINPUT;
1786 pdata[1] = (unsigned char)(idx & 0xff);
1787 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1788 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1789 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1790 len = 5;
1791 }
1792 else { /* unlikely */
1793 PickleState *st = _Pickle_GetGlobalState();
1794 PyErr_SetString(st->PicklingError,
1795 "memo id too large for LONG_BINPUT");
1796 return -1;
1797 }
1798 }
1799 if (_Pickler_Write(self, pdata, len) < 0)
1800 return -1;
1801
1802 return 0;
1803 }
1804
1805 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1806 get_dotted_path(PyObject *obj, PyObject *name)
1807 {
1808 _Py_static_string(PyId_dot, ".");
1809 PyObject *dotted_path;
1810 Py_ssize_t i, n;
1811
1812 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1813 if (dotted_path == NULL)
1814 return NULL;
1815 n = PyList_GET_SIZE(dotted_path);
1816 assert(n >= 1);
1817 for (i = 0; i < n; i++) {
1818 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1819 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1820 if (obj == NULL)
1821 PyErr_Format(PyExc_AttributeError,
1822 "Can't pickle local object %R", name);
1823 else
1824 PyErr_Format(PyExc_AttributeError,
1825 "Can't pickle local attribute %R on %R", name, obj);
1826 Py_DECREF(dotted_path);
1827 return NULL;
1828 }
1829 }
1830 return dotted_path;
1831 }
1832
1833 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1834 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1835 {
1836 Py_ssize_t i, n;
1837 PyObject *parent = NULL;
1838
1839 assert(PyList_CheckExact(names));
1840 Py_INCREF(obj);
1841 n = PyList_GET_SIZE(names);
1842 for (i = 0; i < n; i++) {
1843 PyObject *name = PyList_GET_ITEM(names, i);
1844 Py_XDECREF(parent);
1845 parent = obj;
1846 (void)_PyObject_LookupAttr(parent, name, &obj);
1847 if (obj == NULL) {
1848 Py_DECREF(parent);
1849 return NULL;
1850 }
1851 }
1852 if (pparent != NULL)
1853 *pparent = parent;
1854 else
1855 Py_XDECREF(parent);
1856 return obj;
1857 }
1858
1859
1860 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1861 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1862 {
1863 PyObject *dotted_path, *attr;
1864
1865 if (allow_qualname) {
1866 dotted_path = get_dotted_path(obj, name);
1867 if (dotted_path == NULL)
1868 return NULL;
1869 attr = get_deep_attribute(obj, dotted_path, NULL);
1870 Py_DECREF(dotted_path);
1871 }
1872 else {
1873 (void)_PyObject_LookupAttr(obj, name, &attr);
1874 }
1875 if (attr == NULL && !PyErr_Occurred()) {
1876 PyErr_Format(PyExc_AttributeError,
1877 "Can't get attribute %R on %R", name, obj);
1878 }
1879 return attr;
1880 }
1881
1882 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1883 _checkmodule(PyObject *module_name, PyObject *module,
1884 PyObject *global, PyObject *dotted_path)
1885 {
1886 if (module == Py_None) {
1887 return -1;
1888 }
1889 if (PyUnicode_Check(module_name) &&
1890 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1891 return -1;
1892 }
1893
1894 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1895 if (candidate == NULL) {
1896 return -1;
1897 }
1898 if (candidate != global) {
1899 Py_DECREF(candidate);
1900 return -1;
1901 }
1902 Py_DECREF(candidate);
1903 return 0;
1904 }
1905
1906 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1907 whichmodule(PyObject *global, PyObject *dotted_path)
1908 {
1909 PyObject *module_name;
1910 PyObject *module = NULL;
1911 Py_ssize_t i;
1912 PyObject *modules;
1913 _Py_IDENTIFIER(__module__);
1914 _Py_IDENTIFIER(modules);
1915 _Py_IDENTIFIER(__main__);
1916
1917 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1918 return NULL;
1919 }
1920 if (module_name) {
1921 /* In some rare cases (e.g., bound methods of extension types),
1922 __module__ can be None. If it is so, then search sys.modules for
1923 the module of global. */
1924 if (module_name != Py_None)
1925 return module_name;
1926 Py_CLEAR(module_name);
1927 }
1928 assert(module_name == NULL);
1929
1930 /* Fallback on walking sys.modules */
1931 modules = _PySys_GetObjectId(&PyId_modules);
1932 if (modules == NULL) {
1933 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1934 return NULL;
1935 }
1936 if (PyDict_CheckExact(modules)) {
1937 i = 0;
1938 while (PyDict_Next(modules, &i, &module_name, &module)) {
1939 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1940 Py_INCREF(module_name);
1941 return module_name;
1942 }
1943 if (PyErr_Occurred()) {
1944 return NULL;
1945 }
1946 }
1947 }
1948 else {
1949 PyObject *iterator = PyObject_GetIter(modules);
1950 if (iterator == NULL) {
1951 return NULL;
1952 }
1953 while ((module_name = PyIter_Next(iterator))) {
1954 module = PyObject_GetItem(modules, module_name);
1955 if (module == NULL) {
1956 Py_DECREF(module_name);
1957 Py_DECREF(iterator);
1958 return NULL;
1959 }
1960 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1961 Py_DECREF(module);
1962 Py_DECREF(iterator);
1963 return module_name;
1964 }
1965 Py_DECREF(module);
1966 Py_DECREF(module_name);
1967 if (PyErr_Occurred()) {
1968 Py_DECREF(iterator);
1969 return NULL;
1970 }
1971 }
1972 Py_DECREF(iterator);
1973 }
1974
1975 /* If no module is found, use __main__. */
1976 module_name = _PyUnicode_FromId(&PyId___main__);
1977 Py_XINCREF(module_name);
1978 return module_name;
1979 }
1980
1981 /* fast_save_enter() and fast_save_leave() are guards against recursive
1982 objects when Pickler is used with the "fast mode" (i.e., with object
1983 memoization disabled). If the nesting of a list or dict object exceed
1984 FAST_NESTING_LIMIT, these guards will start keeping an internal
1985 reference to the seen list or dict objects and check whether these objects
1986 are recursive. These are not strictly necessary, since save() has a
1987 hard-coded recursion limit, but they give a nicer error message than the
1988 typical RuntimeError. */
1989 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1990 fast_save_enter(PicklerObject *self, PyObject *obj)
1991 {
1992 /* if fast_nesting < 0, we're doing an error exit. */
1993 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1994 PyObject *key = NULL;
1995 if (self->fast_memo == NULL) {
1996 self->fast_memo = PyDict_New();
1997 if (self->fast_memo == NULL) {
1998 self->fast_nesting = -1;
1999 return 0;
2000 }
2001 }
2002 key = PyLong_FromVoidPtr(obj);
2003 if (key == NULL) {
2004 self->fast_nesting = -1;
2005 return 0;
2006 }
2007 if (PyDict_GetItemWithError(self->fast_memo, key)) {
2008 Py_DECREF(key);
2009 PyErr_Format(PyExc_ValueError,
2010 "fast mode: can't pickle cyclic objects "
2011 "including object type %.200s at %p",
2012 Py_TYPE(obj)->tp_name, obj);
2013 self->fast_nesting = -1;
2014 return 0;
2015 }
2016 if (PyErr_Occurred()) {
2017 Py_DECREF(key);
2018 self->fast_nesting = -1;
2019 return 0;
2020 }
2021 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
2022 Py_DECREF(key);
2023 self->fast_nesting = -1;
2024 return 0;
2025 }
2026 Py_DECREF(key);
2027 }
2028 return 1;
2029 }
2030
2031 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2032 fast_save_leave(PicklerObject *self, PyObject *obj)
2033 {
2034 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2035 PyObject *key = PyLong_FromVoidPtr(obj);
2036 if (key == NULL)
2037 return 0;
2038 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2039 Py_DECREF(key);
2040 return 0;
2041 }
2042 Py_DECREF(key);
2043 }
2044 return 1;
2045 }
2046
2047 static int
save_none(PicklerObject * self,PyObject * obj)2048 save_none(PicklerObject *self, PyObject *obj)
2049 {
2050 const char none_op = NONE;
2051 if (_Pickler_Write(self, &none_op, 1) < 0)
2052 return -1;
2053
2054 return 0;
2055 }
2056
2057 static int
save_bool(PicklerObject * self,PyObject * obj)2058 save_bool(PicklerObject *self, PyObject *obj)
2059 {
2060 if (self->proto >= 2) {
2061 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2062 if (_Pickler_Write(self, &bool_op, 1) < 0)
2063 return -1;
2064 }
2065 else {
2066 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2067 * so that unpicklers written before bools were introduced unpickle them
2068 * as ints, but unpicklers after can recognize that bools were intended.
2069 * Note that protocol 2 added direct ways to pickle bools.
2070 */
2071 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2072 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2073 return -1;
2074 }
2075 return 0;
2076 }
2077
2078 static int
save_long(PicklerObject * self,PyObject * obj)2079 save_long(PicklerObject *self, PyObject *obj)
2080 {
2081 PyObject *repr = NULL;
2082 Py_ssize_t size;
2083 long val;
2084 int overflow;
2085 int status = 0;
2086
2087 val= PyLong_AsLongAndOverflow(obj, &overflow);
2088 if (!overflow && (sizeof(long) <= 4 ||
2089 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2090 {
2091 /* result fits in a signed 4-byte integer.
2092
2093 Note: we can't use -0x80000000L in the above condition because some
2094 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2095 before applying the unary minus when sizeof(long) <= 4. The
2096 resulting value stays unsigned which is commonly not what we want,
2097 so MSVC happily warns us about it. However, that result would have
2098 been fine because we guard for sizeof(long) <= 4 which turns the
2099 condition true in that particular case. */
2100 char pdata[32];
2101 Py_ssize_t len = 0;
2102
2103 if (self->bin) {
2104 pdata[1] = (unsigned char)(val & 0xff);
2105 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2106 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2107 pdata[4] = (unsigned char)((val >> 24) & 0xff);
2108
2109 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2110 pdata[0] = BININT;
2111 len = 5;
2112 }
2113 else if (pdata[2] != 0) {
2114 pdata[0] = BININT2;
2115 len = 3;
2116 }
2117 else {
2118 pdata[0] = BININT1;
2119 len = 2;
2120 }
2121 }
2122 else {
2123 sprintf(pdata, "%c%ld\n", INT, val);
2124 len = strlen(pdata);
2125 }
2126 if (_Pickler_Write(self, pdata, len) < 0)
2127 return -1;
2128
2129 return 0;
2130 }
2131 assert(!PyErr_Occurred());
2132
2133 if (self->proto >= 2) {
2134 /* Linear-time pickling. */
2135 size_t nbits;
2136 size_t nbytes;
2137 unsigned char *pdata;
2138 char header[5];
2139 int i;
2140 int sign = _PyLong_Sign(obj);
2141
2142 if (sign == 0) {
2143 header[0] = LONG1;
2144 header[1] = 0; /* It's 0 -- an empty bytestring. */
2145 if (_Pickler_Write(self, header, 2) < 0)
2146 goto error;
2147 return 0;
2148 }
2149 nbits = _PyLong_NumBits(obj);
2150 if (nbits == (size_t)-1 && PyErr_Occurred())
2151 goto error;
2152 /* How many bytes do we need? There are nbits >> 3 full
2153 * bytes of data, and nbits & 7 leftover bits. If there
2154 * are any leftover bits, then we clearly need another
2155 * byte. What's not so obvious is that we *probably*
2156 * need another byte even if there aren't any leftovers:
2157 * the most-significant bit of the most-significant byte
2158 * acts like a sign bit, and it's usually got a sense
2159 * opposite of the one we need. The exception is ints
2160 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2161 * its own 256's-complement, so has the right sign bit
2162 * even without the extra byte. That's a pain to check
2163 * for in advance, though, so we always grab an extra
2164 * byte at the start, and cut it back later if possible.
2165 */
2166 nbytes = (nbits >> 3) + 1;
2167 if (nbytes > 0x7fffffffL) {
2168 PyErr_SetString(PyExc_OverflowError,
2169 "int too large to pickle");
2170 goto error;
2171 }
2172 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2173 if (repr == NULL)
2174 goto error;
2175 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2176 i = _PyLong_AsByteArray((PyLongObject *)obj,
2177 pdata, nbytes,
2178 1 /* little endian */ , 1 /* signed */ );
2179 if (i < 0)
2180 goto error;
2181 /* If the int is negative, this may be a byte more than
2182 * needed. This is so iff the MSB is all redundant sign
2183 * bits.
2184 */
2185 if (sign < 0 &&
2186 nbytes > 1 &&
2187 pdata[nbytes - 1] == 0xff &&
2188 (pdata[nbytes - 2] & 0x80) != 0) {
2189 nbytes--;
2190 }
2191
2192 if (nbytes < 256) {
2193 header[0] = LONG1;
2194 header[1] = (unsigned char)nbytes;
2195 size = 2;
2196 }
2197 else {
2198 header[0] = LONG4;
2199 size = (Py_ssize_t) nbytes;
2200 for (i = 1; i < 5; i++) {
2201 header[i] = (unsigned char)(size & 0xff);
2202 size >>= 8;
2203 }
2204 size = 5;
2205 }
2206 if (_Pickler_Write(self, header, size) < 0 ||
2207 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2208 goto error;
2209 }
2210 else {
2211 const char long_op = LONG;
2212 const char *string;
2213
2214 /* proto < 2: write the repr and newline. This is quadratic-time (in
2215 the number of digits), in both directions. We add a trailing 'L'
2216 to the repr, for compatibility with Python 2.x. */
2217
2218 repr = PyObject_Repr(obj);
2219 if (repr == NULL)
2220 goto error;
2221
2222 string = PyUnicode_AsUTF8AndSize(repr, &size);
2223 if (string == NULL)
2224 goto error;
2225
2226 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2227 _Pickler_Write(self, string, size) < 0 ||
2228 _Pickler_Write(self, "L\n", 2) < 0)
2229 goto error;
2230 }
2231
2232 if (0) {
2233 error:
2234 status = -1;
2235 }
2236 Py_XDECREF(repr);
2237
2238 return status;
2239 }
2240
2241 static int
save_float(PicklerObject * self,PyObject * obj)2242 save_float(PicklerObject *self, PyObject *obj)
2243 {
2244 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2245
2246 if (self->bin) {
2247 char pdata[9];
2248 pdata[0] = BINFLOAT;
2249 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2250 return -1;
2251 if (_Pickler_Write(self, pdata, 9) < 0)
2252 return -1;
2253 }
2254 else {
2255 int result = -1;
2256 char *buf = NULL;
2257 char op = FLOAT;
2258
2259 if (_Pickler_Write(self, &op, 1) < 0)
2260 goto done;
2261
2262 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2263 if (!buf) {
2264 PyErr_NoMemory();
2265 goto done;
2266 }
2267
2268 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2269 goto done;
2270
2271 if (_Pickler_Write(self, "\n", 1) < 0)
2272 goto done;
2273
2274 result = 0;
2275 done:
2276 PyMem_Free(buf);
2277 return result;
2278 }
2279
2280 return 0;
2281 }
2282
2283 /* Perform direct write of the header and payload of the binary object.
2284
2285 The large contiguous data is written directly into the underlying file
2286 object, bypassing the output_buffer of the Pickler. We intentionally
2287 do not insert a protocol 4 frame opcode to make it possible to optimize
2288 file.read calls in the loader.
2289 */
2290 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2291 _Pickler_write_bytes(PicklerObject *self,
2292 const char *header, Py_ssize_t header_size,
2293 const char *data, Py_ssize_t data_size,
2294 PyObject *payload)
2295 {
2296 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2297 int framing = self->framing;
2298
2299 if (bypass_buffer) {
2300 assert(self->output_buffer != NULL);
2301 /* Commit the previous frame. */
2302 if (_Pickler_CommitFrame(self)) {
2303 return -1;
2304 }
2305 /* Disable framing temporarily */
2306 self->framing = 0;
2307 }
2308
2309 if (_Pickler_Write(self, header, header_size) < 0) {
2310 return -1;
2311 }
2312
2313 if (bypass_buffer && self->write != NULL) {
2314 /* Bypass the in-memory buffer to directly stream large data
2315 into the underlying file object. */
2316 PyObject *result, *mem = NULL;
2317 /* Dump the output buffer to the file. */
2318 if (_Pickler_FlushToFile(self) < 0) {
2319 return -1;
2320 }
2321
2322 /* Stream write the payload into the file without going through the
2323 output buffer. */
2324 if (payload == NULL) {
2325 /* TODO: It would be better to use a memoryview with a linked
2326 original string if this is possible. */
2327 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2328 if (payload == NULL) {
2329 return -1;
2330 }
2331 }
2332 result = PyObject_CallOneArg(self->write, payload);
2333 Py_XDECREF(mem);
2334 if (result == NULL) {
2335 return -1;
2336 }
2337 Py_DECREF(result);
2338
2339 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2340 if (_Pickler_ClearBuffer(self) < 0) {
2341 return -1;
2342 }
2343 }
2344 else {
2345 if (_Pickler_Write(self, data, data_size) < 0) {
2346 return -1;
2347 }
2348 }
2349
2350 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2351 self->framing = framing;
2352
2353 return 0;
2354 }
2355
2356 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2357 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2358 Py_ssize_t size)
2359 {
2360 assert(self->proto >= 3);
2361
2362 char header[9];
2363 Py_ssize_t len;
2364
2365 if (size < 0)
2366 return -1;
2367
2368 if (size <= 0xff) {
2369 header[0] = SHORT_BINBYTES;
2370 header[1] = (unsigned char)size;
2371 len = 2;
2372 }
2373 else if ((size_t)size <= 0xffffffffUL) {
2374 header[0] = BINBYTES;
2375 header[1] = (unsigned char)(size & 0xff);
2376 header[2] = (unsigned char)((size >> 8) & 0xff);
2377 header[3] = (unsigned char)((size >> 16) & 0xff);
2378 header[4] = (unsigned char)((size >> 24) & 0xff);
2379 len = 5;
2380 }
2381 else if (self->proto >= 4) {
2382 header[0] = BINBYTES8;
2383 _write_size64(header + 1, size);
2384 len = 9;
2385 }
2386 else {
2387 PyErr_SetString(PyExc_OverflowError,
2388 "serializing a bytes object larger than 4 GiB "
2389 "requires pickle protocol 4 or higher");
2390 return -1;
2391 }
2392
2393 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2394 return -1;
2395 }
2396
2397 if (memo_put(self, obj) < 0) {
2398 return -1;
2399 }
2400
2401 return 0;
2402 }
2403
2404 static int
save_bytes(PicklerObject * self,PyObject * obj)2405 save_bytes(PicklerObject *self, PyObject *obj)
2406 {
2407 if (self->proto < 3) {
2408 /* Older pickle protocols do not have an opcode for pickling bytes
2409 objects. Therefore, we need to fake the copy protocol (i.e.,
2410 the __reduce__ method) to permit bytes object unpickling.
2411
2412 Here we use a hack to be compatible with Python 2. Since in Python
2413 2 'bytes' is just an alias for 'str' (which has different
2414 parameters than the actual bytes object), we use codecs.encode
2415 to create the appropriate 'str' object when unpickled using
2416 Python 2 *and* the appropriate 'bytes' object when unpickled
2417 using Python 3. Again this is a hack and we don't need to do this
2418 with newer protocols. */
2419 PyObject *reduce_value;
2420 int status;
2421
2422 if (PyBytes_GET_SIZE(obj) == 0) {
2423 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2424 }
2425 else {
2426 PickleState *st = _Pickle_GetGlobalState();
2427 PyObject *unicode_str =
2428 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2429 PyBytes_GET_SIZE(obj),
2430 "strict");
2431 _Py_IDENTIFIER(latin1);
2432
2433 if (unicode_str == NULL)
2434 return -1;
2435 reduce_value = Py_BuildValue("(O(OO))",
2436 st->codecs_encode, unicode_str,
2437 _PyUnicode_FromId(&PyId_latin1));
2438 Py_DECREF(unicode_str);
2439 }
2440
2441 if (reduce_value == NULL)
2442 return -1;
2443
2444 /* save_reduce() will memoize the object automatically. */
2445 status = save_reduce(self, reduce_value, obj);
2446 Py_DECREF(reduce_value);
2447 return status;
2448 }
2449 else {
2450 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2451 PyBytes_GET_SIZE(obj));
2452 }
2453 }
2454
2455 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2456 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2457 Py_ssize_t size)
2458 {
2459 assert(self->proto >= 5);
2460
2461 char header[9];
2462 Py_ssize_t len;
2463
2464 if (size < 0)
2465 return -1;
2466
2467 header[0] = BYTEARRAY8;
2468 _write_size64(header + 1, size);
2469 len = 9;
2470
2471 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2472 return -1;
2473 }
2474
2475 if (memo_put(self, obj) < 0) {
2476 return -1;
2477 }
2478
2479 return 0;
2480 }
2481
2482 static int
save_bytearray(PicklerObject * self,PyObject * obj)2483 save_bytearray(PicklerObject *self, PyObject *obj)
2484 {
2485 if (self->proto < 5) {
2486 /* Older pickle protocols do not have an opcode for pickling
2487 * bytearrays. */
2488 PyObject *reduce_value = NULL;
2489 int status;
2490
2491 if (PyByteArray_GET_SIZE(obj) == 0) {
2492 reduce_value = Py_BuildValue("(O())",
2493 (PyObject *) &PyByteArray_Type);
2494 }
2495 else {
2496 PyObject *bytes_obj = PyBytes_FromObject(obj);
2497 if (bytes_obj != NULL) {
2498 reduce_value = Py_BuildValue("(O(O))",
2499 (PyObject *) &PyByteArray_Type,
2500 bytes_obj);
2501 Py_DECREF(bytes_obj);
2502 }
2503 }
2504 if (reduce_value == NULL)
2505 return -1;
2506
2507 /* save_reduce() will memoize the object automatically. */
2508 status = save_reduce(self, reduce_value, obj);
2509 Py_DECREF(reduce_value);
2510 return status;
2511 }
2512 else {
2513 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2514 PyByteArray_GET_SIZE(obj));
2515 }
2516 }
2517
2518 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2519 save_picklebuffer(PicklerObject *self, PyObject *obj)
2520 {
2521 if (self->proto < 5) {
2522 PickleState *st = _Pickle_GetGlobalState();
2523 PyErr_SetString(st->PicklingError,
2524 "PickleBuffer can only pickled with protocol >= 5");
2525 return -1;
2526 }
2527 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2528 if (view == NULL) {
2529 return -1;
2530 }
2531 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2532 PickleState *st = _Pickle_GetGlobalState();
2533 PyErr_SetString(st->PicklingError,
2534 "PickleBuffer can not be pickled when "
2535 "pointing to a non-contiguous buffer");
2536 return -1;
2537 }
2538 int in_band = 1;
2539 if (self->buffer_callback != NULL) {
2540 PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2541 if (ret == NULL) {
2542 return -1;
2543 }
2544 in_band = PyObject_IsTrue(ret);
2545 Py_DECREF(ret);
2546 if (in_band == -1) {
2547 return -1;
2548 }
2549 }
2550 if (in_band) {
2551 /* Write data in-band */
2552 if (view->readonly) {
2553 return _save_bytes_data(self, obj, (const char*) view->buf,
2554 view->len);
2555 }
2556 else {
2557 return _save_bytearray_data(self, obj, (const char*) view->buf,
2558 view->len);
2559 }
2560 }
2561 else {
2562 /* Write data out-of-band */
2563 const char next_buffer_op = NEXT_BUFFER;
2564 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2565 return -1;
2566 }
2567 if (view->readonly) {
2568 const char readonly_buffer_op = READONLY_BUFFER;
2569 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2570 return -1;
2571 }
2572 }
2573 }
2574 return 0;
2575 }
2576
2577 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2578 backslash and newline characters to \uXXXX escapes. */
2579 static PyObject *
raw_unicode_escape(PyObject * obj)2580 raw_unicode_escape(PyObject *obj)
2581 {
2582 char *p;
2583 Py_ssize_t i, size;
2584 const void *data;
2585 unsigned int kind;
2586 _PyBytesWriter writer;
2587
2588 if (PyUnicode_READY(obj))
2589 return NULL;
2590
2591 _PyBytesWriter_Init(&writer);
2592
2593 size = PyUnicode_GET_LENGTH(obj);
2594 data = PyUnicode_DATA(obj);
2595 kind = PyUnicode_KIND(obj);
2596
2597 p = _PyBytesWriter_Alloc(&writer, size);
2598 if (p == NULL)
2599 goto error;
2600 writer.overallocate = 1;
2601
2602 for (i=0; i < size; i++) {
2603 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2604 /* Map 32-bit characters to '\Uxxxxxxxx' */
2605 if (ch >= 0x10000) {
2606 /* -1: subtract 1 preallocated byte */
2607 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2608 if (p == NULL)
2609 goto error;
2610
2611 *p++ = '\\';
2612 *p++ = 'U';
2613 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2614 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2615 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2616 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2617 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2618 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2619 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2620 *p++ = Py_hexdigits[ch & 15];
2621 }
2622 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2623 else if (ch >= 256 ||
2624 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2625 ch == 0x1a)
2626 {
2627 /* -1: subtract 1 preallocated byte */
2628 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2629 if (p == NULL)
2630 goto error;
2631
2632 *p++ = '\\';
2633 *p++ = 'u';
2634 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2635 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2636 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2637 *p++ = Py_hexdigits[ch & 15];
2638 }
2639 /* Copy everything else as-is */
2640 else
2641 *p++ = (char) ch;
2642 }
2643
2644 return _PyBytesWriter_Finish(&writer, p);
2645
2646 error:
2647 _PyBytesWriter_Dealloc(&writer);
2648 return NULL;
2649 }
2650
2651 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2652 write_unicode_binary(PicklerObject *self, PyObject *obj)
2653 {
2654 char header[9];
2655 Py_ssize_t len;
2656 PyObject *encoded = NULL;
2657 Py_ssize_t size;
2658 const char *data;
2659
2660 if (PyUnicode_READY(obj))
2661 return -1;
2662
2663 data = PyUnicode_AsUTF8AndSize(obj, &size);
2664 if (data == NULL) {
2665 /* Issue #8383: for strings with lone surrogates, fallback on the
2666 "surrogatepass" error handler. */
2667 PyErr_Clear();
2668 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2669 if (encoded == NULL)
2670 return -1;
2671
2672 data = PyBytes_AS_STRING(encoded);
2673 size = PyBytes_GET_SIZE(encoded);
2674 }
2675
2676 assert(size >= 0);
2677 if (size <= 0xff && self->proto >= 4) {
2678 header[0] = SHORT_BINUNICODE;
2679 header[1] = (unsigned char)(size & 0xff);
2680 len = 2;
2681 }
2682 else if ((size_t)size <= 0xffffffffUL) {
2683 header[0] = BINUNICODE;
2684 header[1] = (unsigned char)(size & 0xff);
2685 header[2] = (unsigned char)((size >> 8) & 0xff);
2686 header[3] = (unsigned char)((size >> 16) & 0xff);
2687 header[4] = (unsigned char)((size >> 24) & 0xff);
2688 len = 5;
2689 }
2690 else if (self->proto >= 4) {
2691 header[0] = BINUNICODE8;
2692 _write_size64(header + 1, size);
2693 len = 9;
2694 }
2695 else {
2696 PyErr_SetString(PyExc_OverflowError,
2697 "serializing a string larger than 4 GiB "
2698 "requires pickle protocol 4 or higher");
2699 Py_XDECREF(encoded);
2700 return -1;
2701 }
2702
2703 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2704 Py_XDECREF(encoded);
2705 return -1;
2706 }
2707 Py_XDECREF(encoded);
2708 return 0;
2709 }
2710
2711 static int
save_unicode(PicklerObject * self,PyObject * obj)2712 save_unicode(PicklerObject *self, PyObject *obj)
2713 {
2714 if (self->bin) {
2715 if (write_unicode_binary(self, obj) < 0)
2716 return -1;
2717 }
2718 else {
2719 PyObject *encoded;
2720 Py_ssize_t size;
2721 const char unicode_op = UNICODE;
2722
2723 encoded = raw_unicode_escape(obj);
2724 if (encoded == NULL)
2725 return -1;
2726
2727 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2728 Py_DECREF(encoded);
2729 return -1;
2730 }
2731
2732 size = PyBytes_GET_SIZE(encoded);
2733 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2734 Py_DECREF(encoded);
2735 return -1;
2736 }
2737 Py_DECREF(encoded);
2738
2739 if (_Pickler_Write(self, "\n", 1) < 0)
2740 return -1;
2741 }
2742 if (memo_put(self, obj) < 0)
2743 return -1;
2744
2745 return 0;
2746 }
2747
2748 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2749 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2750 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2751 {
2752 Py_ssize_t i;
2753
2754 assert(PyTuple_Size(t) == len);
2755
2756 for (i = 0; i < len; i++) {
2757 PyObject *element = PyTuple_GET_ITEM(t, i);
2758
2759 if (element == NULL)
2760 return -1;
2761 if (save(self, element, 0) < 0)
2762 return -1;
2763 }
2764
2765 return 0;
2766 }
2767
2768 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2769 * used across protocols to minimize the space needed to pickle them.
2770 * Tuples are also the only builtin immutable type that can be recursive
2771 * (a tuple can be reached from itself), and that requires some subtle
2772 * magic so that it works in all cases. IOW, this is a long routine.
2773 */
2774 static int
save_tuple(PicklerObject * self,PyObject * obj)2775 save_tuple(PicklerObject *self, PyObject *obj)
2776 {
2777 Py_ssize_t len, i;
2778
2779 const char mark_op = MARK;
2780 const char tuple_op = TUPLE;
2781 const char pop_op = POP;
2782 const char pop_mark_op = POP_MARK;
2783 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2784
2785 if ((len = PyTuple_Size(obj)) < 0)
2786 return -1;
2787
2788 if (len == 0) {
2789 char pdata[2];
2790
2791 if (self->proto) {
2792 pdata[0] = EMPTY_TUPLE;
2793 len = 1;
2794 }
2795 else {
2796 pdata[0] = MARK;
2797 pdata[1] = TUPLE;
2798 len = 2;
2799 }
2800 if (_Pickler_Write(self, pdata, len) < 0)
2801 return -1;
2802 return 0;
2803 }
2804
2805 /* The tuple isn't in the memo now. If it shows up there after
2806 * saving the tuple elements, the tuple must be recursive, in
2807 * which case we'll pop everything we put on the stack, and fetch
2808 * its value from the memo.
2809 */
2810 if (len <= 3 && self->proto >= 2) {
2811 /* Use TUPLE{1,2,3} opcodes. */
2812 if (store_tuple_elements(self, obj, len) < 0)
2813 return -1;
2814
2815 if (PyMemoTable_Get(self->memo, obj)) {
2816 /* pop the len elements */
2817 for (i = 0; i < len; i++)
2818 if (_Pickler_Write(self, &pop_op, 1) < 0)
2819 return -1;
2820 /* fetch from memo */
2821 if (memo_get(self, obj) < 0)
2822 return -1;
2823
2824 return 0;
2825 }
2826 else { /* Not recursive. */
2827 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2828 return -1;
2829 }
2830 goto memoize;
2831 }
2832
2833 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2834 * Generate MARK e1 e2 ... TUPLE
2835 */
2836 if (_Pickler_Write(self, &mark_op, 1) < 0)
2837 return -1;
2838
2839 if (store_tuple_elements(self, obj, len) < 0)
2840 return -1;
2841
2842 if (PyMemoTable_Get(self->memo, obj)) {
2843 /* pop the stack stuff we pushed */
2844 if (self->bin) {
2845 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2846 return -1;
2847 }
2848 else {
2849 /* Note that we pop one more than len, to remove
2850 * the MARK too.
2851 */
2852 for (i = 0; i <= len; i++)
2853 if (_Pickler_Write(self, &pop_op, 1) < 0)
2854 return -1;
2855 }
2856 /* fetch from memo */
2857 if (memo_get(self, obj) < 0)
2858 return -1;
2859
2860 return 0;
2861 }
2862 else { /* Not recursive. */
2863 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2864 return -1;
2865 }
2866
2867 memoize:
2868 if (memo_put(self, obj) < 0)
2869 return -1;
2870
2871 return 0;
2872 }
2873
2874 /* iter is an iterator giving items, and we batch up chunks of
2875 * MARK item item ... item APPENDS
2876 * opcode sequences. Calling code should have arranged to first create an
2877 * empty list, or list-like object, for the APPENDS to operate on.
2878 * Returns 0 on success, <0 on error.
2879 */
2880 static int
batch_list(PicklerObject * self,PyObject * iter)2881 batch_list(PicklerObject *self, PyObject *iter)
2882 {
2883 PyObject *obj = NULL;
2884 PyObject *firstitem = NULL;
2885 int i, n;
2886
2887 const char mark_op = MARK;
2888 const char append_op = APPEND;
2889 const char appends_op = APPENDS;
2890
2891 assert(iter != NULL);
2892
2893 /* XXX: I think this function could be made faster by avoiding the
2894 iterator interface and fetching objects directly from list using
2895 PyList_GET_ITEM.
2896 */
2897
2898 if (self->proto == 0) {
2899 /* APPENDS isn't available; do one at a time. */
2900 for (;;) {
2901 obj = PyIter_Next(iter);
2902 if (obj == NULL) {
2903 if (PyErr_Occurred())
2904 return -1;
2905 break;
2906 }
2907 i = save(self, obj, 0);
2908 Py_DECREF(obj);
2909 if (i < 0)
2910 return -1;
2911 if (_Pickler_Write(self, &append_op, 1) < 0)
2912 return -1;
2913 }
2914 return 0;
2915 }
2916
2917 /* proto > 0: write in batches of BATCHSIZE. */
2918 do {
2919 /* Get first item */
2920 firstitem = PyIter_Next(iter);
2921 if (firstitem == NULL) {
2922 if (PyErr_Occurred())
2923 goto error;
2924
2925 /* nothing more to add */
2926 break;
2927 }
2928
2929 /* Try to get a second item */
2930 obj = PyIter_Next(iter);
2931 if (obj == NULL) {
2932 if (PyErr_Occurred())
2933 goto error;
2934
2935 /* Only one item to write */
2936 if (save(self, firstitem, 0) < 0)
2937 goto error;
2938 if (_Pickler_Write(self, &append_op, 1) < 0)
2939 goto error;
2940 Py_CLEAR(firstitem);
2941 break;
2942 }
2943
2944 /* More than one item to write */
2945
2946 /* Pump out MARK, items, APPENDS. */
2947 if (_Pickler_Write(self, &mark_op, 1) < 0)
2948 goto error;
2949
2950 if (save(self, firstitem, 0) < 0)
2951 goto error;
2952 Py_CLEAR(firstitem);
2953 n = 1;
2954
2955 /* Fetch and save up to BATCHSIZE items */
2956 while (obj) {
2957 if (save(self, obj, 0) < 0)
2958 goto error;
2959 Py_CLEAR(obj);
2960 n += 1;
2961
2962 if (n == BATCHSIZE)
2963 break;
2964
2965 obj = PyIter_Next(iter);
2966 if (obj == NULL) {
2967 if (PyErr_Occurred())
2968 goto error;
2969 break;
2970 }
2971 }
2972
2973 if (_Pickler_Write(self, &appends_op, 1) < 0)
2974 goto error;
2975
2976 } while (n == BATCHSIZE);
2977 return 0;
2978
2979 error:
2980 Py_XDECREF(firstitem);
2981 Py_XDECREF(obj);
2982 return -1;
2983 }
2984
2985 /* This is a variant of batch_list() above, specialized for lists (with no
2986 * support for list subclasses). Like batch_list(), we batch up chunks of
2987 * MARK item item ... item APPENDS
2988 * opcode sequences. Calling code should have arranged to first create an
2989 * empty list, or list-like object, for the APPENDS to operate on.
2990 * Returns 0 on success, -1 on error.
2991 *
2992 * This version is considerably faster than batch_list(), if less general.
2993 *
2994 * Note that this only works for protocols > 0.
2995 */
2996 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2997 batch_list_exact(PicklerObject *self, PyObject *obj)
2998 {
2999 PyObject *item = NULL;
3000 Py_ssize_t this_batch, total;
3001
3002 const char append_op = APPEND;
3003 const char appends_op = APPENDS;
3004 const char mark_op = MARK;
3005
3006 assert(obj != NULL);
3007 assert(self->proto > 0);
3008 assert(PyList_CheckExact(obj));
3009
3010 if (PyList_GET_SIZE(obj) == 1) {
3011 item = PyList_GET_ITEM(obj, 0);
3012 if (save(self, item, 0) < 0)
3013 return -1;
3014 if (_Pickler_Write(self, &append_op, 1) < 0)
3015 return -1;
3016 return 0;
3017 }
3018
3019 /* Write in batches of BATCHSIZE. */
3020 total = 0;
3021 do {
3022 this_batch = 0;
3023 if (_Pickler_Write(self, &mark_op, 1) < 0)
3024 return -1;
3025 while (total < PyList_GET_SIZE(obj)) {
3026 item = PyList_GET_ITEM(obj, total);
3027 if (save(self, item, 0) < 0)
3028 return -1;
3029 total++;
3030 if (++this_batch == BATCHSIZE)
3031 break;
3032 }
3033 if (_Pickler_Write(self, &appends_op, 1) < 0)
3034 return -1;
3035
3036 } while (total < PyList_GET_SIZE(obj));
3037
3038 return 0;
3039 }
3040
3041 static int
save_list(PicklerObject * self,PyObject * obj)3042 save_list(PicklerObject *self, PyObject *obj)
3043 {
3044 char header[3];
3045 Py_ssize_t len;
3046 int status = 0;
3047
3048 if (self->fast && !fast_save_enter(self, obj))
3049 goto error;
3050
3051 /* Create an empty list. */
3052 if (self->bin) {
3053 header[0] = EMPTY_LIST;
3054 len = 1;
3055 }
3056 else {
3057 header[0] = MARK;
3058 header[1] = LIST;
3059 len = 2;
3060 }
3061
3062 if (_Pickler_Write(self, header, len) < 0)
3063 goto error;
3064
3065 /* Get list length, and bow out early if empty. */
3066 if ((len = PyList_Size(obj)) < 0)
3067 goto error;
3068
3069 if (memo_put(self, obj) < 0)
3070 goto error;
3071
3072 if (len != 0) {
3073 /* Materialize the list elements. */
3074 if (PyList_CheckExact(obj) && self->proto > 0) {
3075 if (Py_EnterRecursiveCall(" while pickling an object"))
3076 goto error;
3077 status = batch_list_exact(self, obj);
3078 Py_LeaveRecursiveCall();
3079 } else {
3080 PyObject *iter = PyObject_GetIter(obj);
3081 if (iter == NULL)
3082 goto error;
3083
3084 if (Py_EnterRecursiveCall(" while pickling an object")) {
3085 Py_DECREF(iter);
3086 goto error;
3087 }
3088 status = batch_list(self, iter);
3089 Py_LeaveRecursiveCall();
3090 Py_DECREF(iter);
3091 }
3092 }
3093 if (0) {
3094 error:
3095 status = -1;
3096 }
3097
3098 if (self->fast && !fast_save_leave(self, obj))
3099 status = -1;
3100
3101 return status;
3102 }
3103
3104 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3105 * MARK key value ... key value SETITEMS
3106 * opcode sequences. Calling code should have arranged to first create an
3107 * empty dict, or dict-like object, for the SETITEMS to operate on.
3108 * Returns 0 on success, <0 on error.
3109 *
3110 * This is very much like batch_list(). The difference between saving
3111 * elements directly, and picking apart two-tuples, is so long-winded at
3112 * the C level, though, that attempts to combine these routines were too
3113 * ugly to bear.
3114 */
3115 static int
batch_dict(PicklerObject * self,PyObject * iter)3116 batch_dict(PicklerObject *self, PyObject *iter)
3117 {
3118 PyObject *obj = NULL;
3119 PyObject *firstitem = NULL;
3120 int i, n;
3121
3122 const char mark_op = MARK;
3123 const char setitem_op = SETITEM;
3124 const char setitems_op = SETITEMS;
3125
3126 assert(iter != NULL);
3127
3128 if (self->proto == 0) {
3129 /* SETITEMS isn't available; do one at a time. */
3130 for (;;) {
3131 obj = PyIter_Next(iter);
3132 if (obj == NULL) {
3133 if (PyErr_Occurred())
3134 return -1;
3135 break;
3136 }
3137 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3138 PyErr_SetString(PyExc_TypeError, "dict items "
3139 "iterator must return 2-tuples");
3140 return -1;
3141 }
3142 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3143 if (i >= 0)
3144 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3145 Py_DECREF(obj);
3146 if (i < 0)
3147 return -1;
3148 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3149 return -1;
3150 }
3151 return 0;
3152 }
3153
3154 /* proto > 0: write in batches of BATCHSIZE. */
3155 do {
3156 /* Get first item */
3157 firstitem = PyIter_Next(iter);
3158 if (firstitem == NULL) {
3159 if (PyErr_Occurred())
3160 goto error;
3161
3162 /* nothing more to add */
3163 break;
3164 }
3165 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3166 PyErr_SetString(PyExc_TypeError, "dict items "
3167 "iterator must return 2-tuples");
3168 goto error;
3169 }
3170
3171 /* Try to get a second item */
3172 obj = PyIter_Next(iter);
3173 if (obj == NULL) {
3174 if (PyErr_Occurred())
3175 goto error;
3176
3177 /* Only one item to write */
3178 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3179 goto error;
3180 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3181 goto error;
3182 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3183 goto error;
3184 Py_CLEAR(firstitem);
3185 break;
3186 }
3187
3188 /* More than one item to write */
3189
3190 /* Pump out MARK, items, SETITEMS. */
3191 if (_Pickler_Write(self, &mark_op, 1) < 0)
3192 goto error;
3193
3194 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3195 goto error;
3196 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3197 goto error;
3198 Py_CLEAR(firstitem);
3199 n = 1;
3200
3201 /* Fetch and save up to BATCHSIZE items */
3202 while (obj) {
3203 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3204 PyErr_SetString(PyExc_TypeError, "dict items "
3205 "iterator must return 2-tuples");
3206 goto error;
3207 }
3208 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3209 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3210 goto error;
3211 Py_CLEAR(obj);
3212 n += 1;
3213
3214 if (n == BATCHSIZE)
3215 break;
3216
3217 obj = PyIter_Next(iter);
3218 if (obj == NULL) {
3219 if (PyErr_Occurred())
3220 goto error;
3221 break;
3222 }
3223 }
3224
3225 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3226 goto error;
3227
3228 } while (n == BATCHSIZE);
3229 return 0;
3230
3231 error:
3232 Py_XDECREF(firstitem);
3233 Py_XDECREF(obj);
3234 return -1;
3235 }
3236
3237 /* This is a variant of batch_dict() above that specializes for dicts, with no
3238 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3239 * MARK key value ... key value SETITEMS
3240 * opcode sequences. Calling code should have arranged to first create an
3241 * empty dict, or dict-like object, for the SETITEMS to operate on.
3242 * Returns 0 on success, -1 on error.
3243 *
3244 * Note that this currently doesn't work for protocol 0.
3245 */
3246 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3247 batch_dict_exact(PicklerObject *self, PyObject *obj)
3248 {
3249 PyObject *key = NULL, *value = NULL;
3250 int i;
3251 Py_ssize_t dict_size, ppos = 0;
3252
3253 const char mark_op = MARK;
3254 const char setitem_op = SETITEM;
3255 const char setitems_op = SETITEMS;
3256
3257 assert(obj != NULL && PyDict_CheckExact(obj));
3258 assert(self->proto > 0);
3259
3260 dict_size = PyDict_GET_SIZE(obj);
3261
3262 /* Special-case len(d) == 1 to save space. */
3263 if (dict_size == 1) {
3264 PyDict_Next(obj, &ppos, &key, &value);
3265 if (save(self, key, 0) < 0)
3266 return -1;
3267 if (save(self, value, 0) < 0)
3268 return -1;
3269 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3270 return -1;
3271 return 0;
3272 }
3273
3274 /* Write in batches of BATCHSIZE. */
3275 do {
3276 i = 0;
3277 if (_Pickler_Write(self, &mark_op, 1) < 0)
3278 return -1;
3279 while (PyDict_Next(obj, &ppos, &key, &value)) {
3280 if (save(self, key, 0) < 0)
3281 return -1;
3282 if (save(self, value, 0) < 0)
3283 return -1;
3284 if (++i == BATCHSIZE)
3285 break;
3286 }
3287 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3288 return -1;
3289 if (PyDict_GET_SIZE(obj) != dict_size) {
3290 PyErr_Format(
3291 PyExc_RuntimeError,
3292 "dictionary changed size during iteration");
3293 return -1;
3294 }
3295
3296 } while (i == BATCHSIZE);
3297 return 0;
3298 }
3299
3300 static int
save_dict(PicklerObject * self,PyObject * obj)3301 save_dict(PicklerObject *self, PyObject *obj)
3302 {
3303 PyObject *items, *iter;
3304 char header[3];
3305 Py_ssize_t len;
3306 int status = 0;
3307 assert(PyDict_Check(obj));
3308
3309 if (self->fast && !fast_save_enter(self, obj))
3310 goto error;
3311
3312 /* Create an empty dict. */
3313 if (self->bin) {
3314 header[0] = EMPTY_DICT;
3315 len = 1;
3316 }
3317 else {
3318 header[0] = MARK;
3319 header[1] = DICT;
3320 len = 2;
3321 }
3322
3323 if (_Pickler_Write(self, header, len) < 0)
3324 goto error;
3325
3326 if (memo_put(self, obj) < 0)
3327 goto error;
3328
3329 if (PyDict_GET_SIZE(obj)) {
3330 /* Save the dict items. */
3331 if (PyDict_CheckExact(obj) && self->proto > 0) {
3332 /* We can take certain shortcuts if we know this is a dict and
3333 not a dict subclass. */
3334 if (Py_EnterRecursiveCall(" while pickling an object"))
3335 goto error;
3336 status = batch_dict_exact(self, obj);
3337 Py_LeaveRecursiveCall();
3338 } else {
3339 _Py_IDENTIFIER(items);
3340
3341 items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
3342 if (items == NULL)
3343 goto error;
3344 iter = PyObject_GetIter(items);
3345 Py_DECREF(items);
3346 if (iter == NULL)
3347 goto error;
3348 if (Py_EnterRecursiveCall(" while pickling an object")) {
3349 Py_DECREF(iter);
3350 goto error;
3351 }
3352 status = batch_dict(self, iter);
3353 Py_LeaveRecursiveCall();
3354 Py_DECREF(iter);
3355 }
3356 }
3357
3358 if (0) {
3359 error:
3360 status = -1;
3361 }
3362
3363 if (self->fast && !fast_save_leave(self, obj))
3364 status = -1;
3365
3366 return status;
3367 }
3368
3369 static int
save_set(PicklerObject * self,PyObject * obj)3370 save_set(PicklerObject *self, PyObject *obj)
3371 {
3372 PyObject *item;
3373 int i;
3374 Py_ssize_t set_size, ppos = 0;
3375 Py_hash_t hash;
3376
3377 const char empty_set_op = EMPTY_SET;
3378 const char mark_op = MARK;
3379 const char additems_op = ADDITEMS;
3380
3381 if (self->proto < 4) {
3382 PyObject *items;
3383 PyObject *reduce_value;
3384 int status;
3385
3386 items = PySequence_List(obj);
3387 if (items == NULL) {
3388 return -1;
3389 }
3390 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3391 Py_DECREF(items);
3392 if (reduce_value == NULL) {
3393 return -1;
3394 }
3395 /* save_reduce() will memoize the object automatically. */
3396 status = save_reduce(self, reduce_value, obj);
3397 Py_DECREF(reduce_value);
3398 return status;
3399 }
3400
3401 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3402 return -1;
3403
3404 if (memo_put(self, obj) < 0)
3405 return -1;
3406
3407 set_size = PySet_GET_SIZE(obj);
3408 if (set_size == 0)
3409 return 0; /* nothing to do */
3410
3411 /* Write in batches of BATCHSIZE. */
3412 do {
3413 i = 0;
3414 if (_Pickler_Write(self, &mark_op, 1) < 0)
3415 return -1;
3416 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3417 if (save(self, item, 0) < 0)
3418 return -1;
3419 if (++i == BATCHSIZE)
3420 break;
3421 }
3422 if (_Pickler_Write(self, &additems_op, 1) < 0)
3423 return -1;
3424 if (PySet_GET_SIZE(obj) != set_size) {
3425 PyErr_Format(
3426 PyExc_RuntimeError,
3427 "set changed size during iteration");
3428 return -1;
3429 }
3430 } while (i == BATCHSIZE);
3431
3432 return 0;
3433 }
3434
3435 static int
save_frozenset(PicklerObject * self,PyObject * obj)3436 save_frozenset(PicklerObject *self, PyObject *obj)
3437 {
3438 PyObject *iter;
3439
3440 const char mark_op = MARK;
3441 const char frozenset_op = FROZENSET;
3442
3443 if (self->fast && !fast_save_enter(self, obj))
3444 return -1;
3445
3446 if (self->proto < 4) {
3447 PyObject *items;
3448 PyObject *reduce_value;
3449 int status;
3450
3451 items = PySequence_List(obj);
3452 if (items == NULL) {
3453 return -1;
3454 }
3455 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3456 items);
3457 Py_DECREF(items);
3458 if (reduce_value == NULL) {
3459 return -1;
3460 }
3461 /* save_reduce() will memoize the object automatically. */
3462 status = save_reduce(self, reduce_value, obj);
3463 Py_DECREF(reduce_value);
3464 return status;
3465 }
3466
3467 if (_Pickler_Write(self, &mark_op, 1) < 0)
3468 return -1;
3469
3470 iter = PyObject_GetIter(obj);
3471 if (iter == NULL) {
3472 return -1;
3473 }
3474 for (;;) {
3475 PyObject *item;
3476
3477 item = PyIter_Next(iter);
3478 if (item == NULL) {
3479 if (PyErr_Occurred()) {
3480 Py_DECREF(iter);
3481 return -1;
3482 }
3483 break;
3484 }
3485 if (save(self, item, 0) < 0) {
3486 Py_DECREF(item);
3487 Py_DECREF(iter);
3488 return -1;
3489 }
3490 Py_DECREF(item);
3491 }
3492 Py_DECREF(iter);
3493
3494 /* If the object is already in the memo, this means it is
3495 recursive. In this case, throw away everything we put on the
3496 stack, and fetch the object back from the memo. */
3497 if (PyMemoTable_Get(self->memo, obj)) {
3498 const char pop_mark_op = POP_MARK;
3499
3500 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3501 return -1;
3502 if (memo_get(self, obj) < 0)
3503 return -1;
3504 return 0;
3505 }
3506
3507 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3508 return -1;
3509 if (memo_put(self, obj) < 0)
3510 return -1;
3511
3512 return 0;
3513 }
3514
3515 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3516 fix_imports(PyObject **module_name, PyObject **global_name)
3517 {
3518 PyObject *key;
3519 PyObject *item;
3520 PickleState *st = _Pickle_GetGlobalState();
3521
3522 key = PyTuple_Pack(2, *module_name, *global_name);
3523 if (key == NULL)
3524 return -1;
3525 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3526 Py_DECREF(key);
3527 if (item) {
3528 PyObject *fixed_module_name;
3529 PyObject *fixed_global_name;
3530
3531 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3532 PyErr_Format(PyExc_RuntimeError,
3533 "_compat_pickle.REVERSE_NAME_MAPPING values "
3534 "should be 2-tuples, not %.200s",
3535 Py_TYPE(item)->tp_name);
3536 return -1;
3537 }
3538 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3539 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3540 if (!PyUnicode_Check(fixed_module_name) ||
3541 !PyUnicode_Check(fixed_global_name)) {
3542 PyErr_Format(PyExc_RuntimeError,
3543 "_compat_pickle.REVERSE_NAME_MAPPING values "
3544 "should be pairs of str, not (%.200s, %.200s)",
3545 Py_TYPE(fixed_module_name)->tp_name,
3546 Py_TYPE(fixed_global_name)->tp_name);
3547 return -1;
3548 }
3549
3550 Py_CLEAR(*module_name);
3551 Py_CLEAR(*global_name);
3552 Py_INCREF(fixed_module_name);
3553 Py_INCREF(fixed_global_name);
3554 *module_name = fixed_module_name;
3555 *global_name = fixed_global_name;
3556 return 0;
3557 }
3558 else if (PyErr_Occurred()) {
3559 return -1;
3560 }
3561
3562 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3563 if (item) {
3564 if (!PyUnicode_Check(item)) {
3565 PyErr_Format(PyExc_RuntimeError,
3566 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3567 "should be strings, not %.200s",
3568 Py_TYPE(item)->tp_name);
3569 return -1;
3570 }
3571 Py_INCREF(item);
3572 Py_XSETREF(*module_name, item);
3573 }
3574 else if (PyErr_Occurred()) {
3575 return -1;
3576 }
3577
3578 return 0;
3579 }
3580
3581 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3582 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3583 {
3584 PyObject *global_name = NULL;
3585 PyObject *module_name = NULL;
3586 PyObject *module = NULL;
3587 PyObject *parent = NULL;
3588 PyObject *dotted_path = NULL;
3589 PyObject *lastname = NULL;
3590 PyObject *cls;
3591 PickleState *st = _Pickle_GetGlobalState();
3592 int status = 0;
3593 _Py_IDENTIFIER(__name__);
3594 _Py_IDENTIFIER(__qualname__);
3595
3596 const char global_op = GLOBAL;
3597
3598 if (name) {
3599 Py_INCREF(name);
3600 global_name = name;
3601 }
3602 else {
3603 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3604 goto error;
3605 if (global_name == NULL) {
3606 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3607 if (global_name == NULL)
3608 goto error;
3609 }
3610 }
3611
3612 dotted_path = get_dotted_path(module, global_name);
3613 if (dotted_path == NULL)
3614 goto error;
3615 module_name = whichmodule(obj, dotted_path);
3616 if (module_name == NULL)
3617 goto error;
3618
3619 /* XXX: Change to use the import C API directly with level=0 to disallow
3620 relative imports.
3621
3622 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3623 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3624 custom import functions (IMHO, this would be a nice security
3625 feature). The import C API would need to be extended to support the
3626 extra parameters of __import__ to fix that. */
3627 module = PyImport_Import(module_name);
3628 if (module == NULL) {
3629 PyErr_Format(st->PicklingError,
3630 "Can't pickle %R: import of module %R failed",
3631 obj, module_name);
3632 goto error;
3633 }
3634 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3635 Py_INCREF(lastname);
3636 cls = get_deep_attribute(module, dotted_path, &parent);
3637 Py_CLEAR(dotted_path);
3638 if (cls == NULL) {
3639 PyErr_Format(st->PicklingError,
3640 "Can't pickle %R: attribute lookup %S on %S failed",
3641 obj, global_name, module_name);
3642 goto error;
3643 }
3644 if (cls != obj) {
3645 Py_DECREF(cls);
3646 PyErr_Format(st->PicklingError,
3647 "Can't pickle %R: it's not the same object as %S.%S",
3648 obj, module_name, global_name);
3649 goto error;
3650 }
3651 Py_DECREF(cls);
3652
3653 if (self->proto >= 2) {
3654 /* See whether this is in the extension registry, and if
3655 * so generate an EXT opcode.
3656 */
3657 PyObject *extension_key;
3658 PyObject *code_obj; /* extension code as Python object */
3659 long code; /* extension code as C value */
3660 char pdata[5];
3661 Py_ssize_t n;
3662
3663 extension_key = PyTuple_Pack(2, module_name, global_name);
3664 if (extension_key == NULL) {
3665 goto error;
3666 }
3667 code_obj = PyDict_GetItemWithError(st->extension_registry,
3668 extension_key);
3669 Py_DECREF(extension_key);
3670 /* The object is not registered in the extension registry.
3671 This is the most likely code path. */
3672 if (code_obj == NULL) {
3673 if (PyErr_Occurred()) {
3674 goto error;
3675 }
3676 goto gen_global;
3677 }
3678
3679 /* XXX: pickle.py doesn't check neither the type, nor the range
3680 of the value returned by the extension_registry. It should for
3681 consistency. */
3682
3683 /* Verify code_obj has the right type and value. */
3684 if (!PyLong_Check(code_obj)) {
3685 PyErr_Format(st->PicklingError,
3686 "Can't pickle %R: extension code %R isn't an integer",
3687 obj, code_obj);
3688 goto error;
3689 }
3690 code = PyLong_AS_LONG(code_obj);
3691 if (code <= 0 || code > 0x7fffffffL) {
3692 if (!PyErr_Occurred())
3693 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3694 "code %ld is out of range", obj, code);
3695 goto error;
3696 }
3697
3698 /* Generate an EXT opcode. */
3699 if (code <= 0xff) {
3700 pdata[0] = EXT1;
3701 pdata[1] = (unsigned char)code;
3702 n = 2;
3703 }
3704 else if (code <= 0xffff) {
3705 pdata[0] = EXT2;
3706 pdata[1] = (unsigned char)(code & 0xff);
3707 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3708 n = 3;
3709 }
3710 else {
3711 pdata[0] = EXT4;
3712 pdata[1] = (unsigned char)(code & 0xff);
3713 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3714 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3715 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3716 n = 5;
3717 }
3718
3719 if (_Pickler_Write(self, pdata, n) < 0)
3720 goto error;
3721 }
3722 else {
3723 gen_global:
3724 if (parent == module) {
3725 Py_INCREF(lastname);
3726 Py_DECREF(global_name);
3727 global_name = lastname;
3728 }
3729 if (self->proto >= 4) {
3730 const char stack_global_op = STACK_GLOBAL;
3731
3732 if (save(self, module_name, 0) < 0)
3733 goto error;
3734 if (save(self, global_name, 0) < 0)
3735 goto error;
3736
3737 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3738 goto error;
3739 }
3740 else if (parent != module) {
3741 PickleState *st = _Pickle_GetGlobalState();
3742 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3743 st->getattr, parent, lastname);
3744 if (reduce_value == NULL)
3745 goto error;
3746 status = save_reduce(self, reduce_value, NULL);
3747 Py_DECREF(reduce_value);
3748 if (status < 0)
3749 goto error;
3750 }
3751 else {
3752 /* Generate a normal global opcode if we are using a pickle
3753 protocol < 4, or if the object is not registered in the
3754 extension registry. */
3755 PyObject *encoded;
3756 PyObject *(*unicode_encoder)(PyObject *);
3757
3758 if (_Pickler_Write(self, &global_op, 1) < 0)
3759 goto error;
3760
3761 /* For protocol < 3 and if the user didn't request against doing
3762 so, we convert module names to the old 2.x module names. */
3763 if (self->proto < 3 && self->fix_imports) {
3764 if (fix_imports(&module_name, &global_name) < 0) {
3765 goto error;
3766 }
3767 }
3768
3769 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3770 both the module name and the global name using UTF-8. We do so
3771 only when we are using the pickle protocol newer than version
3772 3. This is to ensure compatibility with older Unpickler running
3773 on Python 2.x. */
3774 if (self->proto == 3) {
3775 unicode_encoder = PyUnicode_AsUTF8String;
3776 }
3777 else {
3778 unicode_encoder = PyUnicode_AsASCIIString;
3779 }
3780 encoded = unicode_encoder(module_name);
3781 if (encoded == NULL) {
3782 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3783 PyErr_Format(st->PicklingError,
3784 "can't pickle module identifier '%S' using "
3785 "pickle protocol %i",
3786 module_name, self->proto);
3787 goto error;
3788 }
3789 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3790 PyBytes_GET_SIZE(encoded)) < 0) {
3791 Py_DECREF(encoded);
3792 goto error;
3793 }
3794 Py_DECREF(encoded);
3795 if(_Pickler_Write(self, "\n", 1) < 0)
3796 goto error;
3797
3798 /* Save the name of the module. */
3799 encoded = unicode_encoder(global_name);
3800 if (encoded == NULL) {
3801 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3802 PyErr_Format(st->PicklingError,
3803 "can't pickle global identifier '%S' using "
3804 "pickle protocol %i",
3805 global_name, self->proto);
3806 goto error;
3807 }
3808 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3809 PyBytes_GET_SIZE(encoded)) < 0) {
3810 Py_DECREF(encoded);
3811 goto error;
3812 }
3813 Py_DECREF(encoded);
3814 if (_Pickler_Write(self, "\n", 1) < 0)
3815 goto error;
3816 }
3817 /* Memoize the object. */
3818 if (memo_put(self, obj) < 0)
3819 goto error;
3820 }
3821
3822 if (0) {
3823 error:
3824 status = -1;
3825 }
3826 Py_XDECREF(module_name);
3827 Py_XDECREF(global_name);
3828 Py_XDECREF(module);
3829 Py_XDECREF(parent);
3830 Py_XDECREF(dotted_path);
3831 Py_XDECREF(lastname);
3832
3833 return status;
3834 }
3835
3836 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3837 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3838 {
3839 PyObject *reduce_value;
3840 int status;
3841
3842 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3843 if (reduce_value == NULL) {
3844 return -1;
3845 }
3846 status = save_reduce(self, reduce_value, obj);
3847 Py_DECREF(reduce_value);
3848 return status;
3849 }
3850
3851 static int
save_type(PicklerObject * self,PyObject * obj)3852 save_type(PicklerObject *self, PyObject *obj)
3853 {
3854 if (obj == (PyObject *)&_PyNone_Type) {
3855 return save_singleton_type(self, obj, Py_None);
3856 }
3857 else if (obj == (PyObject *)&PyEllipsis_Type) {
3858 return save_singleton_type(self, obj, Py_Ellipsis);
3859 }
3860 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3861 return save_singleton_type(self, obj, Py_NotImplemented);
3862 }
3863 return save_global(self, obj, NULL);
3864 }
3865
3866 static int
save_pers(PicklerObject * self,PyObject * obj)3867 save_pers(PicklerObject *self, PyObject *obj)
3868 {
3869 PyObject *pid = NULL;
3870 int status = 0;
3871
3872 const char persid_op = PERSID;
3873 const char binpersid_op = BINPERSID;
3874
3875 pid = call_method(self->pers_func, self->pers_func_self, obj);
3876 if (pid == NULL)
3877 return -1;
3878
3879 if (pid != Py_None) {
3880 if (self->bin) {
3881 if (save(self, pid, 1) < 0 ||
3882 _Pickler_Write(self, &binpersid_op, 1) < 0)
3883 goto error;
3884 }
3885 else {
3886 PyObject *pid_str;
3887
3888 pid_str = PyObject_Str(pid);
3889 if (pid_str == NULL)
3890 goto error;
3891
3892 /* XXX: Should it check whether the pid contains embedded
3893 newlines? */
3894 if (!PyUnicode_IS_ASCII(pid_str)) {
3895 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3896 "persistent IDs in protocol 0 must be "
3897 "ASCII strings");
3898 Py_DECREF(pid_str);
3899 goto error;
3900 }
3901
3902 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3903 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3904 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3905 _Pickler_Write(self, "\n", 1) < 0) {
3906 Py_DECREF(pid_str);
3907 goto error;
3908 }
3909 Py_DECREF(pid_str);
3910 }
3911 status = 1;
3912 }
3913
3914 if (0) {
3915 error:
3916 status = -1;
3917 }
3918 Py_XDECREF(pid);
3919
3920 return status;
3921 }
3922
3923 static PyObject *
get_class(PyObject * obj)3924 get_class(PyObject *obj)
3925 {
3926 PyObject *cls;
3927 _Py_IDENTIFIER(__class__);
3928
3929 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3930 cls = (PyObject *) Py_TYPE(obj);
3931 Py_INCREF(cls);
3932 }
3933 return cls;
3934 }
3935
3936 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3937 * appropriate __reduce__ method for obj.
3938 */
3939 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3940 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3941 {
3942 PyObject *callable;
3943 PyObject *argtup;
3944 PyObject *state = NULL;
3945 PyObject *listitems = Py_None;
3946 PyObject *dictitems = Py_None;
3947 PyObject *state_setter = Py_None;
3948 PickleState *st = _Pickle_GetGlobalState();
3949 Py_ssize_t size;
3950 int use_newobj = 0, use_newobj_ex = 0;
3951
3952 const char reduce_op = REDUCE;
3953 const char build_op = BUILD;
3954 const char newobj_op = NEWOBJ;
3955 const char newobj_ex_op = NEWOBJ_EX;
3956
3957 size = PyTuple_Size(args);
3958 if (size < 2 || size > 6) {
3959 PyErr_SetString(st->PicklingError, "tuple returned by "
3960 "__reduce__ must contain 2 through 6 elements");
3961 return -1;
3962 }
3963
3964 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3965 &callable, &argtup, &state, &listitems, &dictitems,
3966 &state_setter))
3967 return -1;
3968
3969 if (!PyCallable_Check(callable)) {
3970 PyErr_SetString(st->PicklingError, "first item of the tuple "
3971 "returned by __reduce__ must be callable");
3972 return -1;
3973 }
3974 if (!PyTuple_Check(argtup)) {
3975 PyErr_SetString(st->PicklingError, "second item of the tuple "
3976 "returned by __reduce__ must be a tuple");
3977 return -1;
3978 }
3979
3980 if (state == Py_None)
3981 state = NULL;
3982
3983 if (listitems == Py_None)
3984 listitems = NULL;
3985 else if (!PyIter_Check(listitems)) {
3986 PyErr_Format(st->PicklingError, "fourth element of the tuple "
3987 "returned by __reduce__ must be an iterator, not %s",
3988 Py_TYPE(listitems)->tp_name);
3989 return -1;
3990 }
3991
3992 if (dictitems == Py_None)
3993 dictitems = NULL;
3994 else if (!PyIter_Check(dictitems)) {
3995 PyErr_Format(st->PicklingError, "fifth element of the tuple "
3996 "returned by __reduce__ must be an iterator, not %s",
3997 Py_TYPE(dictitems)->tp_name);
3998 return -1;
3999 }
4000
4001 if (state_setter == Py_None)
4002 state_setter = NULL;
4003 else if (!PyCallable_Check(state_setter)) {
4004 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4005 "returned by __reduce__ must be a function, not %s",
4006 Py_TYPE(state_setter)->tp_name);
4007 return -1;
4008 }
4009
4010 if (self->proto >= 2) {
4011 PyObject *name;
4012 _Py_IDENTIFIER(__name__);
4013
4014 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4015 return -1;
4016 }
4017 if (name != NULL && PyUnicode_Check(name)) {
4018 _Py_IDENTIFIER(__newobj_ex__);
4019 use_newobj_ex = _PyUnicode_EqualToASCIIId(
4020 name, &PyId___newobj_ex__);
4021 if (!use_newobj_ex) {
4022 _Py_IDENTIFIER(__newobj__);
4023 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
4024 }
4025 }
4026 Py_XDECREF(name);
4027 }
4028
4029 if (use_newobj_ex) {
4030 PyObject *cls;
4031 PyObject *args;
4032 PyObject *kwargs;
4033
4034 if (PyTuple_GET_SIZE(argtup) != 3) {
4035 PyErr_Format(st->PicklingError,
4036 "length of the NEWOBJ_EX argument tuple must be "
4037 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4038 return -1;
4039 }
4040
4041 cls = PyTuple_GET_ITEM(argtup, 0);
4042 if (!PyType_Check(cls)) {
4043 PyErr_Format(st->PicklingError,
4044 "first item from NEWOBJ_EX argument tuple must "
4045 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4046 return -1;
4047 }
4048 args = PyTuple_GET_ITEM(argtup, 1);
4049 if (!PyTuple_Check(args)) {
4050 PyErr_Format(st->PicklingError,
4051 "second item from NEWOBJ_EX argument tuple must "
4052 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4053 return -1;
4054 }
4055 kwargs = PyTuple_GET_ITEM(argtup, 2);
4056 if (!PyDict_Check(kwargs)) {
4057 PyErr_Format(st->PicklingError,
4058 "third item from NEWOBJ_EX argument tuple must "
4059 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4060 return -1;
4061 }
4062
4063 if (self->proto >= 4) {
4064 if (save(self, cls, 0) < 0 ||
4065 save(self, args, 0) < 0 ||
4066 save(self, kwargs, 0) < 0 ||
4067 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4068 return -1;
4069 }
4070 }
4071 else {
4072 PyObject *newargs;
4073 PyObject *cls_new;
4074 Py_ssize_t i;
4075 _Py_IDENTIFIER(__new__);
4076
4077 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4078 if (newargs == NULL)
4079 return -1;
4080
4081 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4082 if (cls_new == NULL) {
4083 Py_DECREF(newargs);
4084 return -1;
4085 }
4086 PyTuple_SET_ITEM(newargs, 0, cls_new);
4087 Py_INCREF(cls);
4088 PyTuple_SET_ITEM(newargs, 1, cls);
4089 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4090 PyObject *item = PyTuple_GET_ITEM(args, i);
4091 Py_INCREF(item);
4092 PyTuple_SET_ITEM(newargs, i + 2, item);
4093 }
4094
4095 callable = PyObject_Call(st->partial, newargs, kwargs);
4096 Py_DECREF(newargs);
4097 if (callable == NULL)
4098 return -1;
4099
4100 newargs = PyTuple_New(0);
4101 if (newargs == NULL) {
4102 Py_DECREF(callable);
4103 return -1;
4104 }
4105
4106 if (save(self, callable, 0) < 0 ||
4107 save(self, newargs, 0) < 0 ||
4108 _Pickler_Write(self, &reduce_op, 1) < 0) {
4109 Py_DECREF(newargs);
4110 Py_DECREF(callable);
4111 return -1;
4112 }
4113 Py_DECREF(newargs);
4114 Py_DECREF(callable);
4115 }
4116 }
4117 else if (use_newobj) {
4118 PyObject *cls;
4119 PyObject *newargtup;
4120 PyObject *obj_class;
4121 int p;
4122
4123 /* Sanity checks. */
4124 if (PyTuple_GET_SIZE(argtup) < 1) {
4125 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4126 return -1;
4127 }
4128
4129 cls = PyTuple_GET_ITEM(argtup, 0);
4130 if (!PyType_Check(cls)) {
4131 PyErr_SetString(st->PicklingError, "args[0] from "
4132 "__newobj__ args is not a type");
4133 return -1;
4134 }
4135
4136 if (obj != NULL) {
4137 obj_class = get_class(obj);
4138 if (obj_class == NULL) {
4139 return -1;
4140 }
4141 p = obj_class != cls;
4142 Py_DECREF(obj_class);
4143 if (p) {
4144 PyErr_SetString(st->PicklingError, "args[0] from "
4145 "__newobj__ args has the wrong class");
4146 return -1;
4147 }
4148 }
4149 /* XXX: These calls save() are prone to infinite recursion. Imagine
4150 what happen if the value returned by the __reduce__() method of
4151 some extension type contains another object of the same type. Ouch!
4152
4153 Here is a quick example, that I ran into, to illustrate what I
4154 mean:
4155
4156 >>> import pickle, copyreg
4157 >>> copyreg.dispatch_table.pop(complex)
4158 >>> pickle.dumps(1+2j)
4159 Traceback (most recent call last):
4160 ...
4161 RecursionError: maximum recursion depth exceeded
4162
4163 Removing the complex class from copyreg.dispatch_table made the
4164 __reduce_ex__() method emit another complex object:
4165
4166 >>> (1+1j).__reduce_ex__(2)
4167 (<function __newobj__ at 0xb7b71c3c>,
4168 (<class 'complex'>, (1+1j)), None, None, None)
4169
4170 Thus when save() was called on newargstup (the 2nd item) recursion
4171 ensued. Of course, the bug was in the complex class which had a
4172 broken __getnewargs__() that emitted another complex object. But,
4173 the point, here, is it is quite easy to end up with a broken reduce
4174 function. */
4175
4176 /* Save the class and its __new__ arguments. */
4177 if (save(self, cls, 0) < 0)
4178 return -1;
4179
4180 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4181 if (newargtup == NULL)
4182 return -1;
4183
4184 p = save(self, newargtup, 0);
4185 Py_DECREF(newargtup);
4186 if (p < 0)
4187 return -1;
4188
4189 /* Add NEWOBJ opcode. */
4190 if (_Pickler_Write(self, &newobj_op, 1) < 0)
4191 return -1;
4192 }
4193 else { /* Not using NEWOBJ. */
4194 if (save(self, callable, 0) < 0 ||
4195 save(self, argtup, 0) < 0 ||
4196 _Pickler_Write(self, &reduce_op, 1) < 0)
4197 return -1;
4198 }
4199
4200 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4201 the caller do not want to memoize the object. Not particularly useful,
4202 but that is to mimic the behavior save_reduce() in pickle.py when
4203 obj is None. */
4204 if (obj != NULL) {
4205 /* If the object is already in the memo, this means it is
4206 recursive. In this case, throw away everything we put on the
4207 stack, and fetch the object back from the memo. */
4208 if (PyMemoTable_Get(self->memo, obj)) {
4209 const char pop_op = POP;
4210
4211 if (_Pickler_Write(self, &pop_op, 1) < 0)
4212 return -1;
4213 if (memo_get(self, obj) < 0)
4214 return -1;
4215
4216 return 0;
4217 }
4218 else if (memo_put(self, obj) < 0)
4219 return -1;
4220 }
4221
4222 if (listitems && batch_list(self, listitems) < 0)
4223 return -1;
4224
4225 if (dictitems && batch_dict(self, dictitems) < 0)
4226 return -1;
4227
4228 if (state) {
4229 if (state_setter == NULL) {
4230 if (save(self, state, 0) < 0 ||
4231 _Pickler_Write(self, &build_op, 1) < 0)
4232 return -1;
4233 }
4234 else {
4235
4236 /* If a state_setter is specified, call it instead of load_build to
4237 * update obj's with its previous state.
4238 * The first 4 save/write instructions push state_setter and its
4239 * tuple of expected arguments (obj, state) onto the stack. The
4240 * REDUCE opcode triggers the state_setter(obj, state) function
4241 * call. Finally, because state-updating routines only do in-place
4242 * modification, the whole operation has to be stack-transparent.
4243 * Thus, we finally pop the call's output from the stack.*/
4244
4245 const char tupletwo_op = TUPLE2;
4246 const char pop_op = POP;
4247 if (save(self, state_setter, 0) < 0 ||
4248 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4249 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4250 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4251 _Pickler_Write(self, &pop_op, 1) < 0)
4252 return -1;
4253 }
4254 }
4255 return 0;
4256 }
4257
4258 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4259 save(PicklerObject *self, PyObject *obj, int pers_save)
4260 {
4261 PyTypeObject *type;
4262 PyObject *reduce_func = NULL;
4263 PyObject *reduce_value = NULL;
4264 int status = 0;
4265
4266 if (_Pickler_OpcodeBoundary(self) < 0)
4267 return -1;
4268
4269 /* The extra pers_save argument is necessary to avoid calling save_pers()
4270 on its returned object. */
4271 if (!pers_save && self->pers_func) {
4272 /* save_pers() returns:
4273 -1 to signal an error;
4274 0 if it did nothing successfully;
4275 1 if a persistent id was saved.
4276 */
4277 if ((status = save_pers(self, obj)) != 0)
4278 return status;
4279 }
4280
4281 type = Py_TYPE(obj);
4282
4283 /* The old cPickle had an optimization that used switch-case statement
4284 dispatching on the first letter of the type name. This has was removed
4285 since benchmarks shown that this optimization was actually slowing
4286 things down. */
4287
4288 /* Atom types; these aren't memoized, so don't check the memo. */
4289
4290 if (obj == Py_None) {
4291 return save_none(self, obj);
4292 }
4293 else if (obj == Py_False || obj == Py_True) {
4294 return save_bool(self, obj);
4295 }
4296 else if (type == &PyLong_Type) {
4297 return save_long(self, obj);
4298 }
4299 else if (type == &PyFloat_Type) {
4300 return save_float(self, obj);
4301 }
4302
4303 /* Check the memo to see if it has the object. If so, generate
4304 a GET (or BINGET) opcode, instead of pickling the object
4305 once again. */
4306 if (PyMemoTable_Get(self->memo, obj)) {
4307 return memo_get(self, obj);
4308 }
4309
4310 if (type == &PyBytes_Type) {
4311 return save_bytes(self, obj);
4312 }
4313 else if (type == &PyUnicode_Type) {
4314 return save_unicode(self, obj);
4315 }
4316
4317 /* We're only calling Py_EnterRecursiveCall here so that atomic
4318 types above are pickled faster. */
4319 if (Py_EnterRecursiveCall(" while pickling an object")) {
4320 return -1;
4321 }
4322
4323 if (type == &PyDict_Type) {
4324 status = save_dict(self, obj);
4325 goto done;
4326 }
4327 else if (type == &PySet_Type) {
4328 status = save_set(self, obj);
4329 goto done;
4330 }
4331 else if (type == &PyFrozenSet_Type) {
4332 status = save_frozenset(self, obj);
4333 goto done;
4334 }
4335 else if (type == &PyList_Type) {
4336 status = save_list(self, obj);
4337 goto done;
4338 }
4339 else if (type == &PyTuple_Type) {
4340 status = save_tuple(self, obj);
4341 goto done;
4342 }
4343 else if (type == &PyByteArray_Type) {
4344 status = save_bytearray(self, obj);
4345 goto done;
4346 }
4347 else if (type == &PyPickleBuffer_Type) {
4348 status = save_picklebuffer(self, obj);
4349 goto done;
4350 }
4351
4352 /* Now, check reducer_override. If it returns NotImplemented,
4353 * fallback to save_type or save_global, and then perhaps to the
4354 * regular reduction mechanism.
4355 */
4356 if (self->reducer_override != NULL) {
4357 reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4358 if (reduce_value == NULL) {
4359 goto error;
4360 }
4361 if (reduce_value != Py_NotImplemented) {
4362 goto reduce;
4363 }
4364 Py_DECREF(reduce_value);
4365 reduce_value = NULL;
4366 }
4367
4368 if (type == &PyType_Type) {
4369 status = save_type(self, obj);
4370 goto done;
4371 }
4372 else if (type == &PyFunction_Type) {
4373 status = save_global(self, obj, NULL);
4374 goto done;
4375 }
4376
4377 /* XXX: This part needs some unit tests. */
4378
4379 /* Get a reduction callable, and call it. This may come from
4380 * self.dispatch_table, copyreg.dispatch_table, the object's
4381 * __reduce_ex__ method, or the object's __reduce__ method.
4382 */
4383 if (self->dispatch_table == NULL) {
4384 PickleState *st = _Pickle_GetGlobalState();
4385 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4386 (PyObject *)type);
4387 if (reduce_func == NULL) {
4388 if (PyErr_Occurred()) {
4389 goto error;
4390 }
4391 } else {
4392 /* PyDict_GetItemWithError() returns a borrowed reference.
4393 Increase the reference count to be consistent with
4394 PyObject_GetItem and _PyObject_GetAttrId used below. */
4395 Py_INCREF(reduce_func);
4396 }
4397 } else {
4398 reduce_func = PyObject_GetItem(self->dispatch_table,
4399 (PyObject *)type);
4400 if (reduce_func == NULL) {
4401 if (PyErr_ExceptionMatches(PyExc_KeyError))
4402 PyErr_Clear();
4403 else
4404 goto error;
4405 }
4406 }
4407 if (reduce_func != NULL) {
4408 Py_INCREF(obj);
4409 reduce_value = _Pickle_FastCall(reduce_func, obj);
4410 }
4411 else if (PyType_IsSubtype(type, &PyType_Type)) {
4412 status = save_global(self, obj, NULL);
4413 goto done;
4414 }
4415 else {
4416 _Py_IDENTIFIER(__reduce__);
4417 _Py_IDENTIFIER(__reduce_ex__);
4418
4419 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4420 automatically defined as __reduce__. While this is convenient, this
4421 make it impossible to know which method was actually called. Of
4422 course, this is not a big deal. But still, it would be nice to let
4423 the user know which method was called when something go
4424 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4425 don't actually have to check for a __reduce__ method. */
4426
4427 /* Check for a __reduce_ex__ method. */
4428 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4429 goto error;
4430 }
4431 if (reduce_func != NULL) {
4432 PyObject *proto;
4433 proto = PyLong_FromLong(self->proto);
4434 if (proto != NULL) {
4435 reduce_value = _Pickle_FastCall(reduce_func, proto);
4436 }
4437 }
4438 else {
4439 /* Check for a __reduce__ method. */
4440 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4441 goto error;
4442 }
4443 if (reduce_func != NULL) {
4444 reduce_value = PyObject_CallNoArgs(reduce_func);
4445 }
4446 else {
4447 PickleState *st = _Pickle_GetGlobalState();
4448 PyErr_Format(st->PicklingError,
4449 "can't pickle '%.200s' object: %R",
4450 type->tp_name, obj);
4451 goto error;
4452 }
4453 }
4454 }
4455
4456 if (reduce_value == NULL)
4457 goto error;
4458
4459 reduce:
4460 if (PyUnicode_Check(reduce_value)) {
4461 status = save_global(self, obj, reduce_value);
4462 goto done;
4463 }
4464
4465 if (!PyTuple_Check(reduce_value)) {
4466 PickleState *st = _Pickle_GetGlobalState();
4467 PyErr_SetString(st->PicklingError,
4468 "__reduce__ must return a string or tuple");
4469 goto error;
4470 }
4471
4472 status = save_reduce(self, reduce_value, obj);
4473
4474 if (0) {
4475 error:
4476 status = -1;
4477 }
4478 done:
4479
4480 Py_LeaveRecursiveCall();
4481 Py_XDECREF(reduce_func);
4482 Py_XDECREF(reduce_value);
4483
4484 return status;
4485 }
4486
4487 static int
dump(PicklerObject * self,PyObject * obj)4488 dump(PicklerObject *self, PyObject *obj)
4489 {
4490 const char stop_op = STOP;
4491 int status = -1;
4492 PyObject *tmp;
4493 _Py_IDENTIFIER(reducer_override);
4494
4495 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4496 &tmp) < 0) {
4497 goto error;
4498 }
4499 /* Cache the reducer_override method, if it exists. */
4500 if (tmp != NULL) {
4501 Py_XSETREF(self->reducer_override, tmp);
4502 }
4503 else {
4504 Py_CLEAR(self->reducer_override);
4505 }
4506
4507 if (self->proto >= 2) {
4508 char header[2];
4509
4510 header[0] = PROTO;
4511 assert(self->proto >= 0 && self->proto < 256);
4512 header[1] = (unsigned char)self->proto;
4513 if (_Pickler_Write(self, header, 2) < 0)
4514 goto error;
4515 if (self->proto >= 4)
4516 self->framing = 1;
4517 }
4518
4519 if (save(self, obj, 0) < 0 ||
4520 _Pickler_Write(self, &stop_op, 1) < 0 ||
4521 _Pickler_CommitFrame(self) < 0)
4522 goto error;
4523
4524 // Success
4525 status = 0;
4526
4527 error:
4528 self->framing = 0;
4529
4530 /* Break the reference cycle we generated at the beginning this function
4531 * call when setting the reducer_override attribute of the Pickler instance
4532 * to a bound method of the same instance. This is important as the Pickler
4533 * instance holds a reference to each object it has pickled (through its
4534 * memo): thus, these objects wont be garbage-collected as long as the
4535 * Pickler itself is not collected. */
4536 Py_CLEAR(self->reducer_override);
4537 return status;
4538 }
4539
4540 /*[clinic input]
4541
4542 _pickle.Pickler.clear_memo
4543
4544 Clears the pickler's "memo".
4545
4546 The memo is the data structure that remembers which objects the
4547 pickler has already seen, so that shared or recursive objects are
4548 pickled by reference and not by value. This method is useful when
4549 re-using picklers.
4550 [clinic start generated code]*/
4551
4552 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4553 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4554 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4555 {
4556 if (self->memo)
4557 PyMemoTable_Clear(self->memo);
4558
4559 Py_RETURN_NONE;
4560 }
4561
4562 /*[clinic input]
4563
4564 _pickle.Pickler.dump
4565
4566 obj: object
4567 /
4568
4569 Write a pickled representation of the given object to the open file.
4570 [clinic start generated code]*/
4571
4572 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4573 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4574 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4575 {
4576 /* Check whether the Pickler was initialized correctly (issue3664).
4577 Developers often forget to call __init__() in their subclasses, which
4578 would trigger a segfault without this check. */
4579 if (self->write == NULL) {
4580 PickleState *st = _Pickle_GetGlobalState();
4581 PyErr_Format(st->PicklingError,
4582 "Pickler.__init__() was not called by %s.__init__()",
4583 Py_TYPE(self)->tp_name);
4584 return NULL;
4585 }
4586
4587 if (_Pickler_ClearBuffer(self) < 0)
4588 return NULL;
4589
4590 if (dump(self, obj) < 0)
4591 return NULL;
4592
4593 if (_Pickler_FlushToFile(self) < 0)
4594 return NULL;
4595
4596 Py_RETURN_NONE;
4597 }
4598
4599 /*[clinic input]
4600
4601 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4602
4603 Returns size in memory, in bytes.
4604 [clinic start generated code]*/
4605
4606 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4607 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4608 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4609 {
4610 Py_ssize_t res, s;
4611
4612 res = _PyObject_SIZE(Py_TYPE(self));
4613 if (self->memo != NULL) {
4614 res += sizeof(PyMemoTable);
4615 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4616 }
4617 if (self->output_buffer != NULL) {
4618 s = _PySys_GetSizeOf(self->output_buffer);
4619 if (s == -1)
4620 return -1;
4621 res += s;
4622 }
4623 return res;
4624 }
4625
4626 static struct PyMethodDef Pickler_methods[] = {
4627 _PICKLE_PICKLER_DUMP_METHODDEF
4628 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4629 _PICKLE_PICKLER___SIZEOF___METHODDEF
4630 {NULL, NULL} /* sentinel */
4631 };
4632
4633 static void
Pickler_dealloc(PicklerObject * self)4634 Pickler_dealloc(PicklerObject *self)
4635 {
4636 PyObject_GC_UnTrack(self);
4637
4638 Py_XDECREF(self->output_buffer);
4639 Py_XDECREF(self->write);
4640 Py_XDECREF(self->pers_func);
4641 Py_XDECREF(self->dispatch_table);
4642 Py_XDECREF(self->fast_memo);
4643 Py_XDECREF(self->reducer_override);
4644 Py_XDECREF(self->buffer_callback);
4645
4646 PyMemoTable_Del(self->memo);
4647
4648 Py_TYPE(self)->tp_free((PyObject *)self);
4649 }
4650
4651 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4652 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4653 {
4654 Py_VISIT(self->write);
4655 Py_VISIT(self->pers_func);
4656 Py_VISIT(self->dispatch_table);
4657 Py_VISIT(self->fast_memo);
4658 Py_VISIT(self->reducer_override);
4659 Py_VISIT(self->buffer_callback);
4660 return 0;
4661 }
4662
4663 static int
Pickler_clear(PicklerObject * self)4664 Pickler_clear(PicklerObject *self)
4665 {
4666 Py_CLEAR(self->output_buffer);
4667 Py_CLEAR(self->write);
4668 Py_CLEAR(self->pers_func);
4669 Py_CLEAR(self->dispatch_table);
4670 Py_CLEAR(self->fast_memo);
4671 Py_CLEAR(self->reducer_override);
4672 Py_CLEAR(self->buffer_callback);
4673
4674 if (self->memo != NULL) {
4675 PyMemoTable *memo = self->memo;
4676 self->memo = NULL;
4677 PyMemoTable_Del(memo);
4678 }
4679 return 0;
4680 }
4681
4682
4683 /*[clinic input]
4684
4685 _pickle.Pickler.__init__
4686
4687 file: object
4688 protocol: object = None
4689 fix_imports: bool = True
4690 buffer_callback: object = None
4691
4692 This takes a binary file for writing a pickle data stream.
4693
4694 The optional *protocol* argument tells the pickler to use the given
4695 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4696 protocol is 4. It was introduced in Python 3.4, and is incompatible
4697 with previous versions.
4698
4699 Specifying a negative protocol version selects the highest protocol
4700 version supported. The higher the protocol used, the more recent the
4701 version of Python needed to read the pickle produced.
4702
4703 The *file* argument must have a write() method that accepts a single
4704 bytes argument. It can thus be a file object opened for binary
4705 writing, an io.BytesIO instance, or any other custom object that meets
4706 this interface.
4707
4708 If *fix_imports* is True and protocol is less than 3, pickle will try
4709 to map the new Python 3 names to the old module names used in Python
4710 2, so that the pickle data stream is readable with Python 2.
4711
4712 If *buffer_callback* is None (the default), buffer views are
4713 serialized into *file* as part of the pickle stream.
4714
4715 If *buffer_callback* is not None, then it can be called any number
4716 of times with a buffer view. If the callback returns a false value
4717 (such as None), the given buffer is out-of-band; otherwise the
4718 buffer is serialized in-band, i.e. inside the pickle stream.
4719
4720 It is an error if *buffer_callback* is not None and *protocol*
4721 is None or smaller than 5.
4722
4723 [clinic start generated code]*/
4724
4725 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4726 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4727 PyObject *protocol, int fix_imports,
4728 PyObject *buffer_callback)
4729 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4730 {
4731 _Py_IDENTIFIER(persistent_id);
4732 _Py_IDENTIFIER(dispatch_table);
4733
4734 /* In case of multiple __init__() calls, clear previous content. */
4735 if (self->write != NULL)
4736 (void)Pickler_clear(self);
4737
4738 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4739 return -1;
4740
4741 if (_Pickler_SetOutputStream(self, file) < 0)
4742 return -1;
4743
4744 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4745 return -1;
4746
4747 /* memo and output_buffer may have already been created in _Pickler_New */
4748 if (self->memo == NULL) {
4749 self->memo = PyMemoTable_New();
4750 if (self->memo == NULL)
4751 return -1;
4752 }
4753 self->output_len = 0;
4754 if (self->output_buffer == NULL) {
4755 self->max_output_len = WRITE_BUF_SIZE;
4756 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4757 self->max_output_len);
4758 if (self->output_buffer == NULL)
4759 return -1;
4760 }
4761
4762 self->fast = 0;
4763 self->fast_nesting = 0;
4764 self->fast_memo = NULL;
4765
4766 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4767 &self->pers_func, &self->pers_func_self) < 0)
4768 {
4769 return -1;
4770 }
4771
4772 if (_PyObject_LookupAttrId((PyObject *)self,
4773 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4774 return -1;
4775 }
4776
4777 return 0;
4778 }
4779
4780
4781 /* Define a proxy object for the Pickler's internal memo object. This is to
4782 * avoid breaking code like:
4783 * pickler.memo.clear()
4784 * and
4785 * pickler.memo = saved_memo
4786 * Is this a good idea? Not really, but we don't want to break code that uses
4787 * it. Note that we don't implement the entire mapping API here. This is
4788 * intentional, as these should be treated as black-box implementation details.
4789 */
4790
4791 /*[clinic input]
4792 _pickle.PicklerMemoProxy.clear
4793
4794 Remove all items from memo.
4795 [clinic start generated code]*/
4796
4797 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4798 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4799 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4800 {
4801 if (self->pickler->memo)
4802 PyMemoTable_Clear(self->pickler->memo);
4803 Py_RETURN_NONE;
4804 }
4805
4806 /*[clinic input]
4807 _pickle.PicklerMemoProxy.copy
4808
4809 Copy the memo to a new object.
4810 [clinic start generated code]*/
4811
4812 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4813 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4814 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4815 {
4816 PyMemoTable *memo;
4817 PyObject *new_memo = PyDict_New();
4818 if (new_memo == NULL)
4819 return NULL;
4820
4821 memo = self->pickler->memo;
4822 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4823 PyMemoEntry entry = memo->mt_table[i];
4824 if (entry.me_key != NULL) {
4825 int status;
4826 PyObject *key, *value;
4827
4828 key = PyLong_FromVoidPtr(entry.me_key);
4829 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4830
4831 if (key == NULL || value == NULL) {
4832 Py_XDECREF(key);
4833 Py_XDECREF(value);
4834 goto error;
4835 }
4836 status = PyDict_SetItem(new_memo, key, value);
4837 Py_DECREF(key);
4838 Py_DECREF(value);
4839 if (status < 0)
4840 goto error;
4841 }
4842 }
4843 return new_memo;
4844
4845 error:
4846 Py_XDECREF(new_memo);
4847 return NULL;
4848 }
4849
4850 /*[clinic input]
4851 _pickle.PicklerMemoProxy.__reduce__
4852
4853 Implement pickle support.
4854 [clinic start generated code]*/
4855
4856 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4857 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4858 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4859 {
4860 PyObject *reduce_value, *dict_args;
4861 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4862 if (contents == NULL)
4863 return NULL;
4864
4865 reduce_value = PyTuple_New(2);
4866 if (reduce_value == NULL) {
4867 Py_DECREF(contents);
4868 return NULL;
4869 }
4870 dict_args = PyTuple_New(1);
4871 if (dict_args == NULL) {
4872 Py_DECREF(contents);
4873 Py_DECREF(reduce_value);
4874 return NULL;
4875 }
4876 PyTuple_SET_ITEM(dict_args, 0, contents);
4877 Py_INCREF((PyObject *)&PyDict_Type);
4878 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4879 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4880 return reduce_value;
4881 }
4882
4883 static PyMethodDef picklerproxy_methods[] = {
4884 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4885 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4886 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4887 {NULL, NULL} /* sentinel */
4888 };
4889
4890 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4891 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4892 {
4893 PyObject_GC_UnTrack(self);
4894 Py_XDECREF(self->pickler);
4895 PyObject_GC_Del((PyObject *)self);
4896 }
4897
4898 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4899 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4900 visitproc visit, void *arg)
4901 {
4902 Py_VISIT(self->pickler);
4903 return 0;
4904 }
4905
4906 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4907 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4908 {
4909 Py_CLEAR(self->pickler);
4910 return 0;
4911 }
4912
4913 static PyTypeObject PicklerMemoProxyType = {
4914 PyVarObject_HEAD_INIT(NULL, 0)
4915 "_pickle.PicklerMemoProxy", /*tp_name*/
4916 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4917 0,
4918 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4919 0, /* tp_vectorcall_offset */
4920 0, /* tp_getattr */
4921 0, /* tp_setattr */
4922 0, /* tp_as_async */
4923 0, /* tp_repr */
4924 0, /* tp_as_number */
4925 0, /* tp_as_sequence */
4926 0, /* tp_as_mapping */
4927 PyObject_HashNotImplemented, /* tp_hash */
4928 0, /* tp_call */
4929 0, /* tp_str */
4930 PyObject_GenericGetAttr, /* tp_getattro */
4931 PyObject_GenericSetAttr, /* tp_setattro */
4932 0, /* tp_as_buffer */
4933 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4934 0, /* tp_doc */
4935 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4936 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4937 0, /* tp_richcompare */
4938 0, /* tp_weaklistoffset */
4939 0, /* tp_iter */
4940 0, /* tp_iternext */
4941 picklerproxy_methods, /* tp_methods */
4942 };
4943
4944 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4945 PicklerMemoProxy_New(PicklerObject *pickler)
4946 {
4947 PicklerMemoProxyObject *self;
4948
4949 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4950 if (self == NULL)
4951 return NULL;
4952 Py_INCREF(pickler);
4953 self->pickler = pickler;
4954 PyObject_GC_Track(self);
4955 return (PyObject *)self;
4956 }
4957
4958 /*****************************************************************************/
4959
4960 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4961 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4962 {
4963 return PicklerMemoProxy_New(self);
4964 }
4965
4966 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4967 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4968 {
4969 PyMemoTable *new_memo = NULL;
4970
4971 if (obj == NULL) {
4972 PyErr_SetString(PyExc_TypeError,
4973 "attribute deletion is not supported");
4974 return -1;
4975 }
4976
4977 if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
4978 PicklerObject *pickler =
4979 ((PicklerMemoProxyObject *)obj)->pickler;
4980
4981 new_memo = PyMemoTable_Copy(pickler->memo);
4982 if (new_memo == NULL)
4983 return -1;
4984 }
4985 else if (PyDict_Check(obj)) {
4986 Py_ssize_t i = 0;
4987 PyObject *key, *value;
4988
4989 new_memo = PyMemoTable_New();
4990 if (new_memo == NULL)
4991 return -1;
4992
4993 while (PyDict_Next(obj, &i, &key, &value)) {
4994 Py_ssize_t memo_id;
4995 PyObject *memo_obj;
4996
4997 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4998 PyErr_SetString(PyExc_TypeError,
4999 "'memo' values must be 2-item tuples");
5000 goto error;
5001 }
5002 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5003 if (memo_id == -1 && PyErr_Occurred())
5004 goto error;
5005 memo_obj = PyTuple_GET_ITEM(value, 1);
5006 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5007 goto error;
5008 }
5009 }
5010 else {
5011 PyErr_Format(PyExc_TypeError,
5012 "'memo' attribute must be a PicklerMemoProxy object "
5013 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5014 return -1;
5015 }
5016
5017 PyMemoTable_Del(self->memo);
5018 self->memo = new_memo;
5019
5020 return 0;
5021
5022 error:
5023 if (new_memo)
5024 PyMemoTable_Del(new_memo);
5025 return -1;
5026 }
5027
5028 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5029 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5030 {
5031 if (self->pers_func == NULL) {
5032 PyErr_SetString(PyExc_AttributeError, "persistent_id");
5033 return NULL;
5034 }
5035 return reconstruct_method(self->pers_func, self->pers_func_self);
5036 }
5037
5038 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5039 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5040 {
5041 if (value == NULL) {
5042 PyErr_SetString(PyExc_TypeError,
5043 "attribute deletion is not supported");
5044 return -1;
5045 }
5046 if (!PyCallable_Check(value)) {
5047 PyErr_SetString(PyExc_TypeError,
5048 "persistent_id must be a callable taking one argument");
5049 return -1;
5050 }
5051
5052 self->pers_func_self = NULL;
5053 Py_INCREF(value);
5054 Py_XSETREF(self->pers_func, value);
5055
5056 return 0;
5057 }
5058
5059 static PyMemberDef Pickler_members[] = {
5060 {"bin", T_INT, offsetof(PicklerObject, bin)},
5061 {"fast", T_INT, offsetof(PicklerObject, fast)},
5062 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5063 {NULL}
5064 };
5065
5066 static PyGetSetDef Pickler_getsets[] = {
5067 {"memo", (getter)Pickler_get_memo,
5068 (setter)Pickler_set_memo},
5069 {"persistent_id", (getter)Pickler_get_persid,
5070 (setter)Pickler_set_persid},
5071 {NULL}
5072 };
5073
5074 static PyTypeObject Pickler_Type = {
5075 PyVarObject_HEAD_INIT(NULL, 0)
5076 "_pickle.Pickler" , /*tp_name*/
5077 sizeof(PicklerObject), /*tp_basicsize*/
5078 0, /*tp_itemsize*/
5079 (destructor)Pickler_dealloc, /*tp_dealloc*/
5080 0, /*tp_vectorcall_offset*/
5081 0, /*tp_getattr*/
5082 0, /*tp_setattr*/
5083 0, /*tp_as_async*/
5084 0, /*tp_repr*/
5085 0, /*tp_as_number*/
5086 0, /*tp_as_sequence*/
5087 0, /*tp_as_mapping*/
5088 0, /*tp_hash*/
5089 0, /*tp_call*/
5090 0, /*tp_str*/
5091 0, /*tp_getattro*/
5092 0, /*tp_setattro*/
5093 0, /*tp_as_buffer*/
5094 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5095 _pickle_Pickler___init____doc__, /*tp_doc*/
5096 (traverseproc)Pickler_traverse, /*tp_traverse*/
5097 (inquiry)Pickler_clear, /*tp_clear*/
5098 0, /*tp_richcompare*/
5099 0, /*tp_weaklistoffset*/
5100 0, /*tp_iter*/
5101 0, /*tp_iternext*/
5102 Pickler_methods, /*tp_methods*/
5103 Pickler_members, /*tp_members*/
5104 Pickler_getsets, /*tp_getset*/
5105 0, /*tp_base*/
5106 0, /*tp_dict*/
5107 0, /*tp_descr_get*/
5108 0, /*tp_descr_set*/
5109 0, /*tp_dictoffset*/
5110 _pickle_Pickler___init__, /*tp_init*/
5111 PyType_GenericAlloc, /*tp_alloc*/
5112 PyType_GenericNew, /*tp_new*/
5113 PyObject_GC_Del, /*tp_free*/
5114 0, /*tp_is_gc*/
5115 };
5116
5117 /* Temporary helper for calling self.find_class().
5118
5119 XXX: It would be nice to able to avoid Python function call overhead, by
5120 using directly the C version of find_class(), when find_class() is not
5121 overridden by a subclass. Although, this could become rather hackish. A
5122 simpler optimization would be to call the C function when self is not a
5123 subclass instance. */
5124 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5125 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5126 {
5127 _Py_IDENTIFIER(find_class);
5128
5129 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5130 module_name, global_name, NULL);
5131 }
5132
5133 static Py_ssize_t
marker(UnpicklerObject * self)5134 marker(UnpicklerObject *self)
5135 {
5136 Py_ssize_t mark;
5137
5138 if (self->num_marks < 1) {
5139 PickleState *st = _Pickle_GetGlobalState();
5140 PyErr_SetString(st->UnpicklingError, "could not find MARK");
5141 return -1;
5142 }
5143
5144 mark = self->marks[--self->num_marks];
5145 self->stack->mark_set = self->num_marks != 0;
5146 self->stack->fence = self->num_marks ?
5147 self->marks[self->num_marks - 1] : 0;
5148 return mark;
5149 }
5150
5151 static int
load_none(UnpicklerObject * self)5152 load_none(UnpicklerObject *self)
5153 {
5154 PDATA_APPEND(self->stack, Py_None, -1);
5155 return 0;
5156 }
5157
5158 static int
load_int(UnpicklerObject * self)5159 load_int(UnpicklerObject *self)
5160 {
5161 PyObject *value;
5162 char *endptr, *s;
5163 Py_ssize_t len;
5164 long x;
5165
5166 if ((len = _Unpickler_Readline(self, &s)) < 0)
5167 return -1;
5168 if (len < 2)
5169 return bad_readline();
5170
5171 errno = 0;
5172 /* XXX: Should the base argument of strtol() be explicitly set to 10?
5173 XXX(avassalotti): Should this uses PyOS_strtol()? */
5174 x = strtol(s, &endptr, 0);
5175
5176 if (errno || (*endptr != '\n' && *endptr != '\0')) {
5177 /* Hm, maybe we've got something long. Let's try reading
5178 * it as a Python int object. */
5179 errno = 0;
5180 /* XXX: Same thing about the base here. */
5181 value = PyLong_FromString(s, NULL, 0);
5182 if (value == NULL) {
5183 PyErr_SetString(PyExc_ValueError,
5184 "could not convert string to int");
5185 return -1;
5186 }
5187 }
5188 else {
5189 if (len == 3 && (x == 0 || x == 1)) {
5190 if ((value = PyBool_FromLong(x)) == NULL)
5191 return -1;
5192 }
5193 else {
5194 if ((value = PyLong_FromLong(x)) == NULL)
5195 return -1;
5196 }
5197 }
5198
5199 PDATA_PUSH(self->stack, value, -1);
5200 return 0;
5201 }
5202
5203 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5204 load_bool(UnpicklerObject *self, PyObject *boolean)
5205 {
5206 assert(boolean == Py_True || boolean == Py_False);
5207 PDATA_APPEND(self->stack, boolean, -1);
5208 return 0;
5209 }
5210
5211 /* s contains x bytes of an unsigned little-endian integer. Return its value
5212 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5213 */
5214 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5215 calc_binsize(char *bytes, int nbytes)
5216 {
5217 unsigned char *s = (unsigned char *)bytes;
5218 int i;
5219 size_t x = 0;
5220
5221 if (nbytes > (int)sizeof(size_t)) {
5222 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5223 * have 64-bit size that can't be represented on 32-bit platform.
5224 */
5225 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5226 if (s[i])
5227 return -1;
5228 }
5229 nbytes = (int)sizeof(size_t);
5230 }
5231 for (i = 0; i < nbytes; i++) {
5232 x |= (size_t) s[i] << (8 * i);
5233 }
5234
5235 if (x > PY_SSIZE_T_MAX)
5236 return -1;
5237 else
5238 return (Py_ssize_t) x;
5239 }
5240
5241 /* s contains x bytes of a little-endian integer. Return its value as a
5242 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
5243 * int, but when x is 4 it's a signed one. This is a historical source
5244 * of x-platform bugs.
5245 */
5246 static long
calc_binint(char * bytes,int nbytes)5247 calc_binint(char *bytes, int nbytes)
5248 {
5249 unsigned char *s = (unsigned char *)bytes;
5250 Py_ssize_t i;
5251 long x = 0;
5252
5253 for (i = 0; i < nbytes; i++) {
5254 x |= (long)s[i] << (8 * i);
5255 }
5256
5257 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5258 * is signed, so on a box with longs bigger than 4 bytes we need
5259 * to extend a BININT's sign bit to the full width.
5260 */
5261 if (SIZEOF_LONG > 4 && nbytes == 4) {
5262 x |= -(x & (1L << 31));
5263 }
5264
5265 return x;
5266 }
5267
5268 static int
load_binintx(UnpicklerObject * self,char * s,int size)5269 load_binintx(UnpicklerObject *self, char *s, int size)
5270 {
5271 PyObject *value;
5272 long x;
5273
5274 x = calc_binint(s, size);
5275
5276 if ((value = PyLong_FromLong(x)) == NULL)
5277 return -1;
5278
5279 PDATA_PUSH(self->stack, value, -1);
5280 return 0;
5281 }
5282
5283 static int
load_binint(UnpicklerObject * self)5284 load_binint(UnpicklerObject *self)
5285 {
5286 char *s;
5287
5288 if (_Unpickler_Read(self, &s, 4) < 0)
5289 return -1;
5290
5291 return load_binintx(self, s, 4);
5292 }
5293
5294 static int
load_binint1(UnpicklerObject * self)5295 load_binint1(UnpicklerObject *self)
5296 {
5297 char *s;
5298
5299 if (_Unpickler_Read(self, &s, 1) < 0)
5300 return -1;
5301
5302 return load_binintx(self, s, 1);
5303 }
5304
5305 static int
load_binint2(UnpicklerObject * self)5306 load_binint2(UnpicklerObject *self)
5307 {
5308 char *s;
5309
5310 if (_Unpickler_Read(self, &s, 2) < 0)
5311 return -1;
5312
5313 return load_binintx(self, s, 2);
5314 }
5315
5316 static int
load_long(UnpicklerObject * self)5317 load_long(UnpicklerObject *self)
5318 {
5319 PyObject *value;
5320 char *s = NULL;
5321 Py_ssize_t len;
5322
5323 if ((len = _Unpickler_Readline(self, &s)) < 0)
5324 return -1;
5325 if (len < 2)
5326 return bad_readline();
5327
5328 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5329 the 'L' before calling PyLong_FromString. In order to maintain
5330 compatibility with Python 3.0.0, we don't actually *require*
5331 the 'L' to be present. */
5332 if (s[len-2] == 'L')
5333 s[len-2] = '\0';
5334 /* XXX: Should the base argument explicitly set to 10? */
5335 value = PyLong_FromString(s, NULL, 0);
5336 if (value == NULL)
5337 return -1;
5338
5339 PDATA_PUSH(self->stack, value, -1);
5340 return 0;
5341 }
5342
5343 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5344 * data following.
5345 */
5346 static int
load_counted_long(UnpicklerObject * self,int size)5347 load_counted_long(UnpicklerObject *self, int size)
5348 {
5349 PyObject *value;
5350 char *nbytes;
5351 char *pdata;
5352
5353 assert(size == 1 || size == 4);
5354 if (_Unpickler_Read(self, &nbytes, size) < 0)
5355 return -1;
5356
5357 size = calc_binint(nbytes, size);
5358 if (size < 0) {
5359 PickleState *st = _Pickle_GetGlobalState();
5360 /* Corrupt or hostile pickle -- we never write one like this */
5361 PyErr_SetString(st->UnpicklingError,
5362 "LONG pickle has negative byte count");
5363 return -1;
5364 }
5365
5366 if (size == 0)
5367 value = PyLong_FromLong(0L);
5368 else {
5369 /* Read the raw little-endian bytes and convert. */
5370 if (_Unpickler_Read(self, &pdata, size) < 0)
5371 return -1;
5372 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5373 1 /* little endian */ , 1 /* signed */ );
5374 }
5375 if (value == NULL)
5376 return -1;
5377 PDATA_PUSH(self->stack, value, -1);
5378 return 0;
5379 }
5380
5381 static int
load_float(UnpicklerObject * self)5382 load_float(UnpicklerObject *self)
5383 {
5384 PyObject *value;
5385 char *endptr, *s;
5386 Py_ssize_t len;
5387 double d;
5388
5389 if ((len = _Unpickler_Readline(self, &s)) < 0)
5390 return -1;
5391 if (len < 2)
5392 return bad_readline();
5393
5394 errno = 0;
5395 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5396 if (d == -1.0 && PyErr_Occurred())
5397 return -1;
5398 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5399 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5400 return -1;
5401 }
5402 value = PyFloat_FromDouble(d);
5403 if (value == NULL)
5404 return -1;
5405
5406 PDATA_PUSH(self->stack, value, -1);
5407 return 0;
5408 }
5409
5410 static int
load_binfloat(UnpicklerObject * self)5411 load_binfloat(UnpicklerObject *self)
5412 {
5413 PyObject *value;
5414 double x;
5415 char *s;
5416
5417 if (_Unpickler_Read(self, &s, 8) < 0)
5418 return -1;
5419
5420 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5421 if (x == -1.0 && PyErr_Occurred())
5422 return -1;
5423
5424 if ((value = PyFloat_FromDouble(x)) == NULL)
5425 return -1;
5426
5427 PDATA_PUSH(self->stack, value, -1);
5428 return 0;
5429 }
5430
5431 static int
load_string(UnpicklerObject * self)5432 load_string(UnpicklerObject *self)
5433 {
5434 PyObject *bytes;
5435 PyObject *obj;
5436 Py_ssize_t len;
5437 char *s, *p;
5438
5439 if ((len = _Unpickler_Readline(self, &s)) < 0)
5440 return -1;
5441 /* Strip the newline */
5442 len--;
5443 /* Strip outermost quotes */
5444 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5445 p = s + 1;
5446 len -= 2;
5447 }
5448 else {
5449 PickleState *st = _Pickle_GetGlobalState();
5450 PyErr_SetString(st->UnpicklingError,
5451 "the STRING opcode argument must be quoted");
5452 return -1;
5453 }
5454 assert(len >= 0);
5455
5456 /* Use the PyBytes API to decode the string, since that is what is used
5457 to encode, and then coerce the result to Unicode. */
5458 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5459 if (bytes == NULL)
5460 return -1;
5461
5462 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5463 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5464 if (strcmp(self->encoding, "bytes") == 0) {
5465 obj = bytes;
5466 }
5467 else {
5468 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5469 Py_DECREF(bytes);
5470 if (obj == NULL) {
5471 return -1;
5472 }
5473 }
5474
5475 PDATA_PUSH(self->stack, obj, -1);
5476 return 0;
5477 }
5478
5479 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5480 load_counted_binstring(UnpicklerObject *self, int nbytes)
5481 {
5482 PyObject *obj;
5483 Py_ssize_t size;
5484 char *s;
5485
5486 if (_Unpickler_Read(self, &s, nbytes) < 0)
5487 return -1;
5488
5489 size = calc_binsize(s, nbytes);
5490 if (size < 0) {
5491 PickleState *st = _Pickle_GetGlobalState();
5492 PyErr_Format(st->UnpicklingError,
5493 "BINSTRING exceeds system's maximum size of %zd bytes",
5494 PY_SSIZE_T_MAX);
5495 return -1;
5496 }
5497
5498 if (_Unpickler_Read(self, &s, size) < 0)
5499 return -1;
5500
5501 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5502 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5503 if (strcmp(self->encoding, "bytes") == 0) {
5504 obj = PyBytes_FromStringAndSize(s, size);
5505 }
5506 else {
5507 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5508 }
5509 if (obj == NULL) {
5510 return -1;
5511 }
5512
5513 PDATA_PUSH(self->stack, obj, -1);
5514 return 0;
5515 }
5516
5517 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5518 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5519 {
5520 PyObject *bytes;
5521 Py_ssize_t size;
5522 char *s;
5523
5524 if (_Unpickler_Read(self, &s, nbytes) < 0)
5525 return -1;
5526
5527 size = calc_binsize(s, nbytes);
5528 if (size < 0) {
5529 PyErr_Format(PyExc_OverflowError,
5530 "BINBYTES exceeds system's maximum size of %zd bytes",
5531 PY_SSIZE_T_MAX);
5532 return -1;
5533 }
5534
5535 bytes = PyBytes_FromStringAndSize(NULL, size);
5536 if (bytes == NULL)
5537 return -1;
5538 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5539 Py_DECREF(bytes);
5540 return -1;
5541 }
5542
5543 PDATA_PUSH(self->stack, bytes, -1);
5544 return 0;
5545 }
5546
5547 static int
load_counted_bytearray(UnpicklerObject * self)5548 load_counted_bytearray(UnpicklerObject *self)
5549 {
5550 PyObject *bytearray;
5551 Py_ssize_t size;
5552 char *s;
5553
5554 if (_Unpickler_Read(self, &s, 8) < 0) {
5555 return -1;
5556 }
5557
5558 size = calc_binsize(s, 8);
5559 if (size < 0) {
5560 PyErr_Format(PyExc_OverflowError,
5561 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5562 PY_SSIZE_T_MAX);
5563 return -1;
5564 }
5565
5566 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5567 if (bytearray == NULL) {
5568 return -1;
5569 }
5570 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5571 Py_DECREF(bytearray);
5572 return -1;
5573 }
5574
5575 PDATA_PUSH(self->stack, bytearray, -1);
5576 return 0;
5577 }
5578
5579 static int
load_next_buffer(UnpicklerObject * self)5580 load_next_buffer(UnpicklerObject *self)
5581 {
5582 if (self->buffers == NULL) {
5583 PickleState *st = _Pickle_GetGlobalState();
5584 PyErr_SetString(st->UnpicklingError,
5585 "pickle stream refers to out-of-band data "
5586 "but no *buffers* argument was given");
5587 return -1;
5588 }
5589 PyObject *buf = PyIter_Next(self->buffers);
5590 if (buf == NULL) {
5591 if (!PyErr_Occurred()) {
5592 PickleState *st = _Pickle_GetGlobalState();
5593 PyErr_SetString(st->UnpicklingError,
5594 "not enough out-of-band buffers");
5595 }
5596 return -1;
5597 }
5598
5599 PDATA_PUSH(self->stack, buf, -1);
5600 return 0;
5601 }
5602
5603 static int
load_readonly_buffer(UnpicklerObject * self)5604 load_readonly_buffer(UnpicklerObject *self)
5605 {
5606 Py_ssize_t len = Py_SIZE(self->stack);
5607 if (len <= self->stack->fence) {
5608 return Pdata_stack_underflow(self->stack);
5609 }
5610
5611 PyObject *obj = self->stack->data[len - 1];
5612 PyObject *view = PyMemoryView_FromObject(obj);
5613 if (view == NULL) {
5614 return -1;
5615 }
5616 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5617 /* Original object is writable */
5618 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5619 self->stack->data[len - 1] = view;
5620 Py_DECREF(obj);
5621 }
5622 else {
5623 /* Original object is read-only, no need to replace it */
5624 Py_DECREF(view);
5625 }
5626 return 0;
5627 }
5628
5629 static int
load_unicode(UnpicklerObject * self)5630 load_unicode(UnpicklerObject *self)
5631 {
5632 PyObject *str;
5633 Py_ssize_t len;
5634 char *s = NULL;
5635
5636 if ((len = _Unpickler_Readline(self, &s)) < 0)
5637 return -1;
5638 if (len < 1)
5639 return bad_readline();
5640
5641 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5642 if (str == NULL)
5643 return -1;
5644
5645 PDATA_PUSH(self->stack, str, -1);
5646 return 0;
5647 }
5648
5649 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5650 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5651 {
5652 PyObject *str;
5653 Py_ssize_t size;
5654 char *s;
5655
5656 if (_Unpickler_Read(self, &s, nbytes) < 0)
5657 return -1;
5658
5659 size = calc_binsize(s, nbytes);
5660 if (size < 0) {
5661 PyErr_Format(PyExc_OverflowError,
5662 "BINUNICODE exceeds system's maximum size of %zd bytes",
5663 PY_SSIZE_T_MAX);
5664 return -1;
5665 }
5666
5667 if (_Unpickler_Read(self, &s, size) < 0)
5668 return -1;
5669
5670 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5671 if (str == NULL)
5672 return -1;
5673
5674 PDATA_PUSH(self->stack, str, -1);
5675 return 0;
5676 }
5677
5678 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5679 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5680 {
5681 PyObject *tuple;
5682
5683 if (Py_SIZE(self->stack) < len)
5684 return Pdata_stack_underflow(self->stack);
5685
5686 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5687 if (tuple == NULL)
5688 return -1;
5689 PDATA_PUSH(self->stack, tuple, -1);
5690 return 0;
5691 }
5692
5693 static int
load_tuple(UnpicklerObject * self)5694 load_tuple(UnpicklerObject *self)
5695 {
5696 Py_ssize_t i;
5697
5698 if ((i = marker(self)) < 0)
5699 return -1;
5700
5701 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5702 }
5703
5704 static int
load_empty_list(UnpicklerObject * self)5705 load_empty_list(UnpicklerObject *self)
5706 {
5707 PyObject *list;
5708
5709 if ((list = PyList_New(0)) == NULL)
5710 return -1;
5711 PDATA_PUSH(self->stack, list, -1);
5712 return 0;
5713 }
5714
5715 static int
load_empty_dict(UnpicklerObject * self)5716 load_empty_dict(UnpicklerObject *self)
5717 {
5718 PyObject *dict;
5719
5720 if ((dict = PyDict_New()) == NULL)
5721 return -1;
5722 PDATA_PUSH(self->stack, dict, -1);
5723 return 0;
5724 }
5725
5726 static int
load_empty_set(UnpicklerObject * self)5727 load_empty_set(UnpicklerObject *self)
5728 {
5729 PyObject *set;
5730
5731 if ((set = PySet_New(NULL)) == NULL)
5732 return -1;
5733 PDATA_PUSH(self->stack, set, -1);
5734 return 0;
5735 }
5736
5737 static int
load_list(UnpicklerObject * self)5738 load_list(UnpicklerObject *self)
5739 {
5740 PyObject *list;
5741 Py_ssize_t i;
5742
5743 if ((i = marker(self)) < 0)
5744 return -1;
5745
5746 list = Pdata_poplist(self->stack, i);
5747 if (list == NULL)
5748 return -1;
5749 PDATA_PUSH(self->stack, list, -1);
5750 return 0;
5751 }
5752
5753 static int
load_dict(UnpicklerObject * self)5754 load_dict(UnpicklerObject *self)
5755 {
5756 PyObject *dict, *key, *value;
5757 Py_ssize_t i, j, k;
5758
5759 if ((i = marker(self)) < 0)
5760 return -1;
5761 j = Py_SIZE(self->stack);
5762
5763 if ((dict = PyDict_New()) == NULL)
5764 return -1;
5765
5766 if ((j - i) % 2 != 0) {
5767 PickleState *st = _Pickle_GetGlobalState();
5768 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5769 Py_DECREF(dict);
5770 return -1;
5771 }
5772
5773 for (k = i + 1; k < j; k += 2) {
5774 key = self->stack->data[k - 1];
5775 value = self->stack->data[k];
5776 if (PyDict_SetItem(dict, key, value) < 0) {
5777 Py_DECREF(dict);
5778 return -1;
5779 }
5780 }
5781 Pdata_clear(self->stack, i);
5782 PDATA_PUSH(self->stack, dict, -1);
5783 return 0;
5784 }
5785
5786 static int
load_frozenset(UnpicklerObject * self)5787 load_frozenset(UnpicklerObject *self)
5788 {
5789 PyObject *items;
5790 PyObject *frozenset;
5791 Py_ssize_t i;
5792
5793 if ((i = marker(self)) < 0)
5794 return -1;
5795
5796 items = Pdata_poptuple(self->stack, i);
5797 if (items == NULL)
5798 return -1;
5799
5800 frozenset = PyFrozenSet_New(items);
5801 Py_DECREF(items);
5802 if (frozenset == NULL)
5803 return -1;
5804
5805 PDATA_PUSH(self->stack, frozenset, -1);
5806 return 0;
5807 }
5808
5809 static PyObject *
instantiate(PyObject * cls,PyObject * args)5810 instantiate(PyObject *cls, PyObject *args)
5811 {
5812 /* Caller must assure args are a tuple. Normally, args come from
5813 Pdata_poptuple which packs objects from the top of the stack
5814 into a newly created tuple. */
5815 assert(PyTuple_Check(args));
5816 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5817 _Py_IDENTIFIER(__getinitargs__);
5818 _Py_IDENTIFIER(__new__);
5819 PyObject *func;
5820 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5821 return NULL;
5822 }
5823 if (func == NULL) {
5824 return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
5825 }
5826 Py_DECREF(func);
5827 }
5828 return PyObject_CallObject(cls, args);
5829 }
5830
5831 static int
load_obj(UnpicklerObject * self)5832 load_obj(UnpicklerObject *self)
5833 {
5834 PyObject *cls, *args, *obj = NULL;
5835 Py_ssize_t i;
5836
5837 if ((i = marker(self)) < 0)
5838 return -1;
5839
5840 if (Py_SIZE(self->stack) - i < 1)
5841 return Pdata_stack_underflow(self->stack);
5842
5843 args = Pdata_poptuple(self->stack, i + 1);
5844 if (args == NULL)
5845 return -1;
5846
5847 PDATA_POP(self->stack, cls);
5848 if (cls) {
5849 obj = instantiate(cls, args);
5850 Py_DECREF(cls);
5851 }
5852 Py_DECREF(args);
5853 if (obj == NULL)
5854 return -1;
5855
5856 PDATA_PUSH(self->stack, obj, -1);
5857 return 0;
5858 }
5859
5860 static int
load_inst(UnpicklerObject * self)5861 load_inst(UnpicklerObject *self)
5862 {
5863 PyObject *cls = NULL;
5864 PyObject *args = NULL;
5865 PyObject *obj = NULL;
5866 PyObject *module_name;
5867 PyObject *class_name;
5868 Py_ssize_t len;
5869 Py_ssize_t i;
5870 char *s;
5871
5872 if ((i = marker(self)) < 0)
5873 return -1;
5874 if ((len = _Unpickler_Readline(self, &s)) < 0)
5875 return -1;
5876 if (len < 2)
5877 return bad_readline();
5878
5879 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5880 identifiers are permitted in Python 3.0, since the INST opcode is only
5881 supported by older protocols on Python 2.x. */
5882 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5883 if (module_name == NULL)
5884 return -1;
5885
5886 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5887 if (len < 2) {
5888 Py_DECREF(module_name);
5889 return bad_readline();
5890 }
5891 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5892 if (class_name != NULL) {
5893 cls = find_class(self, module_name, class_name);
5894 Py_DECREF(class_name);
5895 }
5896 }
5897 Py_DECREF(module_name);
5898
5899 if (cls == NULL)
5900 return -1;
5901
5902 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5903 obj = instantiate(cls, args);
5904 Py_DECREF(args);
5905 }
5906 Py_DECREF(cls);
5907
5908 if (obj == NULL)
5909 return -1;
5910
5911 PDATA_PUSH(self->stack, obj, -1);
5912 return 0;
5913 }
5914
5915 static int
load_newobj(UnpicklerObject * self)5916 load_newobj(UnpicklerObject *self)
5917 {
5918 PyObject *args = NULL;
5919 PyObject *clsraw = NULL;
5920 PyTypeObject *cls; /* clsraw cast to its true type */
5921 PyObject *obj;
5922 PickleState *st = _Pickle_GetGlobalState();
5923
5924 /* Stack is ... cls argtuple, and we want to call
5925 * cls.__new__(cls, *argtuple).
5926 */
5927 PDATA_POP(self->stack, args);
5928 if (args == NULL)
5929 goto error;
5930 if (!PyTuple_Check(args)) {
5931 PyErr_SetString(st->UnpicklingError,
5932 "NEWOBJ expected an arg " "tuple.");
5933 goto error;
5934 }
5935
5936 PDATA_POP(self->stack, clsraw);
5937 cls = (PyTypeObject *)clsraw;
5938 if (cls == NULL)
5939 goto error;
5940 if (!PyType_Check(cls)) {
5941 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5942 "isn't a type object");
5943 goto error;
5944 }
5945 if (cls->tp_new == NULL) {
5946 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5947 "has NULL tp_new");
5948 goto error;
5949 }
5950
5951 /* Call __new__. */
5952 obj = cls->tp_new(cls, args, NULL);
5953 if (obj == NULL)
5954 goto error;
5955
5956 Py_DECREF(args);
5957 Py_DECREF(clsraw);
5958 PDATA_PUSH(self->stack, obj, -1);
5959 return 0;
5960
5961 error:
5962 Py_XDECREF(args);
5963 Py_XDECREF(clsraw);
5964 return -1;
5965 }
5966
5967 static int
load_newobj_ex(UnpicklerObject * self)5968 load_newobj_ex(UnpicklerObject *self)
5969 {
5970 PyObject *cls, *args, *kwargs;
5971 PyObject *obj;
5972 PickleState *st = _Pickle_GetGlobalState();
5973
5974 PDATA_POP(self->stack, kwargs);
5975 if (kwargs == NULL) {
5976 return -1;
5977 }
5978 PDATA_POP(self->stack, args);
5979 if (args == NULL) {
5980 Py_DECREF(kwargs);
5981 return -1;
5982 }
5983 PDATA_POP(self->stack, cls);
5984 if (cls == NULL) {
5985 Py_DECREF(kwargs);
5986 Py_DECREF(args);
5987 return -1;
5988 }
5989
5990 if (!PyType_Check(cls)) {
5991 PyErr_Format(st->UnpicklingError,
5992 "NEWOBJ_EX class argument must be a type, not %.200s",
5993 Py_TYPE(cls)->tp_name);
5994 goto error;
5995 }
5996
5997 if (((PyTypeObject *)cls)->tp_new == NULL) {
5998 PyErr_SetString(st->UnpicklingError,
5999 "NEWOBJ_EX class argument doesn't have __new__");
6000 goto error;
6001 }
6002 if (!PyTuple_Check(args)) {
6003 PyErr_Format(st->UnpicklingError,
6004 "NEWOBJ_EX args argument must be a tuple, not %.200s",
6005 Py_TYPE(args)->tp_name);
6006 goto error;
6007 }
6008 if (!PyDict_Check(kwargs)) {
6009 PyErr_Format(st->UnpicklingError,
6010 "NEWOBJ_EX kwargs argument must be a dict, not %.200s",
6011 Py_TYPE(kwargs)->tp_name);
6012 goto error;
6013 }
6014
6015 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
6016 Py_DECREF(kwargs);
6017 Py_DECREF(args);
6018 Py_DECREF(cls);
6019 if (obj == NULL) {
6020 return -1;
6021 }
6022 PDATA_PUSH(self->stack, obj, -1);
6023 return 0;
6024
6025 error:
6026 Py_DECREF(kwargs);
6027 Py_DECREF(args);
6028 Py_DECREF(cls);
6029 return -1;
6030 }
6031
6032 static int
load_global(UnpicklerObject * self)6033 load_global(UnpicklerObject *self)
6034 {
6035 PyObject *global = NULL;
6036 PyObject *module_name;
6037 PyObject *global_name;
6038 Py_ssize_t len;
6039 char *s;
6040
6041 if ((len = _Unpickler_Readline(self, &s)) < 0)
6042 return -1;
6043 if (len < 2)
6044 return bad_readline();
6045 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6046 if (!module_name)
6047 return -1;
6048
6049 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6050 if (len < 2) {
6051 Py_DECREF(module_name);
6052 return bad_readline();
6053 }
6054 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6055 if (global_name) {
6056 global = find_class(self, module_name, global_name);
6057 Py_DECREF(global_name);
6058 }
6059 }
6060 Py_DECREF(module_name);
6061
6062 if (global == NULL)
6063 return -1;
6064 PDATA_PUSH(self->stack, global, -1);
6065 return 0;
6066 }
6067
6068 static int
load_stack_global(UnpicklerObject * self)6069 load_stack_global(UnpicklerObject *self)
6070 {
6071 PyObject *global;
6072 PyObject *module_name;
6073 PyObject *global_name;
6074
6075 PDATA_POP(self->stack, global_name);
6076 PDATA_POP(self->stack, module_name);
6077 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6078 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6079 PickleState *st = _Pickle_GetGlobalState();
6080 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6081 Py_XDECREF(global_name);
6082 Py_XDECREF(module_name);
6083 return -1;
6084 }
6085 global = find_class(self, module_name, global_name);
6086 Py_DECREF(global_name);
6087 Py_DECREF(module_name);
6088 if (global == NULL)
6089 return -1;
6090 PDATA_PUSH(self->stack, global, -1);
6091 return 0;
6092 }
6093
6094 static int
load_persid(UnpicklerObject * self)6095 load_persid(UnpicklerObject *self)
6096 {
6097 PyObject *pid, *obj;
6098 Py_ssize_t len;
6099 char *s;
6100
6101 if (self->pers_func) {
6102 if ((len = _Unpickler_Readline(self, &s)) < 0)
6103 return -1;
6104 if (len < 1)
6105 return bad_readline();
6106
6107 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6108 if (pid == NULL) {
6109 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6110 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6111 "persistent IDs in protocol 0 must be "
6112 "ASCII strings");
6113 }
6114 return -1;
6115 }
6116
6117 obj = call_method(self->pers_func, self->pers_func_self, pid);
6118 Py_DECREF(pid);
6119 if (obj == NULL)
6120 return -1;
6121
6122 PDATA_PUSH(self->stack, obj, -1);
6123 return 0;
6124 }
6125 else {
6126 PickleState *st = _Pickle_GetGlobalState();
6127 PyErr_SetString(st->UnpicklingError,
6128 "A load persistent id instruction was encountered,\n"
6129 "but no persistent_load function was specified.");
6130 return -1;
6131 }
6132 }
6133
6134 static int
load_binpersid(UnpicklerObject * self)6135 load_binpersid(UnpicklerObject *self)
6136 {
6137 PyObject *pid, *obj;
6138
6139 if (self->pers_func) {
6140 PDATA_POP(self->stack, pid);
6141 if (pid == NULL)
6142 return -1;
6143
6144 obj = call_method(self->pers_func, self->pers_func_self, pid);
6145 Py_DECREF(pid);
6146 if (obj == NULL)
6147 return -1;
6148
6149 PDATA_PUSH(self->stack, obj, -1);
6150 return 0;
6151 }
6152 else {
6153 PickleState *st = _Pickle_GetGlobalState();
6154 PyErr_SetString(st->UnpicklingError,
6155 "A load persistent id instruction was encountered,\n"
6156 "but no persistent_load function was specified.");
6157 return -1;
6158 }
6159 }
6160
6161 static int
load_pop(UnpicklerObject * self)6162 load_pop(UnpicklerObject *self)
6163 {
6164 Py_ssize_t len = Py_SIZE(self->stack);
6165
6166 /* Note that we split the (pickle.py) stack into two stacks,
6167 * an object stack and a mark stack. We have to be clever and
6168 * pop the right one. We do this by looking at the top of the
6169 * mark stack first, and only signalling a stack underflow if
6170 * the object stack is empty and the mark stack doesn't match
6171 * our expectations.
6172 */
6173 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6174 self->num_marks--;
6175 self->stack->mark_set = self->num_marks != 0;
6176 self->stack->fence = self->num_marks ?
6177 self->marks[self->num_marks - 1] : 0;
6178 } else if (len <= self->stack->fence)
6179 return Pdata_stack_underflow(self->stack);
6180 else {
6181 len--;
6182 Py_DECREF(self->stack->data[len]);
6183 Py_SET_SIZE(self->stack, len);
6184 }
6185 return 0;
6186 }
6187
6188 static int
load_pop_mark(UnpicklerObject * self)6189 load_pop_mark(UnpicklerObject *self)
6190 {
6191 Py_ssize_t i;
6192
6193 if ((i = marker(self)) < 0)
6194 return -1;
6195
6196 Pdata_clear(self->stack, i);
6197
6198 return 0;
6199 }
6200
6201 static int
load_dup(UnpicklerObject * self)6202 load_dup(UnpicklerObject *self)
6203 {
6204 PyObject *last;
6205 Py_ssize_t len = Py_SIZE(self->stack);
6206
6207 if (len <= self->stack->fence)
6208 return Pdata_stack_underflow(self->stack);
6209 last = self->stack->data[len - 1];
6210 PDATA_APPEND(self->stack, last, -1);
6211 return 0;
6212 }
6213
6214 static int
load_get(UnpicklerObject * self)6215 load_get(UnpicklerObject *self)
6216 {
6217 PyObject *key, *value;
6218 Py_ssize_t idx;
6219 Py_ssize_t len;
6220 char *s;
6221
6222 if ((len = _Unpickler_Readline(self, &s)) < 0)
6223 return -1;
6224 if (len < 2)
6225 return bad_readline();
6226
6227 key = PyLong_FromString(s, NULL, 10);
6228 if (key == NULL)
6229 return -1;
6230 idx = PyLong_AsSsize_t(key);
6231 if (idx == -1 && PyErr_Occurred()) {
6232 Py_DECREF(key);
6233 return -1;
6234 }
6235
6236 value = _Unpickler_MemoGet(self, idx);
6237 if (value == NULL) {
6238 if (!PyErr_Occurred()) {
6239 PickleState *st = _Pickle_GetGlobalState();
6240 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6241 }
6242 Py_DECREF(key);
6243 return -1;
6244 }
6245 Py_DECREF(key);
6246
6247 PDATA_APPEND(self->stack, value, -1);
6248 return 0;
6249 }
6250
6251 static int
load_binget(UnpicklerObject * self)6252 load_binget(UnpicklerObject *self)
6253 {
6254 PyObject *value;
6255 Py_ssize_t idx;
6256 char *s;
6257
6258 if (_Unpickler_Read(self, &s, 1) < 0)
6259 return -1;
6260
6261 idx = Py_CHARMASK(s[0]);
6262
6263 value = _Unpickler_MemoGet(self, idx);
6264 if (value == NULL) {
6265 PyObject *key = PyLong_FromSsize_t(idx);
6266 if (key != NULL) {
6267 PickleState *st = _Pickle_GetGlobalState();
6268 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6269 Py_DECREF(key);
6270 }
6271 return -1;
6272 }
6273
6274 PDATA_APPEND(self->stack, value, -1);
6275 return 0;
6276 }
6277
6278 static int
load_long_binget(UnpicklerObject * self)6279 load_long_binget(UnpicklerObject *self)
6280 {
6281 PyObject *value;
6282 Py_ssize_t idx;
6283 char *s;
6284
6285 if (_Unpickler_Read(self, &s, 4) < 0)
6286 return -1;
6287
6288 idx = calc_binsize(s, 4);
6289
6290 value = _Unpickler_MemoGet(self, idx);
6291 if (value == NULL) {
6292 PyObject *key = PyLong_FromSsize_t(idx);
6293 if (key != NULL) {
6294 PickleState *st = _Pickle_GetGlobalState();
6295 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6296 Py_DECREF(key);
6297 }
6298 return -1;
6299 }
6300
6301 PDATA_APPEND(self->stack, value, -1);
6302 return 0;
6303 }
6304
6305 /* Push an object from the extension registry (EXT[124]). nbytes is
6306 * the number of bytes following the opcode, holding the index (code) value.
6307 */
6308 static int
load_extension(UnpicklerObject * self,int nbytes)6309 load_extension(UnpicklerObject *self, int nbytes)
6310 {
6311 char *codebytes; /* the nbytes bytes after the opcode */
6312 long code; /* calc_binint returns long */
6313 PyObject *py_code; /* code as a Python int */
6314 PyObject *obj; /* the object to push */
6315 PyObject *pair; /* (module_name, class_name) */
6316 PyObject *module_name, *class_name;
6317 PickleState *st = _Pickle_GetGlobalState();
6318
6319 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6320 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6321 return -1;
6322 code = calc_binint(codebytes, nbytes);
6323 if (code <= 0) { /* note that 0 is forbidden */
6324 /* Corrupt or hostile pickle. */
6325 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6326 return -1;
6327 }
6328
6329 /* Look for the code in the cache. */
6330 py_code = PyLong_FromLong(code);
6331 if (py_code == NULL)
6332 return -1;
6333 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6334 if (obj != NULL) {
6335 /* Bingo. */
6336 Py_DECREF(py_code);
6337 PDATA_APPEND(self->stack, obj, -1);
6338 return 0;
6339 }
6340 if (PyErr_Occurred()) {
6341 Py_DECREF(py_code);
6342 return -1;
6343 }
6344
6345 /* Look up the (module_name, class_name) pair. */
6346 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6347 if (pair == NULL) {
6348 Py_DECREF(py_code);
6349 if (!PyErr_Occurred()) {
6350 PyErr_Format(PyExc_ValueError, "unregistered extension "
6351 "code %ld", code);
6352 }
6353 return -1;
6354 }
6355 /* Since the extension registry is manipulable via Python code,
6356 * confirm that pair is really a 2-tuple of strings.
6357 */
6358 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6359 goto error;
6360 }
6361
6362 module_name = PyTuple_GET_ITEM(pair, 0);
6363 if (!PyUnicode_Check(module_name)) {
6364 goto error;
6365 }
6366
6367 class_name = PyTuple_GET_ITEM(pair, 1);
6368 if (!PyUnicode_Check(class_name)) {
6369 goto error;
6370 }
6371
6372 /* Load the object. */
6373 obj = find_class(self, module_name, class_name);
6374 if (obj == NULL) {
6375 Py_DECREF(py_code);
6376 return -1;
6377 }
6378 /* Cache code -> obj. */
6379 code = PyDict_SetItem(st->extension_cache, py_code, obj);
6380 Py_DECREF(py_code);
6381 if (code < 0) {
6382 Py_DECREF(obj);
6383 return -1;
6384 }
6385 PDATA_PUSH(self->stack, obj, -1);
6386 return 0;
6387
6388 error:
6389 Py_DECREF(py_code);
6390 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6391 "isn't a 2-tuple of strings", code);
6392 return -1;
6393 }
6394
6395 static int
load_put(UnpicklerObject * self)6396 load_put(UnpicklerObject *self)
6397 {
6398 PyObject *key, *value;
6399 Py_ssize_t idx;
6400 Py_ssize_t len;
6401 char *s = NULL;
6402
6403 if ((len = _Unpickler_Readline(self, &s)) < 0)
6404 return -1;
6405 if (len < 2)
6406 return bad_readline();
6407 if (Py_SIZE(self->stack) <= self->stack->fence)
6408 return Pdata_stack_underflow(self->stack);
6409 value = self->stack->data[Py_SIZE(self->stack) - 1];
6410
6411 key = PyLong_FromString(s, NULL, 10);
6412 if (key == NULL)
6413 return -1;
6414 idx = PyLong_AsSsize_t(key);
6415 Py_DECREF(key);
6416 if (idx < 0) {
6417 if (!PyErr_Occurred())
6418 PyErr_SetString(PyExc_ValueError,
6419 "negative PUT argument");
6420 return -1;
6421 }
6422
6423 return _Unpickler_MemoPut(self, idx, value);
6424 }
6425
6426 static int
load_binput(UnpicklerObject * self)6427 load_binput(UnpicklerObject *self)
6428 {
6429 PyObject *value;
6430 Py_ssize_t idx;
6431 char *s;
6432
6433 if (_Unpickler_Read(self, &s, 1) < 0)
6434 return -1;
6435
6436 if (Py_SIZE(self->stack) <= self->stack->fence)
6437 return Pdata_stack_underflow(self->stack);
6438 value = self->stack->data[Py_SIZE(self->stack) - 1];
6439
6440 idx = Py_CHARMASK(s[0]);
6441
6442 return _Unpickler_MemoPut(self, idx, value);
6443 }
6444
6445 static int
load_long_binput(UnpicklerObject * self)6446 load_long_binput(UnpicklerObject *self)
6447 {
6448 PyObject *value;
6449 Py_ssize_t idx;
6450 char *s;
6451
6452 if (_Unpickler_Read(self, &s, 4) < 0)
6453 return -1;
6454
6455 if (Py_SIZE(self->stack) <= self->stack->fence)
6456 return Pdata_stack_underflow(self->stack);
6457 value = self->stack->data[Py_SIZE(self->stack) - 1];
6458
6459 idx = calc_binsize(s, 4);
6460 if (idx < 0) {
6461 PyErr_SetString(PyExc_ValueError,
6462 "negative LONG_BINPUT argument");
6463 return -1;
6464 }
6465
6466 return _Unpickler_MemoPut(self, idx, value);
6467 }
6468
6469 static int
load_memoize(UnpicklerObject * self)6470 load_memoize(UnpicklerObject *self)
6471 {
6472 PyObject *value;
6473
6474 if (Py_SIZE(self->stack) <= self->stack->fence)
6475 return Pdata_stack_underflow(self->stack);
6476 value = self->stack->data[Py_SIZE(self->stack) - 1];
6477
6478 return _Unpickler_MemoPut(self, self->memo_len, value);
6479 }
6480
6481 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6482 do_append(UnpicklerObject *self, Py_ssize_t x)
6483 {
6484 PyObject *value;
6485 PyObject *slice;
6486 PyObject *list;
6487 PyObject *result;
6488 Py_ssize_t len, i;
6489
6490 len = Py_SIZE(self->stack);
6491 if (x > len || x <= self->stack->fence)
6492 return Pdata_stack_underflow(self->stack);
6493 if (len == x) /* nothing to do */
6494 return 0;
6495
6496 list = self->stack->data[x - 1];
6497
6498 if (PyList_CheckExact(list)) {
6499 Py_ssize_t list_len;
6500 int ret;
6501
6502 slice = Pdata_poplist(self->stack, x);
6503 if (!slice)
6504 return -1;
6505 list_len = PyList_GET_SIZE(list);
6506 ret = PyList_SetSlice(list, list_len, list_len, slice);
6507 Py_DECREF(slice);
6508 return ret;
6509 }
6510 else {
6511 PyObject *extend_func;
6512 _Py_IDENTIFIER(extend);
6513
6514 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6515 return -1;
6516 }
6517 if (extend_func != NULL) {
6518 slice = Pdata_poplist(self->stack, x);
6519 if (!slice) {
6520 Py_DECREF(extend_func);
6521 return -1;
6522 }
6523 result = _Pickle_FastCall(extend_func, slice);
6524 Py_DECREF(extend_func);
6525 if (result == NULL)
6526 return -1;
6527 Py_DECREF(result);
6528 }
6529 else {
6530 PyObject *append_func;
6531 _Py_IDENTIFIER(append);
6532
6533 /* Even if the PEP 307 requires extend() and append() methods,
6534 fall back on append() if the object has no extend() method
6535 for backward compatibility. */
6536 append_func = _PyObject_GetAttrId(list, &PyId_append);
6537 if (append_func == NULL)
6538 return -1;
6539 for (i = x; i < len; i++) {
6540 value = self->stack->data[i];
6541 result = _Pickle_FastCall(append_func, value);
6542 if (result == NULL) {
6543 Pdata_clear(self->stack, i + 1);
6544 Py_SET_SIZE(self->stack, x);
6545 Py_DECREF(append_func);
6546 return -1;
6547 }
6548 Py_DECREF(result);
6549 }
6550 Py_SET_SIZE(self->stack, x);
6551 Py_DECREF(append_func);
6552 }
6553 }
6554
6555 return 0;
6556 }
6557
6558 static int
load_append(UnpicklerObject * self)6559 load_append(UnpicklerObject *self)
6560 {
6561 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6562 return Pdata_stack_underflow(self->stack);
6563 return do_append(self, Py_SIZE(self->stack) - 1);
6564 }
6565
6566 static int
load_appends(UnpicklerObject * self)6567 load_appends(UnpicklerObject *self)
6568 {
6569 Py_ssize_t i = marker(self);
6570 if (i < 0)
6571 return -1;
6572 return do_append(self, i);
6573 }
6574
6575 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6576 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6577 {
6578 PyObject *value, *key;
6579 PyObject *dict;
6580 Py_ssize_t len, i;
6581 int status = 0;
6582
6583 len = Py_SIZE(self->stack);
6584 if (x > len || x <= self->stack->fence)
6585 return Pdata_stack_underflow(self->stack);
6586 if (len == x) /* nothing to do */
6587 return 0;
6588 if ((len - x) % 2 != 0) {
6589 PickleState *st = _Pickle_GetGlobalState();
6590 /* Currupt or hostile pickle -- we never write one like this. */
6591 PyErr_SetString(st->UnpicklingError,
6592 "odd number of items for SETITEMS");
6593 return -1;
6594 }
6595
6596 /* Here, dict does not actually need to be a PyDict; it could be anything
6597 that supports the __setitem__ attribute. */
6598 dict = self->stack->data[x - 1];
6599
6600 for (i = x + 1; i < len; i += 2) {
6601 key = self->stack->data[i - 1];
6602 value = self->stack->data[i];
6603 if (PyObject_SetItem(dict, key, value) < 0) {
6604 status = -1;
6605 break;
6606 }
6607 }
6608
6609 Pdata_clear(self->stack, x);
6610 return status;
6611 }
6612
6613 static int
load_setitem(UnpicklerObject * self)6614 load_setitem(UnpicklerObject *self)
6615 {
6616 return do_setitems(self, Py_SIZE(self->stack) - 2);
6617 }
6618
6619 static int
load_setitems(UnpicklerObject * self)6620 load_setitems(UnpicklerObject *self)
6621 {
6622 Py_ssize_t i = marker(self);
6623 if (i < 0)
6624 return -1;
6625 return do_setitems(self, i);
6626 }
6627
6628 static int
load_additems(UnpicklerObject * self)6629 load_additems(UnpicklerObject *self)
6630 {
6631 PyObject *set;
6632 Py_ssize_t mark, len, i;
6633
6634 mark = marker(self);
6635 if (mark < 0)
6636 return -1;
6637 len = Py_SIZE(self->stack);
6638 if (mark > len || mark <= self->stack->fence)
6639 return Pdata_stack_underflow(self->stack);
6640 if (len == mark) /* nothing to do */
6641 return 0;
6642
6643 set = self->stack->data[mark - 1];
6644
6645 if (PySet_Check(set)) {
6646 PyObject *items;
6647 int status;
6648
6649 items = Pdata_poptuple(self->stack, mark);
6650 if (items == NULL)
6651 return -1;
6652
6653 status = _PySet_Update(set, items);
6654 Py_DECREF(items);
6655 return status;
6656 }
6657 else {
6658 PyObject *add_func;
6659 _Py_IDENTIFIER(add);
6660
6661 add_func = _PyObject_GetAttrId(set, &PyId_add);
6662 if (add_func == NULL)
6663 return -1;
6664 for (i = mark; i < len; i++) {
6665 PyObject *result;
6666 PyObject *item;
6667
6668 item = self->stack->data[i];
6669 result = _Pickle_FastCall(add_func, item);
6670 if (result == NULL) {
6671 Pdata_clear(self->stack, i + 1);
6672 Py_SET_SIZE(self->stack, mark);
6673 return -1;
6674 }
6675 Py_DECREF(result);
6676 }
6677 Py_SET_SIZE(self->stack, mark);
6678 }
6679
6680 return 0;
6681 }
6682
6683 static int
load_build(UnpicklerObject * self)6684 load_build(UnpicklerObject *self)
6685 {
6686 PyObject *state, *inst, *slotstate;
6687 PyObject *setstate;
6688 int status = 0;
6689 _Py_IDENTIFIER(__setstate__);
6690
6691 /* Stack is ... instance, state. We want to leave instance at
6692 * the stack top, possibly mutated via instance.__setstate__(state).
6693 */
6694 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6695 return Pdata_stack_underflow(self->stack);
6696
6697 PDATA_POP(self->stack, state);
6698 if (state == NULL)
6699 return -1;
6700
6701 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6702
6703 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6704 Py_DECREF(state);
6705 return -1;
6706 }
6707 if (setstate != NULL) {
6708 PyObject *result;
6709
6710 /* The explicit __setstate__ is responsible for everything. */
6711 result = _Pickle_FastCall(setstate, state);
6712 Py_DECREF(setstate);
6713 if (result == NULL)
6714 return -1;
6715 Py_DECREF(result);
6716 return 0;
6717 }
6718
6719 /* A default __setstate__. First see whether state embeds a
6720 * slot state dict too (a proto 2 addition).
6721 */
6722 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6723 PyObject *tmp = state;
6724
6725 state = PyTuple_GET_ITEM(tmp, 0);
6726 slotstate = PyTuple_GET_ITEM(tmp, 1);
6727 Py_INCREF(state);
6728 Py_INCREF(slotstate);
6729 Py_DECREF(tmp);
6730 }
6731 else
6732 slotstate = NULL;
6733
6734 /* Set inst.__dict__ from the state dict (if any). */
6735 if (state != Py_None) {
6736 PyObject *dict;
6737 PyObject *d_key, *d_value;
6738 Py_ssize_t i;
6739 _Py_IDENTIFIER(__dict__);
6740
6741 if (!PyDict_Check(state)) {
6742 PickleState *st = _Pickle_GetGlobalState();
6743 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6744 goto error;
6745 }
6746 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6747 if (dict == NULL)
6748 goto error;
6749
6750 i = 0;
6751 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6752 /* normally the keys for instance attributes are
6753 interned. we should try to do that here. */
6754 Py_INCREF(d_key);
6755 if (PyUnicode_CheckExact(d_key))
6756 PyUnicode_InternInPlace(&d_key);
6757 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6758 Py_DECREF(d_key);
6759 goto error;
6760 }
6761 Py_DECREF(d_key);
6762 }
6763 Py_DECREF(dict);
6764 }
6765
6766 /* Also set instance attributes from the slotstate dict (if any). */
6767 if (slotstate != NULL) {
6768 PyObject *d_key, *d_value;
6769 Py_ssize_t i;
6770
6771 if (!PyDict_Check(slotstate)) {
6772 PickleState *st = _Pickle_GetGlobalState();
6773 PyErr_SetString(st->UnpicklingError,
6774 "slot state is not a dictionary");
6775 goto error;
6776 }
6777 i = 0;
6778 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6779 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6780 goto error;
6781 }
6782 }
6783
6784 if (0) {
6785 error:
6786 status = -1;
6787 }
6788
6789 Py_DECREF(state);
6790 Py_XDECREF(slotstate);
6791 return status;
6792 }
6793
6794 static int
load_mark(UnpicklerObject * self)6795 load_mark(UnpicklerObject *self)
6796 {
6797
6798 /* Note that we split the (pickle.py) stack into two stacks, an
6799 * object stack and a mark stack. Here we push a mark onto the
6800 * mark stack.
6801 */
6802
6803 if (self->num_marks >= self->marks_size) {
6804 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6805 Py_ssize_t *marks_new = self->marks;
6806 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6807 if (marks_new == NULL) {
6808 PyErr_NoMemory();
6809 return -1;
6810 }
6811 self->marks = marks_new;
6812 self->marks_size = (Py_ssize_t)alloc;
6813 }
6814
6815 self->stack->mark_set = 1;
6816 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6817
6818 return 0;
6819 }
6820
6821 static int
load_reduce(UnpicklerObject * self)6822 load_reduce(UnpicklerObject *self)
6823 {
6824 PyObject *callable = NULL;
6825 PyObject *argtup = NULL;
6826 PyObject *obj = NULL;
6827
6828 PDATA_POP(self->stack, argtup);
6829 if (argtup == NULL)
6830 return -1;
6831 PDATA_POP(self->stack, callable);
6832 if (callable) {
6833 obj = PyObject_CallObject(callable, argtup);
6834 Py_DECREF(callable);
6835 }
6836 Py_DECREF(argtup);
6837
6838 if (obj == NULL)
6839 return -1;
6840
6841 PDATA_PUSH(self->stack, obj, -1);
6842 return 0;
6843 }
6844
6845 /* Just raises an error if we don't know the protocol specified. PROTO
6846 * is the first opcode for protocols >= 2.
6847 */
6848 static int
load_proto(UnpicklerObject * self)6849 load_proto(UnpicklerObject *self)
6850 {
6851 char *s;
6852 int i;
6853
6854 if (_Unpickler_Read(self, &s, 1) < 0)
6855 return -1;
6856
6857 i = (unsigned char)s[0];
6858 if (i <= HIGHEST_PROTOCOL) {
6859 self->proto = i;
6860 return 0;
6861 }
6862
6863 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6864 return -1;
6865 }
6866
6867 static int
load_frame(UnpicklerObject * self)6868 load_frame(UnpicklerObject *self)
6869 {
6870 char *s;
6871 Py_ssize_t frame_len;
6872
6873 if (_Unpickler_Read(self, &s, 8) < 0)
6874 return -1;
6875
6876 frame_len = calc_binsize(s, 8);
6877 if (frame_len < 0) {
6878 PyErr_Format(PyExc_OverflowError,
6879 "FRAME length exceeds system's maximum of %zd bytes",
6880 PY_SSIZE_T_MAX);
6881 return -1;
6882 }
6883
6884 if (_Unpickler_Read(self, &s, frame_len) < 0)
6885 return -1;
6886
6887 /* Rewind to start of frame */
6888 self->next_read_idx -= frame_len;
6889 return 0;
6890 }
6891
6892 static PyObject *
load(UnpicklerObject * self)6893 load(UnpicklerObject *self)
6894 {
6895 PyObject *value = NULL;
6896 char *s = NULL;
6897
6898 self->num_marks = 0;
6899 self->stack->mark_set = 0;
6900 self->stack->fence = 0;
6901 self->proto = 0;
6902 if (Py_SIZE(self->stack))
6903 Pdata_clear(self->stack, 0);
6904
6905 /* Convenient macros for the dispatch while-switch loop just below. */
6906 #define OP(opcode, load_func) \
6907 case opcode: if (load_func(self) < 0) break; continue;
6908
6909 #define OP_ARG(opcode, load_func, arg) \
6910 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6911
6912 while (1) {
6913 if (_Unpickler_Read(self, &s, 1) < 0) {
6914 PickleState *st = _Pickle_GetGlobalState();
6915 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6916 PyErr_Format(PyExc_EOFError, "Ran out of input");
6917 }
6918 return NULL;
6919 }
6920
6921 switch ((enum opcode)s[0]) {
6922 OP(NONE, load_none)
6923 OP(BININT, load_binint)
6924 OP(BININT1, load_binint1)
6925 OP(BININT2, load_binint2)
6926 OP(INT, load_int)
6927 OP(LONG, load_long)
6928 OP_ARG(LONG1, load_counted_long, 1)
6929 OP_ARG(LONG4, load_counted_long, 4)
6930 OP(FLOAT, load_float)
6931 OP(BINFLOAT, load_binfloat)
6932 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6933 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6934 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6935 OP(BYTEARRAY8, load_counted_bytearray)
6936 OP(NEXT_BUFFER, load_next_buffer)
6937 OP(READONLY_BUFFER, load_readonly_buffer)
6938 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6939 OP_ARG(BINSTRING, load_counted_binstring, 4)
6940 OP(STRING, load_string)
6941 OP(UNICODE, load_unicode)
6942 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6943 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6944 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6945 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6946 OP_ARG(TUPLE1, load_counted_tuple, 1)
6947 OP_ARG(TUPLE2, load_counted_tuple, 2)
6948 OP_ARG(TUPLE3, load_counted_tuple, 3)
6949 OP(TUPLE, load_tuple)
6950 OP(EMPTY_LIST, load_empty_list)
6951 OP(LIST, load_list)
6952 OP(EMPTY_DICT, load_empty_dict)
6953 OP(DICT, load_dict)
6954 OP(EMPTY_SET, load_empty_set)
6955 OP(ADDITEMS, load_additems)
6956 OP(FROZENSET, load_frozenset)
6957 OP(OBJ, load_obj)
6958 OP(INST, load_inst)
6959 OP(NEWOBJ, load_newobj)
6960 OP(NEWOBJ_EX, load_newobj_ex)
6961 OP(GLOBAL, load_global)
6962 OP(STACK_GLOBAL, load_stack_global)
6963 OP(APPEND, load_append)
6964 OP(APPENDS, load_appends)
6965 OP(BUILD, load_build)
6966 OP(DUP, load_dup)
6967 OP(BINGET, load_binget)
6968 OP(LONG_BINGET, load_long_binget)
6969 OP(GET, load_get)
6970 OP(MARK, load_mark)
6971 OP(BINPUT, load_binput)
6972 OP(LONG_BINPUT, load_long_binput)
6973 OP(PUT, load_put)
6974 OP(MEMOIZE, load_memoize)
6975 OP(POP, load_pop)
6976 OP(POP_MARK, load_pop_mark)
6977 OP(SETITEM, load_setitem)
6978 OP(SETITEMS, load_setitems)
6979 OP(PERSID, load_persid)
6980 OP(BINPERSID, load_binpersid)
6981 OP(REDUCE, load_reduce)
6982 OP(PROTO, load_proto)
6983 OP(FRAME, load_frame)
6984 OP_ARG(EXT1, load_extension, 1)
6985 OP_ARG(EXT2, load_extension, 2)
6986 OP_ARG(EXT4, load_extension, 4)
6987 OP_ARG(NEWTRUE, load_bool, Py_True)
6988 OP_ARG(NEWFALSE, load_bool, Py_False)
6989
6990 case STOP:
6991 break;
6992
6993 default:
6994 {
6995 PickleState *st = _Pickle_GetGlobalState();
6996 unsigned char c = (unsigned char) *s;
6997 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6998 PyErr_Format(st->UnpicklingError,
6999 "invalid load key, '%c'.", c);
7000 }
7001 else {
7002 PyErr_Format(st->UnpicklingError,
7003 "invalid load key, '\\x%02x'.", c);
7004 }
7005 return NULL;
7006 }
7007 }
7008
7009 break; /* and we are done! */
7010 }
7011
7012 if (PyErr_Occurred()) {
7013 return NULL;
7014 }
7015
7016 if (_Unpickler_SkipConsumed(self) < 0)
7017 return NULL;
7018
7019 PDATA_POP(self->stack, value);
7020 return value;
7021 }
7022
7023 /*[clinic input]
7024
7025 _pickle.Unpickler.load
7026
7027 Load a pickle.
7028
7029 Read a pickled object representation from the open file object given
7030 in the constructor, and return the reconstituted object hierarchy
7031 specified therein.
7032 [clinic start generated code]*/
7033
7034 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)7035 _pickle_Unpickler_load_impl(UnpicklerObject *self)
7036 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
7037 {
7038 UnpicklerObject *unpickler = (UnpicklerObject*)self;
7039
7040 /* Check whether the Unpickler was initialized correctly. This prevents
7041 segfaulting if a subclass overridden __init__ with a function that does
7042 not call Unpickler.__init__(). Here, we simply ensure that self->read
7043 is not NULL. */
7044 if (unpickler->read == NULL) {
7045 PickleState *st = _Pickle_GetGlobalState();
7046 PyErr_Format(st->UnpicklingError,
7047 "Unpickler.__init__() was not called by %s.__init__()",
7048 Py_TYPE(unpickler)->tp_name);
7049 return NULL;
7050 }
7051
7052 return load(unpickler);
7053 }
7054
7055 /* The name of find_class() is misleading. In newer pickle protocols, this
7056 function is used for loading any global (i.e., functions), not just
7057 classes. The name is kept only for backward compatibility. */
7058
7059 /*[clinic input]
7060
7061 _pickle.Unpickler.find_class
7062
7063 module_name: object
7064 global_name: object
7065 /
7066
7067 Return an object from a specified module.
7068
7069 If necessary, the module will be imported. Subclasses may override
7070 this method (e.g. to restrict unpickling of arbitrary classes and
7071 functions).
7072
7073 This method is called whenever a class or a function object is
7074 needed. Both arguments passed are str objects.
7075 [clinic start generated code]*/
7076
7077 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7078 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7079 PyObject *module_name,
7080 PyObject *global_name)
7081 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7082 {
7083 PyObject *global;
7084 PyObject *module;
7085
7086 if (PySys_Audit("pickle.find_class", "OO",
7087 module_name, global_name) < 0) {
7088 return NULL;
7089 }
7090
7091 /* Try to map the old names used in Python 2.x to the new ones used in
7092 Python 3.x. We do this only with old pickle protocols and when the
7093 user has not disabled the feature. */
7094 if (self->proto < 3 && self->fix_imports) {
7095 PyObject *key;
7096 PyObject *item;
7097 PickleState *st = _Pickle_GetGlobalState();
7098
7099 /* Check if the global (i.e., a function or a class) was renamed
7100 or moved to another module. */
7101 key = PyTuple_Pack(2, module_name, global_name);
7102 if (key == NULL)
7103 return NULL;
7104 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7105 Py_DECREF(key);
7106 if (item) {
7107 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7108 PyErr_Format(PyExc_RuntimeError,
7109 "_compat_pickle.NAME_MAPPING values should be "
7110 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7111 return NULL;
7112 }
7113 module_name = PyTuple_GET_ITEM(item, 0);
7114 global_name = PyTuple_GET_ITEM(item, 1);
7115 if (!PyUnicode_Check(module_name) ||
7116 !PyUnicode_Check(global_name)) {
7117 PyErr_Format(PyExc_RuntimeError,
7118 "_compat_pickle.NAME_MAPPING values should be "
7119 "pairs of str, not (%.200s, %.200s)",
7120 Py_TYPE(module_name)->tp_name,
7121 Py_TYPE(global_name)->tp_name);
7122 return NULL;
7123 }
7124 }
7125 else if (PyErr_Occurred()) {
7126 return NULL;
7127 }
7128 else {
7129 /* Check if the module was renamed. */
7130 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7131 if (item) {
7132 if (!PyUnicode_Check(item)) {
7133 PyErr_Format(PyExc_RuntimeError,
7134 "_compat_pickle.IMPORT_MAPPING values should be "
7135 "strings, not %.200s", Py_TYPE(item)->tp_name);
7136 return NULL;
7137 }
7138 module_name = item;
7139 }
7140 else if (PyErr_Occurred()) {
7141 return NULL;
7142 }
7143 }
7144 }
7145
7146 /*
7147 * we don't use PyImport_GetModule here, because it can return partially-
7148 * initialised modules, which then cause the getattribute to fail.
7149 */
7150 module = PyImport_Import(module_name);
7151 if (module == NULL) {
7152 return NULL;
7153 }
7154 global = getattribute(module, global_name, self->proto >= 4);
7155 Py_DECREF(module);
7156 return global;
7157 }
7158
7159 /*[clinic input]
7160
7161 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7162
7163 Returns size in memory, in bytes.
7164 [clinic start generated code]*/
7165
7166 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7167 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7168 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7169 {
7170 Py_ssize_t res;
7171
7172 res = _PyObject_SIZE(Py_TYPE(self));
7173 if (self->memo != NULL)
7174 res += self->memo_size * sizeof(PyObject *);
7175 if (self->marks != NULL)
7176 res += self->marks_size * sizeof(Py_ssize_t);
7177 if (self->input_line != NULL)
7178 res += strlen(self->input_line) + 1;
7179 if (self->encoding != NULL)
7180 res += strlen(self->encoding) + 1;
7181 if (self->errors != NULL)
7182 res += strlen(self->errors) + 1;
7183 return res;
7184 }
7185
7186 static struct PyMethodDef Unpickler_methods[] = {
7187 _PICKLE_UNPICKLER_LOAD_METHODDEF
7188 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7189 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7190 {NULL, NULL} /* sentinel */
7191 };
7192
7193 static void
Unpickler_dealloc(UnpicklerObject * self)7194 Unpickler_dealloc(UnpicklerObject *self)
7195 {
7196 PyObject_GC_UnTrack((PyObject *)self);
7197 Py_XDECREF(self->readline);
7198 Py_XDECREF(self->readinto);
7199 Py_XDECREF(self->read);
7200 Py_XDECREF(self->peek);
7201 Py_XDECREF(self->stack);
7202 Py_XDECREF(self->pers_func);
7203 Py_XDECREF(self->buffers);
7204 if (self->buffer.buf != NULL) {
7205 PyBuffer_Release(&self->buffer);
7206 self->buffer.buf = NULL;
7207 }
7208
7209 _Unpickler_MemoCleanup(self);
7210 PyMem_Free(self->marks);
7211 PyMem_Free(self->input_line);
7212 PyMem_Free(self->encoding);
7213 PyMem_Free(self->errors);
7214
7215 Py_TYPE(self)->tp_free((PyObject *)self);
7216 }
7217
7218 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7219 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7220 {
7221 Py_VISIT(self->readline);
7222 Py_VISIT(self->readinto);
7223 Py_VISIT(self->read);
7224 Py_VISIT(self->peek);
7225 Py_VISIT(self->stack);
7226 Py_VISIT(self->pers_func);
7227 Py_VISIT(self->buffers);
7228 return 0;
7229 }
7230
7231 static int
Unpickler_clear(UnpicklerObject * self)7232 Unpickler_clear(UnpicklerObject *self)
7233 {
7234 Py_CLEAR(self->readline);
7235 Py_CLEAR(self->readinto);
7236 Py_CLEAR(self->read);
7237 Py_CLEAR(self->peek);
7238 Py_CLEAR(self->stack);
7239 Py_CLEAR(self->pers_func);
7240 Py_CLEAR(self->buffers);
7241 if (self->buffer.buf != NULL) {
7242 PyBuffer_Release(&self->buffer);
7243 self->buffer.buf = NULL;
7244 }
7245
7246 _Unpickler_MemoCleanup(self);
7247 PyMem_Free(self->marks);
7248 self->marks = NULL;
7249 PyMem_Free(self->input_line);
7250 self->input_line = NULL;
7251 PyMem_Free(self->encoding);
7252 self->encoding = NULL;
7253 PyMem_Free(self->errors);
7254 self->errors = NULL;
7255
7256 return 0;
7257 }
7258
7259 /*[clinic input]
7260
7261 _pickle.Unpickler.__init__
7262
7263 file: object
7264 *
7265 fix_imports: bool = True
7266 encoding: str = 'ASCII'
7267 errors: str = 'strict'
7268 buffers: object(c_default="NULL") = ()
7269
7270 This takes a binary file for reading a pickle data stream.
7271
7272 The protocol version of the pickle is detected automatically, so no
7273 protocol argument is needed. Bytes past the pickled object's
7274 representation are ignored.
7275
7276 The argument *file* must have two methods, a read() method that takes
7277 an integer argument, and a readline() method that requires no
7278 arguments. Both methods should return bytes. Thus *file* can be a
7279 binary file object opened for reading, an io.BytesIO object, or any
7280 other custom object that meets this interface.
7281
7282 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7283 which are used to control compatibility support for pickle stream
7284 generated by Python 2. If *fix_imports* is True, pickle will try to
7285 map the old Python 2 names to the new names used in Python 3. The
7286 *encoding* and *errors* tell pickle how to decode 8-bit string
7287 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7288 respectively. The *encoding* can be 'bytes' to read these 8-bit
7289 string instances as bytes objects.
7290 [clinic start generated code]*/
7291
7292 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7293 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7294 int fix_imports, const char *encoding,
7295 const char *errors, PyObject *buffers)
7296 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7297 {
7298 _Py_IDENTIFIER(persistent_load);
7299
7300 /* In case of multiple __init__() calls, clear previous content. */
7301 if (self->read != NULL)
7302 (void)Unpickler_clear(self);
7303
7304 if (_Unpickler_SetInputStream(self, file) < 0)
7305 return -1;
7306
7307 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7308 return -1;
7309
7310 if (_Unpickler_SetBuffers(self, buffers) < 0)
7311 return -1;
7312
7313 self->fix_imports = fix_imports;
7314
7315 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7316 &self->pers_func, &self->pers_func_self) < 0)
7317 {
7318 return -1;
7319 }
7320
7321 self->stack = (Pdata *)Pdata_New();
7322 if (self->stack == NULL)
7323 return -1;
7324
7325 self->memo_size = 32;
7326 self->memo = _Unpickler_NewMemo(self->memo_size);
7327 if (self->memo == NULL)
7328 return -1;
7329
7330 self->proto = 0;
7331
7332 return 0;
7333 }
7334
7335
7336 /* Define a proxy object for the Unpickler's internal memo object. This is to
7337 * avoid breaking code like:
7338 * unpickler.memo.clear()
7339 * and
7340 * unpickler.memo = saved_memo
7341 * Is this a good idea? Not really, but we don't want to break code that uses
7342 * it. Note that we don't implement the entire mapping API here. This is
7343 * intentional, as these should be treated as black-box implementation details.
7344 *
7345 * We do, however, have to implement pickling/unpickling support because of
7346 * real-world code like cvs2svn.
7347 */
7348
7349 /*[clinic input]
7350 _pickle.UnpicklerMemoProxy.clear
7351
7352 Remove all items from memo.
7353 [clinic start generated code]*/
7354
7355 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7356 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7357 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7358 {
7359 _Unpickler_MemoCleanup(self->unpickler);
7360 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7361 if (self->unpickler->memo == NULL)
7362 return NULL;
7363 Py_RETURN_NONE;
7364 }
7365
7366 /*[clinic input]
7367 _pickle.UnpicklerMemoProxy.copy
7368
7369 Copy the memo to a new object.
7370 [clinic start generated code]*/
7371
7372 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7373 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7374 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7375 {
7376 size_t i;
7377 PyObject *new_memo = PyDict_New();
7378 if (new_memo == NULL)
7379 return NULL;
7380
7381 for (i = 0; i < self->unpickler->memo_size; i++) {
7382 int status;
7383 PyObject *key, *value;
7384
7385 value = self->unpickler->memo[i];
7386 if (value == NULL)
7387 continue;
7388
7389 key = PyLong_FromSsize_t(i);
7390 if (key == NULL)
7391 goto error;
7392 status = PyDict_SetItem(new_memo, key, value);
7393 Py_DECREF(key);
7394 if (status < 0)
7395 goto error;
7396 }
7397 return new_memo;
7398
7399 error:
7400 Py_DECREF(new_memo);
7401 return NULL;
7402 }
7403
7404 /*[clinic input]
7405 _pickle.UnpicklerMemoProxy.__reduce__
7406
7407 Implement pickling support.
7408 [clinic start generated code]*/
7409
7410 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7411 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7412 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7413 {
7414 PyObject *reduce_value;
7415 PyObject *constructor_args;
7416 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7417 if (contents == NULL)
7418 return NULL;
7419
7420 reduce_value = PyTuple_New(2);
7421 if (reduce_value == NULL) {
7422 Py_DECREF(contents);
7423 return NULL;
7424 }
7425 constructor_args = PyTuple_New(1);
7426 if (constructor_args == NULL) {
7427 Py_DECREF(contents);
7428 Py_DECREF(reduce_value);
7429 return NULL;
7430 }
7431 PyTuple_SET_ITEM(constructor_args, 0, contents);
7432 Py_INCREF((PyObject *)&PyDict_Type);
7433 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7434 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7435 return reduce_value;
7436 }
7437
7438 static PyMethodDef unpicklerproxy_methods[] = {
7439 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7440 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7441 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7442 {NULL, NULL} /* sentinel */
7443 };
7444
7445 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7446 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7447 {
7448 PyObject_GC_UnTrack(self);
7449 Py_XDECREF(self->unpickler);
7450 PyObject_GC_Del((PyObject *)self);
7451 }
7452
7453 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7454 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7455 visitproc visit, void *arg)
7456 {
7457 Py_VISIT(self->unpickler);
7458 return 0;
7459 }
7460
7461 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7462 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7463 {
7464 Py_CLEAR(self->unpickler);
7465 return 0;
7466 }
7467
7468 static PyTypeObject UnpicklerMemoProxyType = {
7469 PyVarObject_HEAD_INIT(NULL, 0)
7470 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7471 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7472 0,
7473 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
7474 0, /* tp_vectorcall_offset */
7475 0, /* tp_getattr */
7476 0, /* tp_setattr */
7477 0, /* tp_as_async */
7478 0, /* tp_repr */
7479 0, /* tp_as_number */
7480 0, /* tp_as_sequence */
7481 0, /* tp_as_mapping */
7482 PyObject_HashNotImplemented, /* tp_hash */
7483 0, /* tp_call */
7484 0, /* tp_str */
7485 PyObject_GenericGetAttr, /* tp_getattro */
7486 PyObject_GenericSetAttr, /* tp_setattro */
7487 0, /* tp_as_buffer */
7488 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7489 0, /* tp_doc */
7490 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7491 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7492 0, /* tp_richcompare */
7493 0, /* tp_weaklistoffset */
7494 0, /* tp_iter */
7495 0, /* tp_iternext */
7496 unpicklerproxy_methods, /* tp_methods */
7497 };
7498
7499 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7500 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7501 {
7502 UnpicklerMemoProxyObject *self;
7503
7504 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7505 &UnpicklerMemoProxyType);
7506 if (self == NULL)
7507 return NULL;
7508 Py_INCREF(unpickler);
7509 self->unpickler = unpickler;
7510 PyObject_GC_Track(self);
7511 return (PyObject *)self;
7512 }
7513
7514 /*****************************************************************************/
7515
7516
7517 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7518 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7519 {
7520 return UnpicklerMemoProxy_New(self);
7521 }
7522
7523 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7524 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7525 {
7526 PyObject **new_memo;
7527 size_t new_memo_size = 0;
7528
7529 if (obj == NULL) {
7530 PyErr_SetString(PyExc_TypeError,
7531 "attribute deletion is not supported");
7532 return -1;
7533 }
7534
7535 if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
7536 UnpicklerObject *unpickler =
7537 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7538
7539 new_memo_size = unpickler->memo_size;
7540 new_memo = _Unpickler_NewMemo(new_memo_size);
7541 if (new_memo == NULL)
7542 return -1;
7543
7544 for (size_t i = 0; i < new_memo_size; i++) {
7545 Py_XINCREF(unpickler->memo[i]);
7546 new_memo[i] = unpickler->memo[i];
7547 }
7548 }
7549 else if (PyDict_Check(obj)) {
7550 Py_ssize_t i = 0;
7551 PyObject *key, *value;
7552
7553 new_memo_size = PyDict_GET_SIZE(obj);
7554 new_memo = _Unpickler_NewMemo(new_memo_size);
7555 if (new_memo == NULL)
7556 return -1;
7557
7558 while (PyDict_Next(obj, &i, &key, &value)) {
7559 Py_ssize_t idx;
7560 if (!PyLong_Check(key)) {
7561 PyErr_SetString(PyExc_TypeError,
7562 "memo key must be integers");
7563 goto error;
7564 }
7565 idx = PyLong_AsSsize_t(key);
7566 if (idx == -1 && PyErr_Occurred())
7567 goto error;
7568 if (idx < 0) {
7569 PyErr_SetString(PyExc_ValueError,
7570 "memo key must be positive integers.");
7571 goto error;
7572 }
7573 if (_Unpickler_MemoPut(self, idx, value) < 0)
7574 goto error;
7575 }
7576 }
7577 else {
7578 PyErr_Format(PyExc_TypeError,
7579 "'memo' attribute must be an UnpicklerMemoProxy object "
7580 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7581 return -1;
7582 }
7583
7584 _Unpickler_MemoCleanup(self);
7585 self->memo_size = new_memo_size;
7586 self->memo = new_memo;
7587
7588 return 0;
7589
7590 error:
7591 if (new_memo_size) {
7592 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7593 Py_XDECREF(new_memo[i]);
7594 }
7595 PyMem_FREE(new_memo);
7596 }
7597 return -1;
7598 }
7599
7600 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7601 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7602 {
7603 if (self->pers_func == NULL) {
7604 PyErr_SetString(PyExc_AttributeError, "persistent_load");
7605 return NULL;
7606 }
7607 return reconstruct_method(self->pers_func, self->pers_func_self);
7608 }
7609
7610 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7611 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7612 {
7613 if (value == NULL) {
7614 PyErr_SetString(PyExc_TypeError,
7615 "attribute deletion is not supported");
7616 return -1;
7617 }
7618 if (!PyCallable_Check(value)) {
7619 PyErr_SetString(PyExc_TypeError,
7620 "persistent_load must be a callable taking "
7621 "one argument");
7622 return -1;
7623 }
7624
7625 self->pers_func_self = NULL;
7626 Py_INCREF(value);
7627 Py_XSETREF(self->pers_func, value);
7628
7629 return 0;
7630 }
7631
7632 static PyGetSetDef Unpickler_getsets[] = {
7633 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7634 {"persistent_load", (getter)Unpickler_get_persload,
7635 (setter)Unpickler_set_persload},
7636 {NULL}
7637 };
7638
7639 static PyTypeObject Unpickler_Type = {
7640 PyVarObject_HEAD_INIT(NULL, 0)
7641 "_pickle.Unpickler", /*tp_name*/
7642 sizeof(UnpicklerObject), /*tp_basicsize*/
7643 0, /*tp_itemsize*/
7644 (destructor)Unpickler_dealloc, /*tp_dealloc*/
7645 0, /*tp_vectorcall_offset*/
7646 0, /*tp_getattr*/
7647 0, /*tp_setattr*/
7648 0, /*tp_as_async*/
7649 0, /*tp_repr*/
7650 0, /*tp_as_number*/
7651 0, /*tp_as_sequence*/
7652 0, /*tp_as_mapping*/
7653 0, /*tp_hash*/
7654 0, /*tp_call*/
7655 0, /*tp_str*/
7656 0, /*tp_getattro*/
7657 0, /*tp_setattro*/
7658 0, /*tp_as_buffer*/
7659 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7660 _pickle_Unpickler___init____doc__, /*tp_doc*/
7661 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7662 (inquiry)Unpickler_clear, /*tp_clear*/
7663 0, /*tp_richcompare*/
7664 0, /*tp_weaklistoffset*/
7665 0, /*tp_iter*/
7666 0, /*tp_iternext*/
7667 Unpickler_methods, /*tp_methods*/
7668 0, /*tp_members*/
7669 Unpickler_getsets, /*tp_getset*/
7670 0, /*tp_base*/
7671 0, /*tp_dict*/
7672 0, /*tp_descr_get*/
7673 0, /*tp_descr_set*/
7674 0, /*tp_dictoffset*/
7675 _pickle_Unpickler___init__, /*tp_init*/
7676 PyType_GenericAlloc, /*tp_alloc*/
7677 PyType_GenericNew, /*tp_new*/
7678 PyObject_GC_Del, /*tp_free*/
7679 0, /*tp_is_gc*/
7680 };
7681
7682 /*[clinic input]
7683
7684 _pickle.dump
7685
7686 obj: object
7687 file: object
7688 protocol: object = None
7689 *
7690 fix_imports: bool = True
7691 buffer_callback: object = None
7692
7693 Write a pickled representation of obj to the open file object file.
7694
7695 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7696 be more efficient.
7697
7698 The optional *protocol* argument tells the pickler to use the given
7699 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7700 protocol is 4. It was introduced in Python 3.4, and is incompatible
7701 with previous versions.
7702
7703 Specifying a negative protocol version selects the highest protocol
7704 version supported. The higher the protocol used, the more recent the
7705 version of Python needed to read the pickle produced.
7706
7707 The *file* argument must have a write() method that accepts a single
7708 bytes argument. It can thus be a file object opened for binary
7709 writing, an io.BytesIO instance, or any other custom object that meets
7710 this interface.
7711
7712 If *fix_imports* is True and protocol is less than 3, pickle will try
7713 to map the new Python 3 names to the old module names used in Python
7714 2, so that the pickle data stream is readable with Python 2.
7715
7716 If *buffer_callback* is None (the default), buffer views are serialized
7717 into *file* as part of the pickle stream. It is an error if
7718 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7719
7720 [clinic start generated code]*/
7721
7722 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7723 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7724 PyObject *protocol, int fix_imports,
7725 PyObject *buffer_callback)
7726 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7727 {
7728 PicklerObject *pickler = _Pickler_New();
7729
7730 if (pickler == NULL)
7731 return NULL;
7732
7733 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7734 goto error;
7735
7736 if (_Pickler_SetOutputStream(pickler, file) < 0)
7737 goto error;
7738
7739 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7740 goto error;
7741
7742 if (dump(pickler, obj) < 0)
7743 goto error;
7744
7745 if (_Pickler_FlushToFile(pickler) < 0)
7746 goto error;
7747
7748 Py_DECREF(pickler);
7749 Py_RETURN_NONE;
7750
7751 error:
7752 Py_XDECREF(pickler);
7753 return NULL;
7754 }
7755
7756 /*[clinic input]
7757
7758 _pickle.dumps
7759
7760 obj: object
7761 protocol: object = None
7762 *
7763 fix_imports: bool = True
7764 buffer_callback: object = None
7765
7766 Return the pickled representation of the object as a bytes object.
7767
7768 The optional *protocol* argument tells the pickler to use the given
7769 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7770 protocol is 4. It was introduced in Python 3.4, and is incompatible
7771 with previous versions.
7772
7773 Specifying a negative protocol version selects the highest protocol
7774 version supported. The higher the protocol used, the more recent the
7775 version of Python needed to read the pickle produced.
7776
7777 If *fix_imports* is True and *protocol* is less than 3, pickle will
7778 try to map the new Python 3 names to the old module names used in
7779 Python 2, so that the pickle data stream is readable with Python 2.
7780
7781 If *buffer_callback* is None (the default), buffer views are serialized
7782 into *file* as part of the pickle stream. It is an error if
7783 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7784
7785 [clinic start generated code]*/
7786
7787 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7788 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7789 int fix_imports, PyObject *buffer_callback)
7790 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7791 {
7792 PyObject *result;
7793 PicklerObject *pickler = _Pickler_New();
7794
7795 if (pickler == NULL)
7796 return NULL;
7797
7798 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7799 goto error;
7800
7801 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7802 goto error;
7803
7804 if (dump(pickler, obj) < 0)
7805 goto error;
7806
7807 result = _Pickler_GetString(pickler);
7808 Py_DECREF(pickler);
7809 return result;
7810
7811 error:
7812 Py_XDECREF(pickler);
7813 return NULL;
7814 }
7815
7816 /*[clinic input]
7817
7818 _pickle.load
7819
7820 file: object
7821 *
7822 fix_imports: bool = True
7823 encoding: str = 'ASCII'
7824 errors: str = 'strict'
7825 buffers: object(c_default="NULL") = ()
7826
7827 Read and return an object from the pickle data stored in a file.
7828
7829 This is equivalent to ``Unpickler(file).load()``, but may be more
7830 efficient.
7831
7832 The protocol version of the pickle is detected automatically, so no
7833 protocol argument is needed. Bytes past the pickled object's
7834 representation are ignored.
7835
7836 The argument *file* must have two methods, a read() method that takes
7837 an integer argument, and a readline() method that requires no
7838 arguments. Both methods should return bytes. Thus *file* can be a
7839 binary file object opened for reading, an io.BytesIO object, or any
7840 other custom object that meets this interface.
7841
7842 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7843 which are used to control compatibility support for pickle stream
7844 generated by Python 2. If *fix_imports* is True, pickle will try to
7845 map the old Python 2 names to the new names used in Python 3. The
7846 *encoding* and *errors* tell pickle how to decode 8-bit string
7847 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7848 respectively. The *encoding* can be 'bytes' to read these 8-bit
7849 string instances as bytes objects.
7850 [clinic start generated code]*/
7851
7852 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7853 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7854 const char *encoding, const char *errors,
7855 PyObject *buffers)
7856 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7857 {
7858 PyObject *result;
7859 UnpicklerObject *unpickler = _Unpickler_New();
7860
7861 if (unpickler == NULL)
7862 return NULL;
7863
7864 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7865 goto error;
7866
7867 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7868 goto error;
7869
7870 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7871 goto error;
7872
7873 unpickler->fix_imports = fix_imports;
7874
7875 result = load(unpickler);
7876 Py_DECREF(unpickler);
7877 return result;
7878
7879 error:
7880 Py_XDECREF(unpickler);
7881 return NULL;
7882 }
7883
7884 /*[clinic input]
7885
7886 _pickle.loads
7887
7888 data: object
7889 /
7890 *
7891 fix_imports: bool = True
7892 encoding: str = 'ASCII'
7893 errors: str = 'strict'
7894 buffers: object(c_default="NULL") = ()
7895
7896 Read and return an object from the given pickle data.
7897
7898 The protocol version of the pickle is detected automatically, so no
7899 protocol argument is needed. Bytes past the pickled object's
7900 representation are ignored.
7901
7902 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7903 which are used to control compatibility support for pickle stream
7904 generated by Python 2. If *fix_imports* is True, pickle will try to
7905 map the old Python 2 names to the new names used in Python 3. The
7906 *encoding* and *errors* tell pickle how to decode 8-bit string
7907 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7908 respectively. The *encoding* can be 'bytes' to read these 8-bit
7909 string instances as bytes objects.
7910 [clinic start generated code]*/
7911
7912 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7913 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7914 const char *encoding, const char *errors,
7915 PyObject *buffers)
7916 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7917 {
7918 PyObject *result;
7919 UnpicklerObject *unpickler = _Unpickler_New();
7920
7921 if (unpickler == NULL)
7922 return NULL;
7923
7924 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7925 goto error;
7926
7927 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7928 goto error;
7929
7930 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7931 goto error;
7932
7933 unpickler->fix_imports = fix_imports;
7934
7935 result = load(unpickler);
7936 Py_DECREF(unpickler);
7937 return result;
7938
7939 error:
7940 Py_XDECREF(unpickler);
7941 return NULL;
7942 }
7943
7944 static struct PyMethodDef pickle_methods[] = {
7945 _PICKLE_DUMP_METHODDEF
7946 _PICKLE_DUMPS_METHODDEF
7947 _PICKLE_LOAD_METHODDEF
7948 _PICKLE_LOADS_METHODDEF
7949 {NULL, NULL} /* sentinel */
7950 };
7951
7952 static int
pickle_clear(PyObject * m)7953 pickle_clear(PyObject *m)
7954 {
7955 _Pickle_ClearState(_Pickle_GetState(m));
7956 return 0;
7957 }
7958
7959 static void
pickle_free(PyObject * m)7960 pickle_free(PyObject *m)
7961 {
7962 _Pickle_ClearState(_Pickle_GetState(m));
7963 }
7964
7965 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7966 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7967 {
7968 PickleState *st = _Pickle_GetState(m);
7969 Py_VISIT(st->PickleError);
7970 Py_VISIT(st->PicklingError);
7971 Py_VISIT(st->UnpicklingError);
7972 Py_VISIT(st->dispatch_table);
7973 Py_VISIT(st->extension_registry);
7974 Py_VISIT(st->extension_cache);
7975 Py_VISIT(st->inverted_registry);
7976 Py_VISIT(st->name_mapping_2to3);
7977 Py_VISIT(st->import_mapping_2to3);
7978 Py_VISIT(st->name_mapping_3to2);
7979 Py_VISIT(st->import_mapping_3to2);
7980 Py_VISIT(st->codecs_encode);
7981 Py_VISIT(st->getattr);
7982 Py_VISIT(st->partial);
7983 return 0;
7984 }
7985
7986 static struct PyModuleDef _picklemodule = {
7987 PyModuleDef_HEAD_INIT,
7988 "_pickle", /* m_name */
7989 pickle_module_doc, /* m_doc */
7990 sizeof(PickleState), /* m_size */
7991 pickle_methods, /* m_methods */
7992 NULL, /* m_reload */
7993 pickle_traverse, /* m_traverse */
7994 pickle_clear, /* m_clear */
7995 (freefunc)pickle_free /* m_free */
7996 };
7997
7998 PyMODINIT_FUNC
PyInit__pickle(void)7999 PyInit__pickle(void)
8000 {
8001 PyObject *m;
8002 PickleState *st;
8003
8004 m = PyState_FindModule(&_picklemodule);
8005 if (m) {
8006 Py_INCREF(m);
8007 return m;
8008 }
8009
8010 if (PyType_Ready(&Pdata_Type) < 0)
8011 return NULL;
8012 if (PyType_Ready(&PicklerMemoProxyType) < 0)
8013 return NULL;
8014 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
8015 return NULL;
8016
8017 /* Create the module and add the functions. */
8018 m = PyModule_Create(&_picklemodule);
8019 if (m == NULL)
8020 return NULL;
8021
8022 /* Add types */
8023 if (PyModule_AddType(m, &Pickler_Type) < 0) {
8024 return NULL;
8025 }
8026 if (PyModule_AddType(m, &Unpickler_Type) < 0) {
8027 return NULL;
8028 }
8029 if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
8030 return NULL;
8031 }
8032
8033 st = _Pickle_GetState(m);
8034
8035 /* Initialize the exceptions. */
8036 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
8037 if (st->PickleError == NULL)
8038 return NULL;
8039 st->PicklingError = \
8040 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
8041 if (st->PicklingError == NULL)
8042 return NULL;
8043 st->UnpicklingError = \
8044 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
8045 if (st->UnpicklingError == NULL)
8046 return NULL;
8047
8048 Py_INCREF(st->PickleError);
8049 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
8050 return NULL;
8051 Py_INCREF(st->PicklingError);
8052 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
8053 return NULL;
8054 Py_INCREF(st->UnpicklingError);
8055 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
8056 return NULL;
8057
8058 if (_Pickle_InitState(st) < 0)
8059 return NULL;
8060
8061 return m;
8062 }
8063