1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #define PY_SSIZE_T_CLEAN
10
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "../Modules/hashtable.h"
16
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21
22 #include "clinic/marshal.c.h"
23
24 /* High water mark to determine when the marshalled object is dangerously deep
25 * and risks coring the interpreter. When the object stack gets this deep,
26 * raise an exception instead of continuing.
27 * On Windows debug builds, reduce this value.
28 *
29 * BUG: https://bugs.python.org/issue33720
30 * On Windows PGO builds, the r_object function overallocates its stack and
31 * can cause a stack overflow. We reduce the maximum depth for all Windows
32 * releases to protect against this.
33 * #if defined(MS_WINDOWS) && defined(_DEBUG)
34 */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40
41 #define TYPE_NULL '0'
42 #define TYPE_NONE 'N'
43 #define TYPE_FALSE 'F'
44 #define TYPE_TRUE 'T'
45 #define TYPE_STOPITER 'S'
46 #define TYPE_ELLIPSIS '.'
47 #define TYPE_INT 'i'
48 /* TYPE_INT64 is not generated anymore.
49 Supported for backward compatibility only. */
50 #define TYPE_INT64 'I'
51 #define TYPE_FLOAT 'f'
52 #define TYPE_BINARY_FLOAT 'g'
53 #define TYPE_COMPLEX 'x'
54 #define TYPE_BINARY_COMPLEX 'y'
55 #define TYPE_LONG 'l'
56 #define TYPE_STRING 's'
57 #define TYPE_INTERNED 't'
58 #define TYPE_REF 'r'
59 #define TYPE_TUPLE '('
60 #define TYPE_LIST '['
61 #define TYPE_DICT '{'
62 #define TYPE_CODE 'c'
63 #define TYPE_UNICODE 'u'
64 #define TYPE_UNKNOWN '?'
65 #define TYPE_SET '<'
66 #define TYPE_FROZENSET '>'
67 #define FLAG_REF '\x80' /* with a type, add obj to index */
68
69 #define TYPE_ASCII 'a'
70 #define TYPE_ASCII_INTERNED 'A'
71 #define TYPE_SMALL_TUPLE ')'
72 #define TYPE_SHORT_ASCII 'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79
80 typedef struct {
81 FILE *fp;
82 int error; /* see WFERR_* values */
83 int depth;
84 PyObject *str;
85 char *ptr;
86 char *end;
87 char *buf;
88 _Py_hashtable_t *hashtable;
89 int version;
90 } WFILE;
91
92 #define w_byte(c, p) do { \
93 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
94 *(p)->ptr++ = (c); \
95 } while(0)
96
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100 assert(p->fp != NULL);
101 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102 p->ptr = p->buf;
103 }
104
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108 Py_ssize_t pos, size, delta;
109 if (p->ptr == NULL)
110 return 0; /* An error already occurred */
111 if (p->fp != NULL) {
112 w_flush(p);
113 return needed <= p->end - p->ptr;
114 }
115 assert(p->str != NULL);
116 pos = p->ptr - p->buf;
117 size = PyBytes_Size(p->str);
118 if (size > 16*1024*1024)
119 delta = (size >> 3); /* 12.5% overallocation */
120 else
121 delta = size + 1024;
122 delta = Py_MAX(delta, needed);
123 if (delta > PY_SSIZE_T_MAX - size) {
124 p->error = WFERR_NOMEMORY;
125 return 0;
126 }
127 size += delta;
128 if (_PyBytes_Resize(&p->str, size) != 0) {
129 p->ptr = p->buf = p->end = NULL;
130 return 0;
131 }
132 else {
133 p->buf = PyBytes_AS_STRING(p->str);
134 p->ptr = p->buf + pos;
135 p->end = p->buf + size;
136 return 1;
137 }
138 }
139
140 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)141 w_string(const char *s, Py_ssize_t n, WFILE *p)
142 {
143 Py_ssize_t m;
144 if (!n || p->ptr == NULL)
145 return;
146 m = p->end - p->ptr;
147 if (p->fp != NULL) {
148 if (n <= m) {
149 memcpy(p->ptr, s, n);
150 p->ptr += n;
151 }
152 else {
153 w_flush(p);
154 fwrite(s, 1, n, p->fp);
155 }
156 }
157 else {
158 if (n <= m || w_reserve(p, n - m)) {
159 memcpy(p->ptr, s, n);
160 p->ptr += n;
161 }
162 }
163 }
164
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168 w_byte((char)( x & 0xff), p);
169 w_byte((char)((x>> 8) & 0xff), p);
170 }
171
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175 w_byte((char)( x & 0xff), p);
176 w_byte((char)((x>> 8) & 0xff), p);
177 w_byte((char)((x>>16) & 0xff), p);
178 w_byte((char)((x>>24) & 0xff), p);
179 }
180
181 #define SIZE32_MAX 0x7FFFFFFF
182
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p) do { \
185 if ((n) > SIZE32_MAX) { \
186 (p)->depth--; \
187 (p)->error = WFERR_UNMARSHALLABLE; \
188 return; \
189 } \
190 w_long((long)(n), p); \
191 } while(0)
192 #else
193 # define W_SIZE w_long
194 #endif
195
196 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)197 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
198 {
199 W_SIZE(n, p);
200 w_string(s, n, p);
201 }
202
203 static void
w_short_pstring(const char * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
205 {
206 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207 w_string(s, n, p);
208 }
209
210 /* We assume that Python ints are stored internally in base some power of
211 2**15; for the sake of portability we'll always read and write them in base
212 exactly 2**15. */
213
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221
222 #define W_TYPE(t, p) do { \
223 w_byte((t) | flag, (p)); \
224 } while(0)
225
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229 Py_ssize_t i, j, n, l;
230 digit d;
231
232 W_TYPE(TYPE_LONG, p);
233 if (Py_SIZE(ob) == 0) {
234 w_long((long)0, p);
235 return;
236 }
237
238 /* set l to number of base PyLong_MARSHAL_BASE digits */
239 n = Py_ABS(Py_SIZE(ob));
240 l = (n-1) * PyLong_MARSHAL_RATIO;
241 d = ob->ob_digit[n-1];
242 assert(d != 0); /* a PyLong is always normalized */
243 do {
244 d >>= PyLong_MARSHAL_SHIFT;
245 l++;
246 } while (d != 0);
247 if (l > SIZE32_MAX) {
248 p->depth--;
249 p->error = WFERR_UNMARSHALLABLE;
250 return;
251 }
252 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253
254 for (i=0; i < n-1; i++) {
255 d = ob->ob_digit[i];
256 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257 w_short(d & PyLong_MARSHAL_MASK, p);
258 d >>= PyLong_MARSHAL_SHIFT;
259 }
260 assert (d == 0);
261 }
262 d = ob->ob_digit[n-1];
263 do {
264 w_short(d & PyLong_MARSHAL_MASK, p);
265 d >>= PyLong_MARSHAL_SHIFT;
266 } while (d != 0);
267 }
268
269 static int
w_ref(PyObject * v,char * flag,WFILE * p)270 w_ref(PyObject *v, char *flag, WFILE *p)
271 {
272 _Py_hashtable_entry_t *entry;
273 int w;
274
275 if (p->version < 3 || p->hashtable == NULL)
276 return 0; /* not writing object references */
277
278 /* if it has only one reference, it definitely isn't shared */
279 if (Py_REFCNT(v) == 1)
280 return 0;
281
282 entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
283 if (entry != NULL) {
284 /* write the reference index to the stream */
285 _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
286 /* we don't store "long" indices in the dict */
287 assert(0 <= w && w <= 0x7fffffff);
288 w_byte(TYPE_REF, p);
289 w_long(w, p);
290 return 1;
291 } else {
292 size_t s = p->hashtable->entries;
293 /* we don't support long indices */
294 if (s >= 0x7fffffff) {
295 PyErr_SetString(PyExc_ValueError, "too many objects");
296 goto err;
297 }
298 w = (int)s;
299 Py_INCREF(v);
300 if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
301 Py_DECREF(v);
302 goto err;
303 }
304 *flag |= FLAG_REF;
305 return 0;
306 }
307 err:
308 p->error = WFERR_UNMARSHALLABLE;
309 return 1;
310 }
311
312 static void
313 w_complex_object(PyObject *v, char flag, WFILE *p);
314
315 static void
w_object(PyObject * v,WFILE * p)316 w_object(PyObject *v, WFILE *p)
317 {
318 char flag = '\0';
319
320 p->depth++;
321
322 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
323 p->error = WFERR_NESTEDTOODEEP;
324 }
325 else if (v == NULL) {
326 w_byte(TYPE_NULL, p);
327 }
328 else if (v == Py_None) {
329 w_byte(TYPE_NONE, p);
330 }
331 else if (v == PyExc_StopIteration) {
332 w_byte(TYPE_STOPITER, p);
333 }
334 else if (v == Py_Ellipsis) {
335 w_byte(TYPE_ELLIPSIS, p);
336 }
337 else if (v == Py_False) {
338 w_byte(TYPE_FALSE, p);
339 }
340 else if (v == Py_True) {
341 w_byte(TYPE_TRUE, p);
342 }
343 else if (!w_ref(v, &flag, p))
344 w_complex_object(v, flag, p);
345
346 p->depth--;
347 }
348
349 static void
w_complex_object(PyObject * v,char flag,WFILE * p)350 w_complex_object(PyObject *v, char flag, WFILE *p)
351 {
352 Py_ssize_t i, n;
353
354 if (PyLong_CheckExact(v)) {
355 long x = PyLong_AsLong(v);
356 if ((x == -1) && PyErr_Occurred()) {
357 PyLongObject *ob = (PyLongObject *)v;
358 PyErr_Clear();
359 w_PyLong(ob, flag, p);
360 }
361 else {
362 #if SIZEOF_LONG > 4
363 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
364 if (y && y != -1) {
365 /* Too large for TYPE_INT */
366 w_PyLong((PyLongObject*)v, flag, p);
367 }
368 else
369 #endif
370 {
371 W_TYPE(TYPE_INT, p);
372 w_long(x, p);
373 }
374 }
375 }
376 else if (PyFloat_CheckExact(v)) {
377 if (p->version > 1) {
378 unsigned char buf[8];
379 if (_PyFloat_Pack8(PyFloat_AsDouble(v),
380 buf, 1) < 0) {
381 p->error = WFERR_UNMARSHALLABLE;
382 return;
383 }
384 W_TYPE(TYPE_BINARY_FLOAT, p);
385 w_string((char*)buf, 8, p);
386 }
387 else {
388 char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
389 'g', 17, 0, NULL);
390 if (!buf) {
391 p->error = WFERR_NOMEMORY;
392 return;
393 }
394 n = strlen(buf);
395 W_TYPE(TYPE_FLOAT, p);
396 w_byte((int)n, p);
397 w_string(buf, n, p);
398 PyMem_Free(buf);
399 }
400 }
401 else if (PyComplex_CheckExact(v)) {
402 if (p->version > 1) {
403 unsigned char buf[8];
404 if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
405 buf, 1) < 0) {
406 p->error = WFERR_UNMARSHALLABLE;
407 return;
408 }
409 W_TYPE(TYPE_BINARY_COMPLEX, p);
410 w_string((char*)buf, 8, p);
411 if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
412 buf, 1) < 0) {
413 p->error = WFERR_UNMARSHALLABLE;
414 return;
415 }
416 w_string((char*)buf, 8, p);
417 }
418 else {
419 char *buf;
420 W_TYPE(TYPE_COMPLEX, p);
421 buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
422 'g', 17, 0, NULL);
423 if (!buf) {
424 p->error = WFERR_NOMEMORY;
425 return;
426 }
427 n = strlen(buf);
428 w_byte((int)n, p);
429 w_string(buf, n, p);
430 PyMem_Free(buf);
431 buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
432 'g', 17, 0, NULL);
433 if (!buf) {
434 p->error = WFERR_NOMEMORY;
435 return;
436 }
437 n = strlen(buf);
438 w_byte((int)n, p);
439 w_string(buf, n, p);
440 PyMem_Free(buf);
441 }
442 }
443 else if (PyBytes_CheckExact(v)) {
444 W_TYPE(TYPE_STRING, p);
445 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
446 }
447 else if (PyUnicode_CheckExact(v)) {
448 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
449 int is_short = PyUnicode_GET_LENGTH(v) < 256;
450 if (is_short) {
451 if (PyUnicode_CHECK_INTERNED(v))
452 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
453 else
454 W_TYPE(TYPE_SHORT_ASCII, p);
455 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
456 PyUnicode_GET_LENGTH(v), p);
457 }
458 else {
459 if (PyUnicode_CHECK_INTERNED(v))
460 W_TYPE(TYPE_ASCII_INTERNED, p);
461 else
462 W_TYPE(TYPE_ASCII, p);
463 w_pstring((char *) PyUnicode_1BYTE_DATA(v),
464 PyUnicode_GET_LENGTH(v), p);
465 }
466 }
467 else {
468 PyObject *utf8;
469 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
470 if (utf8 == NULL) {
471 p->depth--;
472 p->error = WFERR_UNMARSHALLABLE;
473 return;
474 }
475 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
476 W_TYPE(TYPE_INTERNED, p);
477 else
478 W_TYPE(TYPE_UNICODE, p);
479 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
480 Py_DECREF(utf8);
481 }
482 }
483 else if (PyTuple_CheckExact(v)) {
484 n = PyTuple_Size(v);
485 if (p->version >= 4 && n < 256) {
486 W_TYPE(TYPE_SMALL_TUPLE, p);
487 w_byte((unsigned char)n, p);
488 }
489 else {
490 W_TYPE(TYPE_TUPLE, p);
491 W_SIZE(n, p);
492 }
493 for (i = 0; i < n; i++) {
494 w_object(PyTuple_GET_ITEM(v, i), p);
495 }
496 }
497 else if (PyList_CheckExact(v)) {
498 W_TYPE(TYPE_LIST, p);
499 n = PyList_GET_SIZE(v);
500 W_SIZE(n, p);
501 for (i = 0; i < n; i++) {
502 w_object(PyList_GET_ITEM(v, i), p);
503 }
504 }
505 else if (PyDict_CheckExact(v)) {
506 Py_ssize_t pos;
507 PyObject *key, *value;
508 W_TYPE(TYPE_DICT, p);
509 /* This one is NULL object terminated! */
510 pos = 0;
511 while (PyDict_Next(v, &pos, &key, &value)) {
512 w_object(key, p);
513 w_object(value, p);
514 }
515 w_object((PyObject *)NULL, p);
516 }
517 else if (PyAnySet_CheckExact(v)) {
518 PyObject *value, *it;
519
520 if (PyObject_TypeCheck(v, &PySet_Type))
521 W_TYPE(TYPE_SET, p);
522 else
523 W_TYPE(TYPE_FROZENSET, p);
524 n = PyObject_Size(v);
525 if (n == -1) {
526 p->depth--;
527 p->error = WFERR_UNMARSHALLABLE;
528 return;
529 }
530 W_SIZE(n, p);
531 it = PyObject_GetIter(v);
532 if (it == NULL) {
533 p->depth--;
534 p->error = WFERR_UNMARSHALLABLE;
535 return;
536 }
537 while ((value = PyIter_Next(it)) != NULL) {
538 w_object(value, p);
539 Py_DECREF(value);
540 }
541 Py_DECREF(it);
542 if (PyErr_Occurred()) {
543 p->depth--;
544 p->error = WFERR_UNMARSHALLABLE;
545 return;
546 }
547 }
548 else if (PyCode_Check(v)) {
549 PyCodeObject *co = (PyCodeObject *)v;
550 W_TYPE(TYPE_CODE, p);
551 w_long(co->co_argcount, p);
552 w_long(co->co_kwonlyargcount, p);
553 w_long(co->co_nlocals, p);
554 w_long(co->co_stacksize, p);
555 w_long(co->co_flags, p);
556 w_object(co->co_code, p);
557 w_object(co->co_consts, p);
558 w_object(co->co_names, p);
559 w_object(co->co_varnames, p);
560 w_object(co->co_freevars, p);
561 w_object(co->co_cellvars, p);
562 w_object(co->co_filename, p);
563 w_object(co->co_name, p);
564 w_long(co->co_firstlineno, p);
565 w_object(co->co_lnotab, p);
566 }
567 else if (PyObject_CheckBuffer(v)) {
568 /* Write unknown bytes-like objects as a bytes object */
569 Py_buffer view;
570 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
571 w_byte(TYPE_UNKNOWN, p);
572 p->depth--;
573 p->error = WFERR_UNMARSHALLABLE;
574 return;
575 }
576 W_TYPE(TYPE_STRING, p);
577 w_pstring(view.buf, view.len, p);
578 PyBuffer_Release(&view);
579 }
580 else {
581 W_TYPE(TYPE_UNKNOWN, p);
582 p->error = WFERR_UNMARSHALLABLE;
583 }
584 }
585
586 static int
w_init_refs(WFILE * wf,int version)587 w_init_refs(WFILE *wf, int version)
588 {
589 if (version >= 3) {
590 wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
591 _Py_hashtable_hash_ptr,
592 _Py_hashtable_compare_direct);
593 if (wf->hashtable == NULL) {
594 PyErr_NoMemory();
595 return -1;
596 }
597 }
598 return 0;
599 }
600
601 static int
w_decref_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry,void * Py_UNUSED (data))602 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
603 void *Py_UNUSED(data))
604 {
605 PyObject *entry_key;
606
607 _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key);
608 Py_XDECREF(entry_key);
609 return 0;
610 }
611
612 static void
w_clear_refs(WFILE * wf)613 w_clear_refs(WFILE *wf)
614 {
615 if (wf->hashtable != NULL) {
616 _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
617 _Py_hashtable_destroy(wf->hashtable);
618 }
619 }
620
621 /* version currently has no effect for writing ints. */
622 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)623 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
624 {
625 char buf[4];
626 WFILE wf;
627 memset(&wf, 0, sizeof(wf));
628 wf.fp = fp;
629 wf.ptr = wf.buf = buf;
630 wf.end = wf.ptr + sizeof(buf);
631 wf.error = WFERR_OK;
632 wf.version = version;
633 w_long(x, &wf);
634 w_flush(&wf);
635 }
636
637 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)638 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
639 {
640 char buf[BUFSIZ];
641 WFILE wf;
642 memset(&wf, 0, sizeof(wf));
643 wf.fp = fp;
644 wf.ptr = wf.buf = buf;
645 wf.end = wf.ptr + sizeof(buf);
646 wf.error = WFERR_OK;
647 wf.version = version;
648 if (w_init_refs(&wf, version))
649 return; /* caller mush check PyErr_Occurred() */
650 w_object(x, &wf);
651 w_clear_refs(&wf);
652 w_flush(&wf);
653 }
654
655 typedef struct {
656 FILE *fp;
657 int depth;
658 PyObject *readable; /* Stream-like object being read from */
659 char *ptr;
660 char *end;
661 char *buf;
662 Py_ssize_t buf_size;
663 PyObject *refs; /* a list */
664 } RFILE;
665
666 static const char *
r_string(Py_ssize_t n,RFILE * p)667 r_string(Py_ssize_t n, RFILE *p)
668 {
669 Py_ssize_t read = -1;
670
671 if (p->ptr != NULL) {
672 /* Fast path for loads() */
673 char *res = p->ptr;
674 Py_ssize_t left = p->end - p->ptr;
675 if (left < n) {
676 PyErr_SetString(PyExc_EOFError,
677 "marshal data too short");
678 return NULL;
679 }
680 p->ptr += n;
681 return res;
682 }
683 if (p->buf == NULL) {
684 p->buf = PyMem_MALLOC(n);
685 if (p->buf == NULL) {
686 PyErr_NoMemory();
687 return NULL;
688 }
689 p->buf_size = n;
690 }
691 else if (p->buf_size < n) {
692 char *tmp = PyMem_REALLOC(p->buf, n);
693 if (tmp == NULL) {
694 PyErr_NoMemory();
695 return NULL;
696 }
697 p->buf = tmp;
698 p->buf_size = n;
699 }
700
701 if (!p->readable) {
702 assert(p->fp != NULL);
703 read = fread(p->buf, 1, n, p->fp);
704 }
705 else {
706 _Py_IDENTIFIER(readinto);
707 PyObject *res, *mview;
708 Py_buffer buf;
709
710 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
711 return NULL;
712 mview = PyMemoryView_FromBuffer(&buf);
713 if (mview == NULL)
714 return NULL;
715
716 res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
717 if (res != NULL) {
718 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
719 Py_DECREF(res);
720 }
721 }
722 if (read != n) {
723 if (!PyErr_Occurred()) {
724 if (read > n)
725 PyErr_Format(PyExc_ValueError,
726 "read() returned too much data: "
727 "%zd bytes requested, %zd returned",
728 n, read);
729 else
730 PyErr_SetString(PyExc_EOFError,
731 "EOF read where not expected");
732 }
733 return NULL;
734 }
735 return p->buf;
736 }
737
738 static int
r_byte(RFILE * p)739 r_byte(RFILE *p)
740 {
741 int c = EOF;
742
743 if (p->ptr != NULL) {
744 if (p->ptr < p->end)
745 c = (unsigned char) *p->ptr++;
746 return c;
747 }
748 if (!p->readable) {
749 assert(p->fp);
750 c = getc(p->fp);
751 }
752 else {
753 const char *ptr = r_string(1, p);
754 if (ptr != NULL)
755 c = *(unsigned char *) ptr;
756 }
757 return c;
758 }
759
760 static int
r_short(RFILE * p)761 r_short(RFILE *p)
762 {
763 short x = -1;
764 const unsigned char *buffer;
765
766 buffer = (const unsigned char *) r_string(2, p);
767 if (buffer != NULL) {
768 x = buffer[0];
769 x |= buffer[1] << 8;
770 /* Sign-extension, in case short greater than 16 bits */
771 x |= -(x & 0x8000);
772 }
773 return x;
774 }
775
776 static long
r_long(RFILE * p)777 r_long(RFILE *p)
778 {
779 long x = -1;
780 const unsigned char *buffer;
781
782 buffer = (const unsigned char *) r_string(4, p);
783 if (buffer != NULL) {
784 x = buffer[0];
785 x |= (long)buffer[1] << 8;
786 x |= (long)buffer[2] << 16;
787 x |= (long)buffer[3] << 24;
788 #if SIZEOF_LONG > 4
789 /* Sign extension for 64-bit machines */
790 x |= -(x & 0x80000000L);
791 #endif
792 }
793 return x;
794 }
795
796 /* r_long64 deals with the TYPE_INT64 code. */
797 static PyObject *
r_long64(RFILE * p)798 r_long64(RFILE *p)
799 {
800 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
801 if (buffer == NULL) {
802 return NULL;
803 }
804 return _PyLong_FromByteArray(buffer, 8,
805 1 /* little endian */,
806 1 /* signed */);
807 }
808
809 static PyObject *
r_PyLong(RFILE * p)810 r_PyLong(RFILE *p)
811 {
812 PyLongObject *ob;
813 long n, size, i;
814 int j, md, shorts_in_top_digit;
815 digit d;
816
817 n = r_long(p);
818 if (PyErr_Occurred())
819 return NULL;
820 if (n == 0)
821 return (PyObject *)_PyLong_New(0);
822 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
823 PyErr_SetString(PyExc_ValueError,
824 "bad marshal data (long size out of range)");
825 return NULL;
826 }
827
828 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
829 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
830 ob = _PyLong_New(size);
831 if (ob == NULL)
832 return NULL;
833
834 Py_SIZE(ob) = n > 0 ? size : -size;
835
836 for (i = 0; i < size-1; i++) {
837 d = 0;
838 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
839 md = r_short(p);
840 if (PyErr_Occurred()) {
841 Py_DECREF(ob);
842 return NULL;
843 }
844 if (md < 0 || md > PyLong_MARSHAL_BASE)
845 goto bad_digit;
846 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
847 }
848 ob->ob_digit[i] = d;
849 }
850
851 d = 0;
852 for (j=0; j < shorts_in_top_digit; j++) {
853 md = r_short(p);
854 if (PyErr_Occurred()) {
855 Py_DECREF(ob);
856 return NULL;
857 }
858 if (md < 0 || md > PyLong_MARSHAL_BASE)
859 goto bad_digit;
860 /* topmost marshal digit should be nonzero */
861 if (md == 0 && j == shorts_in_top_digit - 1) {
862 Py_DECREF(ob);
863 PyErr_SetString(PyExc_ValueError,
864 "bad marshal data (unnormalized long data)");
865 return NULL;
866 }
867 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
868 }
869 if (PyErr_Occurred()) {
870 Py_DECREF(ob);
871 return NULL;
872 }
873 /* top digit should be nonzero, else the resulting PyLong won't be
874 normalized */
875 ob->ob_digit[size-1] = d;
876 return (PyObject *)ob;
877 bad_digit:
878 Py_DECREF(ob);
879 PyErr_SetString(PyExc_ValueError,
880 "bad marshal data (digit out of range in long)");
881 return NULL;
882 }
883
884 /* allocate the reflist index for a new object. Return -1 on failure */
885 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)886 r_ref_reserve(int flag, RFILE *p)
887 {
888 if (flag) { /* currently only FLAG_REF is defined */
889 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
890 if (idx >= 0x7ffffffe) {
891 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
892 return -1;
893 }
894 if (PyList_Append(p->refs, Py_None) < 0)
895 return -1;
896 return idx;
897 } else
898 return 0;
899 }
900
901 /* insert the new object 'o' to the reflist at previously
902 * allocated index 'idx'.
903 * 'o' can be NULL, in which case nothing is done.
904 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
905 * if 'o' was non-NULL, and the function fails, 'o' is released and
906 * NULL returned. This simplifies error checking at the call site since
907 * a single test for NULL for the function result is enough.
908 */
909 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)910 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
911 {
912 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
913 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
914 Py_INCREF(o);
915 PyList_SET_ITEM(p->refs, idx, o);
916 Py_DECREF(tmp);
917 }
918 return o;
919 }
920
921 /* combination of both above, used when an object can be
922 * created whenever it is seen in the file, as opposed to
923 * after having loaded its sub-objects.
924 */
925 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)926 r_ref(PyObject *o, int flag, RFILE *p)
927 {
928 assert(flag & FLAG_REF);
929 if (o == NULL)
930 return NULL;
931 if (PyList_Append(p->refs, o) < 0) {
932 Py_DECREF(o); /* release the new object */
933 return NULL;
934 }
935 return o;
936 }
937
938 static PyObject *
r_object(RFILE * p)939 r_object(RFILE *p)
940 {
941 /* NULL is a valid return value, it does not necessarily means that
942 an exception is set. */
943 PyObject *v, *v2;
944 Py_ssize_t idx = 0;
945 long i, n;
946 int type, code = r_byte(p);
947 int flag, is_interned = 0;
948 PyObject *retval = NULL;
949
950 if (code == EOF) {
951 PyErr_SetString(PyExc_EOFError,
952 "EOF read where object expected");
953 return NULL;
954 }
955
956 p->depth++;
957
958 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
959 p->depth--;
960 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
961 return NULL;
962 }
963
964 flag = code & FLAG_REF;
965 type = code & ~FLAG_REF;
966
967 #define R_REF(O) do{\
968 if (flag) \
969 O = r_ref(O, flag, p);\
970 } while (0)
971
972 switch (type) {
973
974 case TYPE_NULL:
975 break;
976
977 case TYPE_NONE:
978 Py_INCREF(Py_None);
979 retval = Py_None;
980 break;
981
982 case TYPE_STOPITER:
983 Py_INCREF(PyExc_StopIteration);
984 retval = PyExc_StopIteration;
985 break;
986
987 case TYPE_ELLIPSIS:
988 Py_INCREF(Py_Ellipsis);
989 retval = Py_Ellipsis;
990 break;
991
992 case TYPE_FALSE:
993 Py_INCREF(Py_False);
994 retval = Py_False;
995 break;
996
997 case TYPE_TRUE:
998 Py_INCREF(Py_True);
999 retval = Py_True;
1000 break;
1001
1002 case TYPE_INT:
1003 n = r_long(p);
1004 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1005 R_REF(retval);
1006 break;
1007
1008 case TYPE_INT64:
1009 retval = r_long64(p);
1010 R_REF(retval);
1011 break;
1012
1013 case TYPE_LONG:
1014 retval = r_PyLong(p);
1015 R_REF(retval);
1016 break;
1017
1018 case TYPE_FLOAT:
1019 {
1020 char buf[256];
1021 const char *ptr;
1022 double dx;
1023 n = r_byte(p);
1024 if (n == EOF) {
1025 PyErr_SetString(PyExc_EOFError,
1026 "EOF read where object expected");
1027 break;
1028 }
1029 ptr = r_string(n, p);
1030 if (ptr == NULL)
1031 break;
1032 memcpy(buf, ptr, n);
1033 buf[n] = '\0';
1034 dx = PyOS_string_to_double(buf, NULL, NULL);
1035 if (dx == -1.0 && PyErr_Occurred())
1036 break;
1037 retval = PyFloat_FromDouble(dx);
1038 R_REF(retval);
1039 break;
1040 }
1041
1042 case TYPE_BINARY_FLOAT:
1043 {
1044 const unsigned char *buf;
1045 double x;
1046 buf = (const unsigned char *) r_string(8, p);
1047 if (buf == NULL)
1048 break;
1049 x = _PyFloat_Unpack8(buf, 1);
1050 if (x == -1.0 && PyErr_Occurred())
1051 break;
1052 retval = PyFloat_FromDouble(x);
1053 R_REF(retval);
1054 break;
1055 }
1056
1057 case TYPE_COMPLEX:
1058 {
1059 char buf[256];
1060 const char *ptr;
1061 Py_complex c;
1062 n = r_byte(p);
1063 if (n == EOF) {
1064 PyErr_SetString(PyExc_EOFError,
1065 "EOF read where object expected");
1066 break;
1067 }
1068 ptr = r_string(n, p);
1069 if (ptr == NULL)
1070 break;
1071 memcpy(buf, ptr, n);
1072 buf[n] = '\0';
1073 c.real = PyOS_string_to_double(buf, NULL, NULL);
1074 if (c.real == -1.0 && PyErr_Occurred())
1075 break;
1076 n = r_byte(p);
1077 if (n == EOF) {
1078 PyErr_SetString(PyExc_EOFError,
1079 "EOF read where object expected");
1080 break;
1081 }
1082 ptr = r_string(n, p);
1083 if (ptr == NULL)
1084 break;
1085 memcpy(buf, ptr, n);
1086 buf[n] = '\0';
1087 c.imag = PyOS_string_to_double(buf, NULL, NULL);
1088 if (c.imag == -1.0 && PyErr_Occurred())
1089 break;
1090 retval = PyComplex_FromCComplex(c);
1091 R_REF(retval);
1092 break;
1093 }
1094
1095 case TYPE_BINARY_COMPLEX:
1096 {
1097 const unsigned char *buf;
1098 Py_complex c;
1099 buf = (const unsigned char *) r_string(8, p);
1100 if (buf == NULL)
1101 break;
1102 c.real = _PyFloat_Unpack8(buf, 1);
1103 if (c.real == -1.0 && PyErr_Occurred())
1104 break;
1105 buf = (const unsigned char *) r_string(8, p);
1106 if (buf == NULL)
1107 break;
1108 c.imag = _PyFloat_Unpack8(buf, 1);
1109 if (c.imag == -1.0 && PyErr_Occurred())
1110 break;
1111 retval = PyComplex_FromCComplex(c);
1112 R_REF(retval);
1113 break;
1114 }
1115
1116 case TYPE_STRING:
1117 {
1118 const char *ptr;
1119 n = r_long(p);
1120 if (PyErr_Occurred())
1121 break;
1122 if (n < 0 || n > SIZE32_MAX) {
1123 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1124 break;
1125 }
1126 v = PyBytes_FromStringAndSize((char *)NULL, n);
1127 if (v == NULL)
1128 break;
1129 ptr = r_string(n, p);
1130 if (ptr == NULL) {
1131 Py_DECREF(v);
1132 break;
1133 }
1134 memcpy(PyBytes_AS_STRING(v), ptr, n);
1135 retval = v;
1136 R_REF(retval);
1137 break;
1138 }
1139
1140 case TYPE_ASCII_INTERNED:
1141 is_interned = 1;
1142 /* fall through */
1143 case TYPE_ASCII:
1144 n = r_long(p);
1145 if (PyErr_Occurred())
1146 break;
1147 if (n < 0 || n > SIZE32_MAX) {
1148 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1149 break;
1150 }
1151 goto _read_ascii;
1152
1153 case TYPE_SHORT_ASCII_INTERNED:
1154 is_interned = 1;
1155 /* fall through */
1156 case TYPE_SHORT_ASCII:
1157 n = r_byte(p);
1158 if (n == EOF) {
1159 PyErr_SetString(PyExc_EOFError,
1160 "EOF read where object expected");
1161 break;
1162 }
1163 _read_ascii:
1164 {
1165 const char *ptr;
1166 ptr = r_string(n, p);
1167 if (ptr == NULL)
1168 break;
1169 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1170 if (v == NULL)
1171 break;
1172 if (is_interned)
1173 PyUnicode_InternInPlace(&v);
1174 retval = v;
1175 R_REF(retval);
1176 break;
1177 }
1178
1179 case TYPE_INTERNED:
1180 is_interned = 1;
1181 /* fall through */
1182 case TYPE_UNICODE:
1183 {
1184 const char *buffer;
1185
1186 n = r_long(p);
1187 if (PyErr_Occurred())
1188 break;
1189 if (n < 0 || n > SIZE32_MAX) {
1190 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1191 break;
1192 }
1193 if (n != 0) {
1194 buffer = r_string(n, p);
1195 if (buffer == NULL)
1196 break;
1197 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1198 }
1199 else {
1200 v = PyUnicode_New(0, 0);
1201 }
1202 if (v == NULL)
1203 break;
1204 if (is_interned)
1205 PyUnicode_InternInPlace(&v);
1206 retval = v;
1207 R_REF(retval);
1208 break;
1209 }
1210
1211 case TYPE_SMALL_TUPLE:
1212 n = (unsigned char) r_byte(p);
1213 if (PyErr_Occurred())
1214 break;
1215 goto _read_tuple;
1216 case TYPE_TUPLE:
1217 n = r_long(p);
1218 if (PyErr_Occurred())
1219 break;
1220 if (n < 0 || n > SIZE32_MAX) {
1221 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1222 break;
1223 }
1224 _read_tuple:
1225 v = PyTuple_New(n);
1226 R_REF(v);
1227 if (v == NULL)
1228 break;
1229
1230 for (i = 0; i < n; i++) {
1231 v2 = r_object(p);
1232 if ( v2 == NULL ) {
1233 if (!PyErr_Occurred())
1234 PyErr_SetString(PyExc_TypeError,
1235 "NULL object in marshal data for tuple");
1236 Py_DECREF(v);
1237 v = NULL;
1238 break;
1239 }
1240 PyTuple_SET_ITEM(v, i, v2);
1241 }
1242 retval = v;
1243 break;
1244
1245 case TYPE_LIST:
1246 n = r_long(p);
1247 if (PyErr_Occurred())
1248 break;
1249 if (n < 0 || n > SIZE32_MAX) {
1250 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1251 break;
1252 }
1253 v = PyList_New(n);
1254 R_REF(v);
1255 if (v == NULL)
1256 break;
1257 for (i = 0; i < n; i++) {
1258 v2 = r_object(p);
1259 if ( v2 == NULL ) {
1260 if (!PyErr_Occurred())
1261 PyErr_SetString(PyExc_TypeError,
1262 "NULL object in marshal data for list");
1263 Py_DECREF(v);
1264 v = NULL;
1265 break;
1266 }
1267 PyList_SET_ITEM(v, i, v2);
1268 }
1269 retval = v;
1270 break;
1271
1272 case TYPE_DICT:
1273 v = PyDict_New();
1274 R_REF(v);
1275 if (v == NULL)
1276 break;
1277 for (;;) {
1278 PyObject *key, *val;
1279 key = r_object(p);
1280 if (key == NULL)
1281 break;
1282 val = r_object(p);
1283 if (val == NULL) {
1284 Py_DECREF(key);
1285 break;
1286 }
1287 if (PyDict_SetItem(v, key, val) < 0) {
1288 Py_DECREF(key);
1289 Py_DECREF(val);
1290 break;
1291 }
1292 Py_DECREF(key);
1293 Py_DECREF(val);
1294 }
1295 if (PyErr_Occurred()) {
1296 Py_DECREF(v);
1297 v = NULL;
1298 }
1299 retval = v;
1300 break;
1301
1302 case TYPE_SET:
1303 case TYPE_FROZENSET:
1304 n = r_long(p);
1305 if (PyErr_Occurred())
1306 break;
1307 if (n < 0 || n > SIZE32_MAX) {
1308 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1309 break;
1310 }
1311
1312 if (n == 0 && type == TYPE_FROZENSET) {
1313 /* call frozenset() to get the empty frozenset singleton */
1314 v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1315 if (v == NULL)
1316 break;
1317 R_REF(v);
1318 retval = v;
1319 }
1320 else {
1321 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1322 if (type == TYPE_SET) {
1323 R_REF(v);
1324 } else {
1325 /* must use delayed registration of frozensets because they must
1326 * be init with a refcount of 1
1327 */
1328 idx = r_ref_reserve(flag, p);
1329 if (idx < 0)
1330 Py_CLEAR(v); /* signal error */
1331 }
1332 if (v == NULL)
1333 break;
1334
1335 for (i = 0; i < n; i++) {
1336 v2 = r_object(p);
1337 if ( v2 == NULL ) {
1338 if (!PyErr_Occurred())
1339 PyErr_SetString(PyExc_TypeError,
1340 "NULL object in marshal data for set");
1341 Py_DECREF(v);
1342 v = NULL;
1343 break;
1344 }
1345 if (PySet_Add(v, v2) == -1) {
1346 Py_DECREF(v);
1347 Py_DECREF(v2);
1348 v = NULL;
1349 break;
1350 }
1351 Py_DECREF(v2);
1352 }
1353 if (type != TYPE_SET)
1354 v = r_ref_insert(v, idx, flag, p);
1355 retval = v;
1356 }
1357 break;
1358
1359 case TYPE_CODE:
1360 {
1361 int argcount;
1362 int kwonlyargcount;
1363 int nlocals;
1364 int stacksize;
1365 int flags;
1366 PyObject *code = NULL;
1367 PyObject *consts = NULL;
1368 PyObject *names = NULL;
1369 PyObject *varnames = NULL;
1370 PyObject *freevars = NULL;
1371 PyObject *cellvars = NULL;
1372 PyObject *filename = NULL;
1373 PyObject *name = NULL;
1374 int firstlineno;
1375 PyObject *lnotab = NULL;
1376
1377 idx = r_ref_reserve(flag, p);
1378 if (idx < 0)
1379 break;
1380
1381 v = NULL;
1382
1383 /* XXX ignore long->int overflows for now */
1384 argcount = (int)r_long(p);
1385 if (PyErr_Occurred())
1386 goto code_error;
1387 kwonlyargcount = (int)r_long(p);
1388 if (PyErr_Occurred())
1389 goto code_error;
1390 nlocals = (int)r_long(p);
1391 if (PyErr_Occurred())
1392 goto code_error;
1393 stacksize = (int)r_long(p);
1394 if (PyErr_Occurred())
1395 goto code_error;
1396 flags = (int)r_long(p);
1397 if (PyErr_Occurred())
1398 goto code_error;
1399 code = r_object(p);
1400 if (code == NULL)
1401 goto code_error;
1402 consts = r_object(p);
1403 if (consts == NULL)
1404 goto code_error;
1405 names = r_object(p);
1406 if (names == NULL)
1407 goto code_error;
1408 varnames = r_object(p);
1409 if (varnames == NULL)
1410 goto code_error;
1411 freevars = r_object(p);
1412 if (freevars == NULL)
1413 goto code_error;
1414 cellvars = r_object(p);
1415 if (cellvars == NULL)
1416 goto code_error;
1417 filename = r_object(p);
1418 if (filename == NULL)
1419 goto code_error;
1420 name = r_object(p);
1421 if (name == NULL)
1422 goto code_error;
1423 firstlineno = (int)r_long(p);
1424 if (firstlineno == -1 && PyErr_Occurred())
1425 break;
1426 lnotab = r_object(p);
1427 if (lnotab == NULL)
1428 goto code_error;
1429
1430 v = (PyObject *) PyCode_New(
1431 argcount, kwonlyargcount,
1432 nlocals, stacksize, flags,
1433 code, consts, names, varnames,
1434 freevars, cellvars, filename, name,
1435 firstlineno, lnotab);
1436 v = r_ref_insert(v, idx, flag, p);
1437
1438 code_error:
1439 Py_XDECREF(code);
1440 Py_XDECREF(consts);
1441 Py_XDECREF(names);
1442 Py_XDECREF(varnames);
1443 Py_XDECREF(freevars);
1444 Py_XDECREF(cellvars);
1445 Py_XDECREF(filename);
1446 Py_XDECREF(name);
1447 Py_XDECREF(lnotab);
1448 }
1449 retval = v;
1450 break;
1451
1452 case TYPE_REF:
1453 n = r_long(p);
1454 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1455 if (n == -1 && PyErr_Occurred())
1456 break;
1457 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1458 break;
1459 }
1460 v = PyList_GET_ITEM(p->refs, n);
1461 if (v == Py_None) {
1462 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1463 break;
1464 }
1465 Py_INCREF(v);
1466 retval = v;
1467 break;
1468
1469 default:
1470 /* Bogus data got written, which isn't ideal.
1471 This will let you keep working and recover. */
1472 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1473 break;
1474
1475 }
1476 p->depth--;
1477 return retval;
1478 }
1479
1480 static PyObject *
read_object(RFILE * p)1481 read_object(RFILE *p)
1482 {
1483 PyObject *v;
1484 if (PyErr_Occurred()) {
1485 fprintf(stderr, "XXX readobject called with exception set\n");
1486 return NULL;
1487 }
1488 v = r_object(p);
1489 if (v == NULL && !PyErr_Occurred())
1490 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1491 return v;
1492 }
1493
1494 int
PyMarshal_ReadShortFromFile(FILE * fp)1495 PyMarshal_ReadShortFromFile(FILE *fp)
1496 {
1497 RFILE rf;
1498 int res;
1499 assert(fp);
1500 rf.readable = NULL;
1501 rf.fp = fp;
1502 rf.end = rf.ptr = NULL;
1503 rf.buf = NULL;
1504 res = r_short(&rf);
1505 if (rf.buf != NULL)
1506 PyMem_FREE(rf.buf);
1507 return res;
1508 }
1509
1510 long
PyMarshal_ReadLongFromFile(FILE * fp)1511 PyMarshal_ReadLongFromFile(FILE *fp)
1512 {
1513 RFILE rf;
1514 long res;
1515 rf.fp = fp;
1516 rf.readable = NULL;
1517 rf.ptr = rf.end = NULL;
1518 rf.buf = NULL;
1519 res = r_long(&rf);
1520 if (rf.buf != NULL)
1521 PyMem_FREE(rf.buf);
1522 return res;
1523 }
1524
1525 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1526 static off_t
getfilesize(FILE * fp)1527 getfilesize(FILE *fp)
1528 {
1529 struct _Py_stat_struct st;
1530 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1531 return -1;
1532 #if SIZEOF_OFF_T == 4
1533 else if (st.st_size >= INT_MAX)
1534 return (off_t)INT_MAX;
1535 #endif
1536 else
1537 return (off_t)st.st_size;
1538 }
1539
1540 /* If we can get the size of the file up-front, and it's reasonably small,
1541 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1542 * than reading a byte at a time from file; speeds .pyc imports.
1543 * CAUTION: since this may read the entire remainder of the file, don't
1544 * call it unless you know you're done with the file.
1545 */
1546 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1547 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1548 {
1549 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1550 #define REASONABLE_FILE_LIMIT (1L << 18)
1551 off_t filesize;
1552 filesize = getfilesize(fp);
1553 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1554 char* pBuf = (char *)PyMem_MALLOC(filesize);
1555 if (pBuf != NULL) {
1556 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1557 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1558 PyMem_FREE(pBuf);
1559 return v;
1560 }
1561
1562 }
1563 /* We don't have fstat, or we do but the file is larger than
1564 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1565 */
1566 return PyMarshal_ReadObjectFromFile(fp);
1567
1568 #undef REASONABLE_FILE_LIMIT
1569 }
1570
1571 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1572 PyMarshal_ReadObjectFromFile(FILE *fp)
1573 {
1574 RFILE rf;
1575 PyObject *result;
1576 rf.fp = fp;
1577 rf.readable = NULL;
1578 rf.depth = 0;
1579 rf.ptr = rf.end = NULL;
1580 rf.buf = NULL;
1581 rf.refs = PyList_New(0);
1582 if (rf.refs == NULL)
1583 return NULL;
1584 result = r_object(&rf);
1585 Py_DECREF(rf.refs);
1586 if (rf.buf != NULL)
1587 PyMem_FREE(rf.buf);
1588 return result;
1589 }
1590
1591 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1592 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1593 {
1594 RFILE rf;
1595 PyObject *result;
1596 rf.fp = NULL;
1597 rf.readable = NULL;
1598 rf.ptr = (char *)str;
1599 rf.end = (char *)str + len;
1600 rf.buf = NULL;
1601 rf.depth = 0;
1602 rf.refs = PyList_New(0);
1603 if (rf.refs == NULL)
1604 return NULL;
1605 result = r_object(&rf);
1606 Py_DECREF(rf.refs);
1607 if (rf.buf != NULL)
1608 PyMem_FREE(rf.buf);
1609 return result;
1610 }
1611
1612 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1613 PyMarshal_WriteObjectToString(PyObject *x, int version)
1614 {
1615 WFILE wf;
1616
1617 memset(&wf, 0, sizeof(wf));
1618 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1619 if (wf.str == NULL)
1620 return NULL;
1621 wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1622 wf.end = wf.ptr + PyBytes_Size(wf.str);
1623 wf.error = WFERR_OK;
1624 wf.version = version;
1625 if (w_init_refs(&wf, version)) {
1626 Py_DECREF(wf.str);
1627 return NULL;
1628 }
1629 w_object(x, &wf);
1630 w_clear_refs(&wf);
1631 if (wf.str != NULL) {
1632 char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1633 if (wf.ptr - base > PY_SSIZE_T_MAX) {
1634 Py_DECREF(wf.str);
1635 PyErr_SetString(PyExc_OverflowError,
1636 "too much marshal data for a bytes object");
1637 return NULL;
1638 }
1639 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1640 return NULL;
1641 }
1642 if (wf.error != WFERR_OK) {
1643 Py_XDECREF(wf.str);
1644 if (wf.error == WFERR_NOMEMORY)
1645 PyErr_NoMemory();
1646 else
1647 PyErr_SetString(PyExc_ValueError,
1648 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1649 :"object too deeply nested to marshal");
1650 return NULL;
1651 }
1652 return wf.str;
1653 }
1654
1655 /* And an interface for Python programs... */
1656 /*[clinic input]
1657 marshal.dump
1658
1659 value: object
1660 Must be a supported type.
1661 file: object
1662 Must be a writeable binary file.
1663 version: int(c_default="Py_MARSHAL_VERSION") = version
1664 Indicates the data format that dump should use.
1665 /
1666
1667 Write the value on the open file.
1668
1669 If the value has (or contains an object that has) an unsupported type, a
1670 ValueError exception is raised - but garbage data will also be written
1671 to the file. The object will not be properly read back by load().
1672 [clinic start generated code]*/
1673
1674 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1675 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1676 int version)
1677 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1678 {
1679 /* XXX Quick hack -- need to do this differently */
1680 PyObject *s;
1681 PyObject *res;
1682 _Py_IDENTIFIER(write);
1683
1684 s = PyMarshal_WriteObjectToString(value, version);
1685 if (s == NULL)
1686 return NULL;
1687 res = _PyObject_CallMethodIdObjArgs(file, &PyId_write, s, NULL);
1688 Py_DECREF(s);
1689 return res;
1690 }
1691
1692 /*[clinic input]
1693 marshal.load
1694
1695 file: object
1696 Must be readable binary file.
1697 /
1698
1699 Read one value from the open file and return it.
1700
1701 If no valid value is read (e.g. because the data has a different Python
1702 version's incompatible marshal format), raise EOFError, ValueError or
1703 TypeError.
1704
1705 Note: If an object containing an unsupported type was marshalled with
1706 dump(), load() will substitute None for the unmarshallable type.
1707 [clinic start generated code]*/
1708
1709 static PyObject *
marshal_load(PyObject * module,PyObject * file)1710 marshal_load(PyObject *module, PyObject *file)
1711 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1712 {
1713 PyObject *data, *result;
1714 _Py_IDENTIFIER(read);
1715 RFILE rf;
1716
1717 /*
1718 * Make a call to the read method, but read zero bytes.
1719 * This is to ensure that the object passed in at least
1720 * has a read method which returns bytes.
1721 * This can be removed if we guarantee good error handling
1722 * for r_string()
1723 */
1724 data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1725 if (data == NULL)
1726 return NULL;
1727 if (!PyBytes_Check(data)) {
1728 PyErr_Format(PyExc_TypeError,
1729 "file.read() returned not bytes but %.100s",
1730 data->ob_type->tp_name);
1731 result = NULL;
1732 }
1733 else {
1734 rf.depth = 0;
1735 rf.fp = NULL;
1736 rf.readable = file;
1737 rf.ptr = rf.end = NULL;
1738 rf.buf = NULL;
1739 if ((rf.refs = PyList_New(0)) != NULL) {
1740 result = read_object(&rf);
1741 Py_DECREF(rf.refs);
1742 if (rf.buf != NULL)
1743 PyMem_FREE(rf.buf);
1744 } else
1745 result = NULL;
1746 }
1747 Py_DECREF(data);
1748 return result;
1749 }
1750
1751 /*[clinic input]
1752 marshal.dumps
1753
1754 value: object
1755 Must be a supported type.
1756 version: int(c_default="Py_MARSHAL_VERSION") = version
1757 Indicates the data format that dumps should use.
1758 /
1759
1760 Return the bytes object that would be written to a file by dump(value, file).
1761
1762 Raise a ValueError exception if value has (or contains an object that has) an
1763 unsupported type.
1764 [clinic start generated code]*/
1765
1766 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1767 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1768 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1769 {
1770 return PyMarshal_WriteObjectToString(value, version);
1771 }
1772
1773 /*[clinic input]
1774 marshal.loads
1775
1776 bytes: Py_buffer
1777 /
1778
1779 Convert the bytes-like object to a value.
1780
1781 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1782 bytes in the input are ignored.
1783 [clinic start generated code]*/
1784
1785 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1786 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1787 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1788 {
1789 RFILE rf;
1790 char *s = bytes->buf;
1791 Py_ssize_t n = bytes->len;
1792 PyObject* result;
1793 rf.fp = NULL;
1794 rf.readable = NULL;
1795 rf.ptr = s;
1796 rf.end = s + n;
1797 rf.depth = 0;
1798 if ((rf.refs = PyList_New(0)) == NULL)
1799 return NULL;
1800 result = read_object(&rf);
1801 Py_DECREF(rf.refs);
1802 return result;
1803 }
1804
1805 static PyMethodDef marshal_methods[] = {
1806 MARSHAL_DUMP_METHODDEF
1807 MARSHAL_LOAD_METHODDEF
1808 MARSHAL_DUMPS_METHODDEF
1809 MARSHAL_LOADS_METHODDEF
1810 {NULL, NULL} /* sentinel */
1811 };
1812
1813
1814 PyDoc_STRVAR(module_doc,
1815 "This module contains functions that can read and write Python values in\n\
1816 a binary format. The format is specific to Python, but independent of\n\
1817 machine architecture issues.\n\
1818 \n\
1819 Not all Python object types are supported; in general, only objects\n\
1820 whose value is independent from a particular invocation of Python can be\n\
1821 written and read by this module. The following types are supported:\n\
1822 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1823 tuples, lists, sets, dictionaries, and code objects, where it\n\
1824 should be understood that tuples, lists and dictionaries are only\n\
1825 supported as long as the values contained therein are themselves\n\
1826 supported; and recursive lists and dictionaries should not be written\n\
1827 (they will cause infinite loops).\n\
1828 \n\
1829 Variables:\n\
1830 \n\
1831 version -- indicates the format that the module uses. Version 0 is the\n\
1832 historical format, version 1 shares interned strings and version 2\n\
1833 uses a binary format for floating point numbers.\n\
1834 Version 3 shares common object references (New in version 3.4).\n\
1835 \n\
1836 Functions:\n\
1837 \n\
1838 dump() -- write value to a file\n\
1839 load() -- read value from a file\n\
1840 dumps() -- marshal value as a bytes object\n\
1841 loads() -- read value from a bytes-like object");
1842
1843
1844
1845 static struct PyModuleDef marshalmodule = {
1846 PyModuleDef_HEAD_INIT,
1847 "marshal",
1848 module_doc,
1849 0,
1850 marshal_methods,
1851 NULL,
1852 NULL,
1853 NULL,
1854 NULL
1855 };
1856
1857 PyMODINIT_FUNC
PyMarshal_Init(void)1858 PyMarshal_Init(void)
1859 {
1860 PyObject *mod = PyModule_Create(&marshalmodule);
1861 if (mod == NULL)
1862 return NULL;
1863 PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1864 return mod;
1865 }
1866