1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #define PY_SSIZE_T_CLEAN
10
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "pycore_hashtable.h"
16
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21
22 #include "clinic/marshal.c.h"
23
24 /* High water mark to determine when the marshalled object is dangerously deep
25 * and risks coring the interpreter. When the object stack gets this deep,
26 * raise an exception instead of continuing.
27 * On Windows debug builds, reduce this value.
28 *
29 * BUG: https://bugs.python.org/issue33720
30 * On Windows PGO builds, the r_object function overallocates its stack and
31 * can cause a stack overflow. We reduce the maximum depth for all Windows
32 * releases to protect against this.
33 * #if defined(MS_WINDOWS) && defined(_DEBUG)
34 */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40
41 #define TYPE_NULL '0'
42 #define TYPE_NONE 'N'
43 #define TYPE_FALSE 'F'
44 #define TYPE_TRUE 'T'
45 #define TYPE_STOPITER 'S'
46 #define TYPE_ELLIPSIS '.'
47 #define TYPE_INT 'i'
48 /* TYPE_INT64 is not generated anymore.
49 Supported for backward compatibility only. */
50 #define TYPE_INT64 'I'
51 #define TYPE_FLOAT 'f'
52 #define TYPE_BINARY_FLOAT 'g'
53 #define TYPE_COMPLEX 'x'
54 #define TYPE_BINARY_COMPLEX 'y'
55 #define TYPE_LONG 'l'
56 #define TYPE_STRING 's'
57 #define TYPE_INTERNED 't'
58 #define TYPE_REF 'r'
59 #define TYPE_TUPLE '('
60 #define TYPE_LIST '['
61 #define TYPE_DICT '{'
62 #define TYPE_CODE 'c'
63 #define TYPE_UNICODE 'u'
64 #define TYPE_UNKNOWN '?'
65 #define TYPE_SET '<'
66 #define TYPE_FROZENSET '>'
67 #define FLAG_REF '\x80' /* with a type, add obj to index */
68
69 #define TYPE_ASCII 'a'
70 #define TYPE_ASCII_INTERNED 'A'
71 #define TYPE_SMALL_TUPLE ')'
72 #define TYPE_SHORT_ASCII 'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79
80 typedef struct {
81 FILE *fp;
82 int error; /* see WFERR_* values */
83 int depth;
84 PyObject *str;
85 char *ptr;
86 const char *end;
87 char *buf;
88 _Py_hashtable_t *hashtable;
89 int version;
90 } WFILE;
91
92 #define w_byte(c, p) do { \
93 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
94 *(p)->ptr++ = (c); \
95 } while(0)
96
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100 assert(p->fp != NULL);
101 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102 p->ptr = p->buf;
103 }
104
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108 Py_ssize_t pos, size, delta;
109 if (p->ptr == NULL)
110 return 0; /* An error already occurred */
111 if (p->fp != NULL) {
112 w_flush(p);
113 return needed <= p->end - p->ptr;
114 }
115 assert(p->str != NULL);
116 pos = p->ptr - p->buf;
117 size = PyBytes_GET_SIZE(p->str);
118 if (size > 16*1024*1024)
119 delta = (size >> 3); /* 12.5% overallocation */
120 else
121 delta = size + 1024;
122 delta = Py_MAX(delta, needed);
123 if (delta > PY_SSIZE_T_MAX - size) {
124 p->error = WFERR_NOMEMORY;
125 return 0;
126 }
127 size += delta;
128 if (_PyBytes_Resize(&p->str, size) != 0) {
129 p->end = p->ptr = p->buf = NULL;
130 return 0;
131 }
132 else {
133 p->buf = PyBytes_AS_STRING(p->str);
134 p->ptr = p->buf + pos;
135 p->end = p->buf + size;
136 return 1;
137 }
138 }
139
140 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)141 w_string(const void *s, Py_ssize_t n, WFILE *p)
142 {
143 Py_ssize_t m;
144 if (!n || p->ptr == NULL)
145 return;
146 m = p->end - p->ptr;
147 if (p->fp != NULL) {
148 if (n <= m) {
149 memcpy(p->ptr, s, n);
150 p->ptr += n;
151 }
152 else {
153 w_flush(p);
154 fwrite(s, 1, n, p->fp);
155 }
156 }
157 else {
158 if (n <= m || w_reserve(p, n - m)) {
159 memcpy(p->ptr, s, n);
160 p->ptr += n;
161 }
162 }
163 }
164
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168 w_byte((char)( x & 0xff), p);
169 w_byte((char)((x>> 8) & 0xff), p);
170 }
171
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175 w_byte((char)( x & 0xff), p);
176 w_byte((char)((x>> 8) & 0xff), p);
177 w_byte((char)((x>>16) & 0xff), p);
178 w_byte((char)((x>>24) & 0xff), p);
179 }
180
181 #define SIZE32_MAX 0x7FFFFFFF
182
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p) do { \
185 if ((n) > SIZE32_MAX) { \
186 (p)->depth--; \
187 (p)->error = WFERR_UNMARSHALLABLE; \
188 return; \
189 } \
190 w_long((long)(n), p); \
191 } while(0)
192 #else
193 # define W_SIZE w_long
194 #endif
195
196 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)197 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
198 {
199 W_SIZE(n, p);
200 w_string(s, n, p);
201 }
202
203 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
205 {
206 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207 w_string(s, n, p);
208 }
209
210 /* We assume that Python ints are stored internally in base some power of
211 2**15; for the sake of portability we'll always read and write them in base
212 exactly 2**15. */
213
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221
222 #define W_TYPE(t, p) do { \
223 w_byte((t) | flag, (p)); \
224 } while(0)
225
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229 Py_ssize_t i, j, n, l;
230 digit d;
231
232 W_TYPE(TYPE_LONG, p);
233 if (Py_SIZE(ob) == 0) {
234 w_long((long)0, p);
235 return;
236 }
237
238 /* set l to number of base PyLong_MARSHAL_BASE digits */
239 n = Py_ABS(Py_SIZE(ob));
240 l = (n-1) * PyLong_MARSHAL_RATIO;
241 d = ob->ob_digit[n-1];
242 assert(d != 0); /* a PyLong is always normalized */
243 do {
244 d >>= PyLong_MARSHAL_SHIFT;
245 l++;
246 } while (d != 0);
247 if (l > SIZE32_MAX) {
248 p->depth--;
249 p->error = WFERR_UNMARSHALLABLE;
250 return;
251 }
252 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253
254 for (i=0; i < n-1; i++) {
255 d = ob->ob_digit[i];
256 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257 w_short(d & PyLong_MARSHAL_MASK, p);
258 d >>= PyLong_MARSHAL_SHIFT;
259 }
260 assert (d == 0);
261 }
262 d = ob->ob_digit[n-1];
263 do {
264 w_short(d & PyLong_MARSHAL_MASK, p);
265 d >>= PyLong_MARSHAL_SHIFT;
266 } while (d != 0);
267 }
268
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272 unsigned char buf[8];
273 if (_PyFloat_Pack8(v, buf, 1) < 0) {
274 p->error = WFERR_UNMARSHALLABLE;
275 return;
276 }
277 w_string(buf, 8, p);
278 }
279
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283 char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
284 if (!buf) {
285 p->error = WFERR_NOMEMORY;
286 return;
287 }
288 w_short_pstring(buf, strlen(buf), p);
289 PyMem_Free(buf);
290 }
291
292 static int
w_ref(PyObject * v,char * flag,WFILE * p)293 w_ref(PyObject *v, char *flag, WFILE *p)
294 {
295 _Py_hashtable_entry_t *entry;
296 int w;
297
298 if (p->version < 3 || p->hashtable == NULL)
299 return 0; /* not writing object references */
300
301 /* if it has only one reference, it definitely isn't shared */
302 if (Py_REFCNT(v) == 1)
303 return 0;
304
305 entry = _Py_hashtable_get_entry(p->hashtable, v);
306 if (entry != NULL) {
307 /* write the reference index to the stream */
308 w = (int)(uintptr_t)entry->value;
309 /* we don't store "long" indices in the dict */
310 assert(0 <= w && w <= 0x7fffffff);
311 w_byte(TYPE_REF, p);
312 w_long(w, p);
313 return 1;
314 } else {
315 size_t s = p->hashtable->nentries;
316 /* we don't support long indices */
317 if (s >= 0x7fffffff) {
318 PyErr_SetString(PyExc_ValueError, "too many objects");
319 goto err;
320 }
321 w = (int)s;
322 Py_INCREF(v);
323 if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
324 Py_DECREF(v);
325 goto err;
326 }
327 *flag |= FLAG_REF;
328 return 0;
329 }
330 err:
331 p->error = WFERR_UNMARSHALLABLE;
332 return 1;
333 }
334
335 static void
336 w_complex_object(PyObject *v, char flag, WFILE *p);
337
338 static void
w_object(PyObject * v,WFILE * p)339 w_object(PyObject *v, WFILE *p)
340 {
341 char flag = '\0';
342
343 p->depth++;
344
345 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
346 p->error = WFERR_NESTEDTOODEEP;
347 }
348 else if (v == NULL) {
349 w_byte(TYPE_NULL, p);
350 }
351 else if (v == Py_None) {
352 w_byte(TYPE_NONE, p);
353 }
354 else if (v == PyExc_StopIteration) {
355 w_byte(TYPE_STOPITER, p);
356 }
357 else if (v == Py_Ellipsis) {
358 w_byte(TYPE_ELLIPSIS, p);
359 }
360 else if (v == Py_False) {
361 w_byte(TYPE_FALSE, p);
362 }
363 else if (v == Py_True) {
364 w_byte(TYPE_TRUE, p);
365 }
366 else if (!w_ref(v, &flag, p))
367 w_complex_object(v, flag, p);
368
369 p->depth--;
370 }
371
372 static void
w_complex_object(PyObject * v,char flag,WFILE * p)373 w_complex_object(PyObject *v, char flag, WFILE *p)
374 {
375 Py_ssize_t i, n;
376
377 if (PyLong_CheckExact(v)) {
378 int overflow;
379 long x = PyLong_AsLongAndOverflow(v, &overflow);
380 if (overflow) {
381 w_PyLong((PyLongObject *)v, flag, p);
382 }
383 else {
384 #if SIZEOF_LONG > 4
385 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
386 if (y && y != -1) {
387 /* Too large for TYPE_INT */
388 w_PyLong((PyLongObject*)v, flag, p);
389 }
390 else
391 #endif
392 {
393 W_TYPE(TYPE_INT, p);
394 w_long(x, p);
395 }
396 }
397 }
398 else if (PyFloat_CheckExact(v)) {
399 if (p->version > 1) {
400 W_TYPE(TYPE_BINARY_FLOAT, p);
401 w_float_bin(PyFloat_AS_DOUBLE(v), p);
402 }
403 else {
404 W_TYPE(TYPE_FLOAT, p);
405 w_float_str(PyFloat_AS_DOUBLE(v), p);
406 }
407 }
408 else if (PyComplex_CheckExact(v)) {
409 if (p->version > 1) {
410 W_TYPE(TYPE_BINARY_COMPLEX, p);
411 w_float_bin(PyComplex_RealAsDouble(v), p);
412 w_float_bin(PyComplex_ImagAsDouble(v), p);
413 }
414 else {
415 W_TYPE(TYPE_COMPLEX, p);
416 w_float_str(PyComplex_RealAsDouble(v), p);
417 w_float_str(PyComplex_ImagAsDouble(v), p);
418 }
419 }
420 else if (PyBytes_CheckExact(v)) {
421 W_TYPE(TYPE_STRING, p);
422 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
423 }
424 else if (PyUnicode_CheckExact(v)) {
425 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
426 int is_short = PyUnicode_GET_LENGTH(v) < 256;
427 if (is_short) {
428 if (PyUnicode_CHECK_INTERNED(v))
429 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
430 else
431 W_TYPE(TYPE_SHORT_ASCII, p);
432 w_short_pstring(PyUnicode_1BYTE_DATA(v),
433 PyUnicode_GET_LENGTH(v), p);
434 }
435 else {
436 if (PyUnicode_CHECK_INTERNED(v))
437 W_TYPE(TYPE_ASCII_INTERNED, p);
438 else
439 W_TYPE(TYPE_ASCII, p);
440 w_pstring(PyUnicode_1BYTE_DATA(v),
441 PyUnicode_GET_LENGTH(v), p);
442 }
443 }
444 else {
445 PyObject *utf8;
446 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
447 if (utf8 == NULL) {
448 p->depth--;
449 p->error = WFERR_UNMARSHALLABLE;
450 return;
451 }
452 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
453 W_TYPE(TYPE_INTERNED, p);
454 else
455 W_TYPE(TYPE_UNICODE, p);
456 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
457 Py_DECREF(utf8);
458 }
459 }
460 else if (PyTuple_CheckExact(v)) {
461 n = PyTuple_GET_SIZE(v);
462 if (p->version >= 4 && n < 256) {
463 W_TYPE(TYPE_SMALL_TUPLE, p);
464 w_byte((unsigned char)n, p);
465 }
466 else {
467 W_TYPE(TYPE_TUPLE, p);
468 W_SIZE(n, p);
469 }
470 for (i = 0; i < n; i++) {
471 w_object(PyTuple_GET_ITEM(v, i), p);
472 }
473 }
474 else if (PyList_CheckExact(v)) {
475 W_TYPE(TYPE_LIST, p);
476 n = PyList_GET_SIZE(v);
477 W_SIZE(n, p);
478 for (i = 0; i < n; i++) {
479 w_object(PyList_GET_ITEM(v, i), p);
480 }
481 }
482 else if (PyDict_CheckExact(v)) {
483 Py_ssize_t pos;
484 PyObject *key, *value;
485 W_TYPE(TYPE_DICT, p);
486 /* This one is NULL object terminated! */
487 pos = 0;
488 while (PyDict_Next(v, &pos, &key, &value)) {
489 w_object(key, p);
490 w_object(value, p);
491 }
492 w_object((PyObject *)NULL, p);
493 }
494 else if (PyAnySet_CheckExact(v)) {
495 PyObject *value;
496 Py_ssize_t pos = 0;
497 Py_hash_t hash;
498
499 if (PyFrozenSet_CheckExact(v))
500 W_TYPE(TYPE_FROZENSET, p);
501 else
502 W_TYPE(TYPE_SET, p);
503 n = PySet_GET_SIZE(v);
504 W_SIZE(n, p);
505 while (_PySet_NextEntry(v, &pos, &value, &hash)) {
506 w_object(value, p);
507 }
508 }
509 else if (PyCode_Check(v)) {
510 PyCodeObject *co = (PyCodeObject *)v;
511 W_TYPE(TYPE_CODE, p);
512 w_long(co->co_argcount, p);
513 w_long(co->co_posonlyargcount, p);
514 w_long(co->co_kwonlyargcount, p);
515 w_long(co->co_nlocals, p);
516 w_long(co->co_stacksize, p);
517 w_long(co->co_flags, p);
518 w_object(co->co_code, p);
519 w_object(co->co_consts, p);
520 w_object(co->co_names, p);
521 w_object(co->co_varnames, p);
522 w_object(co->co_freevars, p);
523 w_object(co->co_cellvars, p);
524 w_object(co->co_filename, p);
525 w_object(co->co_name, p);
526 w_long(co->co_firstlineno, p);
527 w_object(co->co_lnotab, p);
528 }
529 else if (PyObject_CheckBuffer(v)) {
530 /* Write unknown bytes-like objects as a bytes object */
531 Py_buffer view;
532 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
533 w_byte(TYPE_UNKNOWN, p);
534 p->depth--;
535 p->error = WFERR_UNMARSHALLABLE;
536 return;
537 }
538 W_TYPE(TYPE_STRING, p);
539 w_pstring(view.buf, view.len, p);
540 PyBuffer_Release(&view);
541 }
542 else {
543 W_TYPE(TYPE_UNKNOWN, p);
544 p->error = WFERR_UNMARSHALLABLE;
545 }
546 }
547
548 static void
w_decref_entry(void * key)549 w_decref_entry(void *key)
550 {
551 PyObject *entry_key = (PyObject *)key;
552 Py_XDECREF(entry_key);
553 }
554
555 static int
w_init_refs(WFILE * wf,int version)556 w_init_refs(WFILE *wf, int version)
557 {
558 if (version >= 3) {
559 wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
560 _Py_hashtable_compare_direct,
561 w_decref_entry, NULL, NULL);
562 if (wf->hashtable == NULL) {
563 PyErr_NoMemory();
564 return -1;
565 }
566 }
567 return 0;
568 }
569
570 static void
w_clear_refs(WFILE * wf)571 w_clear_refs(WFILE *wf)
572 {
573 if (wf->hashtable != NULL) {
574 _Py_hashtable_destroy(wf->hashtable);
575 }
576 }
577
578 /* version currently has no effect for writing ints. */
579 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)580 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
581 {
582 char buf[4];
583 WFILE wf;
584 memset(&wf, 0, sizeof(wf));
585 wf.fp = fp;
586 wf.ptr = wf.buf = buf;
587 wf.end = wf.ptr + sizeof(buf);
588 wf.error = WFERR_OK;
589 wf.version = version;
590 w_long(x, &wf);
591 w_flush(&wf);
592 }
593
594 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)595 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
596 {
597 char buf[BUFSIZ];
598 WFILE wf;
599 memset(&wf, 0, sizeof(wf));
600 wf.fp = fp;
601 wf.ptr = wf.buf = buf;
602 wf.end = wf.ptr + sizeof(buf);
603 wf.error = WFERR_OK;
604 wf.version = version;
605 if (w_init_refs(&wf, version))
606 return; /* caller mush check PyErr_Occurred() */
607 w_object(x, &wf);
608 w_clear_refs(&wf);
609 w_flush(&wf);
610 }
611
612 typedef struct {
613 FILE *fp;
614 int depth;
615 PyObject *readable; /* Stream-like object being read from */
616 const char *ptr;
617 const char *end;
618 char *buf;
619 Py_ssize_t buf_size;
620 PyObject *refs; /* a list */
621 } RFILE;
622
623 static const char *
r_string(Py_ssize_t n,RFILE * p)624 r_string(Py_ssize_t n, RFILE *p)
625 {
626 Py_ssize_t read = -1;
627
628 if (p->ptr != NULL) {
629 /* Fast path for loads() */
630 const char *res = p->ptr;
631 Py_ssize_t left = p->end - p->ptr;
632 if (left < n) {
633 PyErr_SetString(PyExc_EOFError,
634 "marshal data too short");
635 return NULL;
636 }
637 p->ptr += n;
638 return res;
639 }
640 if (p->buf == NULL) {
641 p->buf = PyMem_MALLOC(n);
642 if (p->buf == NULL) {
643 PyErr_NoMemory();
644 return NULL;
645 }
646 p->buf_size = n;
647 }
648 else if (p->buf_size < n) {
649 char *tmp = PyMem_REALLOC(p->buf, n);
650 if (tmp == NULL) {
651 PyErr_NoMemory();
652 return NULL;
653 }
654 p->buf = tmp;
655 p->buf_size = n;
656 }
657
658 if (!p->readable) {
659 assert(p->fp != NULL);
660 read = fread(p->buf, 1, n, p->fp);
661 }
662 else {
663 _Py_IDENTIFIER(readinto);
664 PyObject *res, *mview;
665 Py_buffer buf;
666
667 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
668 return NULL;
669 mview = PyMemoryView_FromBuffer(&buf);
670 if (mview == NULL)
671 return NULL;
672
673 res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
674 if (res != NULL) {
675 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
676 Py_DECREF(res);
677 }
678 }
679 if (read != n) {
680 if (!PyErr_Occurred()) {
681 if (read > n)
682 PyErr_Format(PyExc_ValueError,
683 "read() returned too much data: "
684 "%zd bytes requested, %zd returned",
685 n, read);
686 else
687 PyErr_SetString(PyExc_EOFError,
688 "EOF read where not expected");
689 }
690 return NULL;
691 }
692 return p->buf;
693 }
694
695 static int
r_byte(RFILE * p)696 r_byte(RFILE *p)
697 {
698 int c = EOF;
699
700 if (p->ptr != NULL) {
701 if (p->ptr < p->end)
702 c = (unsigned char) *p->ptr++;
703 return c;
704 }
705 if (!p->readable) {
706 assert(p->fp);
707 c = getc(p->fp);
708 }
709 else {
710 const char *ptr = r_string(1, p);
711 if (ptr != NULL)
712 c = *(const unsigned char *) ptr;
713 }
714 return c;
715 }
716
717 static int
r_short(RFILE * p)718 r_short(RFILE *p)
719 {
720 short x = -1;
721 const unsigned char *buffer;
722
723 buffer = (const unsigned char *) r_string(2, p);
724 if (buffer != NULL) {
725 x = buffer[0];
726 x |= buffer[1] << 8;
727 /* Sign-extension, in case short greater than 16 bits */
728 x |= -(x & 0x8000);
729 }
730 return x;
731 }
732
733 static long
r_long(RFILE * p)734 r_long(RFILE *p)
735 {
736 long x = -1;
737 const unsigned char *buffer;
738
739 buffer = (const unsigned char *) r_string(4, p);
740 if (buffer != NULL) {
741 x = buffer[0];
742 x |= (long)buffer[1] << 8;
743 x |= (long)buffer[2] << 16;
744 x |= (long)buffer[3] << 24;
745 #if SIZEOF_LONG > 4
746 /* Sign extension for 64-bit machines */
747 x |= -(x & 0x80000000L);
748 #endif
749 }
750 return x;
751 }
752
753 /* r_long64 deals with the TYPE_INT64 code. */
754 static PyObject *
r_long64(RFILE * p)755 r_long64(RFILE *p)
756 {
757 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
758 if (buffer == NULL) {
759 return NULL;
760 }
761 return _PyLong_FromByteArray(buffer, 8,
762 1 /* little endian */,
763 1 /* signed */);
764 }
765
766 static PyObject *
r_PyLong(RFILE * p)767 r_PyLong(RFILE *p)
768 {
769 PyLongObject *ob;
770 long n, size, i;
771 int j, md, shorts_in_top_digit;
772 digit d;
773
774 n = r_long(p);
775 if (PyErr_Occurred())
776 return NULL;
777 if (n == 0)
778 return (PyObject *)_PyLong_New(0);
779 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
780 PyErr_SetString(PyExc_ValueError,
781 "bad marshal data (long size out of range)");
782 return NULL;
783 }
784
785 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
786 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
787 ob = _PyLong_New(size);
788 if (ob == NULL)
789 return NULL;
790
791 Py_SET_SIZE(ob, n > 0 ? size : -size);
792
793 for (i = 0; i < size-1; i++) {
794 d = 0;
795 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
796 md = r_short(p);
797 if (PyErr_Occurred()) {
798 Py_DECREF(ob);
799 return NULL;
800 }
801 if (md < 0 || md > PyLong_MARSHAL_BASE)
802 goto bad_digit;
803 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
804 }
805 ob->ob_digit[i] = d;
806 }
807
808 d = 0;
809 for (j=0; j < shorts_in_top_digit; j++) {
810 md = r_short(p);
811 if (PyErr_Occurred()) {
812 Py_DECREF(ob);
813 return NULL;
814 }
815 if (md < 0 || md > PyLong_MARSHAL_BASE)
816 goto bad_digit;
817 /* topmost marshal digit should be nonzero */
818 if (md == 0 && j == shorts_in_top_digit - 1) {
819 Py_DECREF(ob);
820 PyErr_SetString(PyExc_ValueError,
821 "bad marshal data (unnormalized long data)");
822 return NULL;
823 }
824 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
825 }
826 if (PyErr_Occurred()) {
827 Py_DECREF(ob);
828 return NULL;
829 }
830 /* top digit should be nonzero, else the resulting PyLong won't be
831 normalized */
832 ob->ob_digit[size-1] = d;
833 return (PyObject *)ob;
834 bad_digit:
835 Py_DECREF(ob);
836 PyErr_SetString(PyExc_ValueError,
837 "bad marshal data (digit out of range in long)");
838 return NULL;
839 }
840
841 static double
r_float_bin(RFILE * p)842 r_float_bin(RFILE *p)
843 {
844 const unsigned char *buf = (const unsigned char *) r_string(8, p);
845 if (buf == NULL)
846 return -1;
847 return _PyFloat_Unpack8(buf, 1);
848 }
849
850 /* Issue #33720: Disable inlining for reducing the C stack consumption
851 on PGO builds. */
852 _Py_NO_INLINE static double
r_float_str(RFILE * p)853 r_float_str(RFILE *p)
854 {
855 int n;
856 char buf[256];
857 const char *ptr;
858 n = r_byte(p);
859 if (n == EOF) {
860 PyErr_SetString(PyExc_EOFError,
861 "EOF read where object expected");
862 return -1;
863 }
864 ptr = r_string(n, p);
865 if (ptr == NULL) {
866 return -1;
867 }
868 memcpy(buf, ptr, n);
869 buf[n] = '\0';
870 return PyOS_string_to_double(buf, NULL, NULL);
871 }
872
873 /* allocate the reflist index for a new object. Return -1 on failure */
874 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)875 r_ref_reserve(int flag, RFILE *p)
876 {
877 if (flag) { /* currently only FLAG_REF is defined */
878 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
879 if (idx >= 0x7ffffffe) {
880 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
881 return -1;
882 }
883 if (PyList_Append(p->refs, Py_None) < 0)
884 return -1;
885 return idx;
886 } else
887 return 0;
888 }
889
890 /* insert the new object 'o' to the reflist at previously
891 * allocated index 'idx'.
892 * 'o' can be NULL, in which case nothing is done.
893 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
894 * if 'o' was non-NULL, and the function fails, 'o' is released and
895 * NULL returned. This simplifies error checking at the call site since
896 * a single test for NULL for the function result is enough.
897 */
898 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)899 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
900 {
901 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
902 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
903 Py_INCREF(o);
904 PyList_SET_ITEM(p->refs, idx, o);
905 Py_DECREF(tmp);
906 }
907 return o;
908 }
909
910 /* combination of both above, used when an object can be
911 * created whenever it is seen in the file, as opposed to
912 * after having loaded its sub-objects.
913 */
914 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)915 r_ref(PyObject *o, int flag, RFILE *p)
916 {
917 assert(flag & FLAG_REF);
918 if (o == NULL)
919 return NULL;
920 if (PyList_Append(p->refs, o) < 0) {
921 Py_DECREF(o); /* release the new object */
922 return NULL;
923 }
924 return o;
925 }
926
927 static PyObject *
r_object(RFILE * p)928 r_object(RFILE *p)
929 {
930 /* NULL is a valid return value, it does not necessarily means that
931 an exception is set. */
932 PyObject *v, *v2;
933 Py_ssize_t idx = 0;
934 long i, n;
935 int type, code = r_byte(p);
936 int flag, is_interned = 0;
937 PyObject *retval = NULL;
938
939 if (code == EOF) {
940 PyErr_SetString(PyExc_EOFError,
941 "EOF read where object expected");
942 return NULL;
943 }
944
945 p->depth++;
946
947 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
948 p->depth--;
949 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
950 return NULL;
951 }
952
953 flag = code & FLAG_REF;
954 type = code & ~FLAG_REF;
955
956 #define R_REF(O) do{\
957 if (flag) \
958 O = r_ref(O, flag, p);\
959 } while (0)
960
961 switch (type) {
962
963 case TYPE_NULL:
964 break;
965
966 case TYPE_NONE:
967 Py_INCREF(Py_None);
968 retval = Py_None;
969 break;
970
971 case TYPE_STOPITER:
972 Py_INCREF(PyExc_StopIteration);
973 retval = PyExc_StopIteration;
974 break;
975
976 case TYPE_ELLIPSIS:
977 Py_INCREF(Py_Ellipsis);
978 retval = Py_Ellipsis;
979 break;
980
981 case TYPE_FALSE:
982 Py_INCREF(Py_False);
983 retval = Py_False;
984 break;
985
986 case TYPE_TRUE:
987 Py_INCREF(Py_True);
988 retval = Py_True;
989 break;
990
991 case TYPE_INT:
992 n = r_long(p);
993 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
994 R_REF(retval);
995 break;
996
997 case TYPE_INT64:
998 retval = r_long64(p);
999 R_REF(retval);
1000 break;
1001
1002 case TYPE_LONG:
1003 retval = r_PyLong(p);
1004 R_REF(retval);
1005 break;
1006
1007 case TYPE_FLOAT:
1008 {
1009 double x = r_float_str(p);
1010 if (x == -1.0 && PyErr_Occurred())
1011 break;
1012 retval = PyFloat_FromDouble(x);
1013 R_REF(retval);
1014 break;
1015 }
1016
1017 case TYPE_BINARY_FLOAT:
1018 {
1019 double x = r_float_bin(p);
1020 if (x == -1.0 && PyErr_Occurred())
1021 break;
1022 retval = PyFloat_FromDouble(x);
1023 R_REF(retval);
1024 break;
1025 }
1026
1027 case TYPE_COMPLEX:
1028 {
1029 Py_complex c;
1030 c.real = r_float_str(p);
1031 if (c.real == -1.0 && PyErr_Occurred())
1032 break;
1033 c.imag = r_float_str(p);
1034 if (c.imag == -1.0 && PyErr_Occurred())
1035 break;
1036 retval = PyComplex_FromCComplex(c);
1037 R_REF(retval);
1038 break;
1039 }
1040
1041 case TYPE_BINARY_COMPLEX:
1042 {
1043 Py_complex c;
1044 c.real = r_float_bin(p);
1045 if (c.real == -1.0 && PyErr_Occurred())
1046 break;
1047 c.imag = r_float_bin(p);
1048 if (c.imag == -1.0 && PyErr_Occurred())
1049 break;
1050 retval = PyComplex_FromCComplex(c);
1051 R_REF(retval);
1052 break;
1053 }
1054
1055 case TYPE_STRING:
1056 {
1057 const char *ptr;
1058 n = r_long(p);
1059 if (PyErr_Occurred())
1060 break;
1061 if (n < 0 || n > SIZE32_MAX) {
1062 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1063 break;
1064 }
1065 v = PyBytes_FromStringAndSize((char *)NULL, n);
1066 if (v == NULL)
1067 break;
1068 ptr = r_string(n, p);
1069 if (ptr == NULL) {
1070 Py_DECREF(v);
1071 break;
1072 }
1073 memcpy(PyBytes_AS_STRING(v), ptr, n);
1074 retval = v;
1075 R_REF(retval);
1076 break;
1077 }
1078
1079 case TYPE_ASCII_INTERNED:
1080 is_interned = 1;
1081 /* fall through */
1082 case TYPE_ASCII:
1083 n = r_long(p);
1084 if (PyErr_Occurred())
1085 break;
1086 if (n < 0 || n > SIZE32_MAX) {
1087 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1088 break;
1089 }
1090 goto _read_ascii;
1091
1092 case TYPE_SHORT_ASCII_INTERNED:
1093 is_interned = 1;
1094 /* fall through */
1095 case TYPE_SHORT_ASCII:
1096 n = r_byte(p);
1097 if (n == EOF) {
1098 PyErr_SetString(PyExc_EOFError,
1099 "EOF read where object expected");
1100 break;
1101 }
1102 _read_ascii:
1103 {
1104 const char *ptr;
1105 ptr = r_string(n, p);
1106 if (ptr == NULL)
1107 break;
1108 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1109 if (v == NULL)
1110 break;
1111 if (is_interned)
1112 PyUnicode_InternInPlace(&v);
1113 retval = v;
1114 R_REF(retval);
1115 break;
1116 }
1117
1118 case TYPE_INTERNED:
1119 is_interned = 1;
1120 /* fall through */
1121 case TYPE_UNICODE:
1122 {
1123 const char *buffer;
1124
1125 n = r_long(p);
1126 if (PyErr_Occurred())
1127 break;
1128 if (n < 0 || n > SIZE32_MAX) {
1129 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1130 break;
1131 }
1132 if (n != 0) {
1133 buffer = r_string(n, p);
1134 if (buffer == NULL)
1135 break;
1136 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1137 }
1138 else {
1139 v = PyUnicode_New(0, 0);
1140 }
1141 if (v == NULL)
1142 break;
1143 if (is_interned)
1144 PyUnicode_InternInPlace(&v);
1145 retval = v;
1146 R_REF(retval);
1147 break;
1148 }
1149
1150 case TYPE_SMALL_TUPLE:
1151 n = (unsigned char) r_byte(p);
1152 if (PyErr_Occurred())
1153 break;
1154 goto _read_tuple;
1155 case TYPE_TUPLE:
1156 n = r_long(p);
1157 if (PyErr_Occurred())
1158 break;
1159 if (n < 0 || n > SIZE32_MAX) {
1160 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1161 break;
1162 }
1163 _read_tuple:
1164 v = PyTuple_New(n);
1165 R_REF(v);
1166 if (v == NULL)
1167 break;
1168
1169 for (i = 0; i < n; i++) {
1170 v2 = r_object(p);
1171 if ( v2 == NULL ) {
1172 if (!PyErr_Occurred())
1173 PyErr_SetString(PyExc_TypeError,
1174 "NULL object in marshal data for tuple");
1175 Py_DECREF(v);
1176 v = NULL;
1177 break;
1178 }
1179 PyTuple_SET_ITEM(v, i, v2);
1180 }
1181 retval = v;
1182 break;
1183
1184 case TYPE_LIST:
1185 n = r_long(p);
1186 if (PyErr_Occurred())
1187 break;
1188 if (n < 0 || n > SIZE32_MAX) {
1189 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1190 break;
1191 }
1192 v = PyList_New(n);
1193 R_REF(v);
1194 if (v == NULL)
1195 break;
1196 for (i = 0; i < n; i++) {
1197 v2 = r_object(p);
1198 if ( v2 == NULL ) {
1199 if (!PyErr_Occurred())
1200 PyErr_SetString(PyExc_TypeError,
1201 "NULL object in marshal data for list");
1202 Py_DECREF(v);
1203 v = NULL;
1204 break;
1205 }
1206 PyList_SET_ITEM(v, i, v2);
1207 }
1208 retval = v;
1209 break;
1210
1211 case TYPE_DICT:
1212 v = PyDict_New();
1213 R_REF(v);
1214 if (v == NULL)
1215 break;
1216 for (;;) {
1217 PyObject *key, *val;
1218 key = r_object(p);
1219 if (key == NULL)
1220 break;
1221 val = r_object(p);
1222 if (val == NULL) {
1223 Py_DECREF(key);
1224 break;
1225 }
1226 if (PyDict_SetItem(v, key, val) < 0) {
1227 Py_DECREF(key);
1228 Py_DECREF(val);
1229 break;
1230 }
1231 Py_DECREF(key);
1232 Py_DECREF(val);
1233 }
1234 if (PyErr_Occurred()) {
1235 Py_DECREF(v);
1236 v = NULL;
1237 }
1238 retval = v;
1239 break;
1240
1241 case TYPE_SET:
1242 case TYPE_FROZENSET:
1243 n = r_long(p);
1244 if (PyErr_Occurred())
1245 break;
1246 if (n < 0 || n > SIZE32_MAX) {
1247 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1248 break;
1249 }
1250
1251 if (n == 0 && type == TYPE_FROZENSET) {
1252 /* call frozenset() to get the empty frozenset singleton */
1253 v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1254 if (v == NULL)
1255 break;
1256 R_REF(v);
1257 retval = v;
1258 }
1259 else {
1260 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1261 if (type == TYPE_SET) {
1262 R_REF(v);
1263 } else {
1264 /* must use delayed registration of frozensets because they must
1265 * be init with a refcount of 1
1266 */
1267 idx = r_ref_reserve(flag, p);
1268 if (idx < 0)
1269 Py_CLEAR(v); /* signal error */
1270 }
1271 if (v == NULL)
1272 break;
1273
1274 for (i = 0; i < n; i++) {
1275 v2 = r_object(p);
1276 if ( v2 == NULL ) {
1277 if (!PyErr_Occurred())
1278 PyErr_SetString(PyExc_TypeError,
1279 "NULL object in marshal data for set");
1280 Py_DECREF(v);
1281 v = NULL;
1282 break;
1283 }
1284 if (PySet_Add(v, v2) == -1) {
1285 Py_DECREF(v);
1286 Py_DECREF(v2);
1287 v = NULL;
1288 break;
1289 }
1290 Py_DECREF(v2);
1291 }
1292 if (type != TYPE_SET)
1293 v = r_ref_insert(v, idx, flag, p);
1294 retval = v;
1295 }
1296 break;
1297
1298 case TYPE_CODE:
1299 {
1300 int argcount;
1301 int posonlyargcount;
1302 int kwonlyargcount;
1303 int nlocals;
1304 int stacksize;
1305 int flags;
1306 PyObject *code = NULL;
1307 PyObject *consts = NULL;
1308 PyObject *names = NULL;
1309 PyObject *varnames = NULL;
1310 PyObject *freevars = NULL;
1311 PyObject *cellvars = NULL;
1312 PyObject *filename = NULL;
1313 PyObject *name = NULL;
1314 int firstlineno;
1315 PyObject *lnotab = NULL;
1316
1317 idx = r_ref_reserve(flag, p);
1318 if (idx < 0)
1319 break;
1320
1321 v = NULL;
1322
1323 /* XXX ignore long->int overflows for now */
1324 argcount = (int)r_long(p);
1325 if (PyErr_Occurred())
1326 goto code_error;
1327 posonlyargcount = (int)r_long(p);
1328 if (PyErr_Occurred()) {
1329 goto code_error;
1330 }
1331 kwonlyargcount = (int)r_long(p);
1332 if (PyErr_Occurred())
1333 goto code_error;
1334 nlocals = (int)r_long(p);
1335 if (PyErr_Occurred())
1336 goto code_error;
1337 stacksize = (int)r_long(p);
1338 if (PyErr_Occurred())
1339 goto code_error;
1340 flags = (int)r_long(p);
1341 if (PyErr_Occurred())
1342 goto code_error;
1343 code = r_object(p);
1344 if (code == NULL)
1345 goto code_error;
1346 consts = r_object(p);
1347 if (consts == NULL)
1348 goto code_error;
1349 names = r_object(p);
1350 if (names == NULL)
1351 goto code_error;
1352 varnames = r_object(p);
1353 if (varnames == NULL)
1354 goto code_error;
1355 freevars = r_object(p);
1356 if (freevars == NULL)
1357 goto code_error;
1358 cellvars = r_object(p);
1359 if (cellvars == NULL)
1360 goto code_error;
1361 filename = r_object(p);
1362 if (filename == NULL)
1363 goto code_error;
1364 name = r_object(p);
1365 if (name == NULL)
1366 goto code_error;
1367 firstlineno = (int)r_long(p);
1368 if (firstlineno == -1 && PyErr_Occurred())
1369 break;
1370 lnotab = r_object(p);
1371 if (lnotab == NULL)
1372 goto code_error;
1373
1374 if (PySys_Audit("code.__new__", "OOOiiiiii",
1375 code, filename, name, argcount, posonlyargcount,
1376 kwonlyargcount, nlocals, stacksize, flags) < 0) {
1377 goto code_error;
1378 }
1379
1380 v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1381 argcount, posonlyargcount, kwonlyargcount,
1382 nlocals, stacksize, flags,
1383 code, consts, names, varnames,
1384 freevars, cellvars, filename, name,
1385 firstlineno, lnotab);
1386 v = r_ref_insert(v, idx, flag, p);
1387
1388 code_error:
1389 Py_XDECREF(code);
1390 Py_XDECREF(consts);
1391 Py_XDECREF(names);
1392 Py_XDECREF(varnames);
1393 Py_XDECREF(freevars);
1394 Py_XDECREF(cellvars);
1395 Py_XDECREF(filename);
1396 Py_XDECREF(name);
1397 Py_XDECREF(lnotab);
1398 }
1399 retval = v;
1400 break;
1401
1402 case TYPE_REF:
1403 n = r_long(p);
1404 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1405 if (n == -1 && PyErr_Occurred())
1406 break;
1407 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1408 break;
1409 }
1410 v = PyList_GET_ITEM(p->refs, n);
1411 if (v == Py_None) {
1412 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1413 break;
1414 }
1415 Py_INCREF(v);
1416 retval = v;
1417 break;
1418
1419 default:
1420 /* Bogus data got written, which isn't ideal.
1421 This will let you keep working and recover. */
1422 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1423 break;
1424
1425 }
1426 p->depth--;
1427 return retval;
1428 }
1429
1430 static PyObject *
read_object(RFILE * p)1431 read_object(RFILE *p)
1432 {
1433 PyObject *v;
1434 if (PyErr_Occurred()) {
1435 fprintf(stderr, "XXX readobject called with exception set\n");
1436 return NULL;
1437 }
1438 v = r_object(p);
1439 if (v == NULL && !PyErr_Occurred())
1440 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1441 return v;
1442 }
1443
1444 int
PyMarshal_ReadShortFromFile(FILE * fp)1445 PyMarshal_ReadShortFromFile(FILE *fp)
1446 {
1447 RFILE rf;
1448 int res;
1449 assert(fp);
1450 rf.readable = NULL;
1451 rf.fp = fp;
1452 rf.end = rf.ptr = NULL;
1453 rf.buf = NULL;
1454 res = r_short(&rf);
1455 if (rf.buf != NULL)
1456 PyMem_FREE(rf.buf);
1457 return res;
1458 }
1459
1460 long
PyMarshal_ReadLongFromFile(FILE * fp)1461 PyMarshal_ReadLongFromFile(FILE *fp)
1462 {
1463 RFILE rf;
1464 long res;
1465 rf.fp = fp;
1466 rf.readable = NULL;
1467 rf.ptr = rf.end = NULL;
1468 rf.buf = NULL;
1469 res = r_long(&rf);
1470 if (rf.buf != NULL)
1471 PyMem_FREE(rf.buf);
1472 return res;
1473 }
1474
1475 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1476 static off_t
getfilesize(FILE * fp)1477 getfilesize(FILE *fp)
1478 {
1479 struct _Py_stat_struct st;
1480 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1481 return -1;
1482 #if SIZEOF_OFF_T == 4
1483 else if (st.st_size >= INT_MAX)
1484 return (off_t)INT_MAX;
1485 #endif
1486 else
1487 return (off_t)st.st_size;
1488 }
1489
1490 /* If we can get the size of the file up-front, and it's reasonably small,
1491 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1492 * than reading a byte at a time from file; speeds .pyc imports.
1493 * CAUTION: since this may read the entire remainder of the file, don't
1494 * call it unless you know you're done with the file.
1495 */
1496 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1497 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1498 {
1499 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1500 #define REASONABLE_FILE_LIMIT (1L << 18)
1501 off_t filesize;
1502 filesize = getfilesize(fp);
1503 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1504 char* pBuf = (char *)PyMem_MALLOC(filesize);
1505 if (pBuf != NULL) {
1506 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1507 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1508 PyMem_FREE(pBuf);
1509 return v;
1510 }
1511
1512 }
1513 /* We don't have fstat, or we do but the file is larger than
1514 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1515 */
1516 return PyMarshal_ReadObjectFromFile(fp);
1517
1518 #undef REASONABLE_FILE_LIMIT
1519 }
1520
1521 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1522 PyMarshal_ReadObjectFromFile(FILE *fp)
1523 {
1524 RFILE rf;
1525 PyObject *result;
1526 rf.fp = fp;
1527 rf.readable = NULL;
1528 rf.depth = 0;
1529 rf.ptr = rf.end = NULL;
1530 rf.buf = NULL;
1531 rf.refs = PyList_New(0);
1532 if (rf.refs == NULL)
1533 return NULL;
1534 result = r_object(&rf);
1535 Py_DECREF(rf.refs);
1536 if (rf.buf != NULL)
1537 PyMem_FREE(rf.buf);
1538 return result;
1539 }
1540
1541 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1542 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1543 {
1544 RFILE rf;
1545 PyObject *result;
1546 rf.fp = NULL;
1547 rf.readable = NULL;
1548 rf.ptr = str;
1549 rf.end = str + len;
1550 rf.buf = NULL;
1551 rf.depth = 0;
1552 rf.refs = PyList_New(0);
1553 if (rf.refs == NULL)
1554 return NULL;
1555 result = r_object(&rf);
1556 Py_DECREF(rf.refs);
1557 if (rf.buf != NULL)
1558 PyMem_FREE(rf.buf);
1559 return result;
1560 }
1561
1562 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1563 PyMarshal_WriteObjectToString(PyObject *x, int version)
1564 {
1565 WFILE wf;
1566
1567 memset(&wf, 0, sizeof(wf));
1568 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1569 if (wf.str == NULL)
1570 return NULL;
1571 wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1572 wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1573 wf.error = WFERR_OK;
1574 wf.version = version;
1575 if (w_init_refs(&wf, version)) {
1576 Py_DECREF(wf.str);
1577 return NULL;
1578 }
1579 w_object(x, &wf);
1580 w_clear_refs(&wf);
1581 if (wf.str != NULL) {
1582 const char *base = PyBytes_AS_STRING(wf.str);
1583 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1584 return NULL;
1585 }
1586 if (wf.error != WFERR_OK) {
1587 Py_XDECREF(wf.str);
1588 if (wf.error == WFERR_NOMEMORY)
1589 PyErr_NoMemory();
1590 else
1591 PyErr_SetString(PyExc_ValueError,
1592 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1593 :"object too deeply nested to marshal");
1594 return NULL;
1595 }
1596 return wf.str;
1597 }
1598
1599 /* And an interface for Python programs... */
1600 /*[clinic input]
1601 marshal.dump
1602
1603 value: object
1604 Must be a supported type.
1605 file: object
1606 Must be a writeable binary file.
1607 version: int(c_default="Py_MARSHAL_VERSION") = version
1608 Indicates the data format that dump should use.
1609 /
1610
1611 Write the value on the open file.
1612
1613 If the value has (or contains an object that has) an unsupported type, a
1614 ValueError exception is raised - but garbage data will also be written
1615 to the file. The object will not be properly read back by load().
1616 [clinic start generated code]*/
1617
1618 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1619 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1620 int version)
1621 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1622 {
1623 /* XXX Quick hack -- need to do this differently */
1624 PyObject *s;
1625 PyObject *res;
1626 _Py_IDENTIFIER(write);
1627
1628 s = PyMarshal_WriteObjectToString(value, version);
1629 if (s == NULL)
1630 return NULL;
1631 res = _PyObject_CallMethodIdOneArg(file, &PyId_write, s);
1632 Py_DECREF(s);
1633 return res;
1634 }
1635
1636 /*[clinic input]
1637 marshal.load
1638
1639 file: object
1640 Must be readable binary file.
1641 /
1642
1643 Read one value from the open file and return it.
1644
1645 If no valid value is read (e.g. because the data has a different Python
1646 version's incompatible marshal format), raise EOFError, ValueError or
1647 TypeError.
1648
1649 Note: If an object containing an unsupported type was marshalled with
1650 dump(), load() will substitute None for the unmarshallable type.
1651 [clinic start generated code]*/
1652
1653 static PyObject *
marshal_load(PyObject * module,PyObject * file)1654 marshal_load(PyObject *module, PyObject *file)
1655 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1656 {
1657 PyObject *data, *result;
1658 _Py_IDENTIFIER(read);
1659 RFILE rf;
1660
1661 /*
1662 * Make a call to the read method, but read zero bytes.
1663 * This is to ensure that the object passed in at least
1664 * has a read method which returns bytes.
1665 * This can be removed if we guarantee good error handling
1666 * for r_string()
1667 */
1668 data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1669 if (data == NULL)
1670 return NULL;
1671 if (!PyBytes_Check(data)) {
1672 PyErr_Format(PyExc_TypeError,
1673 "file.read() returned not bytes but %.100s",
1674 Py_TYPE(data)->tp_name);
1675 result = NULL;
1676 }
1677 else {
1678 rf.depth = 0;
1679 rf.fp = NULL;
1680 rf.readable = file;
1681 rf.ptr = rf.end = NULL;
1682 rf.buf = NULL;
1683 if ((rf.refs = PyList_New(0)) != NULL) {
1684 result = read_object(&rf);
1685 Py_DECREF(rf.refs);
1686 if (rf.buf != NULL)
1687 PyMem_FREE(rf.buf);
1688 } else
1689 result = NULL;
1690 }
1691 Py_DECREF(data);
1692 return result;
1693 }
1694
1695 /*[clinic input]
1696 marshal.dumps
1697
1698 value: object
1699 Must be a supported type.
1700 version: int(c_default="Py_MARSHAL_VERSION") = version
1701 Indicates the data format that dumps should use.
1702 /
1703
1704 Return the bytes object that would be written to a file by dump(value, file).
1705
1706 Raise a ValueError exception if value has (or contains an object that has) an
1707 unsupported type.
1708 [clinic start generated code]*/
1709
1710 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1711 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1712 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1713 {
1714 return PyMarshal_WriteObjectToString(value, version);
1715 }
1716
1717 /*[clinic input]
1718 marshal.loads
1719
1720 bytes: Py_buffer
1721 /
1722
1723 Convert the bytes-like object to a value.
1724
1725 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1726 bytes in the input are ignored.
1727 [clinic start generated code]*/
1728
1729 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1730 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1731 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1732 {
1733 RFILE rf;
1734 char *s = bytes->buf;
1735 Py_ssize_t n = bytes->len;
1736 PyObject* result;
1737 rf.fp = NULL;
1738 rf.readable = NULL;
1739 rf.ptr = s;
1740 rf.end = s + n;
1741 rf.depth = 0;
1742 if ((rf.refs = PyList_New(0)) == NULL)
1743 return NULL;
1744 result = read_object(&rf);
1745 Py_DECREF(rf.refs);
1746 return result;
1747 }
1748
1749 static PyMethodDef marshal_methods[] = {
1750 MARSHAL_DUMP_METHODDEF
1751 MARSHAL_LOAD_METHODDEF
1752 MARSHAL_DUMPS_METHODDEF
1753 MARSHAL_LOADS_METHODDEF
1754 {NULL, NULL} /* sentinel */
1755 };
1756
1757
1758 PyDoc_STRVAR(module_doc,
1759 "This module contains functions that can read and write Python values in\n\
1760 a binary format. The format is specific to Python, but independent of\n\
1761 machine architecture issues.\n\
1762 \n\
1763 Not all Python object types are supported; in general, only objects\n\
1764 whose value is independent from a particular invocation of Python can be\n\
1765 written and read by this module. The following types are supported:\n\
1766 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1767 tuples, lists, sets, dictionaries, and code objects, where it\n\
1768 should be understood that tuples, lists and dictionaries are only\n\
1769 supported as long as the values contained therein are themselves\n\
1770 supported; and recursive lists and dictionaries should not be written\n\
1771 (they will cause infinite loops).\n\
1772 \n\
1773 Variables:\n\
1774 \n\
1775 version -- indicates the format that the module uses. Version 0 is the\n\
1776 historical format, version 1 shares interned strings and version 2\n\
1777 uses a binary format for floating point numbers.\n\
1778 Version 3 shares common object references (New in version 3.4).\n\
1779 \n\
1780 Functions:\n\
1781 \n\
1782 dump() -- write value to a file\n\
1783 load() -- read value from a file\n\
1784 dumps() -- marshal value as a bytes object\n\
1785 loads() -- read value from a bytes-like object");
1786
1787
1788
1789 static struct PyModuleDef marshalmodule = {
1790 PyModuleDef_HEAD_INIT,
1791 "marshal",
1792 module_doc,
1793 0,
1794 marshal_methods,
1795 NULL,
1796 NULL,
1797 NULL,
1798 NULL
1799 };
1800
1801 PyMODINIT_FUNC
PyMarshal_Init(void)1802 PyMarshal_Init(void)
1803 {
1804 PyObject *mod = PyModule_Create(&marshalmodule);
1805 if (mod == NULL)
1806 return NULL;
1807 if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1808 Py_DECREF(mod);
1809 return NULL;
1810 }
1811 return mod;
1812 }
1813