• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #define PY_SSIZE_T_CLEAN
10 
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "../Modules/hashtable.h"
16 
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21 
22 #include "clinic/marshal.c.h"
23 
24 /* High water mark to determine when the marshalled object is dangerously deep
25  * and risks coring the interpreter.  When the object stack gets this deep,
26  * raise an exception instead of continuing.
27  * On Windows debug builds, reduce this value.
28  *
29  * BUG: https://bugs.python.org/issue33720
30  * On Windows PGO builds, the r_object function overallocates its stack and
31  * can cause a stack overflow. We reduce the maximum depth for all Windows
32  * releases to protect against this.
33  * #if defined(MS_WINDOWS) && defined(_DEBUG)
34  */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40 
41 #define TYPE_NULL               '0'
42 #define TYPE_NONE               'N'
43 #define TYPE_FALSE              'F'
44 #define TYPE_TRUE               'T'
45 #define TYPE_STOPITER           'S'
46 #define TYPE_ELLIPSIS           '.'
47 #define TYPE_INT                'i'
48 /* TYPE_INT64 is not generated anymore.
49    Supported for backward compatibility only. */
50 #define TYPE_INT64              'I'
51 #define TYPE_FLOAT              'f'
52 #define TYPE_BINARY_FLOAT       'g'
53 #define TYPE_COMPLEX            'x'
54 #define TYPE_BINARY_COMPLEX     'y'
55 #define TYPE_LONG               'l'
56 #define TYPE_STRING             's'
57 #define TYPE_INTERNED           't'
58 #define TYPE_REF                'r'
59 #define TYPE_TUPLE              '('
60 #define TYPE_LIST               '['
61 #define TYPE_DICT               '{'
62 #define TYPE_CODE               'c'
63 #define TYPE_UNICODE            'u'
64 #define TYPE_UNKNOWN            '?'
65 #define TYPE_SET                '<'
66 #define TYPE_FROZENSET          '>'
67 #define FLAG_REF                '\x80' /* with a type, add obj to index */
68 
69 #define TYPE_ASCII              'a'
70 #define TYPE_ASCII_INTERNED     'A'
71 #define TYPE_SMALL_TUPLE        ')'
72 #define TYPE_SHORT_ASCII        'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74 
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79 
80 typedef struct {
81     FILE *fp;
82     int error;  /* see WFERR_* values */
83     int depth;
84     PyObject *str;
85     char *ptr;
86     char *end;
87     char *buf;
88     _Py_hashtable_t *hashtable;
89     int version;
90 } WFILE;
91 
92 #define w_byte(c, p) do {                               \
93         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
94             *(p)->ptr++ = (c);                          \
95     } while(0)
96 
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100     assert(p->fp != NULL);
101     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102     p->ptr = p->buf;
103 }
104 
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108     Py_ssize_t pos, size, delta;
109     if (p->ptr == NULL)
110         return 0; /* An error already occurred */
111     if (p->fp != NULL) {
112         w_flush(p);
113         return needed <= p->end - p->ptr;
114     }
115     assert(p->str != NULL);
116     pos = p->ptr - p->buf;
117     size = PyBytes_Size(p->str);
118     if (size > 16*1024*1024)
119         delta = (size >> 3);            /* 12.5% overallocation */
120     else
121         delta = size + 1024;
122     delta = Py_MAX(delta, needed);
123     if (delta > PY_SSIZE_T_MAX - size) {
124         p->error = WFERR_NOMEMORY;
125         return 0;
126     }
127     size += delta;
128     if (_PyBytes_Resize(&p->str, size) != 0) {
129         p->ptr = p->buf = p->end = NULL;
130         return 0;
131     }
132     else {
133         p->buf = PyBytes_AS_STRING(p->str);
134         p->ptr = p->buf + pos;
135         p->end = p->buf + size;
136         return 1;
137     }
138 }
139 
140 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)141 w_string(const char *s, Py_ssize_t n, WFILE *p)
142 {
143     Py_ssize_t m;
144     if (!n || p->ptr == NULL)
145         return;
146     m = p->end - p->ptr;
147     if (p->fp != NULL) {
148         if (n <= m) {
149             memcpy(p->ptr, s, n);
150             p->ptr += n;
151         }
152         else {
153             w_flush(p);
154             fwrite(s, 1, n, p->fp);
155         }
156     }
157     else {
158         if (n <= m || w_reserve(p, n - m)) {
159             memcpy(p->ptr, s, n);
160             p->ptr += n;
161         }
162     }
163 }
164 
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168     w_byte((char)( x      & 0xff), p);
169     w_byte((char)((x>> 8) & 0xff), p);
170 }
171 
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175     w_byte((char)( x      & 0xff), p);
176     w_byte((char)((x>> 8) & 0xff), p);
177     w_byte((char)((x>>16) & 0xff), p);
178     w_byte((char)((x>>24) & 0xff), p);
179 }
180 
181 #define SIZE32_MAX  0x7FFFFFFF
182 
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p)  do {                     \
185         if ((n) > SIZE32_MAX) {                 \
186             (p)->depth--;                       \
187             (p)->error = WFERR_UNMARSHALLABLE;  \
188             return;                             \
189         }                                       \
190         w_long((long)(n), p);                   \
191     } while(0)
192 #else
193 # define W_SIZE  w_long
194 #endif
195 
196 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)197 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
198 {
199         W_SIZE(n, p);
200         w_string(s, n, p);
201 }
202 
203 static void
w_short_pstring(const char * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
205 {
206     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207     w_string(s, n, p);
208 }
209 
210 /* We assume that Python ints are stored internally in base some power of
211    2**15; for the sake of portability we'll always read and write them in base
212    exactly 2**15. */
213 
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221 
222 #define W_TYPE(t, p) do { \
223     w_byte((t) | flag, (p)); \
224 } while(0)
225 
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229     Py_ssize_t i, j, n, l;
230     digit d;
231 
232     W_TYPE(TYPE_LONG, p);
233     if (Py_SIZE(ob) == 0) {
234         w_long((long)0, p);
235         return;
236     }
237 
238     /* set l to number of base PyLong_MARSHAL_BASE digits */
239     n = Py_ABS(Py_SIZE(ob));
240     l = (n-1) * PyLong_MARSHAL_RATIO;
241     d = ob->ob_digit[n-1];
242     assert(d != 0); /* a PyLong is always normalized */
243     do {
244         d >>= PyLong_MARSHAL_SHIFT;
245         l++;
246     } while (d != 0);
247     if (l > SIZE32_MAX) {
248         p->depth--;
249         p->error = WFERR_UNMARSHALLABLE;
250         return;
251     }
252     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253 
254     for (i=0; i < n-1; i++) {
255         d = ob->ob_digit[i];
256         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257             w_short(d & PyLong_MARSHAL_MASK, p);
258             d >>= PyLong_MARSHAL_SHIFT;
259         }
260         assert (d == 0);
261     }
262     d = ob->ob_digit[n-1];
263     do {
264         w_short(d & PyLong_MARSHAL_MASK, p);
265         d >>= PyLong_MARSHAL_SHIFT;
266     } while (d != 0);
267 }
268 
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272     unsigned char buf[8];
273     if (_PyFloat_Pack8(v, buf, 1) < 0) {
274         p->error = WFERR_UNMARSHALLABLE;
275         return;
276     }
277     w_string((const char *)buf, 8, p);
278 }
279 
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283     int n;
284     char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
285     if (!buf) {
286         p->error = WFERR_NOMEMORY;
287         return;
288     }
289     n = (int)strlen(buf);
290     w_byte(n, p);
291     w_string(buf, n, p);
292     PyMem_Free(buf);
293 }
294 
295 static int
w_ref(PyObject * v,char * flag,WFILE * p)296 w_ref(PyObject *v, char *flag, WFILE *p)
297 {
298     _Py_hashtable_entry_t *entry;
299     int w;
300 
301     if (p->version < 3 || p->hashtable == NULL)
302         return 0; /* not writing object references */
303 
304     /* if it has only one reference, it definitely isn't shared */
305     if (Py_REFCNT(v) == 1)
306         return 0;
307 
308     entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
309     if (entry != NULL) {
310         /* write the reference index to the stream */
311         _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
312         /* we don't store "long" indices in the dict */
313         assert(0 <= w && w <= 0x7fffffff);
314         w_byte(TYPE_REF, p);
315         w_long(w, p);
316         return 1;
317     } else {
318         size_t s = p->hashtable->entries;
319         /* we don't support long indices */
320         if (s >= 0x7fffffff) {
321             PyErr_SetString(PyExc_ValueError, "too many objects");
322             goto err;
323         }
324         w = (int)s;
325         Py_INCREF(v);
326         if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
327             Py_DECREF(v);
328             goto err;
329         }
330         *flag |= FLAG_REF;
331         return 0;
332     }
333 err:
334     p->error = WFERR_UNMARSHALLABLE;
335     return 1;
336 }
337 
338 static void
339 w_complex_object(PyObject *v, char flag, WFILE *p);
340 
341 static void
w_object(PyObject * v,WFILE * p)342 w_object(PyObject *v, WFILE *p)
343 {
344     char flag = '\0';
345 
346     p->depth++;
347 
348     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
349         p->error = WFERR_NESTEDTOODEEP;
350     }
351     else if (v == NULL) {
352         w_byte(TYPE_NULL, p);
353     }
354     else if (v == Py_None) {
355         w_byte(TYPE_NONE, p);
356     }
357     else if (v == PyExc_StopIteration) {
358         w_byte(TYPE_STOPITER, p);
359     }
360     else if (v == Py_Ellipsis) {
361         w_byte(TYPE_ELLIPSIS, p);
362     }
363     else if (v == Py_False) {
364         w_byte(TYPE_FALSE, p);
365     }
366     else if (v == Py_True) {
367         w_byte(TYPE_TRUE, p);
368     }
369     else if (!w_ref(v, &flag, p))
370         w_complex_object(v, flag, p);
371 
372     p->depth--;
373 }
374 
375 static void
w_complex_object(PyObject * v,char flag,WFILE * p)376 w_complex_object(PyObject *v, char flag, WFILE *p)
377 {
378     Py_ssize_t i, n;
379 
380     if (PyLong_CheckExact(v)) {
381         long x = PyLong_AsLong(v);
382         if ((x == -1)  && PyErr_Occurred()) {
383             PyLongObject *ob = (PyLongObject *)v;
384             PyErr_Clear();
385             w_PyLong(ob, flag, p);
386         }
387         else {
388 #if SIZEOF_LONG > 4
389             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
390             if (y && y != -1) {
391                 /* Too large for TYPE_INT */
392                 w_PyLong((PyLongObject*)v, flag, p);
393             }
394             else
395 #endif
396             {
397                 W_TYPE(TYPE_INT, p);
398                 w_long(x, p);
399             }
400         }
401     }
402     else if (PyFloat_CheckExact(v)) {
403         if (p->version > 1) {
404             W_TYPE(TYPE_BINARY_FLOAT, p);
405             w_float_bin(PyFloat_AS_DOUBLE(v), p);
406         }
407         else {
408             W_TYPE(TYPE_FLOAT, p);
409             w_float_str(PyFloat_AS_DOUBLE(v), p);
410         }
411     }
412     else if (PyComplex_CheckExact(v)) {
413         if (p->version > 1) {
414             W_TYPE(TYPE_BINARY_COMPLEX, p);
415             w_float_bin(PyComplex_RealAsDouble(v), p);
416             w_float_bin(PyComplex_ImagAsDouble(v), p);
417         }
418         else {
419             W_TYPE(TYPE_COMPLEX, p);
420             w_float_str(PyComplex_RealAsDouble(v), p);
421             w_float_str(PyComplex_ImagAsDouble(v), p);
422         }
423     }
424     else if (PyBytes_CheckExact(v)) {
425         W_TYPE(TYPE_STRING, p);
426         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
427     }
428     else if (PyUnicode_CheckExact(v)) {
429         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
430             int is_short = PyUnicode_GET_LENGTH(v) < 256;
431             if (is_short) {
432                 if (PyUnicode_CHECK_INTERNED(v))
433                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
434                 else
435                     W_TYPE(TYPE_SHORT_ASCII, p);
436                 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
437                                 PyUnicode_GET_LENGTH(v), p);
438             }
439             else {
440                 if (PyUnicode_CHECK_INTERNED(v))
441                     W_TYPE(TYPE_ASCII_INTERNED, p);
442                 else
443                     W_TYPE(TYPE_ASCII, p);
444                 w_pstring((char *) PyUnicode_1BYTE_DATA(v),
445                           PyUnicode_GET_LENGTH(v), p);
446             }
447         }
448         else {
449             PyObject *utf8;
450             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
451             if (utf8 == NULL) {
452                 p->depth--;
453                 p->error = WFERR_UNMARSHALLABLE;
454                 return;
455             }
456             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
457                 W_TYPE(TYPE_INTERNED, p);
458             else
459                 W_TYPE(TYPE_UNICODE, p);
460             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
461             Py_DECREF(utf8);
462         }
463     }
464     else if (PyTuple_CheckExact(v)) {
465         n = PyTuple_Size(v);
466         if (p->version >= 4 && n < 256) {
467             W_TYPE(TYPE_SMALL_TUPLE, p);
468             w_byte((unsigned char)n, p);
469         }
470         else {
471             W_TYPE(TYPE_TUPLE, p);
472             W_SIZE(n, p);
473         }
474         for (i = 0; i < n; i++) {
475             w_object(PyTuple_GET_ITEM(v, i), p);
476         }
477     }
478     else if (PyList_CheckExact(v)) {
479         W_TYPE(TYPE_LIST, p);
480         n = PyList_GET_SIZE(v);
481         W_SIZE(n, p);
482         for (i = 0; i < n; i++) {
483             w_object(PyList_GET_ITEM(v, i), p);
484         }
485     }
486     else if (PyDict_CheckExact(v)) {
487         Py_ssize_t pos;
488         PyObject *key, *value;
489         W_TYPE(TYPE_DICT, p);
490         /* This one is NULL object terminated! */
491         pos = 0;
492         while (PyDict_Next(v, &pos, &key, &value)) {
493             w_object(key, p);
494             w_object(value, p);
495         }
496         w_object((PyObject *)NULL, p);
497     }
498     else if (PyAnySet_CheckExact(v)) {
499         PyObject *value, *it;
500 
501         if (PyObject_TypeCheck(v, &PySet_Type))
502             W_TYPE(TYPE_SET, p);
503         else
504             W_TYPE(TYPE_FROZENSET, p);
505         n = PyObject_Size(v);
506         if (n == -1) {
507             p->depth--;
508             p->error = WFERR_UNMARSHALLABLE;
509             return;
510         }
511         W_SIZE(n, p);
512         it = PyObject_GetIter(v);
513         if (it == NULL) {
514             p->depth--;
515             p->error = WFERR_UNMARSHALLABLE;
516             return;
517         }
518         while ((value = PyIter_Next(it)) != NULL) {
519             w_object(value, p);
520             Py_DECREF(value);
521         }
522         Py_DECREF(it);
523         if (PyErr_Occurred()) {
524             p->depth--;
525             p->error = WFERR_UNMARSHALLABLE;
526             return;
527         }
528     }
529     else if (PyCode_Check(v)) {
530         PyCodeObject *co = (PyCodeObject *)v;
531         W_TYPE(TYPE_CODE, p);
532         w_long(co->co_argcount, p);
533         w_long(co->co_posonlyargcount, p);
534         w_long(co->co_kwonlyargcount, p);
535         w_long(co->co_nlocals, p);
536         w_long(co->co_stacksize, p);
537         w_long(co->co_flags, p);
538         w_object(co->co_code, p);
539         w_object(co->co_consts, p);
540         w_object(co->co_names, p);
541         w_object(co->co_varnames, p);
542         w_object(co->co_freevars, p);
543         w_object(co->co_cellvars, p);
544         w_object(co->co_filename, p);
545         w_object(co->co_name, p);
546         w_long(co->co_firstlineno, p);
547         w_object(co->co_lnotab, p);
548     }
549     else if (PyObject_CheckBuffer(v)) {
550         /* Write unknown bytes-like objects as a bytes object */
551         Py_buffer view;
552         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
553             w_byte(TYPE_UNKNOWN, p);
554             p->depth--;
555             p->error = WFERR_UNMARSHALLABLE;
556             return;
557         }
558         W_TYPE(TYPE_STRING, p);
559         w_pstring(view.buf, view.len, p);
560         PyBuffer_Release(&view);
561     }
562     else {
563         W_TYPE(TYPE_UNKNOWN, p);
564         p->error = WFERR_UNMARSHALLABLE;
565     }
566 }
567 
568 static int
w_init_refs(WFILE * wf,int version)569 w_init_refs(WFILE *wf, int version)
570 {
571     if (version >= 3) {
572         wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
573                                           _Py_hashtable_hash_ptr,
574                                           _Py_hashtable_compare_direct);
575         if (wf->hashtable == NULL) {
576             PyErr_NoMemory();
577             return -1;
578         }
579     }
580     return 0;
581 }
582 
583 static int
w_decref_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry,void * Py_UNUSED (data))584 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
585                void *Py_UNUSED(data))
586 {
587     PyObject *entry_key;
588 
589     _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key);
590     Py_XDECREF(entry_key);
591     return 0;
592 }
593 
594 static void
w_clear_refs(WFILE * wf)595 w_clear_refs(WFILE *wf)
596 {
597     if (wf->hashtable != NULL) {
598         _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
599         _Py_hashtable_destroy(wf->hashtable);
600     }
601 }
602 
603 /* version currently has no effect for writing ints. */
604 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)605 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
606 {
607     char buf[4];
608     WFILE wf;
609     memset(&wf, 0, sizeof(wf));
610     wf.fp = fp;
611     wf.ptr = wf.buf = buf;
612     wf.end = wf.ptr + sizeof(buf);
613     wf.error = WFERR_OK;
614     wf.version = version;
615     w_long(x, &wf);
616     w_flush(&wf);
617 }
618 
619 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)620 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
621 {
622     char buf[BUFSIZ];
623     WFILE wf;
624     memset(&wf, 0, sizeof(wf));
625     wf.fp = fp;
626     wf.ptr = wf.buf = buf;
627     wf.end = wf.ptr + sizeof(buf);
628     wf.error = WFERR_OK;
629     wf.version = version;
630     if (w_init_refs(&wf, version))
631         return; /* caller mush check PyErr_Occurred() */
632     w_object(x, &wf);
633     w_clear_refs(&wf);
634     w_flush(&wf);
635 }
636 
637 typedef struct {
638     FILE *fp;
639     int depth;
640     PyObject *readable;  /* Stream-like object being read from */
641     char *ptr;
642     char *end;
643     char *buf;
644     Py_ssize_t buf_size;
645     PyObject *refs;  /* a list */
646 } RFILE;
647 
648 static const char *
r_string(Py_ssize_t n,RFILE * p)649 r_string(Py_ssize_t n, RFILE *p)
650 {
651     Py_ssize_t read = -1;
652 
653     if (p->ptr != NULL) {
654         /* Fast path for loads() */
655         char *res = p->ptr;
656         Py_ssize_t left = p->end - p->ptr;
657         if (left < n) {
658             PyErr_SetString(PyExc_EOFError,
659                             "marshal data too short");
660             return NULL;
661         }
662         p->ptr += n;
663         return res;
664     }
665     if (p->buf == NULL) {
666         p->buf = PyMem_MALLOC(n);
667         if (p->buf == NULL) {
668             PyErr_NoMemory();
669             return NULL;
670         }
671         p->buf_size = n;
672     }
673     else if (p->buf_size < n) {
674         char *tmp = PyMem_REALLOC(p->buf, n);
675         if (tmp == NULL) {
676             PyErr_NoMemory();
677             return NULL;
678         }
679         p->buf = tmp;
680         p->buf_size = n;
681     }
682 
683     if (!p->readable) {
684         assert(p->fp != NULL);
685         read = fread(p->buf, 1, n, p->fp);
686     }
687     else {
688         _Py_IDENTIFIER(readinto);
689         PyObject *res, *mview;
690         Py_buffer buf;
691 
692         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
693             return NULL;
694         mview = PyMemoryView_FromBuffer(&buf);
695         if (mview == NULL)
696             return NULL;
697 
698         res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
699         if (res != NULL) {
700             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
701             Py_DECREF(res);
702         }
703     }
704     if (read != n) {
705         if (!PyErr_Occurred()) {
706             if (read > n)
707                 PyErr_Format(PyExc_ValueError,
708                              "read() returned too much data: "
709                              "%zd bytes requested, %zd returned",
710                              n, read);
711             else
712                 PyErr_SetString(PyExc_EOFError,
713                                 "EOF read where not expected");
714         }
715         return NULL;
716     }
717     return p->buf;
718 }
719 
720 static int
r_byte(RFILE * p)721 r_byte(RFILE *p)
722 {
723     int c = EOF;
724 
725     if (p->ptr != NULL) {
726         if (p->ptr < p->end)
727             c = (unsigned char) *p->ptr++;
728         return c;
729     }
730     if (!p->readable) {
731         assert(p->fp);
732         c = getc(p->fp);
733     }
734     else {
735         const char *ptr = r_string(1, p);
736         if (ptr != NULL)
737             c = *(unsigned char *) ptr;
738     }
739     return c;
740 }
741 
742 static int
r_short(RFILE * p)743 r_short(RFILE *p)
744 {
745     short x = -1;
746     const unsigned char *buffer;
747 
748     buffer = (const unsigned char *) r_string(2, p);
749     if (buffer != NULL) {
750         x = buffer[0];
751         x |= buffer[1] << 8;
752         /* Sign-extension, in case short greater than 16 bits */
753         x |= -(x & 0x8000);
754     }
755     return x;
756 }
757 
758 static long
r_long(RFILE * p)759 r_long(RFILE *p)
760 {
761     long x = -1;
762     const unsigned char *buffer;
763 
764     buffer = (const unsigned char *) r_string(4, p);
765     if (buffer != NULL) {
766         x = buffer[0];
767         x |= (long)buffer[1] << 8;
768         x |= (long)buffer[2] << 16;
769         x |= (long)buffer[3] << 24;
770 #if SIZEOF_LONG > 4
771         /* Sign extension for 64-bit machines */
772         x |= -(x & 0x80000000L);
773 #endif
774     }
775     return x;
776 }
777 
778 /* r_long64 deals with the TYPE_INT64 code. */
779 static PyObject *
r_long64(RFILE * p)780 r_long64(RFILE *p)
781 {
782     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
783     if (buffer == NULL) {
784         return NULL;
785     }
786     return _PyLong_FromByteArray(buffer, 8,
787                                  1 /* little endian */,
788                                  1 /* signed */);
789 }
790 
791 static PyObject *
r_PyLong(RFILE * p)792 r_PyLong(RFILE *p)
793 {
794     PyLongObject *ob;
795     long n, size, i;
796     int j, md, shorts_in_top_digit;
797     digit d;
798 
799     n = r_long(p);
800     if (PyErr_Occurred())
801         return NULL;
802     if (n == 0)
803         return (PyObject *)_PyLong_New(0);
804     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
805         PyErr_SetString(PyExc_ValueError,
806                        "bad marshal data (long size out of range)");
807         return NULL;
808     }
809 
810     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
811     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
812     ob = _PyLong_New(size);
813     if (ob == NULL)
814         return NULL;
815 
816     Py_SIZE(ob) = n > 0 ? size : -size;
817 
818     for (i = 0; i < size-1; i++) {
819         d = 0;
820         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
821             md = r_short(p);
822             if (PyErr_Occurred()) {
823                 Py_DECREF(ob);
824                 return NULL;
825             }
826             if (md < 0 || md > PyLong_MARSHAL_BASE)
827                 goto bad_digit;
828             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
829         }
830         ob->ob_digit[i] = d;
831     }
832 
833     d = 0;
834     for (j=0; j < shorts_in_top_digit; j++) {
835         md = r_short(p);
836         if (PyErr_Occurred()) {
837             Py_DECREF(ob);
838             return NULL;
839         }
840         if (md < 0 || md > PyLong_MARSHAL_BASE)
841             goto bad_digit;
842         /* topmost marshal digit should be nonzero */
843         if (md == 0 && j == shorts_in_top_digit - 1) {
844             Py_DECREF(ob);
845             PyErr_SetString(PyExc_ValueError,
846                 "bad marshal data (unnormalized long data)");
847             return NULL;
848         }
849         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
850     }
851     if (PyErr_Occurred()) {
852         Py_DECREF(ob);
853         return NULL;
854     }
855     /* top digit should be nonzero, else the resulting PyLong won't be
856        normalized */
857     ob->ob_digit[size-1] = d;
858     return (PyObject *)ob;
859   bad_digit:
860     Py_DECREF(ob);
861     PyErr_SetString(PyExc_ValueError,
862                     "bad marshal data (digit out of range in long)");
863     return NULL;
864 }
865 
866 static double
r_float_bin(RFILE * p)867 r_float_bin(RFILE *p)
868 {
869     const unsigned char *buf = (const unsigned char *) r_string(8, p);
870     if (buf == NULL)
871         return -1;
872     return _PyFloat_Unpack8(buf, 1);
873 }
874 
875 /* Issue #33720: Disable inlining for reducing the C stack consumption
876    on PGO builds. */
877 _Py_NO_INLINE static double
r_float_str(RFILE * p)878 r_float_str(RFILE *p)
879 {
880     int n;
881     char buf[256];
882     const char *ptr;
883     n = r_byte(p);
884     if (n == EOF) {
885         PyErr_SetString(PyExc_EOFError,
886             "EOF read where object expected");
887         return -1;
888     }
889     ptr = r_string(n, p);
890     if (ptr == NULL) {
891         return -1;
892     }
893     memcpy(buf, ptr, n);
894     buf[n] = '\0';
895     return PyOS_string_to_double(buf, NULL, NULL);
896 }
897 
898 /* allocate the reflist index for a new object. Return -1 on failure */
899 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)900 r_ref_reserve(int flag, RFILE *p)
901 {
902     if (flag) { /* currently only FLAG_REF is defined */
903         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
904         if (idx >= 0x7ffffffe) {
905             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
906             return -1;
907         }
908         if (PyList_Append(p->refs, Py_None) < 0)
909             return -1;
910         return idx;
911     } else
912         return 0;
913 }
914 
915 /* insert the new object 'o' to the reflist at previously
916  * allocated index 'idx'.
917  * 'o' can be NULL, in which case nothing is done.
918  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
919  * if 'o' was non-NULL, and the function fails, 'o' is released and
920  * NULL returned. This simplifies error checking at the call site since
921  * a single test for NULL for the function result is enough.
922  */
923 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)924 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
925 {
926     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
927         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
928         Py_INCREF(o);
929         PyList_SET_ITEM(p->refs, idx, o);
930         Py_DECREF(tmp);
931     }
932     return o;
933 }
934 
935 /* combination of both above, used when an object can be
936  * created whenever it is seen in the file, as opposed to
937  * after having loaded its sub-objects.
938  */
939 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)940 r_ref(PyObject *o, int flag, RFILE *p)
941 {
942     assert(flag & FLAG_REF);
943     if (o == NULL)
944         return NULL;
945     if (PyList_Append(p->refs, o) < 0) {
946         Py_DECREF(o); /* release the new object */
947         return NULL;
948     }
949     return o;
950 }
951 
952 static PyObject *
r_object(RFILE * p)953 r_object(RFILE *p)
954 {
955     /* NULL is a valid return value, it does not necessarily means that
956        an exception is set. */
957     PyObject *v, *v2;
958     Py_ssize_t idx = 0;
959     long i, n;
960     int type, code = r_byte(p);
961     int flag, is_interned = 0;
962     PyObject *retval = NULL;
963 
964     if (code == EOF) {
965         PyErr_SetString(PyExc_EOFError,
966                         "EOF read where object expected");
967         return NULL;
968     }
969 
970     p->depth++;
971 
972     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
973         p->depth--;
974         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
975         return NULL;
976     }
977 
978     flag = code & FLAG_REF;
979     type = code & ~FLAG_REF;
980 
981 #define R_REF(O) do{\
982     if (flag) \
983         O = r_ref(O, flag, p);\
984 } while (0)
985 
986     switch (type) {
987 
988     case TYPE_NULL:
989         break;
990 
991     case TYPE_NONE:
992         Py_INCREF(Py_None);
993         retval = Py_None;
994         break;
995 
996     case TYPE_STOPITER:
997         Py_INCREF(PyExc_StopIteration);
998         retval = PyExc_StopIteration;
999         break;
1000 
1001     case TYPE_ELLIPSIS:
1002         Py_INCREF(Py_Ellipsis);
1003         retval = Py_Ellipsis;
1004         break;
1005 
1006     case TYPE_FALSE:
1007         Py_INCREF(Py_False);
1008         retval = Py_False;
1009         break;
1010 
1011     case TYPE_TRUE:
1012         Py_INCREF(Py_True);
1013         retval = Py_True;
1014         break;
1015 
1016     case TYPE_INT:
1017         n = r_long(p);
1018         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1019         R_REF(retval);
1020         break;
1021 
1022     case TYPE_INT64:
1023         retval = r_long64(p);
1024         R_REF(retval);
1025         break;
1026 
1027     case TYPE_LONG:
1028         retval = r_PyLong(p);
1029         R_REF(retval);
1030         break;
1031 
1032     case TYPE_FLOAT:
1033         {
1034             double x = r_float_str(p);
1035             if (x == -1.0 && PyErr_Occurred())
1036                 break;
1037             retval = PyFloat_FromDouble(x);
1038             R_REF(retval);
1039             break;
1040         }
1041 
1042     case TYPE_BINARY_FLOAT:
1043         {
1044             double x = r_float_bin(p);
1045             if (x == -1.0 && PyErr_Occurred())
1046                 break;
1047             retval = PyFloat_FromDouble(x);
1048             R_REF(retval);
1049             break;
1050         }
1051 
1052     case TYPE_COMPLEX:
1053         {
1054             Py_complex c;
1055             c.real = r_float_str(p);
1056             if (c.real == -1.0 && PyErr_Occurred())
1057                 break;
1058             c.imag = r_float_str(p);
1059             if (c.imag == -1.0 && PyErr_Occurred())
1060                 break;
1061             retval = PyComplex_FromCComplex(c);
1062             R_REF(retval);
1063             break;
1064         }
1065 
1066     case TYPE_BINARY_COMPLEX:
1067         {
1068             Py_complex c;
1069             c.real = r_float_bin(p);
1070             if (c.real == -1.0 && PyErr_Occurred())
1071                 break;
1072             c.imag = r_float_bin(p);
1073             if (c.imag == -1.0 && PyErr_Occurred())
1074                 break;
1075             retval = PyComplex_FromCComplex(c);
1076             R_REF(retval);
1077             break;
1078         }
1079 
1080     case TYPE_STRING:
1081         {
1082             const char *ptr;
1083             n = r_long(p);
1084             if (PyErr_Occurred())
1085                 break;
1086             if (n < 0 || n > SIZE32_MAX) {
1087                 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1088                 break;
1089             }
1090             v = PyBytes_FromStringAndSize((char *)NULL, n);
1091             if (v == NULL)
1092                 break;
1093             ptr = r_string(n, p);
1094             if (ptr == NULL) {
1095                 Py_DECREF(v);
1096                 break;
1097             }
1098             memcpy(PyBytes_AS_STRING(v), ptr, n);
1099             retval = v;
1100             R_REF(retval);
1101             break;
1102         }
1103 
1104     case TYPE_ASCII_INTERNED:
1105         is_interned = 1;
1106         /* fall through */
1107     case TYPE_ASCII:
1108         n = r_long(p);
1109         if (PyErr_Occurred())
1110             break;
1111         if (n < 0 || n > SIZE32_MAX) {
1112             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1113             break;
1114         }
1115         goto _read_ascii;
1116 
1117     case TYPE_SHORT_ASCII_INTERNED:
1118         is_interned = 1;
1119         /* fall through */
1120     case TYPE_SHORT_ASCII:
1121         n = r_byte(p);
1122         if (n == EOF) {
1123             PyErr_SetString(PyExc_EOFError,
1124                 "EOF read where object expected");
1125             break;
1126         }
1127     _read_ascii:
1128         {
1129             const char *ptr;
1130             ptr = r_string(n, p);
1131             if (ptr == NULL)
1132                 break;
1133             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1134             if (v == NULL)
1135                 break;
1136             if (is_interned)
1137                 PyUnicode_InternInPlace(&v);
1138             retval = v;
1139             R_REF(retval);
1140             break;
1141         }
1142 
1143     case TYPE_INTERNED:
1144         is_interned = 1;
1145         /* fall through */
1146     case TYPE_UNICODE:
1147         {
1148         const char *buffer;
1149 
1150         n = r_long(p);
1151         if (PyErr_Occurred())
1152             break;
1153         if (n < 0 || n > SIZE32_MAX) {
1154             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1155             break;
1156         }
1157         if (n != 0) {
1158             buffer = r_string(n, p);
1159             if (buffer == NULL)
1160                 break;
1161             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1162         }
1163         else {
1164             v = PyUnicode_New(0, 0);
1165         }
1166         if (v == NULL)
1167             break;
1168         if (is_interned)
1169             PyUnicode_InternInPlace(&v);
1170         retval = v;
1171         R_REF(retval);
1172         break;
1173         }
1174 
1175     case TYPE_SMALL_TUPLE:
1176         n = (unsigned char) r_byte(p);
1177         if (PyErr_Occurred())
1178             break;
1179         goto _read_tuple;
1180     case TYPE_TUPLE:
1181         n = r_long(p);
1182         if (PyErr_Occurred())
1183             break;
1184         if (n < 0 || n > SIZE32_MAX) {
1185             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1186             break;
1187         }
1188     _read_tuple:
1189         v = PyTuple_New(n);
1190         R_REF(v);
1191         if (v == NULL)
1192             break;
1193 
1194         for (i = 0; i < n; i++) {
1195             v2 = r_object(p);
1196             if ( v2 == NULL ) {
1197                 if (!PyErr_Occurred())
1198                     PyErr_SetString(PyExc_TypeError,
1199                         "NULL object in marshal data for tuple");
1200                 Py_DECREF(v);
1201                 v = NULL;
1202                 break;
1203             }
1204             PyTuple_SET_ITEM(v, i, v2);
1205         }
1206         retval = v;
1207         break;
1208 
1209     case TYPE_LIST:
1210         n = r_long(p);
1211         if (PyErr_Occurred())
1212             break;
1213         if (n < 0 || n > SIZE32_MAX) {
1214             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1215             break;
1216         }
1217         v = PyList_New(n);
1218         R_REF(v);
1219         if (v == NULL)
1220             break;
1221         for (i = 0; i < n; i++) {
1222             v2 = r_object(p);
1223             if ( v2 == NULL ) {
1224                 if (!PyErr_Occurred())
1225                     PyErr_SetString(PyExc_TypeError,
1226                         "NULL object in marshal data for list");
1227                 Py_DECREF(v);
1228                 v = NULL;
1229                 break;
1230             }
1231             PyList_SET_ITEM(v, i, v2);
1232         }
1233         retval = v;
1234         break;
1235 
1236     case TYPE_DICT:
1237         v = PyDict_New();
1238         R_REF(v);
1239         if (v == NULL)
1240             break;
1241         for (;;) {
1242             PyObject *key, *val;
1243             key = r_object(p);
1244             if (key == NULL)
1245                 break;
1246             val = r_object(p);
1247             if (val == NULL) {
1248                 Py_DECREF(key);
1249                 break;
1250             }
1251             if (PyDict_SetItem(v, key, val) < 0) {
1252                 Py_DECREF(key);
1253                 Py_DECREF(val);
1254                 break;
1255             }
1256             Py_DECREF(key);
1257             Py_DECREF(val);
1258         }
1259         if (PyErr_Occurred()) {
1260             Py_DECREF(v);
1261             v = NULL;
1262         }
1263         retval = v;
1264         break;
1265 
1266     case TYPE_SET:
1267     case TYPE_FROZENSET:
1268         n = r_long(p);
1269         if (PyErr_Occurred())
1270             break;
1271         if (n < 0 || n > SIZE32_MAX) {
1272             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1273             break;
1274         }
1275 
1276         if (n == 0 && type == TYPE_FROZENSET) {
1277             /* call frozenset() to get the empty frozenset singleton */
1278             v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1279             if (v == NULL)
1280                 break;
1281             R_REF(v);
1282             retval = v;
1283         }
1284         else {
1285             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1286             if (type == TYPE_SET) {
1287                 R_REF(v);
1288             } else {
1289                 /* must use delayed registration of frozensets because they must
1290                  * be init with a refcount of 1
1291                  */
1292                 idx = r_ref_reserve(flag, p);
1293                 if (idx < 0)
1294                     Py_CLEAR(v); /* signal error */
1295             }
1296             if (v == NULL)
1297                 break;
1298 
1299             for (i = 0; i < n; i++) {
1300                 v2 = r_object(p);
1301                 if ( v2 == NULL ) {
1302                     if (!PyErr_Occurred())
1303                         PyErr_SetString(PyExc_TypeError,
1304                             "NULL object in marshal data for set");
1305                     Py_DECREF(v);
1306                     v = NULL;
1307                     break;
1308                 }
1309                 if (PySet_Add(v, v2) == -1) {
1310                     Py_DECREF(v);
1311                     Py_DECREF(v2);
1312                     v = NULL;
1313                     break;
1314                 }
1315                 Py_DECREF(v2);
1316             }
1317             if (type != TYPE_SET)
1318                 v = r_ref_insert(v, idx, flag, p);
1319             retval = v;
1320         }
1321         break;
1322 
1323     case TYPE_CODE:
1324         {
1325             int argcount;
1326             int posonlyargcount;
1327             int kwonlyargcount;
1328             int nlocals;
1329             int stacksize;
1330             int flags;
1331             PyObject *code = NULL;
1332             PyObject *consts = NULL;
1333             PyObject *names = NULL;
1334             PyObject *varnames = NULL;
1335             PyObject *freevars = NULL;
1336             PyObject *cellvars = NULL;
1337             PyObject *filename = NULL;
1338             PyObject *name = NULL;
1339             int firstlineno;
1340             PyObject *lnotab = NULL;
1341 
1342             idx = r_ref_reserve(flag, p);
1343             if (idx < 0)
1344                 break;
1345 
1346             v = NULL;
1347 
1348             /* XXX ignore long->int overflows for now */
1349             argcount = (int)r_long(p);
1350             if (PyErr_Occurred())
1351                 goto code_error;
1352             posonlyargcount = (int)r_long(p);
1353             if (PyErr_Occurred()) {
1354                 goto code_error;
1355             }
1356             kwonlyargcount = (int)r_long(p);
1357             if (PyErr_Occurred())
1358                 goto code_error;
1359             nlocals = (int)r_long(p);
1360             if (PyErr_Occurred())
1361                 goto code_error;
1362             stacksize = (int)r_long(p);
1363             if (PyErr_Occurred())
1364                 goto code_error;
1365             flags = (int)r_long(p);
1366             if (PyErr_Occurred())
1367                 goto code_error;
1368             code = r_object(p);
1369             if (code == NULL)
1370                 goto code_error;
1371             consts = r_object(p);
1372             if (consts == NULL)
1373                 goto code_error;
1374             names = r_object(p);
1375             if (names == NULL)
1376                 goto code_error;
1377             varnames = r_object(p);
1378             if (varnames == NULL)
1379                 goto code_error;
1380             freevars = r_object(p);
1381             if (freevars == NULL)
1382                 goto code_error;
1383             cellvars = r_object(p);
1384             if (cellvars == NULL)
1385                 goto code_error;
1386             filename = r_object(p);
1387             if (filename == NULL)
1388                 goto code_error;
1389             name = r_object(p);
1390             if (name == NULL)
1391                 goto code_error;
1392             firstlineno = (int)r_long(p);
1393             if (firstlineno == -1 && PyErr_Occurred())
1394                 break;
1395             lnotab = r_object(p);
1396             if (lnotab == NULL)
1397                 goto code_error;
1398 
1399             v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1400                             argcount, posonlyargcount, kwonlyargcount,
1401                             nlocals, stacksize, flags,
1402                             code, consts, names, varnames,
1403                             freevars, cellvars, filename, name,
1404                             firstlineno, lnotab);
1405             v = r_ref_insert(v, idx, flag, p);
1406 
1407           code_error:
1408             Py_XDECREF(code);
1409             Py_XDECREF(consts);
1410             Py_XDECREF(names);
1411             Py_XDECREF(varnames);
1412             Py_XDECREF(freevars);
1413             Py_XDECREF(cellvars);
1414             Py_XDECREF(filename);
1415             Py_XDECREF(name);
1416             Py_XDECREF(lnotab);
1417         }
1418         retval = v;
1419         break;
1420 
1421     case TYPE_REF:
1422         n = r_long(p);
1423         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1424             if (n == -1 && PyErr_Occurred())
1425                 break;
1426             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1427             break;
1428         }
1429         v = PyList_GET_ITEM(p->refs, n);
1430         if (v == Py_None) {
1431             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1432             break;
1433         }
1434         Py_INCREF(v);
1435         retval = v;
1436         break;
1437 
1438     default:
1439         /* Bogus data got written, which isn't ideal.
1440            This will let you keep working and recover. */
1441         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1442         break;
1443 
1444     }
1445     p->depth--;
1446     return retval;
1447 }
1448 
1449 static PyObject *
read_object(RFILE * p)1450 read_object(RFILE *p)
1451 {
1452     PyObject *v;
1453     if (PyErr_Occurred()) {
1454         fprintf(stderr, "XXX readobject called with exception set\n");
1455         return NULL;
1456     }
1457     v = r_object(p);
1458     if (v == NULL && !PyErr_Occurred())
1459         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1460     return v;
1461 }
1462 
1463 int
PyMarshal_ReadShortFromFile(FILE * fp)1464 PyMarshal_ReadShortFromFile(FILE *fp)
1465 {
1466     RFILE rf;
1467     int res;
1468     assert(fp);
1469     rf.readable = NULL;
1470     rf.fp = fp;
1471     rf.end = rf.ptr = NULL;
1472     rf.buf = NULL;
1473     res = r_short(&rf);
1474     if (rf.buf != NULL)
1475         PyMem_FREE(rf.buf);
1476     return res;
1477 }
1478 
1479 long
PyMarshal_ReadLongFromFile(FILE * fp)1480 PyMarshal_ReadLongFromFile(FILE *fp)
1481 {
1482     RFILE rf;
1483     long res;
1484     rf.fp = fp;
1485     rf.readable = NULL;
1486     rf.ptr = rf.end = NULL;
1487     rf.buf = NULL;
1488     res = r_long(&rf);
1489     if (rf.buf != NULL)
1490         PyMem_FREE(rf.buf);
1491     return res;
1492 }
1493 
1494 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1495 static off_t
getfilesize(FILE * fp)1496 getfilesize(FILE *fp)
1497 {
1498     struct _Py_stat_struct st;
1499     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1500         return -1;
1501 #if SIZEOF_OFF_T == 4
1502     else if (st.st_size >= INT_MAX)
1503         return (off_t)INT_MAX;
1504 #endif
1505     else
1506         return (off_t)st.st_size;
1507 }
1508 
1509 /* If we can get the size of the file up-front, and it's reasonably small,
1510  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1511  * than reading a byte at a time from file; speeds .pyc imports.
1512  * CAUTION:  since this may read the entire remainder of the file, don't
1513  * call it unless you know you're done with the file.
1514  */
1515 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1516 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1517 {
1518 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1519 #define REASONABLE_FILE_LIMIT (1L << 18)
1520     off_t filesize;
1521     filesize = getfilesize(fp);
1522     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1523         char* pBuf = (char *)PyMem_MALLOC(filesize);
1524         if (pBuf != NULL) {
1525             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1526             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1527             PyMem_FREE(pBuf);
1528             return v;
1529         }
1530 
1531     }
1532     /* We don't have fstat, or we do but the file is larger than
1533      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1534      */
1535     return PyMarshal_ReadObjectFromFile(fp);
1536 
1537 #undef REASONABLE_FILE_LIMIT
1538 }
1539 
1540 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1541 PyMarshal_ReadObjectFromFile(FILE *fp)
1542 {
1543     RFILE rf;
1544     PyObject *result;
1545     rf.fp = fp;
1546     rf.readable = NULL;
1547     rf.depth = 0;
1548     rf.ptr = rf.end = NULL;
1549     rf.buf = NULL;
1550     rf.refs = PyList_New(0);
1551     if (rf.refs == NULL)
1552         return NULL;
1553     result = r_object(&rf);
1554     Py_DECREF(rf.refs);
1555     if (rf.buf != NULL)
1556         PyMem_FREE(rf.buf);
1557     return result;
1558 }
1559 
1560 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1561 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1562 {
1563     RFILE rf;
1564     PyObject *result;
1565     rf.fp = NULL;
1566     rf.readable = NULL;
1567     rf.ptr = (char *)str;
1568     rf.end = (char *)str + len;
1569     rf.buf = NULL;
1570     rf.depth = 0;
1571     rf.refs = PyList_New(0);
1572     if (rf.refs == NULL)
1573         return NULL;
1574     result = r_object(&rf);
1575     Py_DECREF(rf.refs);
1576     if (rf.buf != NULL)
1577         PyMem_FREE(rf.buf);
1578     return result;
1579 }
1580 
1581 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1582 PyMarshal_WriteObjectToString(PyObject *x, int version)
1583 {
1584     WFILE wf;
1585 
1586     memset(&wf, 0, sizeof(wf));
1587     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1588     if (wf.str == NULL)
1589         return NULL;
1590     wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1591     wf.end = wf.ptr + PyBytes_Size(wf.str);
1592     wf.error = WFERR_OK;
1593     wf.version = version;
1594     if (w_init_refs(&wf, version)) {
1595         Py_DECREF(wf.str);
1596         return NULL;
1597     }
1598     w_object(x, &wf);
1599     w_clear_refs(&wf);
1600     if (wf.str != NULL) {
1601         char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1602         if (wf.ptr - base > PY_SSIZE_T_MAX) {
1603             Py_DECREF(wf.str);
1604             PyErr_SetString(PyExc_OverflowError,
1605                             "too much marshal data for a bytes object");
1606             return NULL;
1607         }
1608         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1609             return NULL;
1610     }
1611     if (wf.error != WFERR_OK) {
1612         Py_XDECREF(wf.str);
1613         if (wf.error == WFERR_NOMEMORY)
1614             PyErr_NoMemory();
1615         else
1616             PyErr_SetString(PyExc_ValueError,
1617               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1618                :"object too deeply nested to marshal");
1619         return NULL;
1620     }
1621     return wf.str;
1622 }
1623 
1624 /* And an interface for Python programs... */
1625 /*[clinic input]
1626 marshal.dump
1627 
1628     value: object
1629         Must be a supported type.
1630     file: object
1631         Must be a writeable binary file.
1632     version: int(c_default="Py_MARSHAL_VERSION") = version
1633         Indicates the data format that dump should use.
1634     /
1635 
1636 Write the value on the open file.
1637 
1638 If the value has (or contains an object that has) an unsupported type, a
1639 ValueError exception is raised - but garbage data will also be written
1640 to the file. The object will not be properly read back by load().
1641 [clinic start generated code]*/
1642 
1643 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1644 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1645                   int version)
1646 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1647 {
1648     /* XXX Quick hack -- need to do this differently */
1649     PyObject *s;
1650     PyObject *res;
1651     _Py_IDENTIFIER(write);
1652 
1653     s = PyMarshal_WriteObjectToString(value, version);
1654     if (s == NULL)
1655         return NULL;
1656     res = _PyObject_CallMethodIdObjArgs(file, &PyId_write, s, NULL);
1657     Py_DECREF(s);
1658     return res;
1659 }
1660 
1661 /*[clinic input]
1662 marshal.load
1663 
1664     file: object
1665         Must be readable binary file.
1666     /
1667 
1668 Read one value from the open file and return it.
1669 
1670 If no valid value is read (e.g. because the data has a different Python
1671 version's incompatible marshal format), raise EOFError, ValueError or
1672 TypeError.
1673 
1674 Note: If an object containing an unsupported type was marshalled with
1675 dump(), load() will substitute None for the unmarshallable type.
1676 [clinic start generated code]*/
1677 
1678 static PyObject *
marshal_load(PyObject * module,PyObject * file)1679 marshal_load(PyObject *module, PyObject *file)
1680 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1681 {
1682     PyObject *data, *result;
1683     _Py_IDENTIFIER(read);
1684     RFILE rf;
1685 
1686     /*
1687      * Make a call to the read method, but read zero bytes.
1688      * This is to ensure that the object passed in at least
1689      * has a read method which returns bytes.
1690      * This can be removed if we guarantee good error handling
1691      * for r_string()
1692      */
1693     data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1694     if (data == NULL)
1695         return NULL;
1696     if (!PyBytes_Check(data)) {
1697         PyErr_Format(PyExc_TypeError,
1698                      "file.read() returned not bytes but %.100s",
1699                      data->ob_type->tp_name);
1700         result = NULL;
1701     }
1702     else {
1703         rf.depth = 0;
1704         rf.fp = NULL;
1705         rf.readable = file;
1706         rf.ptr = rf.end = NULL;
1707         rf.buf = NULL;
1708         if ((rf.refs = PyList_New(0)) != NULL) {
1709             result = read_object(&rf);
1710             Py_DECREF(rf.refs);
1711             if (rf.buf != NULL)
1712                 PyMem_FREE(rf.buf);
1713         } else
1714             result = NULL;
1715     }
1716     Py_DECREF(data);
1717     return result;
1718 }
1719 
1720 /*[clinic input]
1721 marshal.dumps
1722 
1723     value: object
1724         Must be a supported type.
1725     version: int(c_default="Py_MARSHAL_VERSION") = version
1726         Indicates the data format that dumps should use.
1727     /
1728 
1729 Return the bytes object that would be written to a file by dump(value, file).
1730 
1731 Raise a ValueError exception if value has (or contains an object that has) an
1732 unsupported type.
1733 [clinic start generated code]*/
1734 
1735 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1736 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1737 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1738 {
1739     return PyMarshal_WriteObjectToString(value, version);
1740 }
1741 
1742 /*[clinic input]
1743 marshal.loads
1744 
1745     bytes: Py_buffer
1746     /
1747 
1748 Convert the bytes-like object to a value.
1749 
1750 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1751 bytes in the input are ignored.
1752 [clinic start generated code]*/
1753 
1754 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1755 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1756 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1757 {
1758     RFILE rf;
1759     char *s = bytes->buf;
1760     Py_ssize_t n = bytes->len;
1761     PyObject* result;
1762     rf.fp = NULL;
1763     rf.readable = NULL;
1764     rf.ptr = s;
1765     rf.end = s + n;
1766     rf.depth = 0;
1767     if ((rf.refs = PyList_New(0)) == NULL)
1768         return NULL;
1769     result = read_object(&rf);
1770     Py_DECREF(rf.refs);
1771     return result;
1772 }
1773 
1774 static PyMethodDef marshal_methods[] = {
1775     MARSHAL_DUMP_METHODDEF
1776     MARSHAL_LOAD_METHODDEF
1777     MARSHAL_DUMPS_METHODDEF
1778     MARSHAL_LOADS_METHODDEF
1779     {NULL,              NULL}           /* sentinel */
1780 };
1781 
1782 
1783 PyDoc_STRVAR(module_doc,
1784 "This module contains functions that can read and write Python values in\n\
1785 a binary format. The format is specific to Python, but independent of\n\
1786 machine architecture issues.\n\
1787 \n\
1788 Not all Python object types are supported; in general, only objects\n\
1789 whose value is independent from a particular invocation of Python can be\n\
1790 written and read by this module. The following types are supported:\n\
1791 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1792 tuples, lists, sets, dictionaries, and code objects, where it\n\
1793 should be understood that tuples, lists and dictionaries are only\n\
1794 supported as long as the values contained therein are themselves\n\
1795 supported; and recursive lists and dictionaries should not be written\n\
1796 (they will cause infinite loops).\n\
1797 \n\
1798 Variables:\n\
1799 \n\
1800 version -- indicates the format that the module uses. Version 0 is the\n\
1801     historical format, version 1 shares interned strings and version 2\n\
1802     uses a binary format for floating point numbers.\n\
1803     Version 3 shares common object references (New in version 3.4).\n\
1804 \n\
1805 Functions:\n\
1806 \n\
1807 dump() -- write value to a file\n\
1808 load() -- read value from a file\n\
1809 dumps() -- marshal value as a bytes object\n\
1810 loads() -- read value from a bytes-like object");
1811 
1812 
1813 
1814 static struct PyModuleDef marshalmodule = {
1815     PyModuleDef_HEAD_INIT,
1816     "marshal",
1817     module_doc,
1818     0,
1819     marshal_methods,
1820     NULL,
1821     NULL,
1822     NULL,
1823     NULL
1824 };
1825 
1826 PyMODINIT_FUNC
PyMarshal_Init(void)1827 PyMarshal_Init(void)
1828 {
1829     PyObject *mod = PyModule_Create(&marshalmodule);
1830     if (mod == NULL)
1831         return NULL;
1832     if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1833         Py_DECREF(mod);
1834         return NULL;
1835     }
1836     return mod;
1837 }
1838