• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #include "Python.h"
10 #include "pycore_call.h"             // _PyObject_CallNoArgs()
11 #include "pycore_code.h"             // _PyCode_New()
12 #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION()
13 #include "pycore_hashtable.h"        // _Py_hashtable_t
14 #include "pycore_long.h"             // _PyLong_DigitCount
15 #include "pycore_setobject.h"        // _PySet_NextEntry()
16 #include "marshal.h"                 // Py_MARSHAL_VERSION
17 #include "pycore_pystate.h"          // _PyInterpreterState_GET()
18 
19 #ifdef __APPLE__
20 #  include "TargetConditionals.h"
21 #endif /* __APPLE__ */
22 
23 /*[clinic input]
24 module marshal
25 [clinic start generated code]*/
26 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
27 
28 #include "clinic/marshal.c.h"
29 
30 /* High water mark to determine when the marshalled object is dangerously deep
31  * and risks coring the interpreter.  When the object stack gets this deep,
32  * raise an exception instead of continuing.
33  * On Windows debug builds, reduce this value.
34  *
35  * BUG: https://bugs.python.org/issue33720
36  * On Windows PGO builds, the r_object function overallocates its stack and
37  * can cause a stack overflow. We reduce the maximum depth for all Windows
38  * releases to protect against this.
39  * #if defined(MS_WINDOWS) && defined(_DEBUG)
40  */
41 #if defined(MS_WINDOWS)
42 #  define MAX_MARSHAL_STACK_DEPTH 1000
43 #elif defined(__wasi__)
44 #  define MAX_MARSHAL_STACK_DEPTH 1500
45 // TARGET_OS_IPHONE covers any non-macOS Apple platform.
46 // It won't be defined on older macOS SDKs
47 #elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
48 #  define MAX_MARSHAL_STACK_DEPTH 1500
49 #else
50 #  define MAX_MARSHAL_STACK_DEPTH 2000
51 #endif
52 
53 #define TYPE_NULL               '0'
54 #define TYPE_NONE               'N'
55 #define TYPE_FALSE              'F'
56 #define TYPE_TRUE               'T'
57 #define TYPE_STOPITER           'S'
58 #define TYPE_ELLIPSIS           '.'
59 #define TYPE_INT                'i'
60 /* TYPE_INT64 is not generated anymore.
61    Supported for backward compatibility only. */
62 #define TYPE_INT64              'I'
63 #define TYPE_FLOAT              'f'
64 #define TYPE_BINARY_FLOAT       'g'
65 #define TYPE_COMPLEX            'x'
66 #define TYPE_BINARY_COMPLEX     'y'
67 #define TYPE_LONG               'l'
68 #define TYPE_STRING             's'
69 #define TYPE_INTERNED           't'
70 #define TYPE_REF                'r'
71 #define TYPE_TUPLE              '('
72 #define TYPE_LIST               '['
73 #define TYPE_DICT               '{'
74 #define TYPE_CODE               'c'
75 #define TYPE_UNICODE            'u'
76 #define TYPE_UNKNOWN            '?'
77 #define TYPE_SET                '<'
78 #define TYPE_FROZENSET          '>'
79 #define FLAG_REF                '\x80' /* with a type, add obj to index */
80 
81 #define TYPE_ASCII              'a'
82 #define TYPE_ASCII_INTERNED     'A'
83 #define TYPE_SMALL_TUPLE        ')'
84 #define TYPE_SHORT_ASCII        'z'
85 #define TYPE_SHORT_ASCII_INTERNED 'Z'
86 
87 #define WFERR_OK 0
88 #define WFERR_UNMARSHALLABLE 1
89 #define WFERR_NESTEDTOODEEP 2
90 #define WFERR_NOMEMORY 3
91 #define WFERR_CODE_NOT_ALLOWED 4
92 
93 typedef struct {
94     FILE *fp;
95     int error;  /* see WFERR_* values */
96     int depth;
97     PyObject *str;
98     char *ptr;
99     const char *end;
100     char *buf;
101     _Py_hashtable_t *hashtable;
102     int version;
103     int allow_code;
104 } WFILE;
105 
106 #define w_byte(c, p) do {                               \
107         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
108             *(p)->ptr++ = (c);                          \
109     } while(0)
110 
111 static void
w_flush(WFILE * p)112 w_flush(WFILE *p)
113 {
114     assert(p->fp != NULL);
115     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
116     p->ptr = p->buf;
117 }
118 
119 static int
w_reserve(WFILE * p,Py_ssize_t needed)120 w_reserve(WFILE *p, Py_ssize_t needed)
121 {
122     Py_ssize_t pos, size, delta;
123     if (p->ptr == NULL)
124         return 0; /* An error already occurred */
125     if (p->fp != NULL) {
126         w_flush(p);
127         return needed <= p->end - p->ptr;
128     }
129     assert(p->str != NULL);
130     pos = p->ptr - p->buf;
131     size = PyBytes_GET_SIZE(p->str);
132     if (size > 16*1024*1024)
133         delta = (size >> 3);            /* 12.5% overallocation */
134     else
135         delta = size + 1024;
136     delta = Py_MAX(delta, needed);
137     if (delta > PY_SSIZE_T_MAX - size) {
138         p->error = WFERR_NOMEMORY;
139         return 0;
140     }
141     size += delta;
142     if (_PyBytes_Resize(&p->str, size) != 0) {
143         p->end = p->ptr = p->buf = NULL;
144         return 0;
145     }
146     else {
147         p->buf = PyBytes_AS_STRING(p->str);
148         p->ptr = p->buf + pos;
149         p->end = p->buf + size;
150         return 1;
151     }
152 }
153 
154 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)155 w_string(const void *s, Py_ssize_t n, WFILE *p)
156 {
157     Py_ssize_t m;
158     if (!n || p->ptr == NULL)
159         return;
160     m = p->end - p->ptr;
161     if (p->fp != NULL) {
162         if (n <= m) {
163             memcpy(p->ptr, s, n);
164             p->ptr += n;
165         }
166         else {
167             w_flush(p);
168             fwrite(s, 1, n, p->fp);
169         }
170     }
171     else {
172         if (n <= m || w_reserve(p, n - m)) {
173             memcpy(p->ptr, s, n);
174             p->ptr += n;
175         }
176     }
177 }
178 
179 static void
w_short(int x,WFILE * p)180 w_short(int x, WFILE *p)
181 {
182     w_byte((char)( x      & 0xff), p);
183     w_byte((char)((x>> 8) & 0xff), p);
184 }
185 
186 static void
w_long(long x,WFILE * p)187 w_long(long x, WFILE *p)
188 {
189     w_byte((char)( x      & 0xff), p);
190     w_byte((char)((x>> 8) & 0xff), p);
191     w_byte((char)((x>>16) & 0xff), p);
192     w_byte((char)((x>>24) & 0xff), p);
193 }
194 
195 #define SIZE32_MAX  0x7FFFFFFF
196 
197 #if SIZEOF_SIZE_T > 4
198 # define W_SIZE(n, p)  do {                     \
199         if ((n) > SIZE32_MAX) {                 \
200             (p)->depth--;                       \
201             (p)->error = WFERR_UNMARSHALLABLE;  \
202             return;                             \
203         }                                       \
204         w_long((long)(n), p);                   \
205     } while(0)
206 #else
207 # define W_SIZE  w_long
208 #endif
209 
210 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)211 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
212 {
213         W_SIZE(n, p);
214         w_string(s, n, p);
215 }
216 
217 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)218 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
219 {
220     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
221     w_string(s, n, p);
222 }
223 
224 /* We assume that Python ints are stored internally in base some power of
225    2**15; for the sake of portability we'll always read and write them in base
226    exactly 2**15. */
227 
228 #define PyLong_MARSHAL_SHIFT 15
229 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
230 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
231 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
232 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
233 #endif
234 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
235 
236 #define W_TYPE(t, p) do { \
237     w_byte((t) | flag, (p)); \
238 } while(0)
239 
240 static PyObject *
241 _PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
242 
243 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)244 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
245 {
246     Py_ssize_t i, j, n, l;
247     digit d;
248 
249     W_TYPE(TYPE_LONG, p);
250     if (_PyLong_IsZero(ob)) {
251         w_long((long)0, p);
252         return;
253     }
254 
255     /* set l to number of base PyLong_MARSHAL_BASE digits */
256     n = _PyLong_DigitCount(ob);
257     l = (n-1) * PyLong_MARSHAL_RATIO;
258     d = ob->long_value.ob_digit[n-1];
259     assert(d != 0); /* a PyLong is always normalized */
260     do {
261         d >>= PyLong_MARSHAL_SHIFT;
262         l++;
263     } while (d != 0);
264     if (l > SIZE32_MAX) {
265         p->depth--;
266         p->error = WFERR_UNMARSHALLABLE;
267         return;
268     }
269     w_long((long)(_PyLong_IsNegative(ob) ? -l : l), p);
270 
271     for (i=0; i < n-1; i++) {
272         d = ob->long_value.ob_digit[i];
273         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
274             w_short(d & PyLong_MARSHAL_MASK, p);
275             d >>= PyLong_MARSHAL_SHIFT;
276         }
277         assert (d == 0);
278     }
279     d = ob->long_value.ob_digit[n-1];
280     do {
281         w_short(d & PyLong_MARSHAL_MASK, p);
282         d >>= PyLong_MARSHAL_SHIFT;
283     } while (d != 0);
284 }
285 
286 static void
w_float_bin(double v,WFILE * p)287 w_float_bin(double v, WFILE *p)
288 {
289     char buf[8];
290     if (PyFloat_Pack8(v, buf, 1) < 0) {
291         p->error = WFERR_UNMARSHALLABLE;
292         return;
293     }
294     w_string(buf, 8, p);
295 }
296 
297 static void
w_float_str(double v,WFILE * p)298 w_float_str(double v, WFILE *p)
299 {
300     char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
301     if (!buf) {
302         p->error = WFERR_NOMEMORY;
303         return;
304     }
305     w_short_pstring(buf, strlen(buf), p);
306     PyMem_Free(buf);
307 }
308 
309 static int
w_ref(PyObject * v,char * flag,WFILE * p)310 w_ref(PyObject *v, char *flag, WFILE *p)
311 {
312     _Py_hashtable_entry_t *entry;
313     int w;
314 
315     if (p->version < 3 || p->hashtable == NULL)
316         return 0; /* not writing object references */
317 
318     /* If it has only one reference, it definitely isn't shared.
319      * But we use TYPE_REF always for interned string, to PYC file stable
320      * as possible.
321      */
322     if (Py_REFCNT(v) == 1 &&
323             !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
324         return 0;
325     }
326 
327     entry = _Py_hashtable_get_entry(p->hashtable, v);
328     if (entry != NULL) {
329         /* write the reference index to the stream */
330         w = (int)(uintptr_t)entry->value;
331         /* we don't store "long" indices in the dict */
332         assert(0 <= w && w <= 0x7fffffff);
333         w_byte(TYPE_REF, p);
334         w_long(w, p);
335         return 1;
336     } else {
337         size_t s = p->hashtable->nentries;
338         /* we don't support long indices */
339         if (s >= 0x7fffffff) {
340             PyErr_SetString(PyExc_ValueError, "too many objects");
341             goto err;
342         }
343         w = (int)s;
344         if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
345                               (void *)(uintptr_t)w) < 0) {
346             Py_DECREF(v);
347             goto err;
348         }
349         *flag |= FLAG_REF;
350         return 0;
351     }
352 err:
353     p->error = WFERR_UNMARSHALLABLE;
354     return 1;
355 }
356 
357 static void
358 w_complex_object(PyObject *v, char flag, WFILE *p);
359 
360 static void
w_object(PyObject * v,WFILE * p)361 w_object(PyObject *v, WFILE *p)
362 {
363     char flag = '\0';
364 
365     p->depth++;
366 
367     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
368         p->error = WFERR_NESTEDTOODEEP;
369     }
370     else if (v == NULL) {
371         w_byte(TYPE_NULL, p);
372     }
373     else if (v == Py_None) {
374         w_byte(TYPE_NONE, p);
375     }
376     else if (v == PyExc_StopIteration) {
377         w_byte(TYPE_STOPITER, p);
378     }
379     else if (v == Py_Ellipsis) {
380         w_byte(TYPE_ELLIPSIS, p);
381     }
382     else if (v == Py_False) {
383         w_byte(TYPE_FALSE, p);
384     }
385     else if (v == Py_True) {
386         w_byte(TYPE_TRUE, p);
387     }
388     else if (!w_ref(v, &flag, p))
389         w_complex_object(v, flag, p);
390 
391     p->depth--;
392 }
393 
394 static void
w_complex_object(PyObject * v,char flag,WFILE * p)395 w_complex_object(PyObject *v, char flag, WFILE *p)
396 {
397     Py_ssize_t i, n;
398 
399     if (PyLong_CheckExact(v)) {
400         int overflow;
401         long x = PyLong_AsLongAndOverflow(v, &overflow);
402         if (overflow) {
403             w_PyLong((PyLongObject *)v, flag, p);
404         }
405         else {
406 #if SIZEOF_LONG > 4
407             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
408             if (y && y != -1) {
409                 /* Too large for TYPE_INT */
410                 w_PyLong((PyLongObject*)v, flag, p);
411             }
412             else
413 #endif
414             {
415                 W_TYPE(TYPE_INT, p);
416                 w_long(x, p);
417             }
418         }
419     }
420     else if (PyFloat_CheckExact(v)) {
421         if (p->version > 1) {
422             W_TYPE(TYPE_BINARY_FLOAT, p);
423             w_float_bin(PyFloat_AS_DOUBLE(v), p);
424         }
425         else {
426             W_TYPE(TYPE_FLOAT, p);
427             w_float_str(PyFloat_AS_DOUBLE(v), p);
428         }
429     }
430     else if (PyComplex_CheckExact(v)) {
431         if (p->version > 1) {
432             W_TYPE(TYPE_BINARY_COMPLEX, p);
433             w_float_bin(PyComplex_RealAsDouble(v), p);
434             w_float_bin(PyComplex_ImagAsDouble(v), p);
435         }
436         else {
437             W_TYPE(TYPE_COMPLEX, p);
438             w_float_str(PyComplex_RealAsDouble(v), p);
439             w_float_str(PyComplex_ImagAsDouble(v), p);
440         }
441     }
442     else if (PyBytes_CheckExact(v)) {
443         W_TYPE(TYPE_STRING, p);
444         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
445     }
446     else if (PyUnicode_CheckExact(v)) {
447         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
448             int is_short = PyUnicode_GET_LENGTH(v) < 256;
449             if (is_short) {
450                 if (PyUnicode_CHECK_INTERNED(v))
451                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
452                 else
453                     W_TYPE(TYPE_SHORT_ASCII, p);
454                 w_short_pstring(PyUnicode_1BYTE_DATA(v),
455                                 PyUnicode_GET_LENGTH(v), p);
456             }
457             else {
458                 if (PyUnicode_CHECK_INTERNED(v))
459                     W_TYPE(TYPE_ASCII_INTERNED, p);
460                 else
461                     W_TYPE(TYPE_ASCII, p);
462                 w_pstring(PyUnicode_1BYTE_DATA(v),
463                           PyUnicode_GET_LENGTH(v), p);
464             }
465         }
466         else {
467             PyObject *utf8;
468             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
469             if (utf8 == NULL) {
470                 p->depth--;
471                 p->error = WFERR_UNMARSHALLABLE;
472                 return;
473             }
474             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
475                 W_TYPE(TYPE_INTERNED, p);
476             else
477                 W_TYPE(TYPE_UNICODE, p);
478             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
479             Py_DECREF(utf8);
480         }
481     }
482     else if (PyTuple_CheckExact(v)) {
483         n = PyTuple_GET_SIZE(v);
484         if (p->version >= 4 && n < 256) {
485             W_TYPE(TYPE_SMALL_TUPLE, p);
486             w_byte((unsigned char)n, p);
487         }
488         else {
489             W_TYPE(TYPE_TUPLE, p);
490             W_SIZE(n, p);
491         }
492         for (i = 0; i < n; i++) {
493             w_object(PyTuple_GET_ITEM(v, i), p);
494         }
495     }
496     else if (PyList_CheckExact(v)) {
497         W_TYPE(TYPE_LIST, p);
498         n = PyList_GET_SIZE(v);
499         W_SIZE(n, p);
500         for (i = 0; i < n; i++) {
501             w_object(PyList_GET_ITEM(v, i), p);
502         }
503     }
504     else if (PyDict_CheckExact(v)) {
505         Py_ssize_t pos;
506         PyObject *key, *value;
507         W_TYPE(TYPE_DICT, p);
508         /* This one is NULL object terminated! */
509         pos = 0;
510         while (PyDict_Next(v, &pos, &key, &value)) {
511             w_object(key, p);
512             w_object(value, p);
513         }
514         w_object((PyObject *)NULL, p);
515     }
516     else if (PyAnySet_CheckExact(v)) {
517         PyObject *value;
518         Py_ssize_t pos = 0;
519         Py_hash_t hash;
520 
521         if (PyFrozenSet_CheckExact(v))
522             W_TYPE(TYPE_FROZENSET, p);
523         else
524             W_TYPE(TYPE_SET, p);
525         n = PySet_GET_SIZE(v);
526         W_SIZE(n, p);
527         // bpo-37596: To support reproducible builds, sets and frozensets need
528         // to have their elements serialized in a consistent order (even when
529         // they have been scrambled by hash randomization). To ensure this, we
530         // use an order equivalent to sorted(v, key=marshal.dumps):
531         PyObject *pairs = PyList_New(n);
532         if (pairs == NULL) {
533             p->error = WFERR_NOMEMORY;
534             return;
535         }
536         Py_ssize_t i = 0;
537         Py_BEGIN_CRITICAL_SECTION(v);
538         while (_PySet_NextEntryRef(v, &pos, &value, &hash)) {
539             PyObject *dump = _PyMarshal_WriteObjectToString(value,
540                                     p->version, p->allow_code);
541             if (dump == NULL) {
542                 p->error = WFERR_UNMARSHALLABLE;
543                 Py_DECREF(value);
544                 break;
545             }
546             PyObject *pair = PyTuple_Pack(2, dump, value);
547             Py_DECREF(dump);
548             Py_DECREF(value);
549             if (pair == NULL) {
550                 p->error = WFERR_NOMEMORY;
551                 break;
552             }
553             PyList_SET_ITEM(pairs, i++, pair);
554         }
555         Py_END_CRITICAL_SECTION();
556         if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) {
557             Py_DECREF(pairs);
558             return;
559         }
560         assert(i == n);
561         if (PyList_Sort(pairs)) {
562             p->error = WFERR_NOMEMORY;
563             Py_DECREF(pairs);
564             return;
565         }
566         for (Py_ssize_t i = 0; i < n; i++) {
567             PyObject *pair = PyList_GET_ITEM(pairs, i);
568             value = PyTuple_GET_ITEM(pair, 1);
569             w_object(value, p);
570         }
571         Py_DECREF(pairs);
572     }
573     else if (PyCode_Check(v)) {
574         if (!p->allow_code) {
575             p->error = WFERR_CODE_NOT_ALLOWED;
576             return;
577         }
578         PyCodeObject *co = (PyCodeObject *)v;
579         PyObject *co_code = _PyCode_GetCode(co);
580         if (co_code == NULL) {
581             p->error = WFERR_NOMEMORY;
582             return;
583         }
584         W_TYPE(TYPE_CODE, p);
585         w_long(co->co_argcount, p);
586         w_long(co->co_posonlyargcount, p);
587         w_long(co->co_kwonlyargcount, p);
588         w_long(co->co_stacksize, p);
589         w_long(co->co_flags, p);
590         w_object(co_code, p);
591         w_object(co->co_consts, p);
592         w_object(co->co_names, p);
593         w_object(co->co_localsplusnames, p);
594         w_object(co->co_localspluskinds, p);
595         w_object(co->co_filename, p);
596         w_object(co->co_name, p);
597         w_object(co->co_qualname, p);
598         w_long(co->co_firstlineno, p);
599         w_object(co->co_linetable, p);
600         w_object(co->co_exceptiontable, p);
601         Py_DECREF(co_code);
602     }
603     else if (PyObject_CheckBuffer(v)) {
604         /* Write unknown bytes-like objects as a bytes object */
605         Py_buffer view;
606         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
607             w_byte(TYPE_UNKNOWN, p);
608             p->depth--;
609             p->error = WFERR_UNMARSHALLABLE;
610             return;
611         }
612         W_TYPE(TYPE_STRING, p);
613         w_pstring(view.buf, view.len, p);
614         PyBuffer_Release(&view);
615     }
616     else {
617         W_TYPE(TYPE_UNKNOWN, p);
618         p->error = WFERR_UNMARSHALLABLE;
619     }
620 }
621 
622 static void
w_decref_entry(void * key)623 w_decref_entry(void *key)
624 {
625     PyObject *entry_key = (PyObject *)key;
626     Py_XDECREF(entry_key);
627 }
628 
629 static int
w_init_refs(WFILE * wf,int version)630 w_init_refs(WFILE *wf, int version)
631 {
632     if (version >= 3) {
633         wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
634                                                _Py_hashtable_compare_direct,
635                                                w_decref_entry, NULL, NULL);
636         if (wf->hashtable == NULL) {
637             PyErr_NoMemory();
638             return -1;
639         }
640     }
641     return 0;
642 }
643 
644 static void
w_clear_refs(WFILE * wf)645 w_clear_refs(WFILE *wf)
646 {
647     if (wf->hashtable != NULL) {
648         _Py_hashtable_destroy(wf->hashtable);
649     }
650 }
651 
652 /* version currently has no effect for writing ints. */
653 /* Note that while the documentation states that this function
654  * can error, currently it never does. Setting an exception in
655  * this function should be regarded as an API-breaking change.
656  */
657 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)658 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
659 {
660     char buf[4];
661     WFILE wf;
662     memset(&wf, 0, sizeof(wf));
663     wf.fp = fp;
664     wf.ptr = wf.buf = buf;
665     wf.end = wf.ptr + sizeof(buf);
666     wf.error = WFERR_OK;
667     wf.version = version;
668     w_long(x, &wf);
669     w_flush(&wf);
670 }
671 
672 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)673 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
674 {
675     char buf[BUFSIZ];
676     WFILE wf;
677     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
678         return; /* caller must check PyErr_Occurred() */
679     }
680     memset(&wf, 0, sizeof(wf));
681     wf.fp = fp;
682     wf.ptr = wf.buf = buf;
683     wf.end = wf.ptr + sizeof(buf);
684     wf.error = WFERR_OK;
685     wf.version = version;
686     wf.allow_code = 1;
687     if (w_init_refs(&wf, version)) {
688         return; /* caller must check PyErr_Occurred() */
689     }
690     w_object(x, &wf);
691     w_clear_refs(&wf);
692     w_flush(&wf);
693 }
694 
695 typedef struct {
696     FILE *fp;
697     int depth;
698     PyObject *readable;  /* Stream-like object being read from */
699     const char *ptr;
700     const char *end;
701     char *buf;
702     Py_ssize_t buf_size;
703     PyObject *refs;  /* a list */
704     int allow_code;
705 } RFILE;
706 
707 static const char *
r_string(Py_ssize_t n,RFILE * p)708 r_string(Py_ssize_t n, RFILE *p)
709 {
710     Py_ssize_t read = -1;
711 
712     if (p->ptr != NULL) {
713         /* Fast path for loads() */
714         const char *res = p->ptr;
715         Py_ssize_t left = p->end - p->ptr;
716         if (left < n) {
717             PyErr_SetString(PyExc_EOFError,
718                             "marshal data too short");
719             return NULL;
720         }
721         p->ptr += n;
722         return res;
723     }
724     if (p->buf == NULL) {
725         p->buf = PyMem_Malloc(n);
726         if (p->buf == NULL) {
727             PyErr_NoMemory();
728             return NULL;
729         }
730         p->buf_size = n;
731     }
732     else if (p->buf_size < n) {
733         char *tmp = PyMem_Realloc(p->buf, n);
734         if (tmp == NULL) {
735             PyErr_NoMemory();
736             return NULL;
737         }
738         p->buf = tmp;
739         p->buf_size = n;
740     }
741 
742     if (!p->readable) {
743         assert(p->fp != NULL);
744         read = fread(p->buf, 1, n, p->fp);
745     }
746     else {
747         PyObject *res, *mview;
748         Py_buffer buf;
749 
750         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
751             return NULL;
752         mview = PyMemoryView_FromBuffer(&buf);
753         if (mview == NULL)
754             return NULL;
755 
756         res = _PyObject_CallMethod(p->readable, &_Py_ID(readinto), "N", mview);
757         if (res != NULL) {
758             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
759             Py_DECREF(res);
760         }
761     }
762     if (read != n) {
763         if (!PyErr_Occurred()) {
764             if (read > n)
765                 PyErr_Format(PyExc_ValueError,
766                              "read() returned too much data: "
767                              "%zd bytes requested, %zd returned",
768                              n, read);
769             else
770                 PyErr_SetString(PyExc_EOFError,
771                                 "EOF read where not expected");
772         }
773         return NULL;
774     }
775     return p->buf;
776 }
777 
778 static int
r_byte(RFILE * p)779 r_byte(RFILE *p)
780 {
781     if (p->ptr != NULL) {
782         if (p->ptr < p->end) {
783             return (unsigned char) *p->ptr++;
784         }
785     }
786     else if (!p->readable) {
787         assert(p->fp);
788         int c = getc(p->fp);
789         if (c != EOF) {
790             return c;
791         }
792     }
793     else {
794         const char *ptr = r_string(1, p);
795         if (ptr != NULL) {
796             return *(const unsigned char *) ptr;
797         }
798         return EOF;
799     }
800     PyErr_SetString(PyExc_EOFError,
801                     "EOF read where not expected");
802     return EOF;
803 }
804 
805 static int
r_short(RFILE * p)806 r_short(RFILE *p)
807 {
808     short x = -1;
809     const unsigned char *buffer;
810 
811     buffer = (const unsigned char *) r_string(2, p);
812     if (buffer != NULL) {
813         x = buffer[0];
814         x |= buffer[1] << 8;
815         /* Sign-extension, in case short greater than 16 bits */
816         x |= -(x & 0x8000);
817     }
818     return x;
819 }
820 
821 static long
r_long(RFILE * p)822 r_long(RFILE *p)
823 {
824     long x = -1;
825     const unsigned char *buffer;
826 
827     buffer = (const unsigned char *) r_string(4, p);
828     if (buffer != NULL) {
829         x = buffer[0];
830         x |= (long)buffer[1] << 8;
831         x |= (long)buffer[2] << 16;
832         x |= (long)buffer[3] << 24;
833 #if SIZEOF_LONG > 4
834         /* Sign extension for 64-bit machines */
835         x |= -(x & 0x80000000L);
836 #endif
837     }
838     return x;
839 }
840 
841 /* r_long64 deals with the TYPE_INT64 code. */
842 static PyObject *
r_long64(RFILE * p)843 r_long64(RFILE *p)
844 {
845     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
846     if (buffer == NULL) {
847         return NULL;
848     }
849     return _PyLong_FromByteArray(buffer, 8,
850                                  1 /* little endian */,
851                                  1 /* signed */);
852 }
853 
854 static PyObject *
r_PyLong(RFILE * p)855 r_PyLong(RFILE *p)
856 {
857     PyLongObject *ob;
858     long n, size, i;
859     int j, md, shorts_in_top_digit;
860     digit d;
861 
862     n = r_long(p);
863     if (n == 0)
864         return (PyObject *)_PyLong_New(0);
865     if (n == -1 && PyErr_Occurred()) {
866         return NULL;
867     }
868     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
869         PyErr_SetString(PyExc_ValueError,
870                        "bad marshal data (long size out of range)");
871         return NULL;
872     }
873 
874     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
875     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
876     ob = _PyLong_New(size);
877     if (ob == NULL)
878         return NULL;
879 
880     _PyLong_SetSignAndDigitCount(ob, n < 0 ? -1 : 1, size);
881 
882     for (i = 0; i < size-1; i++) {
883         d = 0;
884         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
885             md = r_short(p);
886             if (md < 0 || md > PyLong_MARSHAL_BASE)
887                 goto bad_digit;
888             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
889         }
890         ob->long_value.ob_digit[i] = d;
891     }
892 
893     d = 0;
894     for (j=0; j < shorts_in_top_digit; j++) {
895         md = r_short(p);
896         if (md < 0 || md > PyLong_MARSHAL_BASE)
897             goto bad_digit;
898         /* topmost marshal digit should be nonzero */
899         if (md == 0 && j == shorts_in_top_digit - 1) {
900             Py_DECREF(ob);
901             PyErr_SetString(PyExc_ValueError,
902                 "bad marshal data (unnormalized long data)");
903             return NULL;
904         }
905         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
906     }
907     assert(!PyErr_Occurred());
908     /* top digit should be nonzero, else the resulting PyLong won't be
909        normalized */
910     ob->long_value.ob_digit[size-1] = d;
911     return (PyObject *)ob;
912   bad_digit:
913     Py_DECREF(ob);
914     if (!PyErr_Occurred()) {
915         PyErr_SetString(PyExc_ValueError,
916                         "bad marshal data (digit out of range in long)");
917     }
918     return NULL;
919 }
920 
921 static double
r_float_bin(RFILE * p)922 r_float_bin(RFILE *p)
923 {
924     const char *buf = r_string(8, p);
925     if (buf == NULL)
926         return -1;
927     return PyFloat_Unpack8(buf, 1);
928 }
929 
930 /* Issue #33720: Disable inlining for reducing the C stack consumption
931    on PGO builds. */
932 Py_NO_INLINE static double
r_float_str(RFILE * p)933 r_float_str(RFILE *p)
934 {
935     int n;
936     char buf[256];
937     const char *ptr;
938     n = r_byte(p);
939     if (n == EOF) {
940         return -1;
941     }
942     ptr = r_string(n, p);
943     if (ptr == NULL) {
944         return -1;
945     }
946     memcpy(buf, ptr, n);
947     buf[n] = '\0';
948     return PyOS_string_to_double(buf, NULL, NULL);
949 }
950 
951 /* allocate the reflist index for a new object. Return -1 on failure */
952 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)953 r_ref_reserve(int flag, RFILE *p)
954 {
955     if (flag) { /* currently only FLAG_REF is defined */
956         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
957         if (idx >= 0x7ffffffe) {
958             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
959             return -1;
960         }
961         if (PyList_Append(p->refs, Py_None) < 0)
962             return -1;
963         return idx;
964     } else
965         return 0;
966 }
967 
968 /* insert the new object 'o' to the reflist at previously
969  * allocated index 'idx'.
970  * 'o' can be NULL, in which case nothing is done.
971  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
972  * if 'o' was non-NULL, and the function fails, 'o' is released and
973  * NULL returned. This simplifies error checking at the call site since
974  * a single test for NULL for the function result is enough.
975  */
976 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)977 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
978 {
979     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
980         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
981         PyList_SET_ITEM(p->refs, idx, Py_NewRef(o));
982         Py_DECREF(tmp);
983     }
984     return o;
985 }
986 
987 /* combination of both above, used when an object can be
988  * created whenever it is seen in the file, as opposed to
989  * after having loaded its sub-objects.
990  */
991 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)992 r_ref(PyObject *o, int flag, RFILE *p)
993 {
994     assert(flag & FLAG_REF);
995     if (o == NULL)
996         return NULL;
997     if (PyList_Append(p->refs, o) < 0) {
998         Py_DECREF(o); /* release the new object */
999         return NULL;
1000     }
1001     return o;
1002 }
1003 
1004 static PyObject *
r_object(RFILE * p)1005 r_object(RFILE *p)
1006 {
1007     /* NULL is a valid return value, it does not necessarily means that
1008        an exception is set. */
1009     PyObject *v, *v2;
1010     Py_ssize_t idx = 0;
1011     long i, n;
1012     int type, code = r_byte(p);
1013     int flag, is_interned = 0;
1014     PyObject *retval = NULL;
1015 
1016     if (code == EOF) {
1017         if (PyErr_ExceptionMatches(PyExc_EOFError)) {
1018             PyErr_SetString(PyExc_EOFError,
1019                             "EOF read where object expected");
1020         }
1021         return NULL;
1022     }
1023 
1024     p->depth++;
1025 
1026     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
1027         p->depth--;
1028         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
1029         return NULL;
1030     }
1031 
1032     flag = code & FLAG_REF;
1033     type = code & ~FLAG_REF;
1034 
1035 #define R_REF(O) do{\
1036     if (flag) \
1037         O = r_ref(O, flag, p);\
1038 } while (0)
1039 
1040     switch (type) {
1041 
1042     case TYPE_NULL:
1043         break;
1044 
1045     case TYPE_NONE:
1046         retval = Py_None;
1047         break;
1048 
1049     case TYPE_STOPITER:
1050         retval = Py_NewRef(PyExc_StopIteration);
1051         break;
1052 
1053     case TYPE_ELLIPSIS:
1054         retval = Py_Ellipsis;
1055         break;
1056 
1057     case TYPE_FALSE:
1058         retval = Py_False;
1059         break;
1060 
1061     case TYPE_TRUE:
1062         retval = Py_True;
1063         break;
1064 
1065     case TYPE_INT:
1066         n = r_long(p);
1067         if (n == -1 && PyErr_Occurred()) {
1068             break;
1069         }
1070         retval = PyLong_FromLong(n);
1071         R_REF(retval);
1072         break;
1073 
1074     case TYPE_INT64:
1075         retval = r_long64(p);
1076         R_REF(retval);
1077         break;
1078 
1079     case TYPE_LONG:
1080         retval = r_PyLong(p);
1081         R_REF(retval);
1082         break;
1083 
1084     case TYPE_FLOAT:
1085         {
1086             double x = r_float_str(p);
1087             if (x == -1.0 && PyErr_Occurred())
1088                 break;
1089             retval = PyFloat_FromDouble(x);
1090             R_REF(retval);
1091             break;
1092         }
1093 
1094     case TYPE_BINARY_FLOAT:
1095         {
1096             double x = r_float_bin(p);
1097             if (x == -1.0 && PyErr_Occurred())
1098                 break;
1099             retval = PyFloat_FromDouble(x);
1100             R_REF(retval);
1101             break;
1102         }
1103 
1104     case TYPE_COMPLEX:
1105         {
1106             Py_complex c;
1107             c.real = r_float_str(p);
1108             if (c.real == -1.0 && PyErr_Occurred())
1109                 break;
1110             c.imag = r_float_str(p);
1111             if (c.imag == -1.0 && PyErr_Occurred())
1112                 break;
1113             retval = PyComplex_FromCComplex(c);
1114             R_REF(retval);
1115             break;
1116         }
1117 
1118     case TYPE_BINARY_COMPLEX:
1119         {
1120             Py_complex c;
1121             c.real = r_float_bin(p);
1122             if (c.real == -1.0 && PyErr_Occurred())
1123                 break;
1124             c.imag = r_float_bin(p);
1125             if (c.imag == -1.0 && PyErr_Occurred())
1126                 break;
1127             retval = PyComplex_FromCComplex(c);
1128             R_REF(retval);
1129             break;
1130         }
1131 
1132     case TYPE_STRING:
1133         {
1134             const char *ptr;
1135             n = r_long(p);
1136             if (n < 0 || n > SIZE32_MAX) {
1137                 if (!PyErr_Occurred()) {
1138                     PyErr_SetString(PyExc_ValueError,
1139                         "bad marshal data (bytes object size out of range)");
1140                 }
1141                 break;
1142             }
1143             v = PyBytes_FromStringAndSize((char *)NULL, n);
1144             if (v == NULL)
1145                 break;
1146             ptr = r_string(n, p);
1147             if (ptr == NULL) {
1148                 Py_DECREF(v);
1149                 break;
1150             }
1151             memcpy(PyBytes_AS_STRING(v), ptr, n);
1152             retval = v;
1153             R_REF(retval);
1154             break;
1155         }
1156 
1157     case TYPE_ASCII_INTERNED:
1158         is_interned = 1;
1159         /* fall through */
1160     case TYPE_ASCII:
1161         n = r_long(p);
1162         if (n < 0 || n > SIZE32_MAX) {
1163             if (!PyErr_Occurred()) {
1164                 PyErr_SetString(PyExc_ValueError,
1165                     "bad marshal data (string size out of range)");
1166             }
1167             break;
1168         }
1169         goto _read_ascii;
1170 
1171     case TYPE_SHORT_ASCII_INTERNED:
1172         is_interned = 1;
1173         /* fall through */
1174     case TYPE_SHORT_ASCII:
1175         n = r_byte(p);
1176         if (n == EOF) {
1177             break;
1178         }
1179     _read_ascii:
1180         {
1181             const char *ptr;
1182             ptr = r_string(n, p);
1183             if (ptr == NULL)
1184                 break;
1185             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1186             if (v == NULL)
1187                 break;
1188             if (is_interned) {
1189                 // marshal is meant to serialize .pyc files with code
1190                 // objects, and code-related strings are currently immortal.
1191                 PyInterpreterState *interp = _PyInterpreterState_GET();
1192                 _PyUnicode_InternImmortal(interp, &v);
1193             }
1194             retval = v;
1195             R_REF(retval);
1196             break;
1197         }
1198 
1199     case TYPE_INTERNED:
1200         is_interned = 1;
1201         /* fall through */
1202     case TYPE_UNICODE:
1203         {
1204         const char *buffer;
1205 
1206         n = r_long(p);
1207         if (n < 0 || n > SIZE32_MAX) {
1208             if (!PyErr_Occurred()) {
1209                 PyErr_SetString(PyExc_ValueError,
1210                     "bad marshal data (string size out of range)");
1211             }
1212             break;
1213         }
1214         if (n != 0) {
1215             buffer = r_string(n, p);
1216             if (buffer == NULL)
1217                 break;
1218             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1219         }
1220         else {
1221             v = PyUnicode_New(0, 0);
1222         }
1223         if (v == NULL)
1224             break;
1225         if (is_interned) {
1226             // marshal is meant to serialize .pyc files with code
1227             // objects, and code-related strings are currently immortal.
1228             PyInterpreterState *interp = _PyInterpreterState_GET();
1229             _PyUnicode_InternImmortal(interp, &v);
1230         }
1231         retval = v;
1232         R_REF(retval);
1233         break;
1234         }
1235 
1236     case TYPE_SMALL_TUPLE:
1237         n = r_byte(p);
1238         if (n == EOF) {
1239             break;
1240         }
1241         goto _read_tuple;
1242     case TYPE_TUPLE:
1243         n = r_long(p);
1244         if (n < 0 || n > SIZE32_MAX) {
1245             if (!PyErr_Occurred()) {
1246                 PyErr_SetString(PyExc_ValueError,
1247                     "bad marshal data (tuple size out of range)");
1248             }
1249             break;
1250         }
1251     _read_tuple:
1252         v = PyTuple_New(n);
1253         R_REF(v);
1254         if (v == NULL)
1255             break;
1256 
1257         for (i = 0; i < n; i++) {
1258             v2 = r_object(p);
1259             if ( v2 == NULL ) {
1260                 if (!PyErr_Occurred())
1261                     PyErr_SetString(PyExc_TypeError,
1262                         "NULL object in marshal data for tuple");
1263                 Py_SETREF(v, NULL);
1264                 break;
1265             }
1266             PyTuple_SET_ITEM(v, i, v2);
1267         }
1268         retval = v;
1269         break;
1270 
1271     case TYPE_LIST:
1272         n = r_long(p);
1273         if (n < 0 || n > SIZE32_MAX) {
1274             if (!PyErr_Occurred()) {
1275                 PyErr_SetString(PyExc_ValueError,
1276                     "bad marshal data (list size out of range)");
1277             }
1278             break;
1279         }
1280         v = PyList_New(n);
1281         R_REF(v);
1282         if (v == NULL)
1283             break;
1284         for (i = 0; i < n; i++) {
1285             v2 = r_object(p);
1286             if ( v2 == NULL ) {
1287                 if (!PyErr_Occurred())
1288                     PyErr_SetString(PyExc_TypeError,
1289                         "NULL object in marshal data for list");
1290                 Py_SETREF(v, NULL);
1291                 break;
1292             }
1293             PyList_SET_ITEM(v, i, v2);
1294         }
1295         retval = v;
1296         break;
1297 
1298     case TYPE_DICT:
1299         v = PyDict_New();
1300         R_REF(v);
1301         if (v == NULL)
1302             break;
1303         for (;;) {
1304             PyObject *key, *val;
1305             key = r_object(p);
1306             if (key == NULL)
1307                 break;
1308             val = r_object(p);
1309             if (val == NULL) {
1310                 Py_DECREF(key);
1311                 break;
1312             }
1313             if (PyDict_SetItem(v, key, val) < 0) {
1314                 Py_DECREF(key);
1315                 Py_DECREF(val);
1316                 break;
1317             }
1318             Py_DECREF(key);
1319             Py_DECREF(val);
1320         }
1321         if (PyErr_Occurred()) {
1322             Py_SETREF(v, NULL);
1323         }
1324         retval = v;
1325         break;
1326 
1327     case TYPE_SET:
1328     case TYPE_FROZENSET:
1329         n = r_long(p);
1330         if (n < 0 || n > SIZE32_MAX) {
1331             if (!PyErr_Occurred()) {
1332                 PyErr_SetString(PyExc_ValueError,
1333                     "bad marshal data (set size out of range)");
1334             }
1335             break;
1336         }
1337 
1338         if (n == 0 && type == TYPE_FROZENSET) {
1339             /* call frozenset() to get the empty frozenset singleton */
1340             v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1341             if (v == NULL)
1342                 break;
1343             R_REF(v);
1344             retval = v;
1345         }
1346         else {
1347             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1348             if (type == TYPE_SET) {
1349                 R_REF(v);
1350             } else {
1351                 /* must use delayed registration of frozensets because they must
1352                  * be init with a refcount of 1
1353                  */
1354                 idx = r_ref_reserve(flag, p);
1355                 if (idx < 0)
1356                     Py_CLEAR(v); /* signal error */
1357             }
1358             if (v == NULL)
1359                 break;
1360 
1361             for (i = 0; i < n; i++) {
1362                 v2 = r_object(p);
1363                 if ( v2 == NULL ) {
1364                     if (!PyErr_Occurred())
1365                         PyErr_SetString(PyExc_TypeError,
1366                             "NULL object in marshal data for set");
1367                     Py_SETREF(v, NULL);
1368                     break;
1369                 }
1370                 if (PySet_Add(v, v2) == -1) {
1371                     Py_DECREF(v);
1372                     Py_DECREF(v2);
1373                     v = NULL;
1374                     break;
1375                 }
1376                 Py_DECREF(v2);
1377             }
1378             if (type != TYPE_SET)
1379                 v = r_ref_insert(v, idx, flag, p);
1380             retval = v;
1381         }
1382         break;
1383 
1384     case TYPE_CODE:
1385         {
1386             int argcount;
1387             int posonlyargcount;
1388             int kwonlyargcount;
1389             int stacksize;
1390             int flags;
1391             PyObject *code = NULL;
1392             PyObject *consts = NULL;
1393             PyObject *names = NULL;
1394             PyObject *localsplusnames = NULL;
1395             PyObject *localspluskinds = NULL;
1396             PyObject *filename = NULL;
1397             PyObject *name = NULL;
1398             PyObject *qualname = NULL;
1399             int firstlineno;
1400             PyObject* linetable = NULL;
1401             PyObject *exceptiontable = NULL;
1402 
1403             if (!p->allow_code) {
1404                 PyErr_SetString(PyExc_ValueError,
1405                                 "unmarshalling code objects is disallowed");
1406                 break;
1407             }
1408             idx = r_ref_reserve(flag, p);
1409             if (idx < 0)
1410                 break;
1411 
1412             v = NULL;
1413 
1414             /* XXX ignore long->int overflows for now */
1415             argcount = (int)r_long(p);
1416             if (argcount == -1 && PyErr_Occurred())
1417                 goto code_error;
1418             posonlyargcount = (int)r_long(p);
1419             if (posonlyargcount == -1 && PyErr_Occurred()) {
1420                 goto code_error;
1421             }
1422             kwonlyargcount = (int)r_long(p);
1423             if (kwonlyargcount == -1 && PyErr_Occurred())
1424                 goto code_error;
1425             stacksize = (int)r_long(p);
1426             if (stacksize == -1 && PyErr_Occurred())
1427                 goto code_error;
1428             flags = (int)r_long(p);
1429             if (flags == -1 && PyErr_Occurred())
1430                 goto code_error;
1431             code = r_object(p);
1432             if (code == NULL)
1433                 goto code_error;
1434             consts = r_object(p);
1435             if (consts == NULL)
1436                 goto code_error;
1437             names = r_object(p);
1438             if (names == NULL)
1439                 goto code_error;
1440             localsplusnames = r_object(p);
1441             if (localsplusnames == NULL)
1442                 goto code_error;
1443             localspluskinds = r_object(p);
1444             if (localspluskinds == NULL)
1445                 goto code_error;
1446             filename = r_object(p);
1447             if (filename == NULL)
1448                 goto code_error;
1449             name = r_object(p);
1450             if (name == NULL)
1451                 goto code_error;
1452             qualname = r_object(p);
1453             if (qualname == NULL)
1454                 goto code_error;
1455             firstlineno = (int)r_long(p);
1456             if (firstlineno == -1 && PyErr_Occurred())
1457                 break;
1458             linetable = r_object(p);
1459             if (linetable == NULL)
1460                 goto code_error;
1461             exceptiontable = r_object(p);
1462             if (exceptiontable == NULL)
1463                 goto code_error;
1464 
1465             struct _PyCodeConstructor con = {
1466                 .filename = filename,
1467                 .name = name,
1468                 .qualname = qualname,
1469                 .flags = flags,
1470 
1471                 .code = code,
1472                 .firstlineno = firstlineno,
1473                 .linetable = linetable,
1474 
1475                 .consts = consts,
1476                 .names = names,
1477 
1478                 .localsplusnames = localsplusnames,
1479                 .localspluskinds = localspluskinds,
1480 
1481                 .argcount = argcount,
1482                 .posonlyargcount = posonlyargcount,
1483                 .kwonlyargcount = kwonlyargcount,
1484 
1485                 .stacksize = stacksize,
1486 
1487                 .exceptiontable = exceptiontable,
1488             };
1489 
1490             if (_PyCode_Validate(&con) < 0) {
1491                 goto code_error;
1492             }
1493 
1494             v = (PyObject *)_PyCode_New(&con);
1495             if (v == NULL) {
1496                 goto code_error;
1497             }
1498 
1499             v = r_ref_insert(v, idx, flag, p);
1500 
1501           code_error:
1502             if (v == NULL && !PyErr_Occurred()) {
1503                 PyErr_SetString(PyExc_TypeError,
1504                     "NULL object in marshal data for code object");
1505             }
1506             Py_XDECREF(code);
1507             Py_XDECREF(consts);
1508             Py_XDECREF(names);
1509             Py_XDECREF(localsplusnames);
1510             Py_XDECREF(localspluskinds);
1511             Py_XDECREF(filename);
1512             Py_XDECREF(name);
1513             Py_XDECREF(qualname);
1514             Py_XDECREF(linetable);
1515             Py_XDECREF(exceptiontable);
1516         }
1517         retval = v;
1518         break;
1519 
1520     case TYPE_REF:
1521         n = r_long(p);
1522         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1523             if (!PyErr_Occurred()) {
1524                 PyErr_SetString(PyExc_ValueError,
1525                     "bad marshal data (invalid reference)");
1526             }
1527             break;
1528         }
1529         v = PyList_GET_ITEM(p->refs, n);
1530         if (v == Py_None) {
1531             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1532             break;
1533         }
1534         retval = Py_NewRef(v);
1535         break;
1536 
1537     default:
1538         /* Bogus data got written, which isn't ideal.
1539            This will let you keep working and recover. */
1540         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1541         break;
1542 
1543     }
1544     p->depth--;
1545     return retval;
1546 }
1547 
1548 static PyObject *
read_object(RFILE * p)1549 read_object(RFILE *p)
1550 {
1551     PyObject *v;
1552     if (PyErr_Occurred()) {
1553         fprintf(stderr, "XXX readobject called with exception set\n");
1554         return NULL;
1555     }
1556     if (p->ptr && p->end) {
1557         if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1558             return NULL;
1559         }
1560     } else if (p->fp || p->readable) {
1561         if (PySys_Audit("marshal.load", NULL) < 0) {
1562             return NULL;
1563         }
1564     }
1565     v = r_object(p);
1566     if (v == NULL && !PyErr_Occurred())
1567         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1568     return v;
1569 }
1570 
1571 int
PyMarshal_ReadShortFromFile(FILE * fp)1572 PyMarshal_ReadShortFromFile(FILE *fp)
1573 {
1574     RFILE rf;
1575     int res;
1576     assert(fp);
1577     rf.readable = NULL;
1578     rf.fp = fp;
1579     rf.end = rf.ptr = NULL;
1580     rf.buf = NULL;
1581     res = r_short(&rf);
1582     if (rf.buf != NULL)
1583         PyMem_Free(rf.buf);
1584     return res;
1585 }
1586 
1587 long
PyMarshal_ReadLongFromFile(FILE * fp)1588 PyMarshal_ReadLongFromFile(FILE *fp)
1589 {
1590     RFILE rf;
1591     long res;
1592     rf.fp = fp;
1593     rf.readable = NULL;
1594     rf.ptr = rf.end = NULL;
1595     rf.buf = NULL;
1596     res = r_long(&rf);
1597     if (rf.buf != NULL)
1598         PyMem_Free(rf.buf);
1599     return res;
1600 }
1601 
1602 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1603 static off_t
getfilesize(FILE * fp)1604 getfilesize(FILE *fp)
1605 {
1606     struct _Py_stat_struct st;
1607     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1608         return -1;
1609 #if SIZEOF_OFF_T == 4
1610     else if (st.st_size >= INT_MAX)
1611         return (off_t)INT_MAX;
1612 #endif
1613     else
1614         return (off_t)st.st_size;
1615 }
1616 
1617 /* If we can get the size of the file up-front, and it's reasonably small,
1618  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1619  * than reading a byte at a time from file; speeds .pyc imports.
1620  * CAUTION:  since this may read the entire remainder of the file, don't
1621  * call it unless you know you're done with the file.
1622  */
1623 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1624 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1625 {
1626 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1627 #define REASONABLE_FILE_LIMIT (1L << 18)
1628     off_t filesize;
1629     filesize = getfilesize(fp);
1630     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1631         char* pBuf = (char *)PyMem_Malloc(filesize);
1632         if (pBuf != NULL) {
1633             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1634             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1635             PyMem_Free(pBuf);
1636             return v;
1637         }
1638 
1639     }
1640     /* We don't have fstat, or we do but the file is larger than
1641      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1642      */
1643     return PyMarshal_ReadObjectFromFile(fp);
1644 
1645 #undef REASONABLE_FILE_LIMIT
1646 }
1647 
1648 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1649 PyMarshal_ReadObjectFromFile(FILE *fp)
1650 {
1651     RFILE rf;
1652     PyObject *result;
1653     rf.allow_code = 1;
1654     rf.fp = fp;
1655     rf.readable = NULL;
1656     rf.depth = 0;
1657     rf.ptr = rf.end = NULL;
1658     rf.buf = NULL;
1659     rf.refs = PyList_New(0);
1660     if (rf.refs == NULL)
1661         return NULL;
1662     result = read_object(&rf);
1663     Py_DECREF(rf.refs);
1664     if (rf.buf != NULL)
1665         PyMem_Free(rf.buf);
1666     return result;
1667 }
1668 
1669 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1670 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1671 {
1672     RFILE rf;
1673     PyObject *result;
1674     rf.allow_code = 1;
1675     rf.fp = NULL;
1676     rf.readable = NULL;
1677     rf.ptr = str;
1678     rf.end = str + len;
1679     rf.buf = NULL;
1680     rf.depth = 0;
1681     rf.refs = PyList_New(0);
1682     if (rf.refs == NULL)
1683         return NULL;
1684     result = read_object(&rf);
1685     Py_DECREF(rf.refs);
1686     if (rf.buf != NULL)
1687         PyMem_Free(rf.buf);
1688     return result;
1689 }
1690 
1691 static PyObject *
_PyMarshal_WriteObjectToString(PyObject * x,int version,int allow_code)1692 _PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
1693 {
1694     WFILE wf;
1695 
1696     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1697         return NULL;
1698     }
1699     memset(&wf, 0, sizeof(wf));
1700     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1701     if (wf.str == NULL)
1702         return NULL;
1703     wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1704     wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1705     wf.error = WFERR_OK;
1706     wf.version = version;
1707     wf.allow_code = allow_code;
1708     if (w_init_refs(&wf, version)) {
1709         Py_DECREF(wf.str);
1710         return NULL;
1711     }
1712     w_object(x, &wf);
1713     w_clear_refs(&wf);
1714     if (wf.str != NULL) {
1715         const char *base = PyBytes_AS_STRING(wf.str);
1716         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1717             return NULL;
1718     }
1719     if (wf.error != WFERR_OK) {
1720         Py_XDECREF(wf.str);
1721         switch (wf.error) {
1722         case WFERR_NOMEMORY:
1723             PyErr_NoMemory();
1724             break;
1725         case WFERR_NESTEDTOODEEP:
1726             PyErr_SetString(PyExc_ValueError,
1727                             "object too deeply nested to marshal");
1728             break;
1729         case WFERR_CODE_NOT_ALLOWED:
1730             PyErr_SetString(PyExc_ValueError,
1731                             "marshalling code objects is disallowed");
1732             break;
1733         default:
1734         case WFERR_UNMARSHALLABLE:
1735             PyErr_SetString(PyExc_ValueError,
1736                             "unmarshallable object");
1737             break;
1738         }
1739         return NULL;
1740     }
1741     return wf.str;
1742 }
1743 
1744 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1745 PyMarshal_WriteObjectToString(PyObject *x, int version)
1746 {
1747     return _PyMarshal_WriteObjectToString(x, version, 1);
1748 }
1749 
1750 /* And an interface for Python programs... */
1751 /*[clinic input]
1752 marshal.dump
1753 
1754     value: object
1755         Must be a supported type.
1756     file: object
1757         Must be a writeable binary file.
1758     version: int(c_default="Py_MARSHAL_VERSION") = version
1759         Indicates the data format that dump should use.
1760     /
1761     *
1762     allow_code: bool = True
1763         Allow to write code objects.
1764 
1765 Write the value on the open file.
1766 
1767 If the value has (or contains an object that has) an unsupported type, a
1768 ValueError exception is raised - but garbage data will also be written
1769 to the file. The object will not be properly read back by load().
1770 [clinic start generated code]*/
1771 
1772 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version,int allow_code)1773 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1774                   int version, int allow_code)
1775 /*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
1776 {
1777     /* XXX Quick hack -- need to do this differently */
1778     PyObject *s;
1779     PyObject *res;
1780 
1781     s = _PyMarshal_WriteObjectToString(value, version, allow_code);
1782     if (s == NULL)
1783         return NULL;
1784     res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
1785     Py_DECREF(s);
1786     return res;
1787 }
1788 
1789 /*[clinic input]
1790 marshal.load
1791 
1792     file: object
1793         Must be readable binary file.
1794     /
1795     *
1796     allow_code: bool = True
1797         Allow to load code objects.
1798 
1799 Read one value from the open file and return it.
1800 
1801 If no valid value is read (e.g. because the data has a different Python
1802 version's incompatible marshal format), raise EOFError, ValueError or
1803 TypeError.
1804 
1805 Note: If an object containing an unsupported type was marshalled with
1806 dump(), load() will substitute None for the unmarshallable type.
1807 [clinic start generated code]*/
1808 
1809 static PyObject *
marshal_load_impl(PyObject * module,PyObject * file,int allow_code)1810 marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
1811 /*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
1812 {
1813     PyObject *data, *result;
1814     RFILE rf;
1815 
1816     /*
1817      * Make a call to the read method, but read zero bytes.
1818      * This is to ensure that the object passed in at least
1819      * has a read method which returns bytes.
1820      * This can be removed if we guarantee good error handling
1821      * for r_string()
1822      */
1823     data = _PyObject_CallMethod(file, &_Py_ID(read), "i", 0);
1824     if (data == NULL)
1825         return NULL;
1826     if (!PyBytes_Check(data)) {
1827         PyErr_Format(PyExc_TypeError,
1828                      "file.read() returned not bytes but %.100s",
1829                      Py_TYPE(data)->tp_name);
1830         result = NULL;
1831     }
1832     else {
1833         rf.allow_code = allow_code;
1834         rf.depth = 0;
1835         rf.fp = NULL;
1836         rf.readable = file;
1837         rf.ptr = rf.end = NULL;
1838         rf.buf = NULL;
1839         if ((rf.refs = PyList_New(0)) != NULL) {
1840             result = read_object(&rf);
1841             Py_DECREF(rf.refs);
1842             if (rf.buf != NULL)
1843                 PyMem_Free(rf.buf);
1844         } else
1845             result = NULL;
1846     }
1847     Py_DECREF(data);
1848     return result;
1849 }
1850 
1851 /*[clinic input]
1852 marshal.dumps
1853 
1854     value: object
1855         Must be a supported type.
1856     version: int(c_default="Py_MARSHAL_VERSION") = version
1857         Indicates the data format that dumps should use.
1858     /
1859     *
1860     allow_code: bool = True
1861         Allow to write code objects.
1862 
1863 Return the bytes object that would be written to a file by dump(value, file).
1864 
1865 Raise a ValueError exception if value has (or contains an object that has) an
1866 unsupported type.
1867 [clinic start generated code]*/
1868 
1869 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version,int allow_code)1870 marshal_dumps_impl(PyObject *module, PyObject *value, int version,
1871                    int allow_code)
1872 /*[clinic end generated code: output=115f90da518d1d49 input=167eaecceb63f0a8]*/
1873 {
1874     return _PyMarshal_WriteObjectToString(value, version, allow_code);
1875 }
1876 
1877 /*[clinic input]
1878 marshal.loads
1879 
1880     bytes: Py_buffer
1881     /
1882     *
1883     allow_code: bool = True
1884         Allow to load code objects.
1885 
1886 Convert the bytes-like object to a value.
1887 
1888 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1889 bytes in the input are ignored.
1890 [clinic start generated code]*/
1891 
1892 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes,int allow_code)1893 marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
1894 /*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
1895 {
1896     RFILE rf;
1897     char *s = bytes->buf;
1898     Py_ssize_t n = bytes->len;
1899     PyObject* result;
1900     rf.allow_code = allow_code;
1901     rf.fp = NULL;
1902     rf.readable = NULL;
1903     rf.ptr = s;
1904     rf.end = s + n;
1905     rf.depth = 0;
1906     if ((rf.refs = PyList_New(0)) == NULL)
1907         return NULL;
1908     result = read_object(&rf);
1909     Py_DECREF(rf.refs);
1910     return result;
1911 }
1912 
1913 static PyMethodDef marshal_methods[] = {
1914     MARSHAL_DUMP_METHODDEF
1915     MARSHAL_LOAD_METHODDEF
1916     MARSHAL_DUMPS_METHODDEF
1917     MARSHAL_LOADS_METHODDEF
1918     {NULL,              NULL}           /* sentinel */
1919 };
1920 
1921 
1922 PyDoc_STRVAR(module_doc,
1923 "This module contains functions that can read and write Python values in\n\
1924 a binary format. The format is specific to Python, but independent of\n\
1925 machine architecture issues.\n\
1926 \n\
1927 Not all Python object types are supported; in general, only objects\n\
1928 whose value is independent from a particular invocation of Python can be\n\
1929 written and read by this module. The following types are supported:\n\
1930 None, integers, floating-point numbers, strings, bytes, bytearrays,\n\
1931 tuples, lists, sets, dictionaries, and code objects, where it\n\
1932 should be understood that tuples, lists and dictionaries are only\n\
1933 supported as long as the values contained therein are themselves\n\
1934 supported; and recursive lists and dictionaries should not be written\n\
1935 (they will cause infinite loops).\n\
1936 \n\
1937 Variables:\n\
1938 \n\
1939 version -- indicates the format that the module uses. Version 0 is the\n\
1940     historical format, version 1 shares interned strings and version 2\n\
1941     uses a binary format for floating-point numbers.\n\
1942     Version 3 shares common object references (New in version 3.4).\n\
1943 \n\
1944 Functions:\n\
1945 \n\
1946 dump() -- write value to a file\n\
1947 load() -- read value from a file\n\
1948 dumps() -- marshal value as a bytes object\n\
1949 loads() -- read value from a bytes-like object");
1950 
1951 
1952 static int
marshal_module_exec(PyObject * mod)1953 marshal_module_exec(PyObject *mod)
1954 {
1955     if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1956         return -1;
1957     }
1958     return 0;
1959 }
1960 
1961 static PyModuleDef_Slot marshalmodule_slots[] = {
1962     {Py_mod_exec, marshal_module_exec},
1963     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1964     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1965     {0, NULL}
1966 };
1967 
1968 static struct PyModuleDef marshalmodule = {
1969     PyModuleDef_HEAD_INIT,
1970     .m_name = "marshal",
1971     .m_doc = module_doc,
1972     .m_methods = marshal_methods,
1973     .m_slots = marshalmodule_slots,
1974 };
1975 
1976 PyMODINIT_FUNC
PyMarshal_Init(void)1977 PyMarshal_Init(void)
1978 {
1979     return PyModuleDef_Init(&marshalmodule);
1980 }
1981