1 /* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #ifndef Py_BUILD_CORE_BUILTIN
8 # define Py_BUILD_CORE_MODULE 1
9 #endif
10
11 #include "Python.h"
12 #include "pycore_ceval.h" // _Py_EnterRecursiveCall()
13 #include "pycore_runtime.h" // _PyRuntime
14
15 #include "pycore_global_strings.h" // _Py_ID()
16 #include <stdbool.h> // bool
17
18
19 typedef struct _PyScannerObject {
20 PyObject_HEAD
21 signed char strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
27 } PyScannerObject;
28
29 static PyMemberDef scanner_members[] = {
30 {"strict", Py_T_BOOL, offsetof(PyScannerObject, strict), Py_READONLY, "strict"},
31 {"object_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_hook), Py_READONLY, "object_hook"},
32 {"object_pairs_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), Py_READONLY},
33 {"parse_float", _Py_T_OBJECT, offsetof(PyScannerObject, parse_float), Py_READONLY, "parse_float"},
34 {"parse_int", _Py_T_OBJECT, offsetof(PyScannerObject, parse_int), Py_READONLY, "parse_int"},
35 {"parse_constant", _Py_T_OBJECT, offsetof(PyScannerObject, parse_constant), Py_READONLY, "parse_constant"},
36 {NULL}
37 };
38
39 typedef struct _PyEncoderObject {
40 PyObject_HEAD
41 PyObject *markers;
42 PyObject *defaultfn;
43 PyObject *encoder;
44 PyObject *indent;
45 PyObject *key_separator;
46 PyObject *item_separator;
47 char sort_keys;
48 char skipkeys;
49 int allow_nan;
50 PyCFunction fast_encode;
51 } PyEncoderObject;
52
53 static PyMemberDef encoder_members[] = {
54 {"markers", _Py_T_OBJECT, offsetof(PyEncoderObject, markers), Py_READONLY, "markers"},
55 {"default", _Py_T_OBJECT, offsetof(PyEncoderObject, defaultfn), Py_READONLY, "default"},
56 {"encoder", _Py_T_OBJECT, offsetof(PyEncoderObject, encoder), Py_READONLY, "encoder"},
57 {"indent", _Py_T_OBJECT, offsetof(PyEncoderObject, indent), Py_READONLY, "indent"},
58 {"key_separator", _Py_T_OBJECT, offsetof(PyEncoderObject, key_separator), Py_READONLY, "key_separator"},
59 {"item_separator", _Py_T_OBJECT, offsetof(PyEncoderObject, item_separator), Py_READONLY, "item_separator"},
60 {"sort_keys", Py_T_BOOL, offsetof(PyEncoderObject, sort_keys), Py_READONLY, "sort_keys"},
61 {"skipkeys", Py_T_BOOL, offsetof(PyEncoderObject, skipkeys), Py_READONLY, "skipkeys"},
62 {NULL}
63 };
64
65 /* Forward decls */
66
67 static PyObject *
68 ascii_escape_unicode(PyObject *pystr);
69 static PyObject *
70 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
71 static PyObject *
72 scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
73 static PyObject *
74 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
75 static PyObject *
76 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
77 static void
78 scanner_dealloc(PyObject *self);
79 static int
80 scanner_clear(PyScannerObject *self);
81 static PyObject *
82 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
83 static void
84 encoder_dealloc(PyObject *self);
85 static int
86 encoder_clear(PyEncoderObject *self);
87 static int
88 encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
89 static int
90 encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
91 static int
92 encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
93 static PyObject *
94 _encoded_const(PyObject *obj);
95 static void
96 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
97 static PyObject *
98 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
99 static PyObject *
100 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
101
102 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
103 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
104
105 static Py_ssize_t
ascii_escape_unichar(Py_UCS4 c,unsigned char * output,Py_ssize_t chars)106 ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
107 {
108 /* Escape unicode code point c to ASCII escape sequences
109 in char *output. output must have at least 12 bytes unused to
110 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
111 output[chars++] = '\\';
112 switch (c) {
113 case '\\': output[chars++] = c; break;
114 case '"': output[chars++] = c; break;
115 case '\b': output[chars++] = 'b'; break;
116 case '\f': output[chars++] = 'f'; break;
117 case '\n': output[chars++] = 'n'; break;
118 case '\r': output[chars++] = 'r'; break;
119 case '\t': output[chars++] = 't'; break;
120 default:
121 if (c >= 0x10000) {
122 /* UTF-16 surrogate pair */
123 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
124 output[chars++] = 'u';
125 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
126 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
127 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
128 output[chars++] = Py_hexdigits[(v ) & 0xf];
129 c = Py_UNICODE_LOW_SURROGATE(c);
130 output[chars++] = '\\';
131 }
132 output[chars++] = 'u';
133 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
134 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
135 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
136 output[chars++] = Py_hexdigits[(c ) & 0xf];
137 }
138 return chars;
139 }
140
141 static PyObject *
ascii_escape_unicode(PyObject * pystr)142 ascii_escape_unicode(PyObject *pystr)
143 {
144 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
145 Py_ssize_t i;
146 Py_ssize_t input_chars;
147 Py_ssize_t output_size;
148 Py_ssize_t chars;
149 PyObject *rval;
150 const void *input;
151 Py_UCS1 *output;
152 int kind;
153
154 input_chars = PyUnicode_GET_LENGTH(pystr);
155 input = PyUnicode_DATA(pystr);
156 kind = PyUnicode_KIND(pystr);
157
158 /* Compute the output size */
159 for (i = 0, output_size = 2; i < input_chars; i++) {
160 Py_UCS4 c = PyUnicode_READ(kind, input, i);
161 Py_ssize_t d;
162 if (S_CHAR(c)) {
163 d = 1;
164 }
165 else {
166 switch(c) {
167 case '\\': case '"': case '\b': case '\f':
168 case '\n': case '\r': case '\t':
169 d = 2; break;
170 default:
171 d = c >= 0x10000 ? 12 : 6;
172 }
173 }
174 if (output_size > PY_SSIZE_T_MAX - d) {
175 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
176 return NULL;
177 }
178 output_size += d;
179 }
180
181 rval = PyUnicode_New(output_size, 127);
182 if (rval == NULL) {
183 return NULL;
184 }
185 output = PyUnicode_1BYTE_DATA(rval);
186 chars = 0;
187 output[chars++] = '"';
188 for (i = 0; i < input_chars; i++) {
189 Py_UCS4 c = PyUnicode_READ(kind, input, i);
190 if (S_CHAR(c)) {
191 output[chars++] = c;
192 }
193 else {
194 chars = ascii_escape_unichar(c, output, chars);
195 }
196 }
197 output[chars++] = '"';
198 #ifdef Py_DEBUG
199 assert(_PyUnicode_CheckConsistency(rval, 1));
200 #endif
201 return rval;
202 }
203
204 static PyObject *
escape_unicode(PyObject * pystr)205 escape_unicode(PyObject *pystr)
206 {
207 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
208 Py_ssize_t i;
209 Py_ssize_t input_chars;
210 Py_ssize_t output_size;
211 Py_ssize_t chars;
212 PyObject *rval;
213 const void *input;
214 int kind;
215 Py_UCS4 maxchar;
216
217 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
218 input_chars = PyUnicode_GET_LENGTH(pystr);
219 input = PyUnicode_DATA(pystr);
220 kind = PyUnicode_KIND(pystr);
221
222 /* Compute the output size */
223 for (i = 0, output_size = 2; i < input_chars; i++) {
224 Py_UCS4 c = PyUnicode_READ(kind, input, i);
225 Py_ssize_t d;
226 switch (c) {
227 case '\\': case '"': case '\b': case '\f':
228 case '\n': case '\r': case '\t':
229 d = 2;
230 break;
231 default:
232 if (c <= 0x1f)
233 d = 6;
234 else
235 d = 1;
236 }
237 if (output_size > PY_SSIZE_T_MAX - d) {
238 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
239 return NULL;
240 }
241 output_size += d;
242 }
243
244 rval = PyUnicode_New(output_size, maxchar);
245 if (rval == NULL)
246 return NULL;
247
248 kind = PyUnicode_KIND(rval);
249
250 #define ENCODE_OUTPUT do { \
251 chars = 0; \
252 output[chars++] = '"'; \
253 for (i = 0; i < input_chars; i++) { \
254 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
255 switch (c) { \
256 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
257 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
258 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
259 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
260 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
261 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
262 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
263 default: \
264 if (c <= 0x1f) { \
265 output[chars++] = '\\'; \
266 output[chars++] = 'u'; \
267 output[chars++] = '0'; \
268 output[chars++] = '0'; \
269 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
270 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
271 } else { \
272 output[chars++] = c; \
273 } \
274 } \
275 } \
276 output[chars++] = '"'; \
277 } while (0)
278
279 if (kind == PyUnicode_1BYTE_KIND) {
280 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
281 ENCODE_OUTPUT;
282 } else if (kind == PyUnicode_2BYTE_KIND) {
283 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
284 ENCODE_OUTPUT;
285 } else {
286 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
287 assert(kind == PyUnicode_4BYTE_KIND);
288 ENCODE_OUTPUT;
289 }
290 #undef ENCODE_OUTPUT
291
292 #ifdef Py_DEBUG
293 assert(_PyUnicode_CheckConsistency(rval, 1));
294 #endif
295 return rval;
296 }
297
298 static void
raise_errmsg(const char * msg,PyObject * s,Py_ssize_t end)299 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
300 {
301 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
302 _Py_DECLARE_STR(json_decoder, "json.decoder");
303 PyObject *JSONDecodeError =
304 _PyImport_GetModuleAttr(&_Py_STR(json_decoder), &_Py_ID(JSONDecodeError));
305 if (JSONDecodeError == NULL) {
306 return;
307 }
308
309 PyObject *exc;
310 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
311 Py_DECREF(JSONDecodeError);
312 if (exc) {
313 PyErr_SetObject(JSONDecodeError, exc);
314 Py_DECREF(exc);
315 }
316 }
317
318 static void
raise_stop_iteration(Py_ssize_t idx)319 raise_stop_iteration(Py_ssize_t idx)
320 {
321 PyObject *value = PyLong_FromSsize_t(idx);
322 if (value != NULL) {
323 PyErr_SetObject(PyExc_StopIteration, value);
324 Py_DECREF(value);
325 }
326 }
327
328 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)329 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
330 /* return (rval, idx) tuple, stealing reference to rval */
331 PyObject *tpl;
332 PyObject *pyidx;
333 /*
334 steal a reference to rval, returns (rval, idx)
335 */
336 if (rval == NULL) {
337 return NULL;
338 }
339 pyidx = PyLong_FromSsize_t(idx);
340 if (pyidx == NULL) {
341 Py_DECREF(rval);
342 return NULL;
343 }
344 tpl = PyTuple_New(2);
345 if (tpl == NULL) {
346 Py_DECREF(pyidx);
347 Py_DECREF(rval);
348 return NULL;
349 }
350 PyTuple_SET_ITEM(tpl, 0, rval);
351 PyTuple_SET_ITEM(tpl, 1, pyidx);
352 return tpl;
353 }
354
355 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)356 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
357 {
358 /* Read the JSON string from PyUnicode pystr.
359 end is the index of the first character after the quote.
360 if strict is zero then literal control characters are allowed
361 *next_end_ptr is a return-by-reference index of the character
362 after the end quote
363
364 Return value is a new PyUnicode
365 */
366 PyObject *rval = NULL;
367 Py_ssize_t len;
368 Py_ssize_t begin = end - 1;
369 Py_ssize_t next /* = begin */;
370 const void *buf;
371 int kind;
372
373 _PyUnicodeWriter writer;
374 _PyUnicodeWriter_Init(&writer);
375 writer.overallocate = 1;
376
377 len = PyUnicode_GET_LENGTH(pystr);
378 buf = PyUnicode_DATA(pystr);
379 kind = PyUnicode_KIND(pystr);
380
381 if (end < 0 || len < end) {
382 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
383 goto bail;
384 }
385 while (1) {
386 /* Find the end of the string or the next escape */
387 Py_UCS4 c;
388 {
389 // Use tight scope variable to help register allocation.
390 Py_UCS4 d = 0;
391 for (next = end; next < len; next++) {
392 d = PyUnicode_READ(kind, buf, next);
393 if (d == '"' || d == '\\') {
394 break;
395 }
396 if (d <= 0x1f && strict) {
397 raise_errmsg("Invalid control character at", pystr, next);
398 goto bail;
399 }
400 }
401 c = d;
402 }
403
404 if (c == '"') {
405 // Fast path for simple case.
406 if (writer.buffer == NULL) {
407 PyObject *ret = PyUnicode_Substring(pystr, end, next);
408 if (ret == NULL) {
409 goto bail;
410 }
411 *next_end_ptr = next + 1;;
412 return ret;
413 }
414 }
415 else if (c != '\\') {
416 raise_errmsg("Unterminated string starting at", pystr, begin);
417 goto bail;
418 }
419
420 /* Pick up this chunk if it's not zero length */
421 if (next != end) {
422 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
423 goto bail;
424 }
425 }
426 next++;
427 if (c == '"') {
428 end = next;
429 break;
430 }
431 if (next == len) {
432 raise_errmsg("Unterminated string starting at", pystr, begin);
433 goto bail;
434 }
435 c = PyUnicode_READ(kind, buf, next);
436 if (c != 'u') {
437 /* Non-unicode backslash escapes */
438 end = next + 1;
439 switch (c) {
440 case '"': break;
441 case '\\': break;
442 case '/': break;
443 case 'b': c = '\b'; break;
444 case 'f': c = '\f'; break;
445 case 'n': c = '\n'; break;
446 case 'r': c = '\r'; break;
447 case 't': c = '\t'; break;
448 default: c = 0;
449 }
450 if (c == 0) {
451 raise_errmsg("Invalid \\escape", pystr, end - 2);
452 goto bail;
453 }
454 }
455 else {
456 c = 0;
457 next++;
458 end = next + 4;
459 if (end >= len) {
460 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
461 goto bail;
462 }
463 /* Decode 4 hex digits */
464 for (; next < end; next++) {
465 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
466 c <<= 4;
467 switch (digit) {
468 case '0': case '1': case '2': case '3': case '4':
469 case '5': case '6': case '7': case '8': case '9':
470 c |= (digit - '0'); break;
471 case 'a': case 'b': case 'c': case 'd': case 'e':
472 case 'f':
473 c |= (digit - 'a' + 10); break;
474 case 'A': case 'B': case 'C': case 'D': case 'E':
475 case 'F':
476 c |= (digit - 'A' + 10); break;
477 default:
478 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
479 goto bail;
480 }
481 }
482 /* Surrogate pair */
483 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
484 PyUnicode_READ(kind, buf, next++) == '\\' &&
485 PyUnicode_READ(kind, buf, next++) == 'u') {
486 Py_UCS4 c2 = 0;
487 end += 6;
488 /* Decode 4 hex digits */
489 for (; next < end; next++) {
490 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
491 c2 <<= 4;
492 switch (digit) {
493 case '0': case '1': case '2': case '3': case '4':
494 case '5': case '6': case '7': case '8': case '9':
495 c2 |= (digit - '0'); break;
496 case 'a': case 'b': case 'c': case 'd': case 'e':
497 case 'f':
498 c2 |= (digit - 'a' + 10); break;
499 case 'A': case 'B': case 'C': case 'D': case 'E':
500 case 'F':
501 c2 |= (digit - 'A' + 10); break;
502 default:
503 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
504 goto bail;
505 }
506 }
507 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
508 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
509 else
510 end -= 6;
511 }
512 }
513 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
514 goto bail;
515 }
516 }
517
518 rval = _PyUnicodeWriter_Finish(&writer);
519 *next_end_ptr = end;
520 return rval;
521
522 bail:
523 *next_end_ptr = -1;
524 _PyUnicodeWriter_Dealloc(&writer);
525 return NULL;
526 }
527
528 PyDoc_STRVAR(pydoc_scanstring,
529 "scanstring(string, end, strict=True) -> (string, end)\n"
530 "\n"
531 "Scan the string s for a JSON string. End is the index of the\n"
532 "character in s after the quote that started the JSON string.\n"
533 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
534 "on attempt to decode an invalid string. If strict is False then literal\n"
535 "control characters are allowed in the string.\n"
536 "\n"
537 "Returns a tuple of the decoded string and the index of the character in s\n"
538 "after the end quote."
539 );
540
541 static PyObject *
py_scanstring(PyObject * Py_UNUSED (self),PyObject * args)542 py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
543 {
544 PyObject *pystr;
545 PyObject *rval;
546 Py_ssize_t end;
547 Py_ssize_t next_end = -1;
548 int strict = 1;
549 if (!PyArg_ParseTuple(args, "On|p:scanstring", &pystr, &end, &strict)) {
550 return NULL;
551 }
552 if (PyUnicode_Check(pystr)) {
553 rval = scanstring_unicode(pystr, end, strict, &next_end);
554 }
555 else {
556 PyErr_Format(PyExc_TypeError,
557 "first argument must be a string, not %.80s",
558 Py_TYPE(pystr)->tp_name);
559 return NULL;
560 }
561 return _build_rval_index_tuple(rval, next_end);
562 }
563
564 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
565 "encode_basestring_ascii(string) -> string\n"
566 "\n"
567 "Return an ASCII-only JSON representation of a Python string"
568 );
569
570 static PyObject *
py_encode_basestring_ascii(PyObject * Py_UNUSED (self),PyObject * pystr)571 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
572 {
573 PyObject *rval;
574 /* Return an ASCII-only JSON representation of a Python string */
575 /* METH_O */
576 if (PyUnicode_Check(pystr)) {
577 rval = ascii_escape_unicode(pystr);
578 }
579 else {
580 PyErr_Format(PyExc_TypeError,
581 "first argument must be a string, not %.80s",
582 Py_TYPE(pystr)->tp_name);
583 return NULL;
584 }
585 return rval;
586 }
587
588
589 PyDoc_STRVAR(pydoc_encode_basestring,
590 "encode_basestring(string) -> string\n"
591 "\n"
592 "Return a JSON representation of a Python string"
593 );
594
595 static PyObject *
py_encode_basestring(PyObject * Py_UNUSED (self),PyObject * pystr)596 py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
597 {
598 PyObject *rval;
599 /* Return a JSON representation of a Python string */
600 /* METH_O */
601 if (PyUnicode_Check(pystr)) {
602 rval = escape_unicode(pystr);
603 }
604 else {
605 PyErr_Format(PyExc_TypeError,
606 "first argument must be a string, not %.80s",
607 Py_TYPE(pystr)->tp_name);
608 return NULL;
609 }
610 return rval;
611 }
612
613 static void
scanner_dealloc(PyObject * self)614 scanner_dealloc(PyObject *self)
615 {
616 PyTypeObject *tp = Py_TYPE(self);
617 /* bpo-31095: UnTrack is needed before calling any callbacks */
618 PyObject_GC_UnTrack(self);
619 scanner_clear((PyScannerObject *)self);
620 tp->tp_free(self);
621 Py_DECREF(tp);
622 }
623
624 static int
scanner_traverse(PyScannerObject * self,visitproc visit,void * arg)625 scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
626 {
627 Py_VISIT(Py_TYPE(self));
628 Py_VISIT(self->object_hook);
629 Py_VISIT(self->object_pairs_hook);
630 Py_VISIT(self->parse_float);
631 Py_VISIT(self->parse_int);
632 Py_VISIT(self->parse_constant);
633 return 0;
634 }
635
636 static int
scanner_clear(PyScannerObject * self)637 scanner_clear(PyScannerObject *self)
638 {
639 Py_CLEAR(self->object_hook);
640 Py_CLEAR(self->object_pairs_hook);
641 Py_CLEAR(self->parse_float);
642 Py_CLEAR(self->parse_int);
643 Py_CLEAR(self->parse_constant);
644 return 0;
645 }
646
647 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * memo,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)648 _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
649 {
650 /* Read a JSON object from PyUnicode pystr.
651 idx is the index of the first character after the opening curly brace.
652 *next_idx_ptr is a return-by-reference index to the first character after
653 the closing curly brace.
654
655 Returns a new PyObject (usually a dict, but object_hook can change that)
656 */
657 const void *str;
658 int kind;
659 Py_ssize_t end_idx;
660 PyObject *val = NULL;
661 PyObject *rval = NULL;
662 PyObject *key = NULL;
663 int has_pairs_hook = (s->object_pairs_hook != Py_None);
664 Py_ssize_t next_idx;
665 Py_ssize_t comma_idx;
666
667 str = PyUnicode_DATA(pystr);
668 kind = PyUnicode_KIND(pystr);
669 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
670
671 if (has_pairs_hook)
672 rval = PyList_New(0);
673 else
674 rval = PyDict_New();
675 if (rval == NULL)
676 return NULL;
677
678 /* skip whitespace after { */
679 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
680
681 /* only loop if the object is non-empty */
682 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
683 while (1) {
684 PyObject *memokey;
685
686 /* read key */
687 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
688 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
689 goto bail;
690 }
691 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
692 if (key == NULL)
693 goto bail;
694 if (PyDict_SetDefaultRef(memo, key, key, &memokey) < 0) {
695 goto bail;
696 }
697 Py_SETREF(key, memokey);
698 idx = next_idx;
699
700 /* skip whitespace between key and : delimiter, read :, skip whitespace */
701 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
702 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
703 raise_errmsg("Expecting ':' delimiter", pystr, idx);
704 goto bail;
705 }
706 idx++;
707 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
708
709 /* read any JSON term */
710 val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
711 if (val == NULL)
712 goto bail;
713
714 if (has_pairs_hook) {
715 PyObject *item = PyTuple_Pack(2, key, val);
716 if (item == NULL)
717 goto bail;
718 Py_CLEAR(key);
719 Py_CLEAR(val);
720 if (PyList_Append(rval, item) == -1) {
721 Py_DECREF(item);
722 goto bail;
723 }
724 Py_DECREF(item);
725 }
726 else {
727 if (PyDict_SetItem(rval, key, val) < 0)
728 goto bail;
729 Py_CLEAR(key);
730 Py_CLEAR(val);
731 }
732 idx = next_idx;
733
734 /* skip whitespace before } or , */
735 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
736
737 /* bail if the object is closed or we didn't get the , delimiter */
738 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
739 break;
740 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
741 raise_errmsg("Expecting ',' delimiter", pystr, idx);
742 goto bail;
743 }
744 comma_idx = idx;
745 idx++;
746
747 /* skip whitespace after , delimiter */
748 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
749
750 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}') {
751 raise_errmsg("Illegal trailing comma before end of object", pystr, comma_idx);
752 goto bail;
753 }
754 }
755 }
756
757 *next_idx_ptr = idx + 1;
758
759 if (has_pairs_hook) {
760 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
761 Py_DECREF(rval);
762 return val;
763 }
764
765 /* if object_hook is not None: rval = object_hook(rval) */
766 if (s->object_hook != Py_None) {
767 val = PyObject_CallOneArg(s->object_hook, rval);
768 Py_DECREF(rval);
769 return val;
770 }
771 return rval;
772 bail:
773 Py_XDECREF(key);
774 Py_XDECREF(val);
775 Py_XDECREF(rval);
776 return NULL;
777 }
778
779 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * memo,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)780 _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
781 /* Read a JSON array from PyUnicode pystr.
782 idx is the index of the first character after the opening brace.
783 *next_idx_ptr is a return-by-reference index to the first character after
784 the closing brace.
785
786 Returns a new PyList
787 */
788 const void *str;
789 int kind;
790 Py_ssize_t end_idx;
791 PyObject *val = NULL;
792 PyObject *rval;
793 Py_ssize_t next_idx;
794 Py_ssize_t comma_idx;
795
796 rval = PyList_New(0);
797 if (rval == NULL)
798 return NULL;
799
800 str = PyUnicode_DATA(pystr);
801 kind = PyUnicode_KIND(pystr);
802 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
803
804 /* skip whitespace after [ */
805 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
806
807 /* only loop if the array is non-empty */
808 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
809 while (1) {
810
811 /* read any JSON term */
812 val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
813 if (val == NULL)
814 goto bail;
815
816 if (PyList_Append(rval, val) == -1)
817 goto bail;
818
819 Py_CLEAR(val);
820 idx = next_idx;
821
822 /* skip whitespace between term and , */
823 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
824
825 /* bail if the array is closed or we didn't get the , delimiter */
826 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
827 break;
828 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
829 raise_errmsg("Expecting ',' delimiter", pystr, idx);
830 goto bail;
831 }
832 comma_idx = idx;
833 idx++;
834
835 /* skip whitespace after , */
836 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
837
838 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']') {
839 raise_errmsg("Illegal trailing comma before end of array", pystr, comma_idx);
840 goto bail;
841 }
842 }
843 }
844
845 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
846 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
847 raise_errmsg("Expecting value", pystr, end_idx);
848 goto bail;
849 }
850 *next_idx_ptr = idx + 1;
851 return rval;
852 bail:
853 Py_XDECREF(val);
854 Py_DECREF(rval);
855 return NULL;
856 }
857
858 static PyObject *
_parse_constant(PyScannerObject * s,const char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)859 _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
860 /* Read a JSON constant.
861 constant is the constant string that was found
862 ("NaN", "Infinity", "-Infinity").
863 idx is the index of the first character of the constant
864 *next_idx_ptr is a return-by-reference index to the first character after
865 the constant.
866
867 Returns the result of parse_constant
868 */
869 PyObject *cstr;
870 PyObject *rval;
871 /* constant is "NaN", "Infinity", or "-Infinity" */
872 cstr = PyUnicode_InternFromString(constant);
873 if (cstr == NULL)
874 return NULL;
875
876 /* rval = parse_constant(constant) */
877 rval = PyObject_CallOneArg(s->parse_constant, cstr);
878 idx += PyUnicode_GET_LENGTH(cstr);
879 Py_DECREF(cstr);
880 *next_idx_ptr = idx;
881 return rval;
882 }
883
884 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)885 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
886 /* Read a JSON number from PyUnicode pystr.
887 idx is the index of the first character of the number
888 *next_idx_ptr is a return-by-reference index to the first character after
889 the number.
890
891 Returns a new PyObject representation of that number:
892 PyLong, or PyFloat.
893 May return other types if parse_int or parse_float are set
894 */
895 const void *str;
896 int kind;
897 Py_ssize_t end_idx;
898 Py_ssize_t idx = start;
899 int is_float = 0;
900 PyObject *rval;
901 PyObject *numstr = NULL;
902 PyObject *custom_func;
903
904 str = PyUnicode_DATA(pystr);
905 kind = PyUnicode_KIND(pystr);
906 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
907
908 /* read a sign if it's there, make sure it's not the end of the string */
909 if (PyUnicode_READ(kind, str, idx) == '-') {
910 idx++;
911 if (idx > end_idx) {
912 raise_stop_iteration(start);
913 return NULL;
914 }
915 }
916
917 /* read as many integer digits as we find as long as it doesn't start with 0 */
918 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
919 idx++;
920 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
921 }
922 /* if it starts with 0 we only expect one integer digit */
923 else if (PyUnicode_READ(kind, str, idx) == '0') {
924 idx++;
925 }
926 /* no integer digits, error */
927 else {
928 raise_stop_iteration(start);
929 return NULL;
930 }
931
932 /* if the next char is '.' followed by a digit then read all float digits */
933 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
934 is_float = 1;
935 idx += 2;
936 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
937 }
938
939 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
940 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
941 Py_ssize_t e_start = idx;
942 idx++;
943
944 /* read an exponent sign if present */
945 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
946
947 /* read all digits */
948 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
949
950 /* if we got a digit, then parse as float. if not, backtrack */
951 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
952 is_float = 1;
953 }
954 else {
955 idx = e_start;
956 }
957 }
958
959 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
960 custom_func = s->parse_float;
961 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
962 custom_func = s->parse_int;
963 else
964 custom_func = NULL;
965
966 if (custom_func) {
967 /* copy the section we determined to be a number */
968 numstr = PyUnicode_FromKindAndData(kind,
969 (char*)str + kind * start,
970 idx - start);
971 if (numstr == NULL)
972 return NULL;
973 rval = PyObject_CallOneArg(custom_func, numstr);
974 }
975 else {
976 Py_ssize_t i, n;
977 char *buf;
978 /* Straight conversion to ASCII, to avoid costly conversion of
979 decimal unicode digits (which cannot appear here) */
980 n = idx - start;
981 numstr = PyBytes_FromStringAndSize(NULL, n);
982 if (numstr == NULL)
983 return NULL;
984 buf = PyBytes_AS_STRING(numstr);
985 for (i = 0; i < n; i++) {
986 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
987 }
988 if (is_float)
989 rval = PyFloat_FromString(numstr);
990 else
991 rval = PyLong_FromString(buf, NULL, 10);
992 }
993 Py_DECREF(numstr);
994 *next_idx_ptr = idx;
995 return rval;
996 }
997
998 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * memo,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)999 scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1000 {
1001 /* Read one JSON term (of any kind) from PyUnicode pystr.
1002 idx is the index of the first character of the term
1003 *next_idx_ptr is a return-by-reference index to the first character after
1004 the number.
1005
1006 Returns a new PyObject representation of the term.
1007 */
1008 PyObject *res;
1009 const void *str;
1010 int kind;
1011 Py_ssize_t length;
1012
1013 str = PyUnicode_DATA(pystr);
1014 kind = PyUnicode_KIND(pystr);
1015 length = PyUnicode_GET_LENGTH(pystr);
1016
1017 if (idx < 0) {
1018 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1019 return NULL;
1020 }
1021 if (idx >= length) {
1022 raise_stop_iteration(idx);
1023 return NULL;
1024 }
1025
1026 switch (PyUnicode_READ(kind, str, idx)) {
1027 case '"':
1028 /* string */
1029 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1030 case '{':
1031 /* object */
1032 if (_Py_EnterRecursiveCall(" while decoding a JSON object "
1033 "from a unicode string"))
1034 return NULL;
1035 res = _parse_object_unicode(s, memo, pystr, idx + 1, next_idx_ptr);
1036 _Py_LeaveRecursiveCall();
1037 return res;
1038 case '[':
1039 /* array */
1040 if (_Py_EnterRecursiveCall(" while decoding a JSON array "
1041 "from a unicode string"))
1042 return NULL;
1043 res = _parse_array_unicode(s, memo, pystr, idx + 1, next_idx_ptr);
1044 _Py_LeaveRecursiveCall();
1045 return res;
1046 case 'n':
1047 /* null */
1048 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1049 *next_idx_ptr = idx + 4;
1050 Py_RETURN_NONE;
1051 }
1052 break;
1053 case 't':
1054 /* true */
1055 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1056 *next_idx_ptr = idx + 4;
1057 Py_RETURN_TRUE;
1058 }
1059 break;
1060 case 'f':
1061 /* false */
1062 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1063 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1064 PyUnicode_READ(kind, str, idx + 3) == 's' &&
1065 PyUnicode_READ(kind, str, idx + 4) == 'e') {
1066 *next_idx_ptr = idx + 5;
1067 Py_RETURN_FALSE;
1068 }
1069 break;
1070 case 'N':
1071 /* NaN */
1072 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1073 PyUnicode_READ(kind, str, idx + 2) == 'N') {
1074 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1075 }
1076 break;
1077 case 'I':
1078 /* Infinity */
1079 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1080 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1081 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1082 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1083 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1084 PyUnicode_READ(kind, str, idx + 6) == 't' &&
1085 PyUnicode_READ(kind, str, idx + 7) == 'y') {
1086 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1087 }
1088 break;
1089 case '-':
1090 /* -Infinity */
1091 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1092 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1093 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1094 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1095 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1096 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1097 PyUnicode_READ(kind, str, idx + 7) == 't' &&
1098 PyUnicode_READ(kind, str, idx + 8) == 'y') {
1099 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1100 }
1101 break;
1102 }
1103 /* Didn't find a string, object, array, or named constant. Look for a number. */
1104 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1105 }
1106
1107 static PyObject *
scanner_call(PyScannerObject * self,PyObject * args,PyObject * kwds)1108 scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
1109 {
1110 /* Python callable interface to scan_once_{str,unicode} */
1111 PyObject *pystr;
1112 PyObject *rval;
1113 Py_ssize_t idx;
1114 Py_ssize_t next_idx = -1;
1115 static char *kwlist[] = {"string", "idx", NULL};
1116 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1117 return NULL;
1118
1119 if (!PyUnicode_Check(pystr)) {
1120 PyErr_Format(PyExc_TypeError,
1121 "first argument must be a string, not %.80s",
1122 Py_TYPE(pystr)->tp_name);
1123 return NULL;
1124 }
1125
1126 PyObject *memo = PyDict_New();
1127 if (memo == NULL) {
1128 return NULL;
1129 }
1130 rval = scan_once_unicode(self, memo, pystr, idx, &next_idx);
1131 Py_DECREF(memo);
1132 if (rval == NULL)
1133 return NULL;
1134 return _build_rval_index_tuple(rval, next_idx);
1135 }
1136
1137 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1138 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1139 {
1140 PyScannerObject *s;
1141 PyObject *ctx;
1142 PyObject *strict;
1143 static char *kwlist[] = {"context", NULL};
1144
1145 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1146 return NULL;
1147
1148 s = (PyScannerObject *)type->tp_alloc(type, 0);
1149 if (s == NULL) {
1150 return NULL;
1151 }
1152
1153 /* All of these will fail "gracefully" so we don't need to verify them */
1154 strict = PyObject_GetAttrString(ctx, "strict");
1155 if (strict == NULL)
1156 goto bail;
1157 s->strict = PyObject_IsTrue(strict);
1158 Py_DECREF(strict);
1159 if (s->strict < 0)
1160 goto bail;
1161 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1162 if (s->object_hook == NULL)
1163 goto bail;
1164 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1165 if (s->object_pairs_hook == NULL)
1166 goto bail;
1167 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1168 if (s->parse_float == NULL)
1169 goto bail;
1170 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1171 if (s->parse_int == NULL)
1172 goto bail;
1173 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1174 if (s->parse_constant == NULL)
1175 goto bail;
1176
1177 return (PyObject *)s;
1178
1179 bail:
1180 Py_DECREF(s);
1181 return NULL;
1182 }
1183
1184 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1185
1186 static PyType_Slot PyScannerType_slots[] = {
1187 {Py_tp_doc, (void *)scanner_doc},
1188 {Py_tp_dealloc, scanner_dealloc},
1189 {Py_tp_call, scanner_call},
1190 {Py_tp_traverse, scanner_traverse},
1191 {Py_tp_clear, scanner_clear},
1192 {Py_tp_members, scanner_members},
1193 {Py_tp_new, scanner_new},
1194 {0, 0}
1195 };
1196
1197 static PyType_Spec PyScannerType_spec = {
1198 .name = "_json.Scanner",
1199 .basicsize = sizeof(PyScannerObject),
1200 .itemsize = 0,
1201 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1202 .slots = PyScannerType_slots,
1203 };
1204
1205 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1206 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1207 {
1208 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1209
1210 PyEncoderObject *s;
1211 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1212 PyObject *item_separator;
1213 int sort_keys, skipkeys, allow_nan;
1214
1215 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1216 &markers, &defaultfn, &encoder, &indent,
1217 &key_separator, &item_separator,
1218 &sort_keys, &skipkeys, &allow_nan))
1219 return NULL;
1220
1221 if (markers != Py_None && !PyDict_Check(markers)) {
1222 PyErr_Format(PyExc_TypeError,
1223 "make_encoder() argument 1 must be dict or None, "
1224 "not %.200s", Py_TYPE(markers)->tp_name);
1225 return NULL;
1226 }
1227
1228 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1229 if (s == NULL)
1230 return NULL;
1231
1232 s->markers = Py_NewRef(markers);
1233 s->defaultfn = Py_NewRef(defaultfn);
1234 s->encoder = Py_NewRef(encoder);
1235 s->indent = Py_NewRef(indent);
1236 s->key_separator = Py_NewRef(key_separator);
1237 s->item_separator = Py_NewRef(item_separator);
1238 s->sort_keys = sort_keys;
1239 s->skipkeys = skipkeys;
1240 s->allow_nan = allow_nan;
1241 s->fast_encode = NULL;
1242
1243 if (PyCFunction_Check(s->encoder)) {
1244 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1245 if (f == (PyCFunction)py_encode_basestring_ascii ||
1246 f == (PyCFunction)py_encode_basestring) {
1247 s->fast_encode = f;
1248 }
1249 }
1250
1251 return (PyObject *)s;
1252 }
1253
1254 static PyObject *
_create_newline_indent(PyObject * indent,Py_ssize_t indent_level)1255 _create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
1256 {
1257 PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
1258 if (newline_indent != NULL && indent_level) {
1259 PyUnicode_AppendAndDel(&newline_indent,
1260 PySequence_Repeat(indent, indent_level));
1261 }
1262 return newline_indent;
1263 }
1264
1265 static PyObject *
encoder_call(PyEncoderObject * self,PyObject * args,PyObject * kwds)1266 encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
1267 {
1268 /* Python callable interface to encode_listencode_obj */
1269 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1270 PyObject *obj, *result;
1271 Py_ssize_t indent_level;
1272 _PyUnicodeWriter writer;
1273
1274 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1275 &obj, &indent_level))
1276 return NULL;
1277
1278 _PyUnicodeWriter_Init(&writer);
1279 writer.overallocate = 1;
1280
1281 PyObject *newline_indent = NULL;
1282 if (self->indent != Py_None) {
1283 newline_indent = _create_newline_indent(self->indent, indent_level);
1284 if (newline_indent == NULL) {
1285 _PyUnicodeWriter_Dealloc(&writer);
1286 return NULL;
1287 }
1288 }
1289 if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
1290 _PyUnicodeWriter_Dealloc(&writer);
1291 Py_XDECREF(newline_indent);
1292 return NULL;
1293 }
1294 Py_XDECREF(newline_indent);
1295
1296 result = PyTuple_New(1);
1297 if (result == NULL ||
1298 PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
1299 Py_XDECREF(result);
1300 return NULL;
1301 }
1302 return result;
1303 }
1304
1305 static PyObject *
_encoded_const(PyObject * obj)1306 _encoded_const(PyObject *obj)
1307 {
1308 /* Return the JSON string representation of None, True, False */
1309 if (obj == Py_None) {
1310 return &_Py_ID(null);
1311 }
1312 else if (obj == Py_True) {
1313 return &_Py_ID(true);
1314 }
1315 else if (obj == Py_False) {
1316 return &_Py_ID(false);
1317 }
1318 else {
1319 PyErr_SetString(PyExc_ValueError, "not a const");
1320 return NULL;
1321 }
1322 }
1323
1324 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1325 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1326 {
1327 /* Return the JSON representation of a PyFloat. */
1328 double i = PyFloat_AS_DOUBLE(obj);
1329 if (!Py_IS_FINITE(i)) {
1330 if (!s->allow_nan) {
1331 PyErr_Format(
1332 PyExc_ValueError,
1333 "Out of range float values are not JSON compliant: %R",
1334 obj
1335 );
1336 return NULL;
1337 }
1338 if (i > 0) {
1339 return PyUnicode_FromString("Infinity");
1340 }
1341 else if (i < 0) {
1342 return PyUnicode_FromString("-Infinity");
1343 }
1344 else {
1345 return PyUnicode_FromString("NaN");
1346 }
1347 }
1348 return PyFloat_Type.tp_repr(obj);
1349 }
1350
1351 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1352 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1353 {
1354 /* Return the JSON representation of a string */
1355 PyObject *encoded;
1356
1357 if (s->fast_encode) {
1358 return s->fast_encode(NULL, obj);
1359 }
1360 encoded = PyObject_CallOneArg(s->encoder, obj);
1361 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1362 PyErr_Format(PyExc_TypeError,
1363 "encoder() must return a string, not %.80s",
1364 Py_TYPE(encoded)->tp_name);
1365 Py_DECREF(encoded);
1366 return NULL;
1367 }
1368 return encoded;
1369 }
1370
1371 static int
_steal_accumulate(_PyUnicodeWriter * writer,PyObject * stolen)1372 _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
1373 {
1374 /* Append stolen and then decrement its reference count */
1375 int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
1376 Py_DECREF(stolen);
1377 return rval;
1378 }
1379
1380 static int
encoder_listencode_obj(PyEncoderObject * s,_PyUnicodeWriter * writer,PyObject * obj,PyObject * newline_indent)1381 encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
1382 PyObject *obj, PyObject *newline_indent)
1383 {
1384 /* Encode Python object obj to a JSON term */
1385 PyObject *newobj;
1386 int rv;
1387
1388 if (obj == Py_None) {
1389 return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
1390 }
1391 else if (obj == Py_True) {
1392 return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
1393 }
1394 else if (obj == Py_False) {
1395 return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
1396 }
1397 else if (PyUnicode_Check(obj)) {
1398 PyObject *encoded = encoder_encode_string(s, obj);
1399 if (encoded == NULL)
1400 return -1;
1401 return _steal_accumulate(writer, encoded);
1402 }
1403 else if (PyLong_Check(obj)) {
1404 PyObject *encoded = PyLong_Type.tp_repr(obj);
1405 if (encoded == NULL)
1406 return -1;
1407 return _steal_accumulate(writer, encoded);
1408 }
1409 else if (PyFloat_Check(obj)) {
1410 PyObject *encoded = encoder_encode_float(s, obj);
1411 if (encoded == NULL)
1412 return -1;
1413 return _steal_accumulate(writer, encoded);
1414 }
1415 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1416 if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1417 return -1;
1418 rv = encoder_listencode_list(s, writer, obj, newline_indent);
1419 _Py_LeaveRecursiveCall();
1420 return rv;
1421 }
1422 else if (PyDict_Check(obj)) {
1423 if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1424 return -1;
1425 rv = encoder_listencode_dict(s, writer, obj, newline_indent);
1426 _Py_LeaveRecursiveCall();
1427 return rv;
1428 }
1429 else {
1430 PyObject *ident = NULL;
1431 if (s->markers != Py_None) {
1432 int has_key;
1433 ident = PyLong_FromVoidPtr(obj);
1434 if (ident == NULL)
1435 return -1;
1436 has_key = PyDict_Contains(s->markers, ident);
1437 if (has_key) {
1438 if (has_key != -1)
1439 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1440 Py_DECREF(ident);
1441 return -1;
1442 }
1443 if (PyDict_SetItem(s->markers, ident, obj)) {
1444 Py_DECREF(ident);
1445 return -1;
1446 }
1447 }
1448 newobj = PyObject_CallOneArg(s->defaultfn, obj);
1449 if (newobj == NULL) {
1450 Py_XDECREF(ident);
1451 return -1;
1452 }
1453
1454 if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
1455 Py_DECREF(newobj);
1456 Py_XDECREF(ident);
1457 return -1;
1458 }
1459 rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
1460 _Py_LeaveRecursiveCall();
1461
1462 Py_DECREF(newobj);
1463 if (rv) {
1464 Py_XDECREF(ident);
1465 return -1;
1466 }
1467 if (ident != NULL) {
1468 if (PyDict_DelItem(s->markers, ident)) {
1469 Py_XDECREF(ident);
1470 return -1;
1471 }
1472 Py_XDECREF(ident);
1473 }
1474 return rv;
1475 }
1476 }
1477
1478 static int
encoder_encode_key_value(PyEncoderObject * s,_PyUnicodeWriter * writer,bool * first,PyObject * key,PyObject * value,PyObject * newline_indent,PyObject * item_separator)1479 encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
1480 PyObject *key, PyObject *value,
1481 PyObject *newline_indent,
1482 PyObject *item_separator)
1483 {
1484 PyObject *keystr = NULL;
1485 PyObject *encoded;
1486
1487 if (PyUnicode_Check(key)) {
1488 keystr = Py_NewRef(key);
1489 }
1490 else if (PyFloat_Check(key)) {
1491 keystr = encoder_encode_float(s, key);
1492 }
1493 else if (key == Py_True || key == Py_False || key == Py_None) {
1494 /* This must come before the PyLong_Check because
1495 True and False are also 1 and 0.*/
1496 keystr = _encoded_const(key);
1497 }
1498 else if (PyLong_Check(key)) {
1499 keystr = PyLong_Type.tp_repr(key);
1500 }
1501 else if (s->skipkeys) {
1502 return 0;
1503 }
1504 else {
1505 PyErr_Format(PyExc_TypeError,
1506 "keys must be str, int, float, bool or None, "
1507 "not %.100s", Py_TYPE(key)->tp_name);
1508 return -1;
1509 }
1510
1511 if (keystr == NULL) {
1512 return -1;
1513 }
1514
1515 if (*first) {
1516 *first = false;
1517 }
1518 else {
1519 if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
1520 Py_DECREF(keystr);
1521 return -1;
1522 }
1523 }
1524
1525 encoded = encoder_encode_string(s, keystr);
1526 Py_DECREF(keystr);
1527 if (encoded == NULL) {
1528 return -1;
1529 }
1530
1531 if (_steal_accumulate(writer, encoded) < 0) {
1532 return -1;
1533 }
1534 if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
1535 return -1;
1536 }
1537 if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
1538 return -1;
1539 }
1540 return 0;
1541 }
1542
1543 static int
encoder_listencode_dict(PyEncoderObject * s,_PyUnicodeWriter * writer,PyObject * dct,PyObject * newline_indent)1544 encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
1545 PyObject *dct, PyObject *newline_indent)
1546 {
1547 /* Encode Python dict dct a JSON term */
1548 PyObject *ident = NULL;
1549 PyObject *items = NULL;
1550 PyObject *key, *value;
1551 bool first = true;
1552 PyObject *new_newline_indent = NULL;
1553 PyObject *separator_indent = NULL;
1554
1555 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
1556 return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
1557
1558 if (s->markers != Py_None) {
1559 int has_key;
1560 ident = PyLong_FromVoidPtr(dct);
1561 if (ident == NULL)
1562 goto bail;
1563 has_key = PyDict_Contains(s->markers, ident);
1564 if (has_key) {
1565 if (has_key != -1)
1566 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1567 goto bail;
1568 }
1569 if (PyDict_SetItem(s->markers, ident, dct)) {
1570 goto bail;
1571 }
1572 }
1573
1574 if (_PyUnicodeWriter_WriteChar(writer, '{'))
1575 goto bail;
1576
1577 PyObject *current_item_separator = s->item_separator; // borrowed reference
1578 if (s->indent != Py_None) {
1579 new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1580 if (new_newline_indent == NULL) {
1581 goto bail;
1582 }
1583 separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
1584 if (separator_indent == NULL) {
1585 goto bail;
1586 }
1587 // update item separator with a borrowed reference
1588 current_item_separator = separator_indent;
1589 if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1590 goto bail;
1591 }
1592 }
1593
1594 if (s->sort_keys || !PyDict_CheckExact(dct)) {
1595 items = PyMapping_Items(dct);
1596 if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
1597 goto bail;
1598
1599 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
1600 PyObject *item = PyList_GET_ITEM(items, i);
1601
1602 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1603 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1604 goto bail;
1605 }
1606
1607 key = PyTuple_GET_ITEM(item, 0);
1608 value = PyTuple_GET_ITEM(item, 1);
1609 if (encoder_encode_key_value(s, writer, &first, key, value,
1610 new_newline_indent,
1611 current_item_separator) < 0)
1612 goto bail;
1613 }
1614 Py_CLEAR(items);
1615
1616 } else {
1617 Py_ssize_t pos = 0;
1618 while (PyDict_Next(dct, &pos, &key, &value)) {
1619 if (encoder_encode_key_value(s, writer, &first, key, value,
1620 new_newline_indent,
1621 current_item_separator) < 0)
1622 goto bail;
1623 }
1624 }
1625
1626 if (ident != NULL) {
1627 if (PyDict_DelItem(s->markers, ident))
1628 goto bail;
1629 Py_CLEAR(ident);
1630 }
1631 if (s->indent != Py_None) {
1632 Py_CLEAR(new_newline_indent);
1633 Py_CLEAR(separator_indent);
1634
1635 if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1636 goto bail;
1637 }
1638 }
1639
1640 if (_PyUnicodeWriter_WriteChar(writer, '}'))
1641 goto bail;
1642 return 0;
1643
1644 bail:
1645 Py_XDECREF(items);
1646 Py_XDECREF(ident);
1647 Py_XDECREF(separator_indent);
1648 Py_XDECREF(new_newline_indent);
1649 return -1;
1650 }
1651
1652 static int
encoder_listencode_list(PyEncoderObject * s,_PyUnicodeWriter * writer,PyObject * seq,PyObject * newline_indent)1653 encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
1654 PyObject *seq, PyObject *newline_indent)
1655 {
1656 PyObject *ident = NULL;
1657 PyObject *s_fast = NULL;
1658 Py_ssize_t i;
1659 PyObject *new_newline_indent = NULL;
1660 PyObject *separator_indent = NULL;
1661
1662 ident = NULL;
1663 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1664 if (s_fast == NULL)
1665 return -1;
1666 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1667 Py_DECREF(s_fast);
1668 return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
1669 }
1670
1671 if (s->markers != Py_None) {
1672 int has_key;
1673 ident = PyLong_FromVoidPtr(seq);
1674 if (ident == NULL)
1675 goto bail;
1676 has_key = PyDict_Contains(s->markers, ident);
1677 if (has_key) {
1678 if (has_key != -1)
1679 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1680 goto bail;
1681 }
1682 if (PyDict_SetItem(s->markers, ident, seq)) {
1683 goto bail;
1684 }
1685 }
1686
1687 if (_PyUnicodeWriter_WriteChar(writer, '['))
1688 goto bail;
1689
1690 PyObject *separator = s->item_separator; // borrowed reference
1691 if (s->indent != Py_None) {
1692 new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1693 if (new_newline_indent == NULL) {
1694 goto bail;
1695 }
1696
1697 if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1698 goto bail;
1699 }
1700
1701 separator_indent = PyUnicode_Concat(separator, new_newline_indent);
1702 if (separator_indent == NULL) {
1703 goto bail;
1704 }
1705 separator = separator_indent; // assign separator with borrowed reference
1706 }
1707 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1708 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1709 if (i) {
1710 if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
1711 goto bail;
1712 }
1713 if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
1714 goto bail;
1715 }
1716 if (ident != NULL) {
1717 if (PyDict_DelItem(s->markers, ident))
1718 goto bail;
1719 Py_CLEAR(ident);
1720 }
1721
1722 if (s->indent != Py_None) {
1723 Py_CLEAR(new_newline_indent);
1724 Py_CLEAR(separator_indent);
1725 if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1726 goto bail;
1727 }
1728 }
1729
1730 if (_PyUnicodeWriter_WriteChar(writer, ']'))
1731 goto bail;
1732 Py_DECREF(s_fast);
1733 return 0;
1734
1735 bail:
1736 Py_XDECREF(ident);
1737 Py_DECREF(s_fast);
1738 Py_XDECREF(separator_indent);
1739 Py_XDECREF(new_newline_indent);
1740 return -1;
1741 }
1742
1743 static void
encoder_dealloc(PyObject * self)1744 encoder_dealloc(PyObject *self)
1745 {
1746 PyTypeObject *tp = Py_TYPE(self);
1747 /* bpo-31095: UnTrack is needed before calling any callbacks */
1748 PyObject_GC_UnTrack(self);
1749 encoder_clear((PyEncoderObject *)self);
1750 tp->tp_free(self);
1751 Py_DECREF(tp);
1752 }
1753
1754 static int
encoder_traverse(PyEncoderObject * self,visitproc visit,void * arg)1755 encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
1756 {
1757 Py_VISIT(Py_TYPE(self));
1758 Py_VISIT(self->markers);
1759 Py_VISIT(self->defaultfn);
1760 Py_VISIT(self->encoder);
1761 Py_VISIT(self->indent);
1762 Py_VISIT(self->key_separator);
1763 Py_VISIT(self->item_separator);
1764 return 0;
1765 }
1766
1767 static int
encoder_clear(PyEncoderObject * self)1768 encoder_clear(PyEncoderObject *self)
1769 {
1770 /* Deallocate Encoder */
1771 Py_CLEAR(self->markers);
1772 Py_CLEAR(self->defaultfn);
1773 Py_CLEAR(self->encoder);
1774 Py_CLEAR(self->indent);
1775 Py_CLEAR(self->key_separator);
1776 Py_CLEAR(self->item_separator);
1777 return 0;
1778 }
1779
1780 PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, key_separator, item_separator, sort_keys, skipkeys, allow_nan)");
1781
1782 static PyType_Slot PyEncoderType_slots[] = {
1783 {Py_tp_doc, (void *)encoder_doc},
1784 {Py_tp_dealloc, encoder_dealloc},
1785 {Py_tp_call, encoder_call},
1786 {Py_tp_traverse, encoder_traverse},
1787 {Py_tp_clear, encoder_clear},
1788 {Py_tp_members, encoder_members},
1789 {Py_tp_new, encoder_new},
1790 {0, 0}
1791 };
1792
1793 static PyType_Spec PyEncoderType_spec = {
1794 .name = "_json.Encoder",
1795 .basicsize = sizeof(PyEncoderObject),
1796 .itemsize = 0,
1797 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1798 .slots = PyEncoderType_slots
1799 };
1800
1801 static PyMethodDef speedups_methods[] = {
1802 {"encode_basestring_ascii",
1803 (PyCFunction)py_encode_basestring_ascii,
1804 METH_O,
1805 pydoc_encode_basestring_ascii},
1806 {"encode_basestring",
1807 (PyCFunction)py_encode_basestring,
1808 METH_O,
1809 pydoc_encode_basestring},
1810 {"scanstring",
1811 (PyCFunction)py_scanstring,
1812 METH_VARARGS,
1813 pydoc_scanstring},
1814 {NULL, NULL, 0, NULL}
1815 };
1816
1817 PyDoc_STRVAR(module_doc,
1818 "json speedups\n");
1819
1820 static int
_json_exec(PyObject * module)1821 _json_exec(PyObject *module)
1822 {
1823 PyObject *PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1824 if (PyModule_Add(module, "make_scanner", PyScannerType) < 0) {
1825 return -1;
1826 }
1827
1828 PyObject *PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1829 if (PyModule_Add(module, "make_encoder", PyEncoderType) < 0) {
1830 return -1;
1831 }
1832
1833 return 0;
1834 }
1835
1836 static PyModuleDef_Slot _json_slots[] = {
1837 {Py_mod_exec, _json_exec},
1838 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1839 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1840 {0, NULL}
1841 };
1842
1843 static struct PyModuleDef jsonmodule = {
1844 .m_base = PyModuleDef_HEAD_INIT,
1845 .m_name = "_json",
1846 .m_doc = module_doc,
1847 .m_methods = speedups_methods,
1848 .m_slots = _json_slots,
1849 };
1850
1851 PyMODINIT_FUNC
PyInit__json(void)1852 PyInit__json(void)
1853 {
1854 return PyModuleDef_Init(&jsonmodule);
1855 }
1856