1 /* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 # error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10
11 #include "Python.h"
12 #include "structmember.h" // PyMemberDef
13 #include "pycore_accu.h"
14
15 typedef struct {
16 PyObject *PyScannerType;
17 PyObject *PyEncoderType;
18 } _jsonmodulestate;
19
20 static inline _jsonmodulestate*
get_json_state(PyObject * module)21 get_json_state(PyObject *module)
22 {
23 void *state = PyModule_GetState(module);
24 assert(state != NULL);
25 return (_jsonmodulestate *)state;
26 }
27
28
29 typedef struct _PyScannerObject {
30 PyObject_HEAD
31 signed char strict;
32 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
37 PyObject *memo;
38 } PyScannerObject;
39
40 static PyMemberDef scanner_members[] = {
41 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
42 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48 };
49
50 typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
58 char sort_keys;
59 char skipkeys;
60 int allow_nan;
61 PyCFunction fast_encode;
62 } PyEncoderObject;
63
64 static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
71 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
73 {NULL}
74 };
75
76 /* Forward decls */
77
78 static PyObject *
79 ascii_escape_unicode(PyObject *pystr);
80 static PyObject *
81 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
82 static PyObject *
83 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
84 static PyObject *
85 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
86 static PyObject *
87 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
88 static void
89 scanner_dealloc(PyObject *self);
90 static int
91 scanner_clear(PyScannerObject *self);
92 static PyObject *
93 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
94 static void
95 encoder_dealloc(PyObject *self);
96 static int
97 encoder_clear(PyEncoderObject *self);
98 static int
99 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
100 static int
101 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
102 static int
103 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
104 static PyObject *
105 _encoded_const(PyObject *obj);
106 static void
107 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
108 static PyObject *
109 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
110 static PyObject *
111 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
112
113 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
114 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
115
116 static Py_ssize_t
ascii_escape_unichar(Py_UCS4 c,unsigned char * output,Py_ssize_t chars)117 ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
118 {
119 /* Escape unicode code point c to ASCII escape sequences
120 in char *output. output must have at least 12 bytes unused to
121 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
122 output[chars++] = '\\';
123 switch (c) {
124 case '\\': output[chars++] = c; break;
125 case '"': output[chars++] = c; break;
126 case '\b': output[chars++] = 'b'; break;
127 case '\f': output[chars++] = 'f'; break;
128 case '\n': output[chars++] = 'n'; break;
129 case '\r': output[chars++] = 'r'; break;
130 case '\t': output[chars++] = 't'; break;
131 default:
132 if (c >= 0x10000) {
133 /* UTF-16 surrogate pair */
134 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
135 output[chars++] = 'u';
136 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
137 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
138 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
139 output[chars++] = Py_hexdigits[(v ) & 0xf];
140 c = Py_UNICODE_LOW_SURROGATE(c);
141 output[chars++] = '\\';
142 }
143 output[chars++] = 'u';
144 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
145 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
146 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
147 output[chars++] = Py_hexdigits[(c ) & 0xf];
148 }
149 return chars;
150 }
151
152 static PyObject *
ascii_escape_unicode(PyObject * pystr)153 ascii_escape_unicode(PyObject *pystr)
154 {
155 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
156 Py_ssize_t i;
157 Py_ssize_t input_chars;
158 Py_ssize_t output_size;
159 Py_ssize_t chars;
160 PyObject *rval;
161 const void *input;
162 Py_UCS1 *output;
163 int kind;
164
165 if (PyUnicode_READY(pystr) == -1)
166 return NULL;
167
168 input_chars = PyUnicode_GET_LENGTH(pystr);
169 input = PyUnicode_DATA(pystr);
170 kind = PyUnicode_KIND(pystr);
171
172 /* Compute the output size */
173 for (i = 0, output_size = 2; i < input_chars; i++) {
174 Py_UCS4 c = PyUnicode_READ(kind, input, i);
175 Py_ssize_t d;
176 if (S_CHAR(c)) {
177 d = 1;
178 }
179 else {
180 switch(c) {
181 case '\\': case '"': case '\b': case '\f':
182 case '\n': case '\r': case '\t':
183 d = 2; break;
184 default:
185 d = c >= 0x10000 ? 12 : 6;
186 }
187 }
188 if (output_size > PY_SSIZE_T_MAX - d) {
189 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
190 return NULL;
191 }
192 output_size += d;
193 }
194
195 rval = PyUnicode_New(output_size, 127);
196 if (rval == NULL) {
197 return NULL;
198 }
199 output = PyUnicode_1BYTE_DATA(rval);
200 chars = 0;
201 output[chars++] = '"';
202 for (i = 0; i < input_chars; i++) {
203 Py_UCS4 c = PyUnicode_READ(kind, input, i);
204 if (S_CHAR(c)) {
205 output[chars++] = c;
206 }
207 else {
208 chars = ascii_escape_unichar(c, output, chars);
209 }
210 }
211 output[chars++] = '"';
212 #ifdef Py_DEBUG
213 assert(_PyUnicode_CheckConsistency(rval, 1));
214 #endif
215 return rval;
216 }
217
218 static PyObject *
escape_unicode(PyObject * pystr)219 escape_unicode(PyObject *pystr)
220 {
221 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
222 Py_ssize_t i;
223 Py_ssize_t input_chars;
224 Py_ssize_t output_size;
225 Py_ssize_t chars;
226 PyObject *rval;
227 const void *input;
228 int kind;
229 Py_UCS4 maxchar;
230
231 if (PyUnicode_READY(pystr) == -1)
232 return NULL;
233
234 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
235 input_chars = PyUnicode_GET_LENGTH(pystr);
236 input = PyUnicode_DATA(pystr);
237 kind = PyUnicode_KIND(pystr);
238
239 /* Compute the output size */
240 for (i = 0, output_size = 2; i < input_chars; i++) {
241 Py_UCS4 c = PyUnicode_READ(kind, input, i);
242 Py_ssize_t d;
243 switch (c) {
244 case '\\': case '"': case '\b': case '\f':
245 case '\n': case '\r': case '\t':
246 d = 2;
247 break;
248 default:
249 if (c <= 0x1f)
250 d = 6;
251 else
252 d = 1;
253 }
254 if (output_size > PY_SSIZE_T_MAX - d) {
255 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
256 return NULL;
257 }
258 output_size += d;
259 }
260
261 rval = PyUnicode_New(output_size, maxchar);
262 if (rval == NULL)
263 return NULL;
264
265 kind = PyUnicode_KIND(rval);
266
267 #define ENCODE_OUTPUT do { \
268 chars = 0; \
269 output[chars++] = '"'; \
270 for (i = 0; i < input_chars; i++) { \
271 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
272 switch (c) { \
273 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
274 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
275 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
276 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
277 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
278 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
279 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
280 default: \
281 if (c <= 0x1f) { \
282 output[chars++] = '\\'; \
283 output[chars++] = 'u'; \
284 output[chars++] = '0'; \
285 output[chars++] = '0'; \
286 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
287 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
288 } else { \
289 output[chars++] = c; \
290 } \
291 } \
292 } \
293 output[chars++] = '"'; \
294 } while (0)
295
296 if (kind == PyUnicode_1BYTE_KIND) {
297 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
298 ENCODE_OUTPUT;
299 } else if (kind == PyUnicode_2BYTE_KIND) {
300 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
301 ENCODE_OUTPUT;
302 } else {
303 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
304 assert(kind == PyUnicode_4BYTE_KIND);
305 ENCODE_OUTPUT;
306 }
307 #undef ENCODE_OUTPUT
308
309 #ifdef Py_DEBUG
310 assert(_PyUnicode_CheckConsistency(rval, 1));
311 #endif
312 return rval;
313 }
314
315 static void
raise_errmsg(const char * msg,PyObject * s,Py_ssize_t end)316 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
317 {
318 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
319 _Py_static_string(PyId_decoder, "json.decoder");
320 PyObject *decoder = _PyImport_GetModuleId(&PyId_decoder);
321 if (decoder == NULL) {
322 return;
323 }
324
325 _Py_IDENTIFIER(JSONDecodeError);
326 PyObject *JSONDecodeError = _PyObject_GetAttrId(decoder, &PyId_JSONDecodeError);
327 Py_DECREF(decoder);
328 if (JSONDecodeError == NULL) {
329 return;
330 }
331
332 PyObject *exc;
333 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
334 Py_DECREF(JSONDecodeError);
335 if (exc) {
336 PyErr_SetObject(JSONDecodeError, exc);
337 Py_DECREF(exc);
338 }
339 }
340
341 static void
raise_stop_iteration(Py_ssize_t idx)342 raise_stop_iteration(Py_ssize_t idx)
343 {
344 PyObject *value = PyLong_FromSsize_t(idx);
345 if (value != NULL) {
346 PyErr_SetObject(PyExc_StopIteration, value);
347 Py_DECREF(value);
348 }
349 }
350
351 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)352 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
353 /* return (rval, idx) tuple, stealing reference to rval */
354 PyObject *tpl;
355 PyObject *pyidx;
356 /*
357 steal a reference to rval, returns (rval, idx)
358 */
359 if (rval == NULL) {
360 return NULL;
361 }
362 pyidx = PyLong_FromSsize_t(idx);
363 if (pyidx == NULL) {
364 Py_DECREF(rval);
365 return NULL;
366 }
367 tpl = PyTuple_New(2);
368 if (tpl == NULL) {
369 Py_DECREF(pyidx);
370 Py_DECREF(rval);
371 return NULL;
372 }
373 PyTuple_SET_ITEM(tpl, 0, rval);
374 PyTuple_SET_ITEM(tpl, 1, pyidx);
375 return tpl;
376 }
377
378 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)379 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
380 {
381 /* Read the JSON string from PyUnicode pystr.
382 end is the index of the first character after the quote.
383 if strict is zero then literal control characters are allowed
384 *next_end_ptr is a return-by-reference index of the character
385 after the end quote
386
387 Return value is a new PyUnicode
388 */
389 PyObject *rval = NULL;
390 Py_ssize_t len;
391 Py_ssize_t begin = end - 1;
392 Py_ssize_t next /* = begin */;
393 const void *buf;
394 int kind;
395
396 if (PyUnicode_READY(pystr) == -1)
397 return 0;
398
399 _PyUnicodeWriter writer;
400 _PyUnicodeWriter_Init(&writer);
401 writer.overallocate = 1;
402
403 len = PyUnicode_GET_LENGTH(pystr);
404 buf = PyUnicode_DATA(pystr);
405 kind = PyUnicode_KIND(pystr);
406
407 if (end < 0 || len < end) {
408 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
409 goto bail;
410 }
411 while (1) {
412 /* Find the end of the string or the next escape */
413 Py_UCS4 c;
414 {
415 // Use tight scope variable to help register allocation.
416 Py_UCS4 d = 0;
417 for (next = end; next < len; next++) {
418 d = PyUnicode_READ(kind, buf, next);
419 if (d == '"' || d == '\\') {
420 break;
421 }
422 if (d <= 0x1f && strict) {
423 raise_errmsg("Invalid control character at", pystr, next);
424 goto bail;
425 }
426 }
427 c = d;
428 }
429
430 if (c == '"') {
431 // Fast path for simple case.
432 if (writer.buffer == NULL) {
433 PyObject *ret = PyUnicode_Substring(pystr, end, next);
434 if (ret == NULL) {
435 goto bail;
436 }
437 *next_end_ptr = next + 1;;
438 return ret;
439 }
440 }
441 else if (c != '\\') {
442 raise_errmsg("Unterminated string starting at", pystr, begin);
443 goto bail;
444 }
445
446 /* Pick up this chunk if it's not zero length */
447 if (next != end) {
448 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
449 goto bail;
450 }
451 }
452 next++;
453 if (c == '"') {
454 end = next;
455 break;
456 }
457 if (next == len) {
458 raise_errmsg("Unterminated string starting at", pystr, begin);
459 goto bail;
460 }
461 c = PyUnicode_READ(kind, buf, next);
462 if (c != 'u') {
463 /* Non-unicode backslash escapes */
464 end = next + 1;
465 switch (c) {
466 case '"': break;
467 case '\\': break;
468 case '/': break;
469 case 'b': c = '\b'; break;
470 case 'f': c = '\f'; break;
471 case 'n': c = '\n'; break;
472 case 'r': c = '\r'; break;
473 case 't': c = '\t'; break;
474 default: c = 0;
475 }
476 if (c == 0) {
477 raise_errmsg("Invalid \\escape", pystr, end - 2);
478 goto bail;
479 }
480 }
481 else {
482 c = 0;
483 next++;
484 end = next + 4;
485 if (end >= len) {
486 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
487 goto bail;
488 }
489 /* Decode 4 hex digits */
490 for (; next < end; next++) {
491 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
492 c <<= 4;
493 switch (digit) {
494 case '0': case '1': case '2': case '3': case '4':
495 case '5': case '6': case '7': case '8': case '9':
496 c |= (digit - '0'); break;
497 case 'a': case 'b': case 'c': case 'd': case 'e':
498 case 'f':
499 c |= (digit - 'a' + 10); break;
500 case 'A': case 'B': case 'C': case 'D': case 'E':
501 case 'F':
502 c |= (digit - 'A' + 10); break;
503 default:
504 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505 goto bail;
506 }
507 }
508 /* Surrogate pair */
509 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
510 PyUnicode_READ(kind, buf, next++) == '\\' &&
511 PyUnicode_READ(kind, buf, next++) == 'u') {
512 Py_UCS4 c2 = 0;
513 end += 6;
514 /* Decode 4 hex digits */
515 for (; next < end; next++) {
516 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
517 c2 <<= 4;
518 switch (digit) {
519 case '0': case '1': case '2': case '3': case '4':
520 case '5': case '6': case '7': case '8': case '9':
521 c2 |= (digit - '0'); break;
522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f':
524 c2 |= (digit - 'a' + 10); break;
525 case 'A': case 'B': case 'C': case 'D': case 'E':
526 case 'F':
527 c2 |= (digit - 'A' + 10); break;
528 default:
529 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
530 goto bail;
531 }
532 }
533 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
534 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
535 else
536 end -= 6;
537 }
538 }
539 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
540 goto bail;
541 }
542 }
543
544 rval = _PyUnicodeWriter_Finish(&writer);
545 *next_end_ptr = end;
546 return rval;
547
548 bail:
549 *next_end_ptr = -1;
550 _PyUnicodeWriter_Dealloc(&writer);
551 return NULL;
552 }
553
554 PyDoc_STRVAR(pydoc_scanstring,
555 "scanstring(string, end, strict=True) -> (string, end)\n"
556 "\n"
557 "Scan the string s for a JSON string. End is the index of the\n"
558 "character in s after the quote that started the JSON string.\n"
559 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
560 "on attempt to decode an invalid string. If strict is False then literal\n"
561 "control characters are allowed in the string.\n"
562 "\n"
563 "Returns a tuple of the decoded string and the index of the character in s\n"
564 "after the end quote."
565 );
566
567 static PyObject *
py_scanstring(PyObject * Py_UNUSED (self),PyObject * args)568 py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
569 {
570 PyObject *pystr;
571 PyObject *rval;
572 Py_ssize_t end;
573 Py_ssize_t next_end = -1;
574 int strict = 1;
575 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
576 return NULL;
577 }
578 if (PyUnicode_Check(pystr)) {
579 rval = scanstring_unicode(pystr, end, strict, &next_end);
580 }
581 else {
582 PyErr_Format(PyExc_TypeError,
583 "first argument must be a string, not %.80s",
584 Py_TYPE(pystr)->tp_name);
585 return NULL;
586 }
587 return _build_rval_index_tuple(rval, next_end);
588 }
589
590 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
591 "encode_basestring_ascii(string) -> string\n"
592 "\n"
593 "Return an ASCII-only JSON representation of a Python string"
594 );
595
596 static PyObject *
py_encode_basestring_ascii(PyObject * Py_UNUSED (self),PyObject * pystr)597 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
598 {
599 PyObject *rval;
600 /* Return an ASCII-only JSON representation of a Python string */
601 /* METH_O */
602 if (PyUnicode_Check(pystr)) {
603 rval = ascii_escape_unicode(pystr);
604 }
605 else {
606 PyErr_Format(PyExc_TypeError,
607 "first argument must be a string, not %.80s",
608 Py_TYPE(pystr)->tp_name);
609 return NULL;
610 }
611 return rval;
612 }
613
614
615 PyDoc_STRVAR(pydoc_encode_basestring,
616 "encode_basestring(string) -> string\n"
617 "\n"
618 "Return a JSON representation of a Python string"
619 );
620
621 static PyObject *
py_encode_basestring(PyObject * Py_UNUSED (self),PyObject * pystr)622 py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
623 {
624 PyObject *rval;
625 /* Return a JSON representation of a Python string */
626 /* METH_O */
627 if (PyUnicode_Check(pystr)) {
628 rval = escape_unicode(pystr);
629 }
630 else {
631 PyErr_Format(PyExc_TypeError,
632 "first argument must be a string, not %.80s",
633 Py_TYPE(pystr)->tp_name);
634 return NULL;
635 }
636 return rval;
637 }
638
639 static void
scanner_dealloc(PyObject * self)640 scanner_dealloc(PyObject *self)
641 {
642 PyTypeObject *tp = Py_TYPE(self);
643 /* bpo-31095: UnTrack is needed before calling any callbacks */
644 PyObject_GC_UnTrack(self);
645 scanner_clear((PyScannerObject *)self);
646 tp->tp_free(self);
647 Py_DECREF(tp);
648 }
649
650 static int
scanner_traverse(PyScannerObject * self,visitproc visit,void * arg)651 scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
652 {
653 Py_VISIT(Py_TYPE(self));
654 Py_VISIT(self->object_hook);
655 Py_VISIT(self->object_pairs_hook);
656 Py_VISIT(self->parse_float);
657 Py_VISIT(self->parse_int);
658 Py_VISIT(self->parse_constant);
659 Py_VISIT(self->memo);
660 return 0;
661 }
662
663 static int
scanner_clear(PyScannerObject * self)664 scanner_clear(PyScannerObject *self)
665 {
666 Py_CLEAR(self->object_hook);
667 Py_CLEAR(self->object_pairs_hook);
668 Py_CLEAR(self->parse_float);
669 Py_CLEAR(self->parse_int);
670 Py_CLEAR(self->parse_constant);
671 Py_CLEAR(self->memo);
672 return 0;
673 }
674
675 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)676 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
677 {
678 /* Read a JSON object from PyUnicode pystr.
679 idx is the index of the first character after the opening curly brace.
680 *next_idx_ptr is a return-by-reference index to the first character after
681 the closing curly brace.
682
683 Returns a new PyObject (usually a dict, but object_hook can change that)
684 */
685 const void *str;
686 int kind;
687 Py_ssize_t end_idx;
688 PyObject *val = NULL;
689 PyObject *rval = NULL;
690 PyObject *key = NULL;
691 int has_pairs_hook = (s->object_pairs_hook != Py_None);
692 Py_ssize_t next_idx;
693
694 if (PyUnicode_READY(pystr) == -1)
695 return NULL;
696
697 str = PyUnicode_DATA(pystr);
698 kind = PyUnicode_KIND(pystr);
699 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
700
701 if (has_pairs_hook)
702 rval = PyList_New(0);
703 else
704 rval = PyDict_New();
705 if (rval == NULL)
706 return NULL;
707
708 /* skip whitespace after { */
709 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
710
711 /* only loop if the object is non-empty */
712 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
713 while (1) {
714 PyObject *memokey;
715
716 /* read key */
717 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
718 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
719 goto bail;
720 }
721 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
722 if (key == NULL)
723 goto bail;
724 memokey = PyDict_SetDefault(s->memo, key, key);
725 if (memokey == NULL) {
726 goto bail;
727 }
728 Py_INCREF(memokey);
729 Py_DECREF(key);
730 key = memokey;
731 idx = next_idx;
732
733 /* skip whitespace between key and : delimiter, read :, skip whitespace */
734 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
735 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
736 raise_errmsg("Expecting ':' delimiter", pystr, idx);
737 goto bail;
738 }
739 idx++;
740 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
741
742 /* read any JSON term */
743 val = scan_once_unicode(s, pystr, idx, &next_idx);
744 if (val == NULL)
745 goto bail;
746
747 if (has_pairs_hook) {
748 PyObject *item = PyTuple_Pack(2, key, val);
749 if (item == NULL)
750 goto bail;
751 Py_CLEAR(key);
752 Py_CLEAR(val);
753 if (PyList_Append(rval, item) == -1) {
754 Py_DECREF(item);
755 goto bail;
756 }
757 Py_DECREF(item);
758 }
759 else {
760 if (PyDict_SetItem(rval, key, val) < 0)
761 goto bail;
762 Py_CLEAR(key);
763 Py_CLEAR(val);
764 }
765 idx = next_idx;
766
767 /* skip whitespace before } or , */
768 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
769
770 /* bail if the object is closed or we didn't get the , delimiter */
771 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
772 break;
773 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
774 raise_errmsg("Expecting ',' delimiter", pystr, idx);
775 goto bail;
776 }
777 idx++;
778
779 /* skip whitespace after , delimiter */
780 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
781 }
782 }
783
784 *next_idx_ptr = idx + 1;
785
786 if (has_pairs_hook) {
787 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
788 Py_DECREF(rval);
789 return val;
790 }
791
792 /* if object_hook is not None: rval = object_hook(rval) */
793 if (s->object_hook != Py_None) {
794 val = PyObject_CallOneArg(s->object_hook, rval);
795 Py_DECREF(rval);
796 return val;
797 }
798 return rval;
799 bail:
800 Py_XDECREF(key);
801 Py_XDECREF(val);
802 Py_XDECREF(rval);
803 return NULL;
804 }
805
806 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)807 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
808 /* Read a JSON array from PyUnicode pystr.
809 idx is the index of the first character after the opening brace.
810 *next_idx_ptr is a return-by-reference index to the first character after
811 the closing brace.
812
813 Returns a new PyList
814 */
815 const void *str;
816 int kind;
817 Py_ssize_t end_idx;
818 PyObject *val = NULL;
819 PyObject *rval;
820 Py_ssize_t next_idx;
821
822 if (PyUnicode_READY(pystr) == -1)
823 return NULL;
824
825 rval = PyList_New(0);
826 if (rval == NULL)
827 return NULL;
828
829 str = PyUnicode_DATA(pystr);
830 kind = PyUnicode_KIND(pystr);
831 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
832
833 /* skip whitespace after [ */
834 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
835
836 /* only loop if the array is non-empty */
837 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
838 while (1) {
839
840 /* read any JSON term */
841 val = scan_once_unicode(s, pystr, idx, &next_idx);
842 if (val == NULL)
843 goto bail;
844
845 if (PyList_Append(rval, val) == -1)
846 goto bail;
847
848 Py_CLEAR(val);
849 idx = next_idx;
850
851 /* skip whitespace between term and , */
852 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
853
854 /* bail if the array is closed or we didn't get the , delimiter */
855 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
856 break;
857 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
858 raise_errmsg("Expecting ',' delimiter", pystr, idx);
859 goto bail;
860 }
861 idx++;
862
863 /* skip whitespace after , */
864 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
865 }
866 }
867
868 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
869 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
870 raise_errmsg("Expecting value", pystr, end_idx);
871 goto bail;
872 }
873 *next_idx_ptr = idx + 1;
874 return rval;
875 bail:
876 Py_XDECREF(val);
877 Py_DECREF(rval);
878 return NULL;
879 }
880
881 static PyObject *
_parse_constant(PyScannerObject * s,const char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)882 _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
883 /* Read a JSON constant.
884 constant is the constant string that was found
885 ("NaN", "Infinity", "-Infinity").
886 idx is the index of the first character of the constant
887 *next_idx_ptr is a return-by-reference index to the first character after
888 the constant.
889
890 Returns the result of parse_constant
891 */
892 PyObject *cstr;
893 PyObject *rval;
894 /* constant is "NaN", "Infinity", or "-Infinity" */
895 cstr = PyUnicode_InternFromString(constant);
896 if (cstr == NULL)
897 return NULL;
898
899 /* rval = parse_constant(constant) */
900 rval = PyObject_CallOneArg(s->parse_constant, cstr);
901 idx += PyUnicode_GET_LENGTH(cstr);
902 Py_DECREF(cstr);
903 *next_idx_ptr = idx;
904 return rval;
905 }
906
907 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)908 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
909 /* Read a JSON number from PyUnicode pystr.
910 idx is the index of the first character of the number
911 *next_idx_ptr is a return-by-reference index to the first character after
912 the number.
913
914 Returns a new PyObject representation of that number:
915 PyLong, or PyFloat.
916 May return other types if parse_int or parse_float are set
917 */
918 const void *str;
919 int kind;
920 Py_ssize_t end_idx;
921 Py_ssize_t idx = start;
922 int is_float = 0;
923 PyObject *rval;
924 PyObject *numstr = NULL;
925 PyObject *custom_func;
926
927 if (PyUnicode_READY(pystr) == -1)
928 return NULL;
929
930 str = PyUnicode_DATA(pystr);
931 kind = PyUnicode_KIND(pystr);
932 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
933
934 /* read a sign if it's there, make sure it's not the end of the string */
935 if (PyUnicode_READ(kind, str, idx) == '-') {
936 idx++;
937 if (idx > end_idx) {
938 raise_stop_iteration(start);
939 return NULL;
940 }
941 }
942
943 /* read as many integer digits as we find as long as it doesn't start with 0 */
944 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
945 idx++;
946 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
947 }
948 /* if it starts with 0 we only expect one integer digit */
949 else if (PyUnicode_READ(kind, str, idx) == '0') {
950 idx++;
951 }
952 /* no integer digits, error */
953 else {
954 raise_stop_iteration(start);
955 return NULL;
956 }
957
958 /* if the next char is '.' followed by a digit then read all float digits */
959 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
960 is_float = 1;
961 idx += 2;
962 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
963 }
964
965 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
966 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
967 Py_ssize_t e_start = idx;
968 idx++;
969
970 /* read an exponent sign if present */
971 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
972
973 /* read all digits */
974 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
975
976 /* if we got a digit, then parse as float. if not, backtrack */
977 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
978 is_float = 1;
979 }
980 else {
981 idx = e_start;
982 }
983 }
984
985 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
986 custom_func = s->parse_float;
987 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
988 custom_func = s->parse_int;
989 else
990 custom_func = NULL;
991
992 if (custom_func) {
993 /* copy the section we determined to be a number */
994 numstr = PyUnicode_FromKindAndData(kind,
995 (char*)str + kind * start,
996 idx - start);
997 if (numstr == NULL)
998 return NULL;
999 rval = PyObject_CallOneArg(custom_func, numstr);
1000 }
1001 else {
1002 Py_ssize_t i, n;
1003 char *buf;
1004 /* Straight conversion to ASCII, to avoid costly conversion of
1005 decimal unicode digits (which cannot appear here) */
1006 n = idx - start;
1007 numstr = PyBytes_FromStringAndSize(NULL, n);
1008 if (numstr == NULL)
1009 return NULL;
1010 buf = PyBytes_AS_STRING(numstr);
1011 for (i = 0; i < n; i++) {
1012 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
1013 }
1014 if (is_float)
1015 rval = PyFloat_FromString(numstr);
1016 else
1017 rval = PyLong_FromString(buf, NULL, 10);
1018 }
1019 Py_DECREF(numstr);
1020 *next_idx_ptr = idx;
1021 return rval;
1022 }
1023
1024 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1025 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1026 {
1027 /* Read one JSON term (of any kind) from PyUnicode pystr.
1028 idx is the index of the first character of the term
1029 *next_idx_ptr is a return-by-reference index to the first character after
1030 the number.
1031
1032 Returns a new PyObject representation of the term.
1033 */
1034 PyObject *res;
1035 const void *str;
1036 int kind;
1037 Py_ssize_t length;
1038
1039 if (PyUnicode_READY(pystr) == -1)
1040 return NULL;
1041
1042 str = PyUnicode_DATA(pystr);
1043 kind = PyUnicode_KIND(pystr);
1044 length = PyUnicode_GET_LENGTH(pystr);
1045
1046 if (idx < 0) {
1047 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1048 return NULL;
1049 }
1050 if (idx >= length) {
1051 raise_stop_iteration(idx);
1052 return NULL;
1053 }
1054
1055 switch (PyUnicode_READ(kind, str, idx)) {
1056 case '"':
1057 /* string */
1058 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1059 case '{':
1060 /* object */
1061 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1062 "from a unicode string"))
1063 return NULL;
1064 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1065 Py_LeaveRecursiveCall();
1066 return res;
1067 case '[':
1068 /* array */
1069 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1070 "from a unicode string"))
1071 return NULL;
1072 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1073 Py_LeaveRecursiveCall();
1074 return res;
1075 case 'n':
1076 /* null */
1077 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1078 *next_idx_ptr = idx + 4;
1079 Py_RETURN_NONE;
1080 }
1081 break;
1082 case 't':
1083 /* true */
1084 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1085 *next_idx_ptr = idx + 4;
1086 Py_RETURN_TRUE;
1087 }
1088 break;
1089 case 'f':
1090 /* false */
1091 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1092 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1093 PyUnicode_READ(kind, str, idx + 3) == 's' &&
1094 PyUnicode_READ(kind, str, idx + 4) == 'e') {
1095 *next_idx_ptr = idx + 5;
1096 Py_RETURN_FALSE;
1097 }
1098 break;
1099 case 'N':
1100 /* NaN */
1101 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1102 PyUnicode_READ(kind, str, idx + 2) == 'N') {
1103 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1104 }
1105 break;
1106 case 'I':
1107 /* Infinity */
1108 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1109 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1110 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1111 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1112 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1113 PyUnicode_READ(kind, str, idx + 6) == 't' &&
1114 PyUnicode_READ(kind, str, idx + 7) == 'y') {
1115 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1116 }
1117 break;
1118 case '-':
1119 /* -Infinity */
1120 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1121 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1122 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1123 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1124 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1125 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1126 PyUnicode_READ(kind, str, idx + 7) == 't' &&
1127 PyUnicode_READ(kind, str, idx + 8) == 'y') {
1128 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1129 }
1130 break;
1131 }
1132 /* Didn't find a string, object, array, or named constant. Look for a number. */
1133 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1134 }
1135
1136 static PyObject *
scanner_call(PyScannerObject * self,PyObject * args,PyObject * kwds)1137 scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
1138 {
1139 /* Python callable interface to scan_once_{str,unicode} */
1140 PyObject *pystr;
1141 PyObject *rval;
1142 Py_ssize_t idx;
1143 Py_ssize_t next_idx = -1;
1144 static char *kwlist[] = {"string", "idx", NULL};
1145 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1146 return NULL;
1147
1148 if (PyUnicode_Check(pystr)) {
1149 rval = scan_once_unicode(self, pystr, idx, &next_idx);
1150 }
1151 else {
1152 PyErr_Format(PyExc_TypeError,
1153 "first argument must be a string, not %.80s",
1154 Py_TYPE(pystr)->tp_name);
1155 return NULL;
1156 }
1157 PyDict_Clear(self->memo);
1158 if (rval == NULL)
1159 return NULL;
1160 return _build_rval_index_tuple(rval, next_idx);
1161 }
1162
1163 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1164 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1165 {
1166 PyScannerObject *s;
1167 PyObject *ctx;
1168 PyObject *strict;
1169 static char *kwlist[] = {"context", NULL};
1170
1171 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1172 return NULL;
1173
1174 s = (PyScannerObject *)type->tp_alloc(type, 0);
1175 if (s == NULL) {
1176 return NULL;
1177 }
1178
1179 s->memo = PyDict_New();
1180 if (s->memo == NULL)
1181 goto bail;
1182
1183 /* All of these will fail "gracefully" so we don't need to verify them */
1184 strict = PyObject_GetAttrString(ctx, "strict");
1185 if (strict == NULL)
1186 goto bail;
1187 s->strict = PyObject_IsTrue(strict);
1188 Py_DECREF(strict);
1189 if (s->strict < 0)
1190 goto bail;
1191 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1192 if (s->object_hook == NULL)
1193 goto bail;
1194 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1195 if (s->object_pairs_hook == NULL)
1196 goto bail;
1197 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1198 if (s->parse_float == NULL)
1199 goto bail;
1200 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1201 if (s->parse_int == NULL)
1202 goto bail;
1203 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1204 if (s->parse_constant == NULL)
1205 goto bail;
1206
1207 return (PyObject *)s;
1208
1209 bail:
1210 Py_DECREF(s);
1211 return NULL;
1212 }
1213
1214 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1215
1216 static PyType_Slot PyScannerType_slots[] = {
1217 {Py_tp_doc, (void *)scanner_doc},
1218 {Py_tp_dealloc, scanner_dealloc},
1219 {Py_tp_call, scanner_call},
1220 {Py_tp_traverse, scanner_traverse},
1221 {Py_tp_clear, scanner_clear},
1222 {Py_tp_members, scanner_members},
1223 {Py_tp_new, scanner_new},
1224 {0, 0}
1225 };
1226
1227 static PyType_Spec PyScannerType_spec = {
1228 .name = "_json.Scanner",
1229 .basicsize = sizeof(PyScannerObject),
1230 .itemsize = 0,
1231 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1232 .slots = PyScannerType_slots,
1233 };
1234
1235 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1236 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1237 {
1238 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1239
1240 PyEncoderObject *s;
1241 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1242 PyObject *item_separator;
1243 int sort_keys, skipkeys, allow_nan;
1244
1245 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1246 &markers, &defaultfn, &encoder, &indent,
1247 &key_separator, &item_separator,
1248 &sort_keys, &skipkeys, &allow_nan))
1249 return NULL;
1250
1251 if (markers != Py_None && !PyDict_Check(markers)) {
1252 PyErr_Format(PyExc_TypeError,
1253 "make_encoder() argument 1 must be dict or None, "
1254 "not %.200s", Py_TYPE(markers)->tp_name);
1255 return NULL;
1256 }
1257
1258 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1259 if (s == NULL)
1260 return NULL;
1261
1262 s->markers = markers;
1263 s->defaultfn = defaultfn;
1264 s->encoder = encoder;
1265 s->indent = indent;
1266 s->key_separator = key_separator;
1267 s->item_separator = item_separator;
1268 s->sort_keys = sort_keys;
1269 s->skipkeys = skipkeys;
1270 s->allow_nan = allow_nan;
1271 s->fast_encode = NULL;
1272 if (PyCFunction_Check(s->encoder)) {
1273 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1274 if (f == (PyCFunction)py_encode_basestring_ascii ||
1275 f == (PyCFunction)py_encode_basestring) {
1276 s->fast_encode = f;
1277 }
1278 }
1279
1280 Py_INCREF(s->markers);
1281 Py_INCREF(s->defaultfn);
1282 Py_INCREF(s->encoder);
1283 Py_INCREF(s->indent);
1284 Py_INCREF(s->key_separator);
1285 Py_INCREF(s->item_separator);
1286 return (PyObject *)s;
1287 }
1288
1289 static PyObject *
encoder_call(PyEncoderObject * self,PyObject * args,PyObject * kwds)1290 encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
1291 {
1292 /* Python callable interface to encode_listencode_obj */
1293 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1294 PyObject *obj;
1295 Py_ssize_t indent_level;
1296 _PyAccu acc;
1297 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1298 &obj, &indent_level))
1299 return NULL;
1300 if (_PyAccu_Init(&acc))
1301 return NULL;
1302 if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
1303 _PyAccu_Destroy(&acc);
1304 return NULL;
1305 }
1306 return _PyAccu_FinishAsList(&acc);
1307 }
1308
1309 static PyObject *
_encoded_const(PyObject * obj)1310 _encoded_const(PyObject *obj)
1311 {
1312 /* Return the JSON string representation of None, True, False */
1313 if (obj == Py_None) {
1314 _Py_static_string(PyId_null, "null");
1315 PyObject *s_null = _PyUnicode_FromId(&PyId_null);
1316 if (s_null == NULL) {
1317 return NULL;
1318 }
1319 return Py_NewRef(s_null);
1320 }
1321 else if (obj == Py_True) {
1322 _Py_static_string(PyId_true, "true");
1323 PyObject *s_true = _PyUnicode_FromId(&PyId_true);
1324 if (s_true == NULL) {
1325 return NULL;
1326 }
1327 return Py_NewRef(s_true);
1328 }
1329 else if (obj == Py_False) {
1330 _Py_static_string(PyId_false, "false");
1331 PyObject *s_false = _PyUnicode_FromId(&PyId_false);
1332 if (s_false == NULL) {
1333 return NULL;
1334 }
1335 return Py_NewRef(s_false);
1336 }
1337 else {
1338 PyErr_SetString(PyExc_ValueError, "not a const");
1339 return NULL;
1340 }
1341 }
1342
1343 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1344 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1345 {
1346 /* Return the JSON representation of a PyFloat. */
1347 double i = PyFloat_AS_DOUBLE(obj);
1348 if (!Py_IS_FINITE(i)) {
1349 if (!s->allow_nan) {
1350 PyErr_SetString(
1351 PyExc_ValueError,
1352 "Out of range float values are not JSON compliant"
1353 );
1354 return NULL;
1355 }
1356 if (i > 0) {
1357 return PyUnicode_FromString("Infinity");
1358 }
1359 else if (i < 0) {
1360 return PyUnicode_FromString("-Infinity");
1361 }
1362 else {
1363 return PyUnicode_FromString("NaN");
1364 }
1365 }
1366 return PyFloat_Type.tp_repr(obj);
1367 }
1368
1369 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1370 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1371 {
1372 /* Return the JSON representation of a string */
1373 PyObject *encoded;
1374
1375 if (s->fast_encode) {
1376 return s->fast_encode(NULL, obj);
1377 }
1378 encoded = PyObject_CallOneArg(s->encoder, obj);
1379 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1380 PyErr_Format(PyExc_TypeError,
1381 "encoder() must return a string, not %.80s",
1382 Py_TYPE(encoded)->tp_name);
1383 Py_DECREF(encoded);
1384 return NULL;
1385 }
1386 return encoded;
1387 }
1388
1389 static int
_steal_accumulate(_PyAccu * acc,PyObject * stolen)1390 _steal_accumulate(_PyAccu *acc, PyObject *stolen)
1391 {
1392 /* Append stolen and then decrement its reference count */
1393 int rval = _PyAccu_Accumulate(acc, stolen);
1394 Py_DECREF(stolen);
1395 return rval;
1396 }
1397
1398 static int
encoder_listencode_obj(PyEncoderObject * s,_PyAccu * acc,PyObject * obj,Py_ssize_t indent_level)1399 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
1400 PyObject *obj, Py_ssize_t indent_level)
1401 {
1402 /* Encode Python object obj to a JSON term */
1403 PyObject *newobj;
1404 int rv;
1405
1406 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1407 PyObject *cstr = _encoded_const(obj);
1408 if (cstr == NULL)
1409 return -1;
1410 return _steal_accumulate(acc, cstr);
1411 }
1412 else if (PyUnicode_Check(obj))
1413 {
1414 PyObject *encoded = encoder_encode_string(s, obj);
1415 if (encoded == NULL)
1416 return -1;
1417 return _steal_accumulate(acc, encoded);
1418 }
1419 else if (PyLong_Check(obj)) {
1420 PyObject *encoded = PyLong_Type.tp_repr(obj);
1421 if (encoded == NULL)
1422 return -1;
1423 return _steal_accumulate(acc, encoded);
1424 }
1425 else if (PyFloat_Check(obj)) {
1426 PyObject *encoded = encoder_encode_float(s, obj);
1427 if (encoded == NULL)
1428 return -1;
1429 return _steal_accumulate(acc, encoded);
1430 }
1431 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1432 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1433 return -1;
1434 rv = encoder_listencode_list(s, acc, obj, indent_level);
1435 Py_LeaveRecursiveCall();
1436 return rv;
1437 }
1438 else if (PyDict_Check(obj)) {
1439 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1440 return -1;
1441 rv = encoder_listencode_dict(s, acc, obj, indent_level);
1442 Py_LeaveRecursiveCall();
1443 return rv;
1444 }
1445 else {
1446 PyObject *ident = NULL;
1447 if (s->markers != Py_None) {
1448 int has_key;
1449 ident = PyLong_FromVoidPtr(obj);
1450 if (ident == NULL)
1451 return -1;
1452 has_key = PyDict_Contains(s->markers, ident);
1453 if (has_key) {
1454 if (has_key != -1)
1455 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1456 Py_DECREF(ident);
1457 return -1;
1458 }
1459 if (PyDict_SetItem(s->markers, ident, obj)) {
1460 Py_DECREF(ident);
1461 return -1;
1462 }
1463 }
1464 newobj = PyObject_CallOneArg(s->defaultfn, obj);
1465 if (newobj == NULL) {
1466 Py_XDECREF(ident);
1467 return -1;
1468 }
1469
1470 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1471 Py_DECREF(newobj);
1472 Py_XDECREF(ident);
1473 return -1;
1474 }
1475 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
1476 Py_LeaveRecursiveCall();
1477
1478 Py_DECREF(newobj);
1479 if (rv) {
1480 Py_XDECREF(ident);
1481 return -1;
1482 }
1483 if (ident != NULL) {
1484 if (PyDict_DelItem(s->markers, ident)) {
1485 Py_XDECREF(ident);
1486 return -1;
1487 }
1488 Py_XDECREF(ident);
1489 }
1490 return rv;
1491 }
1492 }
1493
1494 static int
encoder_listencode_dict(PyEncoderObject * s,_PyAccu * acc,PyObject * dct,Py_ssize_t indent_level)1495 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
1496 PyObject *dct, Py_ssize_t indent_level)
1497 {
1498 /* Encode Python dict dct a JSON term */
1499 _Py_static_string(PyId_open_dict, "{");
1500 _Py_static_string(PyId_close_dict, "}");
1501 _Py_static_string(PyId_empty_dict, "{}");
1502 PyObject *open_dict = _PyUnicode_FromId(&PyId_open_dict); // borrowed ref
1503 PyObject *close_dict = _PyUnicode_FromId(&PyId_close_dict); // borrowed ref
1504 PyObject *empty_dict = _PyUnicode_FromId(&PyId_empty_dict); // borrowed ref
1505 PyObject *kstr = NULL;
1506 PyObject *ident = NULL;
1507 PyObject *it = NULL;
1508 PyObject *items;
1509 PyObject *item = NULL;
1510 Py_ssize_t idx;
1511
1512 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1513 return -1;
1514 }
1515 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
1516 return _PyAccu_Accumulate(acc, empty_dict);
1517
1518 if (s->markers != Py_None) {
1519 int has_key;
1520 ident = PyLong_FromVoidPtr(dct);
1521 if (ident == NULL)
1522 goto bail;
1523 has_key = PyDict_Contains(s->markers, ident);
1524 if (has_key) {
1525 if (has_key != -1)
1526 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1527 goto bail;
1528 }
1529 if (PyDict_SetItem(s->markers, ident, dct)) {
1530 goto bail;
1531 }
1532 }
1533
1534 if (_PyAccu_Accumulate(acc, open_dict))
1535 goto bail;
1536
1537 if (s->indent != Py_None) {
1538 /* TODO: DOES NOT RUN */
1539 indent_level += 1;
1540 /*
1541 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1542 separator = _item_separator + newline_indent
1543 buf += newline_indent
1544 */
1545 }
1546
1547 items = PyMapping_Items(dct);
1548 if (items == NULL)
1549 goto bail;
1550 if (s->sort_keys && PyList_Sort(items) < 0) {
1551 Py_DECREF(items);
1552 goto bail;
1553 }
1554 it = PyObject_GetIter(items);
1555 Py_DECREF(items);
1556 if (it == NULL)
1557 goto bail;
1558 idx = 0;
1559 while ((item = PyIter_Next(it)) != NULL) {
1560 PyObject *encoded, *key, *value;
1561 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1562 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1563 goto bail;
1564 }
1565 key = PyTuple_GET_ITEM(item, 0);
1566 if (PyUnicode_Check(key)) {
1567 Py_INCREF(key);
1568 kstr = key;
1569 }
1570 else if (PyFloat_Check(key)) {
1571 kstr = encoder_encode_float(s, key);
1572 if (kstr == NULL)
1573 goto bail;
1574 }
1575 else if (key == Py_True || key == Py_False || key == Py_None) {
1576 /* This must come before the PyLong_Check because
1577 True and False are also 1 and 0.*/
1578 kstr = _encoded_const(key);
1579 if (kstr == NULL)
1580 goto bail;
1581 }
1582 else if (PyLong_Check(key)) {
1583 kstr = PyLong_Type.tp_repr(key);
1584 if (kstr == NULL) {
1585 goto bail;
1586 }
1587 }
1588 else if (s->skipkeys) {
1589 Py_DECREF(item);
1590 continue;
1591 }
1592 else {
1593 PyErr_Format(PyExc_TypeError,
1594 "keys must be str, int, float, bool or None, "
1595 "not %.100s", Py_TYPE(key)->tp_name);
1596 goto bail;
1597 }
1598
1599 if (idx) {
1600 if (_PyAccu_Accumulate(acc, s->item_separator))
1601 goto bail;
1602 }
1603
1604 encoded = encoder_encode_string(s, kstr);
1605 Py_CLEAR(kstr);
1606 if (encoded == NULL)
1607 goto bail;
1608 if (_PyAccu_Accumulate(acc, encoded)) {
1609 Py_DECREF(encoded);
1610 goto bail;
1611 }
1612 Py_DECREF(encoded);
1613 if (_PyAccu_Accumulate(acc, s->key_separator))
1614 goto bail;
1615
1616 value = PyTuple_GET_ITEM(item, 1);
1617 if (encoder_listencode_obj(s, acc, value, indent_level))
1618 goto bail;
1619 idx += 1;
1620 Py_DECREF(item);
1621 }
1622 if (PyErr_Occurred())
1623 goto bail;
1624 Py_CLEAR(it);
1625
1626 if (ident != NULL) {
1627 if (PyDict_DelItem(s->markers, ident))
1628 goto bail;
1629 Py_CLEAR(ident);
1630 }
1631 /* TODO DOES NOT RUN; dead code
1632 if (s->indent != Py_None) {
1633 indent_level -= 1;
1634
1635 yield '\n' + (' ' * (_indent * _current_indent_level))
1636 }*/
1637 if (_PyAccu_Accumulate(acc, close_dict))
1638 goto bail;
1639 return 0;
1640
1641 bail:
1642 Py_XDECREF(it);
1643 Py_XDECREF(item);
1644 Py_XDECREF(kstr);
1645 Py_XDECREF(ident);
1646 return -1;
1647 }
1648
1649
1650 static int
encoder_listencode_list(PyEncoderObject * s,_PyAccu * acc,PyObject * seq,Py_ssize_t indent_level)1651 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
1652 PyObject *seq, Py_ssize_t indent_level)
1653 {
1654 /* Encode Python list seq to a JSON term */
1655 _Py_static_string(PyId_open_array, "[");
1656 _Py_static_string(PyId_close_array, "]");
1657 _Py_static_string(PyId_empty_array, "[]");
1658 PyObject *open_array = _PyUnicode_FromId(&PyId_open_array); // borrowed ref
1659 PyObject *close_array = _PyUnicode_FromId(&PyId_close_array); // borrowed ref
1660 PyObject *empty_array = _PyUnicode_FromId(&PyId_empty_array); // borrowed ref
1661 PyObject *ident = NULL;
1662 PyObject *s_fast = NULL;
1663 Py_ssize_t i;
1664
1665 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1666 return -1;
1667 }
1668 ident = NULL;
1669 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1670 if (s_fast == NULL)
1671 return -1;
1672 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1673 Py_DECREF(s_fast);
1674 return _PyAccu_Accumulate(acc, empty_array);
1675 }
1676
1677 if (s->markers != Py_None) {
1678 int has_key;
1679 ident = PyLong_FromVoidPtr(seq);
1680 if (ident == NULL)
1681 goto bail;
1682 has_key = PyDict_Contains(s->markers, ident);
1683 if (has_key) {
1684 if (has_key != -1)
1685 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1686 goto bail;
1687 }
1688 if (PyDict_SetItem(s->markers, ident, seq)) {
1689 goto bail;
1690 }
1691 }
1692
1693 if (_PyAccu_Accumulate(acc, open_array))
1694 goto bail;
1695 if (s->indent != Py_None) {
1696 /* TODO: DOES NOT RUN */
1697 indent_level += 1;
1698 /*
1699 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1700 separator = _item_separator + newline_indent
1701 buf += newline_indent
1702 */
1703 }
1704 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1705 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1706 if (i) {
1707 if (_PyAccu_Accumulate(acc, s->item_separator))
1708 goto bail;
1709 }
1710 if (encoder_listencode_obj(s, acc, obj, indent_level))
1711 goto bail;
1712 }
1713 if (ident != NULL) {
1714 if (PyDict_DelItem(s->markers, ident))
1715 goto bail;
1716 Py_CLEAR(ident);
1717 }
1718
1719 /* TODO: DOES NOT RUN
1720 if (s->indent != Py_None) {
1721 indent_level -= 1;
1722
1723 yield '\n' + (' ' * (_indent * _current_indent_level))
1724 }*/
1725 if (_PyAccu_Accumulate(acc, close_array))
1726 goto bail;
1727 Py_DECREF(s_fast);
1728 return 0;
1729
1730 bail:
1731 Py_XDECREF(ident);
1732 Py_DECREF(s_fast);
1733 return -1;
1734 }
1735
1736 static void
encoder_dealloc(PyObject * self)1737 encoder_dealloc(PyObject *self)
1738 {
1739 PyTypeObject *tp = Py_TYPE(self);
1740 /* bpo-31095: UnTrack is needed before calling any callbacks */
1741 PyObject_GC_UnTrack(self);
1742 encoder_clear((PyEncoderObject *)self);
1743 tp->tp_free(self);
1744 Py_DECREF(tp);
1745 }
1746
1747 static int
encoder_traverse(PyEncoderObject * self,visitproc visit,void * arg)1748 encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
1749 {
1750 Py_VISIT(Py_TYPE(self));
1751 Py_VISIT(self->markers);
1752 Py_VISIT(self->defaultfn);
1753 Py_VISIT(self->encoder);
1754 Py_VISIT(self->indent);
1755 Py_VISIT(self->key_separator);
1756 Py_VISIT(self->item_separator);
1757 return 0;
1758 }
1759
1760 static int
encoder_clear(PyEncoderObject * self)1761 encoder_clear(PyEncoderObject *self)
1762 {
1763 /* Deallocate Encoder */
1764 Py_CLEAR(self->markers);
1765 Py_CLEAR(self->defaultfn);
1766 Py_CLEAR(self->encoder);
1767 Py_CLEAR(self->indent);
1768 Py_CLEAR(self->key_separator);
1769 Py_CLEAR(self->item_separator);
1770 return 0;
1771 }
1772
1773 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1774
1775 static PyType_Slot PyEncoderType_slots[] = {
1776 {Py_tp_doc, (void *)encoder_doc},
1777 {Py_tp_dealloc, encoder_dealloc},
1778 {Py_tp_call, encoder_call},
1779 {Py_tp_traverse, encoder_traverse},
1780 {Py_tp_clear, encoder_clear},
1781 {Py_tp_members, encoder_members},
1782 {Py_tp_new, encoder_new},
1783 {0, 0}
1784 };
1785
1786 static PyType_Spec PyEncoderType_spec = {
1787 .name = "_json.Encoder",
1788 .basicsize = sizeof(PyEncoderObject),
1789 .itemsize = 0,
1790 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1791 .slots = PyEncoderType_slots
1792 };
1793
1794 static PyMethodDef speedups_methods[] = {
1795 {"encode_basestring_ascii",
1796 (PyCFunction)py_encode_basestring_ascii,
1797 METH_O,
1798 pydoc_encode_basestring_ascii},
1799 {"encode_basestring",
1800 (PyCFunction)py_encode_basestring,
1801 METH_O,
1802 pydoc_encode_basestring},
1803 {"scanstring",
1804 (PyCFunction)py_scanstring,
1805 METH_VARARGS,
1806 pydoc_scanstring},
1807 {NULL, NULL, 0, NULL}
1808 };
1809
1810 PyDoc_STRVAR(module_doc,
1811 "json speedups\n");
1812
1813 static int
_json_exec(PyObject * module)1814 _json_exec(PyObject *module)
1815 {
1816 _jsonmodulestate *state = get_json_state(module);
1817
1818 state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1819 if (state->PyScannerType == NULL) {
1820 return -1;
1821 }
1822 Py_INCREF(state->PyScannerType);
1823 if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
1824 Py_DECREF(state->PyScannerType);
1825 return -1;
1826 }
1827
1828 state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1829 if (state->PyEncoderType == NULL) {
1830 return -1;
1831 }
1832 Py_INCREF(state->PyEncoderType);
1833 if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
1834 Py_DECREF(state->PyEncoderType);
1835 return -1;
1836 }
1837
1838 return 0;
1839 }
1840
1841 static int
_jsonmodule_traverse(PyObject * module,visitproc visit,void * arg)1842 _jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1843 {
1844 _jsonmodulestate *state = get_json_state(module);
1845 Py_VISIT(state->PyScannerType);
1846 Py_VISIT(state->PyEncoderType);
1847 return 0;
1848 }
1849
1850 static int
_jsonmodule_clear(PyObject * module)1851 _jsonmodule_clear(PyObject *module)
1852 {
1853 _jsonmodulestate *state = get_json_state(module);
1854 Py_CLEAR(state->PyScannerType);
1855 Py_CLEAR(state->PyEncoderType);
1856 return 0;
1857 }
1858
1859 static void
_jsonmodule_free(void * module)1860 _jsonmodule_free(void *module)
1861 {
1862 _jsonmodule_clear((PyObject *)module);
1863 }
1864
1865 static PyModuleDef_Slot _json_slots[] = {
1866 {Py_mod_exec, _json_exec},
1867 {0, NULL}
1868 };
1869
1870 static struct PyModuleDef jsonmodule = {
1871 PyModuleDef_HEAD_INIT,
1872 "_json",
1873 module_doc,
1874 sizeof(_jsonmodulestate),
1875 speedups_methods,
1876 _json_slots,
1877 _jsonmodule_traverse,
1878 _jsonmodule_clear,
1879 _jsonmodule_free,
1880 };
1881
1882 PyMODINIT_FUNC
PyInit__json(void)1883 PyInit__json(void)
1884 {
1885 return PyModuleDef_Init(&jsonmodule);
1886 }
1887