1 /* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 # error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10
11 #include "Python.h"
12 #include "structmember.h" // PyMemberDef
13 #include "pycore_accu.h"
14
15 typedef struct {
16 PyObject *PyScannerType;
17 PyObject *PyEncoderType;
18 } _jsonmodulestate;
19
20 static inline _jsonmodulestate*
get_json_state(PyObject * module)21 get_json_state(PyObject *module)
22 {
23 void *state = PyModule_GetState(module);
24 assert(state != NULL);
25 return (_jsonmodulestate *)state;
26 }
27
28
29 typedef struct _PyScannerObject {
30 PyObject_HEAD
31 signed char strict;
32 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
37 PyObject *memo;
38 } PyScannerObject;
39
40 static PyMemberDef scanner_members[] = {
41 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
42 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48 };
49
50 typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
58 char sort_keys;
59 char skipkeys;
60 int allow_nan;
61 PyCFunction fast_encode;
62 } PyEncoderObject;
63
64 static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
71 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
73 {NULL}
74 };
75
76 /* Forward decls */
77
78 static PyObject *
79 ascii_escape_unicode(PyObject *pystr);
80 static PyObject *
81 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
82 void init_json(void);
83 static PyObject *
84 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
85 static PyObject *
86 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
87 static PyObject *
88 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
89 static void
90 scanner_dealloc(PyObject *self);
91 static int
92 scanner_clear(PyScannerObject *self);
93 static PyObject *
94 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
95 static void
96 encoder_dealloc(PyObject *self);
97 static int
98 encoder_clear(PyEncoderObject *self);
99 static int
100 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
101 static int
102 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
103 static int
104 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
105 static PyObject *
106 _encoded_const(PyObject *obj);
107 static void
108 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
109 static PyObject *
110 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
111 static PyObject *
112 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
113
114 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
115 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
116
117 static Py_ssize_t
ascii_escape_unichar(Py_UCS4 c,unsigned char * output,Py_ssize_t chars)118 ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
119 {
120 /* Escape unicode code point c to ASCII escape sequences
121 in char *output. output must have at least 12 bytes unused to
122 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
123 output[chars++] = '\\';
124 switch (c) {
125 case '\\': output[chars++] = c; break;
126 case '"': output[chars++] = c; break;
127 case '\b': output[chars++] = 'b'; break;
128 case '\f': output[chars++] = 'f'; break;
129 case '\n': output[chars++] = 'n'; break;
130 case '\r': output[chars++] = 'r'; break;
131 case '\t': output[chars++] = 't'; break;
132 default:
133 if (c >= 0x10000) {
134 /* UTF-16 surrogate pair */
135 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
136 output[chars++] = 'u';
137 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
138 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
139 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
140 output[chars++] = Py_hexdigits[(v ) & 0xf];
141 c = Py_UNICODE_LOW_SURROGATE(c);
142 output[chars++] = '\\';
143 }
144 output[chars++] = 'u';
145 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
146 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
147 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
148 output[chars++] = Py_hexdigits[(c ) & 0xf];
149 }
150 return chars;
151 }
152
153 static PyObject *
ascii_escape_unicode(PyObject * pystr)154 ascii_escape_unicode(PyObject *pystr)
155 {
156 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
157 Py_ssize_t i;
158 Py_ssize_t input_chars;
159 Py_ssize_t output_size;
160 Py_ssize_t chars;
161 PyObject *rval;
162 const void *input;
163 Py_UCS1 *output;
164 int kind;
165
166 if (PyUnicode_READY(pystr) == -1)
167 return NULL;
168
169 input_chars = PyUnicode_GET_LENGTH(pystr);
170 input = PyUnicode_DATA(pystr);
171 kind = PyUnicode_KIND(pystr);
172
173 /* Compute the output size */
174 for (i = 0, output_size = 2; i < input_chars; i++) {
175 Py_UCS4 c = PyUnicode_READ(kind, input, i);
176 Py_ssize_t d;
177 if (S_CHAR(c)) {
178 d = 1;
179 }
180 else {
181 switch(c) {
182 case '\\': case '"': case '\b': case '\f':
183 case '\n': case '\r': case '\t':
184 d = 2; break;
185 default:
186 d = c >= 0x10000 ? 12 : 6;
187 }
188 }
189 if (output_size > PY_SSIZE_T_MAX - d) {
190 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
191 return NULL;
192 }
193 output_size += d;
194 }
195
196 rval = PyUnicode_New(output_size, 127);
197 if (rval == NULL) {
198 return NULL;
199 }
200 output = PyUnicode_1BYTE_DATA(rval);
201 chars = 0;
202 output[chars++] = '"';
203 for (i = 0; i < input_chars; i++) {
204 Py_UCS4 c = PyUnicode_READ(kind, input, i);
205 if (S_CHAR(c)) {
206 output[chars++] = c;
207 }
208 else {
209 chars = ascii_escape_unichar(c, output, chars);
210 }
211 }
212 output[chars++] = '"';
213 #ifdef Py_DEBUG
214 assert(_PyUnicode_CheckConsistency(rval, 1));
215 #endif
216 return rval;
217 }
218
219 static PyObject *
escape_unicode(PyObject * pystr)220 escape_unicode(PyObject *pystr)
221 {
222 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
223 Py_ssize_t i;
224 Py_ssize_t input_chars;
225 Py_ssize_t output_size;
226 Py_ssize_t chars;
227 PyObject *rval;
228 const void *input;
229 int kind;
230 Py_UCS4 maxchar;
231
232 if (PyUnicode_READY(pystr) == -1)
233 return NULL;
234
235 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
236 input_chars = PyUnicode_GET_LENGTH(pystr);
237 input = PyUnicode_DATA(pystr);
238 kind = PyUnicode_KIND(pystr);
239
240 /* Compute the output size */
241 for (i = 0, output_size = 2; i < input_chars; i++) {
242 Py_UCS4 c = PyUnicode_READ(kind, input, i);
243 Py_ssize_t d;
244 switch (c) {
245 case '\\': case '"': case '\b': case '\f':
246 case '\n': case '\r': case '\t':
247 d = 2;
248 break;
249 default:
250 if (c <= 0x1f)
251 d = 6;
252 else
253 d = 1;
254 }
255 if (output_size > PY_SSIZE_T_MAX - d) {
256 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
257 return NULL;
258 }
259 output_size += d;
260 }
261
262 rval = PyUnicode_New(output_size, maxchar);
263 if (rval == NULL)
264 return NULL;
265
266 kind = PyUnicode_KIND(rval);
267
268 #define ENCODE_OUTPUT do { \
269 chars = 0; \
270 output[chars++] = '"'; \
271 for (i = 0; i < input_chars; i++) { \
272 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
273 switch (c) { \
274 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
275 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
276 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
277 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
278 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
279 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
280 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
281 default: \
282 if (c <= 0x1f) { \
283 output[chars++] = '\\'; \
284 output[chars++] = 'u'; \
285 output[chars++] = '0'; \
286 output[chars++] = '0'; \
287 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
288 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
289 } else { \
290 output[chars++] = c; \
291 } \
292 } \
293 } \
294 output[chars++] = '"'; \
295 } while (0)
296
297 if (kind == PyUnicode_1BYTE_KIND) {
298 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
299 ENCODE_OUTPUT;
300 } else if (kind == PyUnicode_2BYTE_KIND) {
301 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else {
304 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
305 assert(kind == PyUnicode_4BYTE_KIND);
306 ENCODE_OUTPUT;
307 }
308 #undef ENCODE_OUTPUT
309
310 #ifdef Py_DEBUG
311 assert(_PyUnicode_CheckConsistency(rval, 1));
312 #endif
313 return rval;
314 }
315
316 static void
raise_errmsg(const char * msg,PyObject * s,Py_ssize_t end)317 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
318 {
319 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
320 static PyObject *JSONDecodeError = NULL;
321 PyObject *exc;
322 if (JSONDecodeError == NULL) {
323 PyObject *decoder = PyImport_ImportModule("json.decoder");
324 if (decoder == NULL)
325 return;
326 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
327 Py_DECREF(decoder);
328 if (JSONDecodeError == NULL)
329 return;
330 }
331 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
332 if (exc) {
333 PyErr_SetObject(JSONDecodeError, exc);
334 Py_DECREF(exc);
335 }
336 }
337
338 static void
raise_stop_iteration(Py_ssize_t idx)339 raise_stop_iteration(Py_ssize_t idx)
340 {
341 PyObject *value = PyLong_FromSsize_t(idx);
342 if (value != NULL) {
343 PyErr_SetObject(PyExc_StopIteration, value);
344 Py_DECREF(value);
345 }
346 }
347
348 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)349 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
350 /* return (rval, idx) tuple, stealing reference to rval */
351 PyObject *tpl;
352 PyObject *pyidx;
353 /*
354 steal a reference to rval, returns (rval, idx)
355 */
356 if (rval == NULL) {
357 return NULL;
358 }
359 pyidx = PyLong_FromSsize_t(idx);
360 if (pyidx == NULL) {
361 Py_DECREF(rval);
362 return NULL;
363 }
364 tpl = PyTuple_New(2);
365 if (tpl == NULL) {
366 Py_DECREF(pyidx);
367 Py_DECREF(rval);
368 return NULL;
369 }
370 PyTuple_SET_ITEM(tpl, 0, rval);
371 PyTuple_SET_ITEM(tpl, 1, pyidx);
372 return tpl;
373 }
374
375 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)376 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
377 {
378 /* Read the JSON string from PyUnicode pystr.
379 end is the index of the first character after the quote.
380 if strict is zero then literal control characters are allowed
381 *next_end_ptr is a return-by-reference index of the character
382 after the end quote
383
384 Return value is a new PyUnicode
385 */
386 PyObject *rval = NULL;
387 Py_ssize_t len;
388 Py_ssize_t begin = end - 1;
389 Py_ssize_t next /* = begin */;
390 const void *buf;
391 int kind;
392
393 if (PyUnicode_READY(pystr) == -1)
394 return 0;
395
396 _PyUnicodeWriter writer;
397 _PyUnicodeWriter_Init(&writer);
398 writer.overallocate = 1;
399
400 len = PyUnicode_GET_LENGTH(pystr);
401 buf = PyUnicode_DATA(pystr);
402 kind = PyUnicode_KIND(pystr);
403
404 if (end < 0 || len < end) {
405 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
406 goto bail;
407 }
408 while (1) {
409 /* Find the end of the string or the next escape */
410 Py_UCS4 c;
411 {
412 // Use tight scope variable to help register allocation.
413 Py_UCS4 d = 0;
414 for (next = end; next < len; next++) {
415 d = PyUnicode_READ(kind, buf, next);
416 if (d == '"' || d == '\\') {
417 break;
418 }
419 if (d <= 0x1f && strict) {
420 raise_errmsg("Invalid control character at", pystr, next);
421 goto bail;
422 }
423 }
424 c = d;
425 }
426
427 if (c == '"') {
428 // Fast path for simple case.
429 if (writer.buffer == NULL) {
430 PyObject *ret = PyUnicode_Substring(pystr, end, next);
431 if (ret == NULL) {
432 goto bail;
433 }
434 *next_end_ptr = next + 1;;
435 return ret;
436 }
437 }
438 else if (c != '\\') {
439 raise_errmsg("Unterminated string starting at", pystr, begin);
440 goto bail;
441 }
442
443 /* Pick up this chunk if it's not zero length */
444 if (next != end) {
445 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
446 goto bail;
447 }
448 }
449 next++;
450 if (c == '"') {
451 end = next;
452 break;
453 }
454 if (next == len) {
455 raise_errmsg("Unterminated string starting at", pystr, begin);
456 goto bail;
457 }
458 c = PyUnicode_READ(kind, buf, next);
459 if (c != 'u') {
460 /* Non-unicode backslash escapes */
461 end = next + 1;
462 switch (c) {
463 case '"': break;
464 case '\\': break;
465 case '/': break;
466 case 'b': c = '\b'; break;
467 case 'f': c = '\f'; break;
468 case 'n': c = '\n'; break;
469 case 'r': c = '\r'; break;
470 case 't': c = '\t'; break;
471 default: c = 0;
472 }
473 if (c == 0) {
474 raise_errmsg("Invalid \\escape", pystr, end - 2);
475 goto bail;
476 }
477 }
478 else {
479 c = 0;
480 next++;
481 end = next + 4;
482 if (end >= len) {
483 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
484 goto bail;
485 }
486 /* Decode 4 hex digits */
487 for (; next < end; next++) {
488 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
489 c <<= 4;
490 switch (digit) {
491 case '0': case '1': case '2': case '3': case '4':
492 case '5': case '6': case '7': case '8': case '9':
493 c |= (digit - '0'); break;
494 case 'a': case 'b': case 'c': case 'd': case 'e':
495 case 'f':
496 c |= (digit - 'a' + 10); break;
497 case 'A': case 'B': case 'C': case 'D': case 'E':
498 case 'F':
499 c |= (digit - 'A' + 10); break;
500 default:
501 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
502 goto bail;
503 }
504 }
505 /* Surrogate pair */
506 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
507 PyUnicode_READ(kind, buf, next++) == '\\' &&
508 PyUnicode_READ(kind, buf, next++) == 'u') {
509 Py_UCS4 c2 = 0;
510 end += 6;
511 /* Decode 4 hex digits */
512 for (; next < end; next++) {
513 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
514 c2 <<= 4;
515 switch (digit) {
516 case '0': case '1': case '2': case '3': case '4':
517 case '5': case '6': case '7': case '8': case '9':
518 c2 |= (digit - '0'); break;
519 case 'a': case 'b': case 'c': case 'd': case 'e':
520 case 'f':
521 c2 |= (digit - 'a' + 10); break;
522 case 'A': case 'B': case 'C': case 'D': case 'E':
523 case 'F':
524 c2 |= (digit - 'A' + 10); break;
525 default:
526 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
527 goto bail;
528 }
529 }
530 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
531 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
532 else
533 end -= 6;
534 }
535 }
536 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
537 goto bail;
538 }
539 }
540
541 rval = _PyUnicodeWriter_Finish(&writer);
542 *next_end_ptr = end;
543 return rval;
544
545 bail:
546 *next_end_ptr = -1;
547 _PyUnicodeWriter_Dealloc(&writer);
548 return NULL;
549 }
550
551 PyDoc_STRVAR(pydoc_scanstring,
552 "scanstring(string, end, strict=True) -> (string, end)\n"
553 "\n"
554 "Scan the string s for a JSON string. End is the index of the\n"
555 "character in s after the quote that started the JSON string.\n"
556 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
557 "on attempt to decode an invalid string. If strict is False then literal\n"
558 "control characters are allowed in the string.\n"
559 "\n"
560 "Returns a tuple of the decoded string and the index of the character in s\n"
561 "after the end quote."
562 );
563
564 static PyObject *
py_scanstring(PyObject * Py_UNUSED (self),PyObject * args)565 py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
566 {
567 PyObject *pystr;
568 PyObject *rval;
569 Py_ssize_t end;
570 Py_ssize_t next_end = -1;
571 int strict = 1;
572 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
573 return NULL;
574 }
575 if (PyUnicode_Check(pystr)) {
576 rval = scanstring_unicode(pystr, end, strict, &next_end);
577 }
578 else {
579 PyErr_Format(PyExc_TypeError,
580 "first argument must be a string, not %.80s",
581 Py_TYPE(pystr)->tp_name);
582 return NULL;
583 }
584 return _build_rval_index_tuple(rval, next_end);
585 }
586
587 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
588 "encode_basestring_ascii(string) -> string\n"
589 "\n"
590 "Return an ASCII-only JSON representation of a Python string"
591 );
592
593 static PyObject *
py_encode_basestring_ascii(PyObject * Py_UNUSED (self),PyObject * pystr)594 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
595 {
596 PyObject *rval;
597 /* Return an ASCII-only JSON representation of a Python string */
598 /* METH_O */
599 if (PyUnicode_Check(pystr)) {
600 rval = ascii_escape_unicode(pystr);
601 }
602 else {
603 PyErr_Format(PyExc_TypeError,
604 "first argument must be a string, not %.80s",
605 Py_TYPE(pystr)->tp_name);
606 return NULL;
607 }
608 return rval;
609 }
610
611
612 PyDoc_STRVAR(pydoc_encode_basestring,
613 "encode_basestring(string) -> string\n"
614 "\n"
615 "Return a JSON representation of a Python string"
616 );
617
618 static PyObject *
py_encode_basestring(PyObject * Py_UNUSED (self),PyObject * pystr)619 py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
620 {
621 PyObject *rval;
622 /* Return a JSON representation of a Python string */
623 /* METH_O */
624 if (PyUnicode_Check(pystr)) {
625 rval = escape_unicode(pystr);
626 }
627 else {
628 PyErr_Format(PyExc_TypeError,
629 "first argument must be a string, not %.80s",
630 Py_TYPE(pystr)->tp_name);
631 return NULL;
632 }
633 return rval;
634 }
635
636 static void
scanner_dealloc(PyObject * self)637 scanner_dealloc(PyObject *self)
638 {
639 PyTypeObject *tp = Py_TYPE(self);
640 /* bpo-31095: UnTrack is needed before calling any callbacks */
641 PyObject_GC_UnTrack(self);
642 scanner_clear((PyScannerObject *)self);
643 tp->tp_free(self);
644 Py_DECREF(tp);
645 }
646
647 static int
scanner_traverse(PyScannerObject * self,visitproc visit,void * arg)648 scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
649 {
650 Py_VISIT(Py_TYPE(self));
651 Py_VISIT(self->object_hook);
652 Py_VISIT(self->object_pairs_hook);
653 Py_VISIT(self->parse_float);
654 Py_VISIT(self->parse_int);
655 Py_VISIT(self->parse_constant);
656 Py_VISIT(self->memo);
657 return 0;
658 }
659
660 static int
scanner_clear(PyScannerObject * self)661 scanner_clear(PyScannerObject *self)
662 {
663 Py_CLEAR(self->object_hook);
664 Py_CLEAR(self->object_pairs_hook);
665 Py_CLEAR(self->parse_float);
666 Py_CLEAR(self->parse_int);
667 Py_CLEAR(self->parse_constant);
668 Py_CLEAR(self->memo);
669 return 0;
670 }
671
672 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)673 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
674 {
675 /* Read a JSON object from PyUnicode pystr.
676 idx is the index of the first character after the opening curly brace.
677 *next_idx_ptr is a return-by-reference index to the first character after
678 the closing curly brace.
679
680 Returns a new PyObject (usually a dict, but object_hook can change that)
681 */
682 const void *str;
683 int kind;
684 Py_ssize_t end_idx;
685 PyObject *val = NULL;
686 PyObject *rval = NULL;
687 PyObject *key = NULL;
688 int has_pairs_hook = (s->object_pairs_hook != Py_None);
689 Py_ssize_t next_idx;
690
691 if (PyUnicode_READY(pystr) == -1)
692 return NULL;
693
694 str = PyUnicode_DATA(pystr);
695 kind = PyUnicode_KIND(pystr);
696 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
697
698 if (has_pairs_hook)
699 rval = PyList_New(0);
700 else
701 rval = PyDict_New();
702 if (rval == NULL)
703 return NULL;
704
705 /* skip whitespace after { */
706 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
707
708 /* only loop if the object is non-empty */
709 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
710 while (1) {
711 PyObject *memokey;
712
713 /* read key */
714 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
715 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
716 goto bail;
717 }
718 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
719 if (key == NULL)
720 goto bail;
721 memokey = PyDict_SetDefault(s->memo, key, key);
722 if (memokey == NULL) {
723 goto bail;
724 }
725 Py_INCREF(memokey);
726 Py_DECREF(key);
727 key = memokey;
728 idx = next_idx;
729
730 /* skip whitespace between key and : delimiter, read :, skip whitespace */
731 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
732 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
733 raise_errmsg("Expecting ':' delimiter", pystr, idx);
734 goto bail;
735 }
736 idx++;
737 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
738
739 /* read any JSON term */
740 val = scan_once_unicode(s, pystr, idx, &next_idx);
741 if (val == NULL)
742 goto bail;
743
744 if (has_pairs_hook) {
745 PyObject *item = PyTuple_Pack(2, key, val);
746 if (item == NULL)
747 goto bail;
748 Py_CLEAR(key);
749 Py_CLEAR(val);
750 if (PyList_Append(rval, item) == -1) {
751 Py_DECREF(item);
752 goto bail;
753 }
754 Py_DECREF(item);
755 }
756 else {
757 if (PyDict_SetItem(rval, key, val) < 0)
758 goto bail;
759 Py_CLEAR(key);
760 Py_CLEAR(val);
761 }
762 idx = next_idx;
763
764 /* skip whitespace before } or , */
765 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
766
767 /* bail if the object is closed or we didn't get the , delimiter */
768 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
769 break;
770 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
771 raise_errmsg("Expecting ',' delimiter", pystr, idx);
772 goto bail;
773 }
774 idx++;
775
776 /* skip whitespace after , delimiter */
777 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
778 }
779 }
780
781 *next_idx_ptr = idx + 1;
782
783 if (has_pairs_hook) {
784 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
785 Py_DECREF(rval);
786 return val;
787 }
788
789 /* if object_hook is not None: rval = object_hook(rval) */
790 if (s->object_hook != Py_None) {
791 val = PyObject_CallOneArg(s->object_hook, rval);
792 Py_DECREF(rval);
793 return val;
794 }
795 return rval;
796 bail:
797 Py_XDECREF(key);
798 Py_XDECREF(val);
799 Py_XDECREF(rval);
800 return NULL;
801 }
802
803 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)804 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
805 /* Read a JSON array from PyUnicode pystr.
806 idx is the index of the first character after the opening brace.
807 *next_idx_ptr is a return-by-reference index to the first character after
808 the closing brace.
809
810 Returns a new PyList
811 */
812 const void *str;
813 int kind;
814 Py_ssize_t end_idx;
815 PyObject *val = NULL;
816 PyObject *rval;
817 Py_ssize_t next_idx;
818
819 if (PyUnicode_READY(pystr) == -1)
820 return NULL;
821
822 rval = PyList_New(0);
823 if (rval == NULL)
824 return NULL;
825
826 str = PyUnicode_DATA(pystr);
827 kind = PyUnicode_KIND(pystr);
828 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
829
830 /* skip whitespace after [ */
831 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
832
833 /* only loop if the array is non-empty */
834 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
835 while (1) {
836
837 /* read any JSON term */
838 val = scan_once_unicode(s, pystr, idx, &next_idx);
839 if (val == NULL)
840 goto bail;
841
842 if (PyList_Append(rval, val) == -1)
843 goto bail;
844
845 Py_CLEAR(val);
846 idx = next_idx;
847
848 /* skip whitespace between term and , */
849 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
850
851 /* bail if the array is closed or we didn't get the , delimiter */
852 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
853 break;
854 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
855 raise_errmsg("Expecting ',' delimiter", pystr, idx);
856 goto bail;
857 }
858 idx++;
859
860 /* skip whitespace after , */
861 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
862 }
863 }
864
865 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
866 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
867 raise_errmsg("Expecting value", pystr, end_idx);
868 goto bail;
869 }
870 *next_idx_ptr = idx + 1;
871 return rval;
872 bail:
873 Py_XDECREF(val);
874 Py_DECREF(rval);
875 return NULL;
876 }
877
878 static PyObject *
_parse_constant(PyScannerObject * s,const char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)879 _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
880 /* Read a JSON constant.
881 constant is the constant string that was found
882 ("NaN", "Infinity", "-Infinity").
883 idx is the index of the first character of the constant
884 *next_idx_ptr is a return-by-reference index to the first character after
885 the constant.
886
887 Returns the result of parse_constant
888 */
889 PyObject *cstr;
890 PyObject *rval;
891 /* constant is "NaN", "Infinity", or "-Infinity" */
892 cstr = PyUnicode_InternFromString(constant);
893 if (cstr == NULL)
894 return NULL;
895
896 /* rval = parse_constant(constant) */
897 rval = PyObject_CallOneArg(s->parse_constant, cstr);
898 idx += PyUnicode_GET_LENGTH(cstr);
899 Py_DECREF(cstr);
900 *next_idx_ptr = idx;
901 return rval;
902 }
903
904 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)905 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
906 /* Read a JSON number from PyUnicode pystr.
907 idx is the index of the first character of the number
908 *next_idx_ptr is a return-by-reference index to the first character after
909 the number.
910
911 Returns a new PyObject representation of that number:
912 PyLong, or PyFloat.
913 May return other types if parse_int or parse_float are set
914 */
915 const void *str;
916 int kind;
917 Py_ssize_t end_idx;
918 Py_ssize_t idx = start;
919 int is_float = 0;
920 PyObject *rval;
921 PyObject *numstr = NULL;
922 PyObject *custom_func;
923
924 if (PyUnicode_READY(pystr) == -1)
925 return NULL;
926
927 str = PyUnicode_DATA(pystr);
928 kind = PyUnicode_KIND(pystr);
929 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
930
931 /* read a sign if it's there, make sure it's not the end of the string */
932 if (PyUnicode_READ(kind, str, idx) == '-') {
933 idx++;
934 if (idx > end_idx) {
935 raise_stop_iteration(start);
936 return NULL;
937 }
938 }
939
940 /* read as many integer digits as we find as long as it doesn't start with 0 */
941 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
942 idx++;
943 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
944 }
945 /* if it starts with 0 we only expect one integer digit */
946 else if (PyUnicode_READ(kind, str, idx) == '0') {
947 idx++;
948 }
949 /* no integer digits, error */
950 else {
951 raise_stop_iteration(start);
952 return NULL;
953 }
954
955 /* if the next char is '.' followed by a digit then read all float digits */
956 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
957 is_float = 1;
958 idx += 2;
959 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
960 }
961
962 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
963 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
964 Py_ssize_t e_start = idx;
965 idx++;
966
967 /* read an exponent sign if present */
968 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
969
970 /* read all digits */
971 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
972
973 /* if we got a digit, then parse as float. if not, backtrack */
974 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
975 is_float = 1;
976 }
977 else {
978 idx = e_start;
979 }
980 }
981
982 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
983 custom_func = s->parse_float;
984 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
985 custom_func = s->parse_int;
986 else
987 custom_func = NULL;
988
989 if (custom_func) {
990 /* copy the section we determined to be a number */
991 numstr = PyUnicode_FromKindAndData(kind,
992 (char*)str + kind * start,
993 idx - start);
994 if (numstr == NULL)
995 return NULL;
996 rval = PyObject_CallOneArg(custom_func, numstr);
997 }
998 else {
999 Py_ssize_t i, n;
1000 char *buf;
1001 /* Straight conversion to ASCII, to avoid costly conversion of
1002 decimal unicode digits (which cannot appear here) */
1003 n = idx - start;
1004 numstr = PyBytes_FromStringAndSize(NULL, n);
1005 if (numstr == NULL)
1006 return NULL;
1007 buf = PyBytes_AS_STRING(numstr);
1008 for (i = 0; i < n; i++) {
1009 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
1010 }
1011 if (is_float)
1012 rval = PyFloat_FromString(numstr);
1013 else
1014 rval = PyLong_FromString(buf, NULL, 10);
1015 }
1016 Py_DECREF(numstr);
1017 *next_idx_ptr = idx;
1018 return rval;
1019 }
1020
1021 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1022 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1023 {
1024 /* Read one JSON term (of any kind) from PyUnicode pystr.
1025 idx is the index of the first character of the term
1026 *next_idx_ptr is a return-by-reference index to the first character after
1027 the number.
1028
1029 Returns a new PyObject representation of the term.
1030 */
1031 PyObject *res;
1032 const void *str;
1033 int kind;
1034 Py_ssize_t length;
1035
1036 if (PyUnicode_READY(pystr) == -1)
1037 return NULL;
1038
1039 str = PyUnicode_DATA(pystr);
1040 kind = PyUnicode_KIND(pystr);
1041 length = PyUnicode_GET_LENGTH(pystr);
1042
1043 if (idx < 0) {
1044 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1045 return NULL;
1046 }
1047 if (idx >= length) {
1048 raise_stop_iteration(idx);
1049 return NULL;
1050 }
1051
1052 switch (PyUnicode_READ(kind, str, idx)) {
1053 case '"':
1054 /* string */
1055 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1056 case '{':
1057 /* object */
1058 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1059 "from a unicode string"))
1060 return NULL;
1061 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1062 Py_LeaveRecursiveCall();
1063 return res;
1064 case '[':
1065 /* array */
1066 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1067 "from a unicode string"))
1068 return NULL;
1069 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1070 Py_LeaveRecursiveCall();
1071 return res;
1072 case 'n':
1073 /* null */
1074 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1075 *next_idx_ptr = idx + 4;
1076 Py_RETURN_NONE;
1077 }
1078 break;
1079 case 't':
1080 /* true */
1081 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1082 *next_idx_ptr = idx + 4;
1083 Py_RETURN_TRUE;
1084 }
1085 break;
1086 case 'f':
1087 /* false */
1088 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1089 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1090 PyUnicode_READ(kind, str, idx + 3) == 's' &&
1091 PyUnicode_READ(kind, str, idx + 4) == 'e') {
1092 *next_idx_ptr = idx + 5;
1093 Py_RETURN_FALSE;
1094 }
1095 break;
1096 case 'N':
1097 /* NaN */
1098 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1099 PyUnicode_READ(kind, str, idx + 2) == 'N') {
1100 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1101 }
1102 break;
1103 case 'I':
1104 /* Infinity */
1105 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1106 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1107 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1108 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1109 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1110 PyUnicode_READ(kind, str, idx + 6) == 't' &&
1111 PyUnicode_READ(kind, str, idx + 7) == 'y') {
1112 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1113 }
1114 break;
1115 case '-':
1116 /* -Infinity */
1117 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1118 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1119 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1120 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1121 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1122 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1123 PyUnicode_READ(kind, str, idx + 7) == 't' &&
1124 PyUnicode_READ(kind, str, idx + 8) == 'y') {
1125 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1126 }
1127 break;
1128 }
1129 /* Didn't find a string, object, array, or named constant. Look for a number. */
1130 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1131 }
1132
1133 static PyObject *
scanner_call(PyScannerObject * self,PyObject * args,PyObject * kwds)1134 scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
1135 {
1136 /* Python callable interface to scan_once_{str,unicode} */
1137 PyObject *pystr;
1138 PyObject *rval;
1139 Py_ssize_t idx;
1140 Py_ssize_t next_idx = -1;
1141 static char *kwlist[] = {"string", "idx", NULL};
1142 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1143 return NULL;
1144
1145 if (PyUnicode_Check(pystr)) {
1146 rval = scan_once_unicode(self, pystr, idx, &next_idx);
1147 }
1148 else {
1149 PyErr_Format(PyExc_TypeError,
1150 "first argument must be a string, not %.80s",
1151 Py_TYPE(pystr)->tp_name);
1152 return NULL;
1153 }
1154 PyDict_Clear(self->memo);
1155 if (rval == NULL)
1156 return NULL;
1157 return _build_rval_index_tuple(rval, next_idx);
1158 }
1159
1160 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1161 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1162 {
1163 PyScannerObject *s;
1164 PyObject *ctx;
1165 PyObject *strict;
1166 static char *kwlist[] = {"context", NULL};
1167
1168 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1169 return NULL;
1170
1171 s = (PyScannerObject *)type->tp_alloc(type, 0);
1172 if (s == NULL) {
1173 return NULL;
1174 }
1175
1176 s->memo = PyDict_New();
1177 if (s->memo == NULL)
1178 goto bail;
1179
1180 /* All of these will fail "gracefully" so we don't need to verify them */
1181 strict = PyObject_GetAttrString(ctx, "strict");
1182 if (strict == NULL)
1183 goto bail;
1184 s->strict = PyObject_IsTrue(strict);
1185 Py_DECREF(strict);
1186 if (s->strict < 0)
1187 goto bail;
1188 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1189 if (s->object_hook == NULL)
1190 goto bail;
1191 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1192 if (s->object_pairs_hook == NULL)
1193 goto bail;
1194 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1195 if (s->parse_float == NULL)
1196 goto bail;
1197 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1198 if (s->parse_int == NULL)
1199 goto bail;
1200 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1201 if (s->parse_constant == NULL)
1202 goto bail;
1203
1204 return (PyObject *)s;
1205
1206 bail:
1207 Py_DECREF(s);
1208 return NULL;
1209 }
1210
1211 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1212
1213 static PyType_Slot PyScannerType_slots[] = {
1214 {Py_tp_doc, (void *)scanner_doc},
1215 {Py_tp_dealloc, scanner_dealloc},
1216 {Py_tp_call, scanner_call},
1217 {Py_tp_traverse, scanner_traverse},
1218 {Py_tp_clear, scanner_clear},
1219 {Py_tp_members, scanner_members},
1220 {Py_tp_new, scanner_new},
1221 {0, 0}
1222 };
1223
1224 static PyType_Spec PyScannerType_spec = {
1225 .name = "_json.Scanner",
1226 .basicsize = sizeof(PyScannerObject),
1227 .itemsize = 0,
1228 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1229 .slots = PyScannerType_slots,
1230 };
1231
1232 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1233 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1234 {
1235 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1236
1237 PyEncoderObject *s;
1238 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1239 PyObject *item_separator;
1240 int sort_keys, skipkeys, allow_nan;
1241
1242 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1243 &markers, &defaultfn, &encoder, &indent,
1244 &key_separator, &item_separator,
1245 &sort_keys, &skipkeys, &allow_nan))
1246 return NULL;
1247
1248 if (markers != Py_None && !PyDict_Check(markers)) {
1249 PyErr_Format(PyExc_TypeError,
1250 "make_encoder() argument 1 must be dict or None, "
1251 "not %.200s", Py_TYPE(markers)->tp_name);
1252 return NULL;
1253 }
1254
1255 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1256 if (s == NULL)
1257 return NULL;
1258
1259 s->markers = markers;
1260 s->defaultfn = defaultfn;
1261 s->encoder = encoder;
1262 s->indent = indent;
1263 s->key_separator = key_separator;
1264 s->item_separator = item_separator;
1265 s->sort_keys = sort_keys;
1266 s->skipkeys = skipkeys;
1267 s->allow_nan = allow_nan;
1268 s->fast_encode = NULL;
1269 if (PyCFunction_Check(s->encoder)) {
1270 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1271 if (f == (PyCFunction)py_encode_basestring_ascii ||
1272 f == (PyCFunction)py_encode_basestring) {
1273 s->fast_encode = f;
1274 }
1275 }
1276
1277 Py_INCREF(s->markers);
1278 Py_INCREF(s->defaultfn);
1279 Py_INCREF(s->encoder);
1280 Py_INCREF(s->indent);
1281 Py_INCREF(s->key_separator);
1282 Py_INCREF(s->item_separator);
1283 return (PyObject *)s;
1284 }
1285
1286 static PyObject *
encoder_call(PyEncoderObject * self,PyObject * args,PyObject * kwds)1287 encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
1288 {
1289 /* Python callable interface to encode_listencode_obj */
1290 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1291 PyObject *obj;
1292 Py_ssize_t indent_level;
1293 _PyAccu acc;
1294 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1295 &obj, &indent_level))
1296 return NULL;
1297 if (_PyAccu_Init(&acc))
1298 return NULL;
1299 if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
1300 _PyAccu_Destroy(&acc);
1301 return NULL;
1302 }
1303 return _PyAccu_FinishAsList(&acc);
1304 }
1305
1306 static PyObject *
_encoded_const(PyObject * obj)1307 _encoded_const(PyObject *obj)
1308 {
1309 /* Return the JSON string representation of None, True, False */
1310 if (obj == Py_None) {
1311 static PyObject *s_null = NULL;
1312 if (s_null == NULL) {
1313 s_null = PyUnicode_InternFromString("null");
1314 }
1315 Py_XINCREF(s_null);
1316 return s_null;
1317 }
1318 else if (obj == Py_True) {
1319 static PyObject *s_true = NULL;
1320 if (s_true == NULL) {
1321 s_true = PyUnicode_InternFromString("true");
1322 }
1323 Py_XINCREF(s_true);
1324 return s_true;
1325 }
1326 else if (obj == Py_False) {
1327 static PyObject *s_false = NULL;
1328 if (s_false == NULL) {
1329 s_false = PyUnicode_InternFromString("false");
1330 }
1331 Py_XINCREF(s_false);
1332 return s_false;
1333 }
1334 else {
1335 PyErr_SetString(PyExc_ValueError, "not a const");
1336 return NULL;
1337 }
1338 }
1339
1340 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1341 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1342 {
1343 /* Return the JSON representation of a PyFloat. */
1344 double i = PyFloat_AS_DOUBLE(obj);
1345 if (!Py_IS_FINITE(i)) {
1346 if (!s->allow_nan) {
1347 PyErr_SetString(
1348 PyExc_ValueError,
1349 "Out of range float values are not JSON compliant"
1350 );
1351 return NULL;
1352 }
1353 if (i > 0) {
1354 return PyUnicode_FromString("Infinity");
1355 }
1356 else if (i < 0) {
1357 return PyUnicode_FromString("-Infinity");
1358 }
1359 else {
1360 return PyUnicode_FromString("NaN");
1361 }
1362 }
1363 return PyFloat_Type.tp_repr(obj);
1364 }
1365
1366 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1367 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1368 {
1369 /* Return the JSON representation of a string */
1370 PyObject *encoded;
1371
1372 if (s->fast_encode) {
1373 return s->fast_encode(NULL, obj);
1374 }
1375 encoded = PyObject_CallOneArg(s->encoder, obj);
1376 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1377 PyErr_Format(PyExc_TypeError,
1378 "encoder() must return a string, not %.80s",
1379 Py_TYPE(encoded)->tp_name);
1380 Py_DECREF(encoded);
1381 return NULL;
1382 }
1383 return encoded;
1384 }
1385
1386 static int
_steal_accumulate(_PyAccu * acc,PyObject * stolen)1387 _steal_accumulate(_PyAccu *acc, PyObject *stolen)
1388 {
1389 /* Append stolen and then decrement its reference count */
1390 int rval = _PyAccu_Accumulate(acc, stolen);
1391 Py_DECREF(stolen);
1392 return rval;
1393 }
1394
1395 static int
encoder_listencode_obj(PyEncoderObject * s,_PyAccu * acc,PyObject * obj,Py_ssize_t indent_level)1396 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
1397 PyObject *obj, Py_ssize_t indent_level)
1398 {
1399 /* Encode Python object obj to a JSON term */
1400 PyObject *newobj;
1401 int rv;
1402
1403 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1404 PyObject *cstr = _encoded_const(obj);
1405 if (cstr == NULL)
1406 return -1;
1407 return _steal_accumulate(acc, cstr);
1408 }
1409 else if (PyUnicode_Check(obj))
1410 {
1411 PyObject *encoded = encoder_encode_string(s, obj);
1412 if (encoded == NULL)
1413 return -1;
1414 return _steal_accumulate(acc, encoded);
1415 }
1416 else if (PyLong_Check(obj)) {
1417 PyObject *encoded = PyLong_Type.tp_repr(obj);
1418 if (encoded == NULL)
1419 return -1;
1420 return _steal_accumulate(acc, encoded);
1421 }
1422 else if (PyFloat_Check(obj)) {
1423 PyObject *encoded = encoder_encode_float(s, obj);
1424 if (encoded == NULL)
1425 return -1;
1426 return _steal_accumulate(acc, encoded);
1427 }
1428 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1429 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1430 return -1;
1431 rv = encoder_listencode_list(s, acc, obj, indent_level);
1432 Py_LeaveRecursiveCall();
1433 return rv;
1434 }
1435 else if (PyDict_Check(obj)) {
1436 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1437 return -1;
1438 rv = encoder_listencode_dict(s, acc, obj, indent_level);
1439 Py_LeaveRecursiveCall();
1440 return rv;
1441 }
1442 else {
1443 PyObject *ident = NULL;
1444 if (s->markers != Py_None) {
1445 int has_key;
1446 ident = PyLong_FromVoidPtr(obj);
1447 if (ident == NULL)
1448 return -1;
1449 has_key = PyDict_Contains(s->markers, ident);
1450 if (has_key) {
1451 if (has_key != -1)
1452 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1453 Py_DECREF(ident);
1454 return -1;
1455 }
1456 if (PyDict_SetItem(s->markers, ident, obj)) {
1457 Py_DECREF(ident);
1458 return -1;
1459 }
1460 }
1461 newobj = PyObject_CallOneArg(s->defaultfn, obj);
1462 if (newobj == NULL) {
1463 Py_XDECREF(ident);
1464 return -1;
1465 }
1466
1467 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1468 Py_DECREF(newobj);
1469 Py_XDECREF(ident);
1470 return -1;
1471 }
1472 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
1473 Py_LeaveRecursiveCall();
1474
1475 Py_DECREF(newobj);
1476 if (rv) {
1477 Py_XDECREF(ident);
1478 return -1;
1479 }
1480 if (ident != NULL) {
1481 if (PyDict_DelItem(s->markers, ident)) {
1482 Py_XDECREF(ident);
1483 return -1;
1484 }
1485 Py_XDECREF(ident);
1486 }
1487 return rv;
1488 }
1489 }
1490
1491 static int
encoder_listencode_dict(PyEncoderObject * s,_PyAccu * acc,PyObject * dct,Py_ssize_t indent_level)1492 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
1493 PyObject *dct, Py_ssize_t indent_level)
1494 {
1495 /* Encode Python dict dct a JSON term */
1496 static PyObject *open_dict = NULL;
1497 static PyObject *close_dict = NULL;
1498 static PyObject *empty_dict = NULL;
1499 PyObject *kstr = NULL;
1500 PyObject *ident = NULL;
1501 PyObject *it = NULL;
1502 PyObject *items;
1503 PyObject *item = NULL;
1504 Py_ssize_t idx;
1505
1506 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1507 open_dict = PyUnicode_InternFromString("{");
1508 close_dict = PyUnicode_InternFromString("}");
1509 empty_dict = PyUnicode_InternFromString("{}");
1510 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1511 return -1;
1512 }
1513 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
1514 return _PyAccu_Accumulate(acc, empty_dict);
1515
1516 if (s->markers != Py_None) {
1517 int has_key;
1518 ident = PyLong_FromVoidPtr(dct);
1519 if (ident == NULL)
1520 goto bail;
1521 has_key = PyDict_Contains(s->markers, ident);
1522 if (has_key) {
1523 if (has_key != -1)
1524 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1525 goto bail;
1526 }
1527 if (PyDict_SetItem(s->markers, ident, dct)) {
1528 goto bail;
1529 }
1530 }
1531
1532 if (_PyAccu_Accumulate(acc, open_dict))
1533 goto bail;
1534
1535 if (s->indent != Py_None) {
1536 /* TODO: DOES NOT RUN */
1537 indent_level += 1;
1538 /*
1539 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1540 separator = _item_separator + newline_indent
1541 buf += newline_indent
1542 */
1543 }
1544
1545 items = PyMapping_Items(dct);
1546 if (items == NULL)
1547 goto bail;
1548 if (s->sort_keys && PyList_Sort(items) < 0) {
1549 Py_DECREF(items);
1550 goto bail;
1551 }
1552 it = PyObject_GetIter(items);
1553 Py_DECREF(items);
1554 if (it == NULL)
1555 goto bail;
1556 idx = 0;
1557 while ((item = PyIter_Next(it)) != NULL) {
1558 PyObject *encoded, *key, *value;
1559 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1560 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1561 goto bail;
1562 }
1563 key = PyTuple_GET_ITEM(item, 0);
1564 if (PyUnicode_Check(key)) {
1565 Py_INCREF(key);
1566 kstr = key;
1567 }
1568 else if (PyFloat_Check(key)) {
1569 kstr = encoder_encode_float(s, key);
1570 if (kstr == NULL)
1571 goto bail;
1572 }
1573 else if (key == Py_True || key == Py_False || key == Py_None) {
1574 /* This must come before the PyLong_Check because
1575 True and False are also 1 and 0.*/
1576 kstr = _encoded_const(key);
1577 if (kstr == NULL)
1578 goto bail;
1579 }
1580 else if (PyLong_Check(key)) {
1581 kstr = PyLong_Type.tp_repr(key);
1582 if (kstr == NULL) {
1583 goto bail;
1584 }
1585 }
1586 else if (s->skipkeys) {
1587 Py_DECREF(item);
1588 continue;
1589 }
1590 else {
1591 PyErr_Format(PyExc_TypeError,
1592 "keys must be str, int, float, bool or None, "
1593 "not %.100s", Py_TYPE(key)->tp_name);
1594 goto bail;
1595 }
1596
1597 if (idx) {
1598 if (_PyAccu_Accumulate(acc, s->item_separator))
1599 goto bail;
1600 }
1601
1602 encoded = encoder_encode_string(s, kstr);
1603 Py_CLEAR(kstr);
1604 if (encoded == NULL)
1605 goto bail;
1606 if (_PyAccu_Accumulate(acc, encoded)) {
1607 Py_DECREF(encoded);
1608 goto bail;
1609 }
1610 Py_DECREF(encoded);
1611 if (_PyAccu_Accumulate(acc, s->key_separator))
1612 goto bail;
1613
1614 value = PyTuple_GET_ITEM(item, 1);
1615 if (encoder_listencode_obj(s, acc, value, indent_level))
1616 goto bail;
1617 idx += 1;
1618 Py_DECREF(item);
1619 }
1620 if (PyErr_Occurred())
1621 goto bail;
1622 Py_CLEAR(it);
1623
1624 if (ident != NULL) {
1625 if (PyDict_DelItem(s->markers, ident))
1626 goto bail;
1627 Py_CLEAR(ident);
1628 }
1629 /* TODO DOES NOT RUN; dead code
1630 if (s->indent != Py_None) {
1631 indent_level -= 1;
1632
1633 yield '\n' + (' ' * (_indent * _current_indent_level))
1634 }*/
1635 if (_PyAccu_Accumulate(acc, close_dict))
1636 goto bail;
1637 return 0;
1638
1639 bail:
1640 Py_XDECREF(it);
1641 Py_XDECREF(item);
1642 Py_XDECREF(kstr);
1643 Py_XDECREF(ident);
1644 return -1;
1645 }
1646
1647
1648 static int
encoder_listencode_list(PyEncoderObject * s,_PyAccu * acc,PyObject * seq,Py_ssize_t indent_level)1649 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
1650 PyObject *seq, Py_ssize_t indent_level)
1651 {
1652 /* Encode Python list seq to a JSON term */
1653 static PyObject *open_array = NULL;
1654 static PyObject *close_array = NULL;
1655 static PyObject *empty_array = NULL;
1656 PyObject *ident = NULL;
1657 PyObject *s_fast = NULL;
1658 Py_ssize_t i;
1659
1660 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1661 open_array = PyUnicode_InternFromString("[");
1662 close_array = PyUnicode_InternFromString("]");
1663 empty_array = PyUnicode_InternFromString("[]");
1664 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1665 return -1;
1666 }
1667 ident = NULL;
1668 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1669 if (s_fast == NULL)
1670 return -1;
1671 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1672 Py_DECREF(s_fast);
1673 return _PyAccu_Accumulate(acc, empty_array);
1674 }
1675
1676 if (s->markers != Py_None) {
1677 int has_key;
1678 ident = PyLong_FromVoidPtr(seq);
1679 if (ident == NULL)
1680 goto bail;
1681 has_key = PyDict_Contains(s->markers, ident);
1682 if (has_key) {
1683 if (has_key != -1)
1684 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1685 goto bail;
1686 }
1687 if (PyDict_SetItem(s->markers, ident, seq)) {
1688 goto bail;
1689 }
1690 }
1691
1692 if (_PyAccu_Accumulate(acc, open_array))
1693 goto bail;
1694 if (s->indent != Py_None) {
1695 /* TODO: DOES NOT RUN */
1696 indent_level += 1;
1697 /*
1698 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1699 separator = _item_separator + newline_indent
1700 buf += newline_indent
1701 */
1702 }
1703 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1704 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1705 if (i) {
1706 if (_PyAccu_Accumulate(acc, s->item_separator))
1707 goto bail;
1708 }
1709 if (encoder_listencode_obj(s, acc, obj, indent_level))
1710 goto bail;
1711 }
1712 if (ident != NULL) {
1713 if (PyDict_DelItem(s->markers, ident))
1714 goto bail;
1715 Py_CLEAR(ident);
1716 }
1717
1718 /* TODO: DOES NOT RUN
1719 if (s->indent != Py_None) {
1720 indent_level -= 1;
1721
1722 yield '\n' + (' ' * (_indent * _current_indent_level))
1723 }*/
1724 if (_PyAccu_Accumulate(acc, close_array))
1725 goto bail;
1726 Py_DECREF(s_fast);
1727 return 0;
1728
1729 bail:
1730 Py_XDECREF(ident);
1731 Py_DECREF(s_fast);
1732 return -1;
1733 }
1734
1735 static void
encoder_dealloc(PyObject * self)1736 encoder_dealloc(PyObject *self)
1737 {
1738 PyTypeObject *tp = Py_TYPE(self);
1739 /* bpo-31095: UnTrack is needed before calling any callbacks */
1740 PyObject_GC_UnTrack(self);
1741 encoder_clear((PyEncoderObject *)self);
1742 tp->tp_free(self);
1743 Py_DECREF(tp);
1744 }
1745
1746 static int
encoder_traverse(PyEncoderObject * self,visitproc visit,void * arg)1747 encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
1748 {
1749 Py_VISIT(Py_TYPE(self));
1750 Py_VISIT(self->markers);
1751 Py_VISIT(self->defaultfn);
1752 Py_VISIT(self->encoder);
1753 Py_VISIT(self->indent);
1754 Py_VISIT(self->key_separator);
1755 Py_VISIT(self->item_separator);
1756 return 0;
1757 }
1758
1759 static int
encoder_clear(PyEncoderObject * self)1760 encoder_clear(PyEncoderObject *self)
1761 {
1762 /* Deallocate Encoder */
1763 Py_CLEAR(self->markers);
1764 Py_CLEAR(self->defaultfn);
1765 Py_CLEAR(self->encoder);
1766 Py_CLEAR(self->indent);
1767 Py_CLEAR(self->key_separator);
1768 Py_CLEAR(self->item_separator);
1769 return 0;
1770 }
1771
1772 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1773
1774 static PyType_Slot PyEncoderType_slots[] = {
1775 {Py_tp_doc, (void *)encoder_doc},
1776 {Py_tp_dealloc, encoder_dealloc},
1777 {Py_tp_call, encoder_call},
1778 {Py_tp_traverse, encoder_traverse},
1779 {Py_tp_clear, encoder_clear},
1780 {Py_tp_members, encoder_members},
1781 {Py_tp_new, encoder_new},
1782 {0, 0}
1783 };
1784
1785 static PyType_Spec PyEncoderType_spec = {
1786 .name = "_json.Encoder",
1787 .basicsize = sizeof(PyEncoderObject),
1788 .itemsize = 0,
1789 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1790 .slots = PyEncoderType_slots
1791 };
1792
1793 static PyMethodDef speedups_methods[] = {
1794 {"encode_basestring_ascii",
1795 (PyCFunction)py_encode_basestring_ascii,
1796 METH_O,
1797 pydoc_encode_basestring_ascii},
1798 {"encode_basestring",
1799 (PyCFunction)py_encode_basestring,
1800 METH_O,
1801 pydoc_encode_basestring},
1802 {"scanstring",
1803 (PyCFunction)py_scanstring,
1804 METH_VARARGS,
1805 pydoc_scanstring},
1806 {NULL, NULL, 0, NULL}
1807 };
1808
1809 PyDoc_STRVAR(module_doc,
1810 "json speedups\n");
1811
1812 static int
_json_exec(PyObject * module)1813 _json_exec(PyObject *module)
1814 {
1815 _jsonmodulestate *state = get_json_state(module);
1816
1817 state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1818 if (state->PyScannerType == NULL) {
1819 return -1;
1820 }
1821 Py_INCREF(state->PyScannerType);
1822 if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
1823 Py_DECREF(state->PyScannerType);
1824 return -1;
1825 }
1826
1827 state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1828 if (state->PyEncoderType == NULL) {
1829 return -1;
1830 }
1831 Py_INCREF(state->PyEncoderType);
1832 if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
1833 Py_DECREF(state->PyEncoderType);
1834 return -1;
1835 }
1836
1837 return 0;
1838 }
1839
1840 static int
_jsonmodule_traverse(PyObject * module,visitproc visit,void * arg)1841 _jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1842 {
1843 _jsonmodulestate *state = get_json_state(module);
1844 Py_VISIT(state->PyScannerType);
1845 Py_VISIT(state->PyEncoderType);
1846 return 0;
1847 }
1848
1849 static int
_jsonmodule_clear(PyObject * module)1850 _jsonmodule_clear(PyObject *module)
1851 {
1852 _jsonmodulestate *state = get_json_state(module);
1853 Py_CLEAR(state->PyScannerType);
1854 Py_CLEAR(state->PyEncoderType);
1855 return 0;
1856 }
1857
1858 static void
_jsonmodule_free(void * module)1859 _jsonmodule_free(void *module)
1860 {
1861 _jsonmodule_clear((PyObject *)module);
1862 }
1863
1864 static PyModuleDef_Slot _json_slots[] = {
1865 {Py_mod_exec, _json_exec},
1866 {0, NULL}
1867 };
1868
1869 static struct PyModuleDef jsonmodule = {
1870 PyModuleDef_HEAD_INIT,
1871 "_json",
1872 module_doc,
1873 sizeof(_jsonmodulestate),
1874 speedups_methods,
1875 _json_slots,
1876 _jsonmodule_traverse,
1877 _jsonmodule_clear,
1878 _jsonmodule_free,
1879 };
1880
1881 PyMODINIT_FUNC
PyInit__json(void)1882 PyInit__json(void)
1883 {
1884 return PyModuleDef_Init(&jsonmodule);
1885 }
1886