1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
16
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
22
23 #define DEFAULT_ENCODING "utf-8"
24
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
32
33 typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
38 PyObject *pairs_hook;
39 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42 } PyScannerObject;
43
44 static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53 };
54
55 typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67 } PyEncoderObject;
68
69 static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79 };
80
81 static Py_ssize_t
82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83 static PyObject *
84 ascii_escape_unicode(PyObject *pystr);
85 static PyObject *
86 ascii_escape_str(PyObject *pystr);
87 static PyObject *
88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89 void init_json(void);
90 static PyObject *
91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92 static PyObject *
93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94 static PyObject *
95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96 static PyObject *
97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98 static int
99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100 static void
101 scanner_dealloc(PyObject *self);
102 static int
103 scanner_clear(PyObject *self);
104 static PyObject *
105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106 static int
107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108 static void
109 encoder_dealloc(PyObject *self);
110 static int
111 encoder_clear(PyObject *self);
112 static int
113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114 static int
115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116 static int
117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118 static PyObject *
119 _encoded_const(PyObject *obj);
120 static void
121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122 static PyObject *
123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124 static int
125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126 static PyObject *
127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128 static PyObject *
129 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134 #define MIN_EXPANSION 6
135 #ifdef Py_UNICODE_WIDE
136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
137 #else
138 #define MAX_EXPANSION MIN_EXPANSION
139 #endif
140
141 static int
_convertPyInt_AsSsize_t(PyObject * o,Py_ssize_t * size_ptr)142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143 {
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
149 }
150
151 static PyObject *
_convertPyInt_FromSsize_t(Py_ssize_t * size_ptr)152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153 {
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156 }
157
158 static Py_ssize_t
ascii_escape_char(Py_UNICODE c,char * output,Py_ssize_t chars)159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160 {
161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174 #ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187 #endif
188 output[chars++] = 'u';
189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
193 }
194 return chars;
195 }
196
197 static PyObject *
ascii_escape_unicode(PyObject * pystr)198 ascii_escape_unicode(PyObject *pystr)
199 {
200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
204 Py_ssize_t max_output_size;
205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
212
213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
216 rval = PyString_FromStringAndSize(NULL, output_size);
217 if (rval == NULL) {
218 return NULL;
219 }
220 output = PyString_AS_STRING(rval);
221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
227 }
228 else {
229 chars = ascii_escape_char(c, output, chars);
230 }
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
233 Py_ssize_t new_output_size = output_size * 2;
234 /* This is an upper bound */
235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
237 }
238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
243 }
244 output = PyString_AS_STRING(rval);
245 }
246 }
247 }
248 output[chars++] = '"';
249 if (_PyString_Resize(&rval, chars) == -1) {
250 return NULL;
251 }
252 return rval;
253 }
254
255 static PyObject *
ascii_escape_str(PyObject * pystr)256 ascii_escape_str(PyObject *pystr)
257 {
258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
266
267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
269
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
284 }
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
288 }
289 }
290 break;
291 }
292 }
293
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
297 }
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301 }
302 rval = PyString_FromStringAndSize(NULL, output_size);
303 if (rval == NULL) {
304 return NULL;
305 }
306 output = PyString_AS_STRING(rval);
307 output[0] = '"';
308
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
312
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
317 }
318 else {
319 chars = ascii_escape_char(c, output, chars);
320 }
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
327 }
328 if (_PyString_Resize(&rval, output_size) == -1) {
329 return NULL;
330 }
331 output = PyString_AS_STRING(rval);
332 }
333 }
334 output[chars++] = '"';
335 if (_PyString_Resize(&rval, chars) == -1) {
336 return NULL;
337 }
338 return rval;
339 }
340
341 static void
raise_errmsg(char * msg,PyObject * s,Py_ssize_t end)342 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343 {
344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
353 Py_DECREF(decoder);
354 if (errmsg_fn == NULL)
355 return;
356 }
357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
361 }
362 }
363
364 static PyObject *
join_list_unicode(PyObject * lst)365 join_list_unicode(PyObject *lst)
366 {
367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
373
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
378 }
379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
380 }
381
382 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)383 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384 /* return (rval, idx) tuple, stealing reference to rval */
385 PyObject *tpl;
386 PyObject *pyidx;
387 /*
388 steal a reference to rval, returns (rval, idx)
389 */
390 if (rval == NULL) {
391 return NULL;
392 }
393 pyidx = PyInt_FromSsize_t(idx);
394 if (pyidx == NULL) {
395 Py_DECREF(rval);
396 return NULL;
397 }
398 tpl = PyTuple_New(2);
399 if (tpl == NULL) {
400 Py_DECREF(pyidx);
401 Py_DECREF(rval);
402 return NULL;
403 }
404 PyTuple_SET_ITEM(tpl, 0, rval);
405 PyTuple_SET_ITEM(tpl, 1, pyidx);
406 return tpl;
407 }
408
409 static PyObject *
scanstring_str(PyObject * pystr,Py_ssize_t end,char * encoding,int strict,Py_ssize_t * next_end_ptr)410 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411 {
412 /* Read the JSON string from PyString pystr.
413 end is the index of the first character after the quote.
414 encoding is the encoding of pystr (must be an ASCII superset)
415 if strict is zero then literal control characters are allowed
416 *next_end_ptr is a return-by-reference index of the character
417 after the end quote
418
419 Return value is a new PyString (if ASCII-only) or PyUnicode
420 */
421 PyObject *rval;
422 Py_ssize_t len = PyString_GET_SIZE(pystr);
423 Py_ssize_t begin = end - 1;
424 Py_ssize_t next;
425 char *buf = PyString_AS_STRING(pystr);
426 PyObject *chunks = PyList_New(0);
427 if (chunks == NULL) {
428 goto bail;
429 }
430 if (end < 0 || len <= end) {
431 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432 goto bail;
433 }
434 while (1) {
435 /* Find the end of the string or the next escape */
436 Py_UNICODE c = 0;
437 PyObject *chunk = NULL;
438 for (next = end; next < len; next++) {
439 c = (unsigned char)buf[next];
440 if (c == '"' || c == '\\') {
441 break;
442 }
443 else if (strict && c <= 0x1f) {
444 raise_errmsg("Invalid control character at", pystr, next);
445 goto bail;
446 }
447 }
448 if (!(c == '"' || c == '\\')) {
449 raise_errmsg("Unterminated string starting at", pystr, begin);
450 goto bail;
451 }
452 /* Pick up this chunk if it's not zero length */
453 if (next != end) {
454 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
455 if (strchunk == NULL) {
456 goto bail;
457 }
458 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459 Py_DECREF(strchunk);
460 if (chunk == NULL) {
461 goto bail;
462 }
463 if (PyList_Append(chunks, chunk)) {
464 Py_DECREF(chunk);
465 goto bail;
466 }
467 Py_DECREF(chunk);
468 }
469 next++;
470 if (c == '"') {
471 end = next;
472 break;
473 }
474 if (next == len) {
475 raise_errmsg("Unterminated string starting at", pystr, begin);
476 goto bail;
477 }
478 c = buf[next];
479 if (c != 'u') {
480 /* Non-unicode backslash escapes */
481 end = next + 1;
482 switch (c) {
483 case '"': break;
484 case '\\': break;
485 case '/': break;
486 case 'b': c = '\b'; break;
487 case 'f': c = '\f'; break;
488 case 'n': c = '\n'; break;
489 case 'r': c = '\r'; break;
490 case 't': c = '\t'; break;
491 default: c = 0;
492 }
493 if (c == 0) {
494 raise_errmsg("Invalid \\escape", pystr, end - 2);
495 goto bail;
496 }
497 }
498 else {
499 c = 0;
500 next++;
501 end = next + 4;
502 if (end >= len) {
503 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504 goto bail;
505 }
506 /* Decode 4 hex digits */
507 for (; next < end; next++) {
508 Py_UNICODE digit = buf[next];
509 c <<= 4;
510 switch (digit) {
511 case '0': case '1': case '2': case '3': case '4':
512 case '5': case '6': case '7': case '8': case '9':
513 c |= (digit - '0'); break;
514 case 'a': case 'b': case 'c': case 'd': case 'e':
515 case 'f':
516 c |= (digit - 'a' + 10); break;
517 case 'A': case 'B': case 'C': case 'D': case 'E':
518 case 'F':
519 c |= (digit - 'A' + 10); break;
520 default:
521 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522 goto bail;
523 }
524 }
525 #ifdef Py_UNICODE_WIDE
526 /* Surrogate pair */
527 if ((c & 0xfc00) == 0xd800) {
528 Py_UNICODE c2 = 0;
529 if (end + 6 >= len) {
530 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
531 goto bail;
532 }
533 if (buf[next++] != '\\' || buf[next++] != 'u') {
534 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
535 goto bail;
536 }
537 end += 6;
538 /* Decode 4 hex digits */
539 for (; next < end; next++) {
540 Py_UNICODE digit = buf[next];
541 c2 <<= 4;
542 switch (digit) {
543 case '0': case '1': case '2': case '3': case '4':
544 case '5': case '6': case '7': case '8': case '9':
545 c2 |= (digit - '0'); break;
546 case 'a': case 'b': case 'c': case 'd': case 'e':
547 case 'f':
548 c2 |= (digit - 'a' + 10); break;
549 case 'A': case 'B': case 'C': case 'D': case 'E':
550 case 'F':
551 c2 |= (digit - 'A' + 10); break;
552 default:
553 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
554 goto bail;
555 }
556 }
557 if ((c2 & 0xfc00) != 0xdc00) {
558 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
559 goto bail;
560 }
561 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
562 }
563 else if ((c & 0xfc00) == 0xdc00) {
564 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
565 goto bail;
566 }
567 #endif
568 }
569 chunk = PyUnicode_FromUnicode(&c, 1);
570 if (chunk == NULL) {
571 goto bail;
572 }
573 if (PyList_Append(chunks, chunk)) {
574 Py_DECREF(chunk);
575 goto bail;
576 }
577 Py_DECREF(chunk);
578 }
579
580 rval = join_list_unicode(chunks);
581 if (rval == NULL) {
582 goto bail;
583 }
584 Py_CLEAR(chunks);
585 *next_end_ptr = end;
586 return rval;
587 bail:
588 *next_end_ptr = -1;
589 Py_XDECREF(chunks);
590 return NULL;
591 }
592
593
594 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)595 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
596 {
597 /* Read the JSON string from PyUnicode pystr.
598 end is the index of the first character after the quote.
599 if strict is zero then literal control characters are allowed
600 *next_end_ptr is a return-by-reference index of the character
601 after the end quote
602
603 Return value is a new PyUnicode
604 */
605 PyObject *rval;
606 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
607 Py_ssize_t begin = end - 1;
608 Py_ssize_t next;
609 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
610 PyObject *chunks = PyList_New(0);
611 if (chunks == NULL) {
612 goto bail;
613 }
614 if (end < 0 || len <= end) {
615 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
616 goto bail;
617 }
618 while (1) {
619 /* Find the end of the string or the next escape */
620 Py_UNICODE c = 0;
621 PyObject *chunk = NULL;
622 for (next = end; next < len; next++) {
623 c = buf[next];
624 if (c == '"' || c == '\\') {
625 break;
626 }
627 else if (strict && c <= 0x1f) {
628 raise_errmsg("Invalid control character at", pystr, next);
629 goto bail;
630 }
631 }
632 if (!(c == '"' || c == '\\')) {
633 raise_errmsg("Unterminated string starting at", pystr, begin);
634 goto bail;
635 }
636 /* Pick up this chunk if it's not zero length */
637 if (next != end) {
638 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
639 if (chunk == NULL) {
640 goto bail;
641 }
642 if (PyList_Append(chunks, chunk)) {
643 Py_DECREF(chunk);
644 goto bail;
645 }
646 Py_DECREF(chunk);
647 }
648 next++;
649 if (c == '"') {
650 end = next;
651 break;
652 }
653 if (next == len) {
654 raise_errmsg("Unterminated string starting at", pystr, begin);
655 goto bail;
656 }
657 c = buf[next];
658 if (c != 'u') {
659 /* Non-unicode backslash escapes */
660 end = next + 1;
661 switch (c) {
662 case '"': break;
663 case '\\': break;
664 case '/': break;
665 case 'b': c = '\b'; break;
666 case 'f': c = '\f'; break;
667 case 'n': c = '\n'; break;
668 case 'r': c = '\r'; break;
669 case 't': c = '\t'; break;
670 default: c = 0;
671 }
672 if (c == 0) {
673 raise_errmsg("Invalid \\escape", pystr, end - 2);
674 goto bail;
675 }
676 }
677 else {
678 c = 0;
679 next++;
680 end = next + 4;
681 if (end >= len) {
682 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
683 goto bail;
684 }
685 /* Decode 4 hex digits */
686 for (; next < end; next++) {
687 Py_UNICODE digit = buf[next];
688 c <<= 4;
689 switch (digit) {
690 case '0': case '1': case '2': case '3': case '4':
691 case '5': case '6': case '7': case '8': case '9':
692 c |= (digit - '0'); break;
693 case 'a': case 'b': case 'c': case 'd': case 'e':
694 case 'f':
695 c |= (digit - 'a' + 10); break;
696 case 'A': case 'B': case 'C': case 'D': case 'E':
697 case 'F':
698 c |= (digit - 'A' + 10); break;
699 default:
700 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
701 goto bail;
702 }
703 }
704 #ifdef Py_UNICODE_WIDE
705 /* Surrogate pair */
706 if ((c & 0xfc00) == 0xd800) {
707 Py_UNICODE c2 = 0;
708 if (end + 6 >= len) {
709 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
710 goto bail;
711 }
712 if (buf[next++] != '\\' || buf[next++] != 'u') {
713 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
714 goto bail;
715 }
716 end += 6;
717 /* Decode 4 hex digits */
718 for (; next < end; next++) {
719 Py_UNICODE digit = buf[next];
720 c2 <<= 4;
721 switch (digit) {
722 case '0': case '1': case '2': case '3': case '4':
723 case '5': case '6': case '7': case '8': case '9':
724 c2 |= (digit - '0'); break;
725 case 'a': case 'b': case 'c': case 'd': case 'e':
726 case 'f':
727 c2 |= (digit - 'a' + 10); break;
728 case 'A': case 'B': case 'C': case 'D': case 'E':
729 case 'F':
730 c2 |= (digit - 'A' + 10); break;
731 default:
732 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
733 goto bail;
734 }
735 }
736 if ((c2 & 0xfc00) != 0xdc00) {
737 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
738 goto bail;
739 }
740 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
741 }
742 else if ((c & 0xfc00) == 0xdc00) {
743 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
744 goto bail;
745 }
746 #endif
747 }
748 chunk = PyUnicode_FromUnicode(&c, 1);
749 if (chunk == NULL) {
750 goto bail;
751 }
752 if (PyList_Append(chunks, chunk)) {
753 Py_DECREF(chunk);
754 goto bail;
755 }
756 Py_DECREF(chunk);
757 }
758
759 rval = join_list_unicode(chunks);
760 if (rval == NULL) {
761 goto bail;
762 }
763 Py_DECREF(chunks);
764 *next_end_ptr = end;
765 return rval;
766 bail:
767 *next_end_ptr = -1;
768 Py_XDECREF(chunks);
769 return NULL;
770 }
771
772 PyDoc_STRVAR(pydoc_scanstring,
773 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774 "\n"
775 "Scan the string s for a JSON string. End is the index of the\n"
776 "character in s after the quote that started the JSON string.\n"
777 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778 "on attempt to decode an invalid string. If strict is False then literal\n"
779 "control characters are allowed in the string.\n"
780 "\n"
781 "Returns a tuple of the decoded string and the index of the character in s\n"
782 "after the end quote."
783 );
784
785 static PyObject *
py_scanstring(PyObject * self UNUSED,PyObject * args)786 py_scanstring(PyObject* self UNUSED, PyObject *args)
787 {
788 PyObject *pystr;
789 PyObject *rval;
790 Py_ssize_t end;
791 Py_ssize_t next_end = -1;
792 char *encoding = NULL;
793 int strict = 1;
794 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
795 return NULL;
796 }
797 if (encoding == NULL) {
798 encoding = DEFAULT_ENCODING;
799 }
800 if (PyString_Check(pystr)) {
801 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
802 }
803 else if (PyUnicode_Check(pystr)) {
804 rval = scanstring_unicode(pystr, end, strict, &next_end);
805 }
806 else {
807 PyErr_Format(PyExc_TypeError,
808 "first argument must be a string, not %.80s",
809 Py_TYPE(pystr)->tp_name);
810 return NULL;
811 }
812 return _build_rval_index_tuple(rval, next_end);
813 }
814
815 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
816 "encode_basestring_ascii(basestring) -> str\n"
817 "\n"
818 "Return an ASCII-only JSON representation of a Python string"
819 );
820
821 static PyObject *
py_encode_basestring_ascii(PyObject * self UNUSED,PyObject * pystr)822 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
823 {
824 /* Return an ASCII-only JSON representation of a Python string */
825 /* METH_O */
826 if (PyString_Check(pystr)) {
827 return ascii_escape_str(pystr);
828 }
829 else if (PyUnicode_Check(pystr)) {
830 return ascii_escape_unicode(pystr);
831 }
832 else {
833 PyErr_Format(PyExc_TypeError,
834 "first argument must be a string, not %.80s",
835 Py_TYPE(pystr)->tp_name);
836 return NULL;
837 }
838 }
839
840 static void
scanner_dealloc(PyObject * self)841 scanner_dealloc(PyObject *self)
842 {
843 /* Deallocate scanner object */
844 scanner_clear(self);
845 Py_TYPE(self)->tp_free(self);
846 }
847
848 static int
scanner_traverse(PyObject * self,visitproc visit,void * arg)849 scanner_traverse(PyObject *self, visitproc visit, void *arg)
850 {
851 PyScannerObject *s;
852 assert(PyScanner_Check(self));
853 s = (PyScannerObject *)self;
854 Py_VISIT(s->encoding);
855 Py_VISIT(s->strict);
856 Py_VISIT(s->object_hook);
857 Py_VISIT(s->pairs_hook);
858 Py_VISIT(s->parse_float);
859 Py_VISIT(s->parse_int);
860 Py_VISIT(s->parse_constant);
861 return 0;
862 }
863
864 static int
scanner_clear(PyObject * self)865 scanner_clear(PyObject *self)
866 {
867 PyScannerObject *s;
868 assert(PyScanner_Check(self));
869 s = (PyScannerObject *)self;
870 Py_CLEAR(s->encoding);
871 Py_CLEAR(s->strict);
872 Py_CLEAR(s->object_hook);
873 Py_CLEAR(s->pairs_hook);
874 Py_CLEAR(s->parse_float);
875 Py_CLEAR(s->parse_int);
876 Py_CLEAR(s->parse_constant);
877 return 0;
878 }
879
880 static PyObject *
_parse_object_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)881 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
882 /* Read a JSON object from PyString pystr.
883 idx is the index of the first character after the opening curly brace.
884 *next_idx_ptr is a return-by-reference index to the first character after
885 the closing curly brace.
886
887 Returns a new PyObject (usually a dict, but object_hook can change that)
888 */
889 char *str = PyString_AS_STRING(pystr);
890 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
891 PyObject *rval;
892 PyObject *pairs;
893 PyObject *item;
894 PyObject *key = NULL;
895 PyObject *val = NULL;
896 char *encoding = PyString_AS_STRING(s->encoding);
897 int strict = PyObject_IsTrue(s->strict);
898 Py_ssize_t next_idx;
899
900 pairs = PyList_New(0);
901 if (pairs == NULL)
902 return NULL;
903
904 /* skip whitespace after { */
905 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
906
907 /* only loop if the object is non-empty */
908 if (idx <= end_idx && str[idx] != '}') {
909 while (idx <= end_idx) {
910 /* read key */
911 if (str[idx] != '"') {
912 raise_errmsg("Expecting property name", pystr, idx);
913 goto bail;
914 }
915 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
916 if (key == NULL)
917 goto bail;
918 idx = next_idx;
919
920 /* skip whitespace between key and : delimiter, read :, skip whitespace */
921 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
922 if (idx > end_idx || str[idx] != ':') {
923 raise_errmsg("Expecting : delimiter", pystr, idx);
924 goto bail;
925 }
926 idx++;
927 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
928
929 /* read any JSON data type */
930 val = scan_once_str(s, pystr, idx, &next_idx);
931 if (val == NULL)
932 goto bail;
933
934 item = PyTuple_Pack(2, key, val);
935 if (item == NULL)
936 goto bail;
937 Py_CLEAR(key);
938 Py_CLEAR(val);
939 if (PyList_Append(pairs, item) == -1) {
940 Py_DECREF(item);
941 goto bail;
942 }
943 Py_DECREF(item);
944 idx = next_idx;
945
946 /* skip whitespace before } or , */
947 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
948
949 /* bail if the object is closed or we didn't get the , delimiter */
950 if (idx > end_idx) break;
951 if (str[idx] == '}') {
952 break;
953 }
954 else if (str[idx] != ',') {
955 raise_errmsg("Expecting , delimiter", pystr, idx);
956 goto bail;
957 }
958 idx++;
959
960 /* skip whitespace after , delimiter */
961 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
962 }
963 }
964 /* verify that idx < end_idx, str[idx] should be '}' */
965 if (idx > end_idx || str[idx] != '}') {
966 raise_errmsg("Expecting object", pystr, end_idx);
967 goto bail;
968 }
969
970 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
971 if (s->pairs_hook != Py_None) {
972 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
973 if (val == NULL)
974 goto bail;
975 Py_DECREF(pairs);
976 *next_idx_ptr = idx + 1;
977 return val;
978 }
979
980 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
981 pairs, NULL);
982 if (rval == NULL)
983 goto bail;
984 Py_CLEAR(pairs);
985
986 /* if object_hook is not None: rval = object_hook(rval) */
987 if (s->object_hook != Py_None) {
988 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
989 if (val == NULL)
990 goto bail;
991 Py_DECREF(rval);
992 rval = val;
993 val = NULL;
994 }
995 *next_idx_ptr = idx + 1;
996 return rval;
997 bail:
998 Py_XDECREF(key);
999 Py_XDECREF(val);
1000 Py_XDECREF(pairs);
1001 return NULL;
1002 }
1003
1004 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1005 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1006 /* Read a JSON object from PyUnicode pystr.
1007 idx is the index of the first character after the opening curly brace.
1008 *next_idx_ptr is a return-by-reference index to the first character after
1009 the closing curly brace.
1010
1011 Returns a new PyObject (usually a dict, but object_hook can change that)
1012 */
1013 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1014 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1015 PyObject *rval;
1016 PyObject *pairs;
1017 PyObject *item;
1018 PyObject *key = NULL;
1019 PyObject *val = NULL;
1020 int strict = PyObject_IsTrue(s->strict);
1021 Py_ssize_t next_idx;
1022
1023 pairs = PyList_New(0);
1024 if (pairs == NULL)
1025 return NULL;
1026
1027 /* skip whitespace after { */
1028 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1029
1030 /* only loop if the object is non-empty */
1031 if (idx <= end_idx && str[idx] != '}') {
1032 while (idx <= end_idx) {
1033 /* read key */
1034 if (str[idx] != '"') {
1035 raise_errmsg("Expecting property name", pystr, idx);
1036 goto bail;
1037 }
1038 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1039 if (key == NULL)
1040 goto bail;
1041 idx = next_idx;
1042
1043 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1044 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1045 if (idx > end_idx || str[idx] != ':') {
1046 raise_errmsg("Expecting : delimiter", pystr, idx);
1047 goto bail;
1048 }
1049 idx++;
1050 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1051
1052 /* read any JSON term */
1053 val = scan_once_unicode(s, pystr, idx, &next_idx);
1054 if (val == NULL)
1055 goto bail;
1056
1057 item = PyTuple_Pack(2, key, val);
1058 if (item == NULL)
1059 goto bail;
1060 Py_CLEAR(key);
1061 Py_CLEAR(val);
1062 if (PyList_Append(pairs, item) == -1) {
1063 Py_DECREF(item);
1064 goto bail;
1065 }
1066 Py_DECREF(item);
1067 idx = next_idx;
1068
1069 /* skip whitespace before } or , */
1070 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1071
1072 /* bail if the object is closed or we didn't get the , delimiter */
1073 if (idx > end_idx) break;
1074 if (str[idx] == '}') {
1075 break;
1076 }
1077 else if (str[idx] != ',') {
1078 raise_errmsg("Expecting , delimiter", pystr, idx);
1079 goto bail;
1080 }
1081 idx++;
1082
1083 /* skip whitespace after , delimiter */
1084 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1085 }
1086 }
1087
1088 /* verify that idx < end_idx, str[idx] should be '}' */
1089 if (idx > end_idx || str[idx] != '}') {
1090 raise_errmsg("Expecting object", pystr, end_idx);
1091 goto bail;
1092 }
1093
1094 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1095 if (s->pairs_hook != Py_None) {
1096 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1097 if (val == NULL)
1098 goto bail;
1099 Py_DECREF(pairs);
1100 *next_idx_ptr = idx + 1;
1101 return val;
1102 }
1103
1104 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1105 pairs, NULL);
1106 if (rval == NULL)
1107 goto bail;
1108 Py_CLEAR(pairs);
1109
1110 /* if object_hook is not None: rval = object_hook(rval) */
1111 if (s->object_hook != Py_None) {
1112 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1113 if (val == NULL)
1114 goto bail;
1115 Py_DECREF(rval);
1116 rval = val;
1117 val = NULL;
1118 }
1119 *next_idx_ptr = idx + 1;
1120 return rval;
1121 bail:
1122 Py_XDECREF(key);
1123 Py_XDECREF(val);
1124 Py_XDECREF(pairs);
1125 return NULL;
1126 }
1127
1128 static PyObject *
_parse_array_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1129 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1130 /* Read a JSON array from PyString pystr.
1131 idx is the index of the first character after the opening brace.
1132 *next_idx_ptr is a return-by-reference index to the first character after
1133 the closing brace.
1134
1135 Returns a new PyList
1136 */
1137 char *str = PyString_AS_STRING(pystr);
1138 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1139 PyObject *val = NULL;
1140 PyObject *rval = PyList_New(0);
1141 Py_ssize_t next_idx;
1142 if (rval == NULL)
1143 return NULL;
1144
1145 /* skip whitespace after [ */
1146 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1147
1148 /* only loop if the array is non-empty */
1149 if (idx <= end_idx && str[idx] != ']') {
1150 while (idx <= end_idx) {
1151
1152 /* read any JSON term and de-tuplefy the (rval, idx) */
1153 val = scan_once_str(s, pystr, idx, &next_idx);
1154 if (val == NULL)
1155 goto bail;
1156
1157 if (PyList_Append(rval, val) == -1)
1158 goto bail;
1159
1160 Py_CLEAR(val);
1161 idx = next_idx;
1162
1163 /* skip whitespace between term and , */
1164 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1165
1166 /* bail if the array is closed or we didn't get the , delimiter */
1167 if (idx > end_idx) break;
1168 if (str[idx] == ']') {
1169 break;
1170 }
1171 else if (str[idx] != ',') {
1172 raise_errmsg("Expecting , delimiter", pystr, idx);
1173 goto bail;
1174 }
1175 idx++;
1176
1177 /* skip whitespace after , */
1178 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1179 }
1180 }
1181
1182 /* verify that idx < end_idx, str[idx] should be ']' */
1183 if (idx > end_idx || str[idx] != ']') {
1184 raise_errmsg("Expecting object", pystr, end_idx);
1185 goto bail;
1186 }
1187 *next_idx_ptr = idx + 1;
1188 return rval;
1189 bail:
1190 Py_XDECREF(val);
1191 Py_DECREF(rval);
1192 return NULL;
1193 }
1194
1195 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1196 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1197 /* Read a JSON array from PyString pystr.
1198 idx is the index of the first character after the opening brace.
1199 *next_idx_ptr is a return-by-reference index to the first character after
1200 the closing brace.
1201
1202 Returns a new PyList
1203 */
1204 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1205 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1206 PyObject *val = NULL;
1207 PyObject *rval = PyList_New(0);
1208 Py_ssize_t next_idx;
1209 if (rval == NULL)
1210 return NULL;
1211
1212 /* skip whitespace after [ */
1213 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1214
1215 /* only loop if the array is non-empty */
1216 if (idx <= end_idx && str[idx] != ']') {
1217 while (idx <= end_idx) {
1218
1219 /* read any JSON term */
1220 val = scan_once_unicode(s, pystr, idx, &next_idx);
1221 if (val == NULL)
1222 goto bail;
1223
1224 if (PyList_Append(rval, val) == -1)
1225 goto bail;
1226
1227 Py_CLEAR(val);
1228 idx = next_idx;
1229
1230 /* skip whitespace between term and , */
1231 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232
1233 /* bail if the array is closed or we didn't get the , delimiter */
1234 if (idx > end_idx) break;
1235 if (str[idx] == ']') {
1236 break;
1237 }
1238 else if (str[idx] != ',') {
1239 raise_errmsg("Expecting , delimiter", pystr, idx);
1240 goto bail;
1241 }
1242 idx++;
1243
1244 /* skip whitespace after , */
1245 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1246 }
1247 }
1248
1249 /* verify that idx < end_idx, str[idx] should be ']' */
1250 if (idx > end_idx || str[idx] != ']') {
1251 raise_errmsg("Expecting object", pystr, end_idx);
1252 goto bail;
1253 }
1254 *next_idx_ptr = idx + 1;
1255 return rval;
1256 bail:
1257 Py_XDECREF(val);
1258 Py_DECREF(rval);
1259 return NULL;
1260 }
1261
1262 static PyObject *
_parse_constant(PyScannerObject * s,char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1263 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1264 /* Read a JSON constant from PyString pystr.
1265 constant is the constant string that was found
1266 ("NaN", "Infinity", "-Infinity").
1267 idx is the index of the first character of the constant
1268 *next_idx_ptr is a return-by-reference index to the first character after
1269 the constant.
1270
1271 Returns the result of parse_constant
1272 */
1273 PyObject *cstr;
1274 PyObject *rval;
1275 /* constant is "NaN", "Infinity", or "-Infinity" */
1276 cstr = PyString_InternFromString(constant);
1277 if (cstr == NULL)
1278 return NULL;
1279
1280 /* rval = parse_constant(constant) */
1281 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1282 idx += PyString_GET_SIZE(cstr);
1283 Py_DECREF(cstr);
1284 *next_idx_ptr = idx;
1285 return rval;
1286 }
1287
1288 static PyObject *
_match_number_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1289 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1290 /* Read a JSON number from PyString pystr.
1291 idx is the index of the first character of the number
1292 *next_idx_ptr is a return-by-reference index to the first character after
1293 the number.
1294
1295 Returns a new PyObject representation of that number:
1296 PyInt, PyLong, or PyFloat.
1297 May return other types if parse_int or parse_float are set
1298 */
1299 char *str = PyString_AS_STRING(pystr);
1300 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1301 Py_ssize_t idx = start;
1302 int is_float = 0;
1303 PyObject *rval;
1304 PyObject *numstr;
1305
1306 /* read a sign if it's there, make sure it's not the end of the string */
1307 if (str[idx] == '-') {
1308 idx++;
1309 if (idx > end_idx) {
1310 PyErr_SetNone(PyExc_StopIteration);
1311 return NULL;
1312 }
1313 }
1314
1315 /* read as many integer digits as we find as long as it doesn't start with 0 */
1316 if (str[idx] >= '1' && str[idx] <= '9') {
1317 idx++;
1318 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1319 }
1320 /* if it starts with 0 we only expect one integer digit */
1321 else if (str[idx] == '0') {
1322 idx++;
1323 }
1324 /* no integer digits, error */
1325 else {
1326 PyErr_SetNone(PyExc_StopIteration);
1327 return NULL;
1328 }
1329
1330 /* if the next char is '.' followed by a digit then read all float digits */
1331 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1332 is_float = 1;
1333 idx += 2;
1334 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1335 }
1336
1337 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1338 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1339
1340 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1341 Py_ssize_t e_start = idx;
1342 idx++;
1343
1344 /* read an exponent sign if present */
1345 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1346
1347 /* read all digits */
1348 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1349
1350 /* if we got a digit, then parse as float. if not, backtrack */
1351 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1352 is_float = 1;
1353 }
1354 else {
1355 idx = e_start;
1356 }
1357 }
1358
1359 /* copy the section we determined to be a number */
1360 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1361 if (numstr == NULL)
1362 return NULL;
1363 if (is_float) {
1364 /* parse as a float using a fast path if available, otherwise call user defined method */
1365 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1366 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1367 }
1368 else {
1369 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1370 NULL, NULL);
1371 if (d == -1.0 && PyErr_Occurred())
1372 return NULL;
1373 rval = PyFloat_FromDouble(d);
1374 }
1375 }
1376 else {
1377 /* parse as an int using a fast path if available, otherwise call user defined method */
1378 if (s->parse_int != (PyObject *)&PyInt_Type) {
1379 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1380 }
1381 else {
1382 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1383 }
1384 }
1385 Py_DECREF(numstr);
1386 *next_idx_ptr = idx;
1387 return rval;
1388 }
1389
1390 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1391 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1392 /* Read a JSON number from PyUnicode pystr.
1393 idx is the index of the first character of the number
1394 *next_idx_ptr is a return-by-reference index to the first character after
1395 the number.
1396
1397 Returns a new PyObject representation of that number:
1398 PyInt, PyLong, or PyFloat.
1399 May return other types if parse_int or parse_float are set
1400 */
1401 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1402 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1403 Py_ssize_t idx = start;
1404 int is_float = 0;
1405 PyObject *rval;
1406 PyObject *numstr;
1407
1408 /* read a sign if it's there, make sure it's not the end of the string */
1409 if (str[idx] == '-') {
1410 idx++;
1411 if (idx > end_idx) {
1412 PyErr_SetNone(PyExc_StopIteration);
1413 return NULL;
1414 }
1415 }
1416
1417 /* read as many integer digits as we find as long as it doesn't start with 0 */
1418 if (str[idx] >= '1' && str[idx] <= '9') {
1419 idx++;
1420 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1421 }
1422 /* if it starts with 0 we only expect one integer digit */
1423 else if (str[idx] == '0') {
1424 idx++;
1425 }
1426 /* no integer digits, error */
1427 else {
1428 PyErr_SetNone(PyExc_StopIteration);
1429 return NULL;
1430 }
1431
1432 /* if the next char is '.' followed by a digit then read all float digits */
1433 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1434 is_float = 1;
1435 idx += 2;
1436 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1437 }
1438
1439 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1440 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1441 Py_ssize_t e_start = idx;
1442 idx++;
1443
1444 /* read an exponent sign if present */
1445 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1446
1447 /* read all digits */
1448 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1449
1450 /* if we got a digit, then parse as float. if not, backtrack */
1451 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1452 is_float = 1;
1453 }
1454 else {
1455 idx = e_start;
1456 }
1457 }
1458
1459 /* copy the section we determined to be a number */
1460 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1461 if (numstr == NULL)
1462 return NULL;
1463 if (is_float) {
1464 /* parse as a float using a fast path if available, otherwise call user defined method */
1465 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1466 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1467 }
1468 else {
1469 rval = PyFloat_FromString(numstr, NULL);
1470 }
1471 }
1472 else {
1473 /* no fast path for unicode -> int, just call */
1474 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1475 }
1476 Py_DECREF(numstr);
1477 *next_idx_ptr = idx;
1478 return rval;
1479 }
1480
1481 static PyObject *
scan_once_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1482 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1483 {
1484 /* Read one JSON term (of any kind) from PyString pystr.
1485 idx is the index of the first character of the term
1486 *next_idx_ptr is a return-by-reference index to the first character after
1487 the number.
1488
1489 Returns a new PyObject representation of the term.
1490 */
1491 PyObject *res;
1492 char *str = PyString_AS_STRING(pystr);
1493 Py_ssize_t length = PyString_GET_SIZE(pystr);
1494 if (idx >= length) {
1495 PyErr_SetNone(PyExc_StopIteration);
1496 return NULL;
1497 }
1498 switch (str[idx]) {
1499 case '"':
1500 /* string */
1501 return scanstring_str(pystr, idx + 1,
1502 PyString_AS_STRING(s->encoding),
1503 PyObject_IsTrue(s->strict),
1504 next_idx_ptr);
1505 case '{':
1506 /* object */
1507 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1508 "from a byte string"))
1509 return NULL;
1510 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1511 Py_LeaveRecursiveCall();
1512 return res;
1513 case '[':
1514 /* array */
1515 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1516 "from a byte string"))
1517 return NULL;
1518 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1519 Py_LeaveRecursiveCall();
1520 return res;
1521 case 'n':
1522 /* null */
1523 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1524 Py_INCREF(Py_None);
1525 *next_idx_ptr = idx + 4;
1526 return Py_None;
1527 }
1528 break;
1529 case 't':
1530 /* true */
1531 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1532 Py_INCREF(Py_True);
1533 *next_idx_ptr = idx + 4;
1534 return Py_True;
1535 }
1536 break;
1537 case 'f':
1538 /* false */
1539 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1540 Py_INCREF(Py_False);
1541 *next_idx_ptr = idx + 5;
1542 return Py_False;
1543 }
1544 break;
1545 case 'N':
1546 /* NaN */
1547 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1548 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1549 }
1550 break;
1551 case 'I':
1552 /* Infinity */
1553 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1554 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1555 }
1556 break;
1557 case '-':
1558 /* -Infinity */
1559 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1560 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1561 }
1562 break;
1563 }
1564 /* Didn't find a string, object, array, or named constant. Look for a number. */
1565 return _match_number_str(s, pystr, idx, next_idx_ptr);
1566 }
1567
1568 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1569 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1570 {
1571 /* Read one JSON term (of any kind) from PyUnicode pystr.
1572 idx is the index of the first character of the term
1573 *next_idx_ptr is a return-by-reference index to the first character after
1574 the number.
1575
1576 Returns a new PyObject representation of the term.
1577 */
1578 PyObject *res;
1579 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1580 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1581 if (idx >= length) {
1582 PyErr_SetNone(PyExc_StopIteration);
1583 return NULL;
1584 }
1585 switch (str[idx]) {
1586 case '"':
1587 /* string */
1588 return scanstring_unicode(pystr, idx + 1,
1589 PyObject_IsTrue(s->strict),
1590 next_idx_ptr);
1591 case '{':
1592 /* object */
1593 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1594 "from a unicode string"))
1595 return NULL;
1596 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1597 Py_LeaveRecursiveCall();
1598 return res;
1599 case '[':
1600 /* array */
1601 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1602 "from a unicode string"))
1603 return NULL;
1604 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1605 Py_LeaveRecursiveCall();
1606 return res;
1607 case 'n':
1608 /* null */
1609 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1610 Py_INCREF(Py_None);
1611 *next_idx_ptr = idx + 4;
1612 return Py_None;
1613 }
1614 break;
1615 case 't':
1616 /* true */
1617 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1618 Py_INCREF(Py_True);
1619 *next_idx_ptr = idx + 4;
1620 return Py_True;
1621 }
1622 break;
1623 case 'f':
1624 /* false */
1625 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1626 Py_INCREF(Py_False);
1627 *next_idx_ptr = idx + 5;
1628 return Py_False;
1629 }
1630 break;
1631 case 'N':
1632 /* NaN */
1633 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1634 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1635 }
1636 break;
1637 case 'I':
1638 /* Infinity */
1639 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1640 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1641 }
1642 break;
1643 case '-':
1644 /* -Infinity */
1645 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1646 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1647 }
1648 break;
1649 }
1650 /* Didn't find a string, object, array, or named constant. Look for a number. */
1651 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1652 }
1653
1654 static PyObject *
scanner_call(PyObject * self,PyObject * args,PyObject * kwds)1655 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1656 {
1657 /* Python callable interface to scan_once_{str,unicode} */
1658 PyObject *pystr;
1659 PyObject *rval;
1660 Py_ssize_t idx;
1661 Py_ssize_t next_idx = -1;
1662 static char *kwlist[] = {"string", "idx", NULL};
1663 PyScannerObject *s;
1664 assert(PyScanner_Check(self));
1665 s = (PyScannerObject *)self;
1666 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1667 return NULL;
1668
1669 if (PyString_Check(pystr)) {
1670 rval = scan_once_str(s, pystr, idx, &next_idx);
1671 }
1672 else if (PyUnicode_Check(pystr)) {
1673 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1674 }
1675 else {
1676 PyErr_Format(PyExc_TypeError,
1677 "first argument must be a string, not %.80s",
1678 Py_TYPE(pystr)->tp_name);
1679 return NULL;
1680 }
1681 return _build_rval_index_tuple(rval, next_idx);
1682 }
1683
1684 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1685 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1686 {
1687 PyScannerObject *s;
1688 s = (PyScannerObject *)type->tp_alloc(type, 0);
1689 if (s != NULL) {
1690 s->encoding = NULL;
1691 s->strict = NULL;
1692 s->object_hook = NULL;
1693 s->pairs_hook = NULL;
1694 s->parse_float = NULL;
1695 s->parse_int = NULL;
1696 s->parse_constant = NULL;
1697 }
1698 return (PyObject *)s;
1699 }
1700
1701 static int
scanner_init(PyObject * self,PyObject * args,PyObject * kwds)1702 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1703 {
1704 /* Initialize Scanner object */
1705 PyObject *ctx;
1706 static char *kwlist[] = {"context", NULL};
1707 PyScannerObject *s;
1708
1709 assert(PyScanner_Check(self));
1710 s = (PyScannerObject *)self;
1711
1712 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1713 return -1;
1714
1715 /* PyString_AS_STRING is used on encoding */
1716 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1717 if (s->encoding == NULL)
1718 goto bail;
1719 if (s->encoding == Py_None) {
1720 Py_DECREF(Py_None);
1721 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1722 }
1723 else if (PyUnicode_Check(s->encoding)) {
1724 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1725 Py_DECREF(s->encoding);
1726 s->encoding = tmp;
1727 }
1728 if (s->encoding == NULL || !PyString_Check(s->encoding))
1729 goto bail;
1730
1731 /* All of these will fail "gracefully" so we don't need to verify them */
1732 s->strict = PyObject_GetAttrString(ctx, "strict");
1733 if (s->strict == NULL)
1734 goto bail;
1735 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1736 if (s->object_hook == NULL)
1737 goto bail;
1738 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1739 if (s->pairs_hook == NULL)
1740 goto bail;
1741 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1742 if (s->parse_float == NULL)
1743 goto bail;
1744 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1745 if (s->parse_int == NULL)
1746 goto bail;
1747 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1748 if (s->parse_constant == NULL)
1749 goto bail;
1750
1751 return 0;
1752
1753 bail:
1754 Py_CLEAR(s->encoding);
1755 Py_CLEAR(s->strict);
1756 Py_CLEAR(s->object_hook);
1757 Py_CLEAR(s->pairs_hook);
1758 Py_CLEAR(s->parse_float);
1759 Py_CLEAR(s->parse_int);
1760 Py_CLEAR(s->parse_constant);
1761 return -1;
1762 }
1763
1764 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1765
1766 static
1767 PyTypeObject PyScannerType = {
1768 PyObject_HEAD_INIT(NULL)
1769 0, /* tp_internal */
1770 "_json.Scanner", /* tp_name */
1771 sizeof(PyScannerObject), /* tp_basicsize */
1772 0, /* tp_itemsize */
1773 scanner_dealloc, /* tp_dealloc */
1774 0, /* tp_print */
1775 0, /* tp_getattr */
1776 0, /* tp_setattr */
1777 0, /* tp_compare */
1778 0, /* tp_repr */
1779 0, /* tp_as_number */
1780 0, /* tp_as_sequence */
1781 0, /* tp_as_mapping */
1782 0, /* tp_hash */
1783 scanner_call, /* tp_call */
1784 0, /* tp_str */
1785 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1786 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1787 0, /* tp_as_buffer */
1788 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1789 scanner_doc, /* tp_doc */
1790 scanner_traverse, /* tp_traverse */
1791 scanner_clear, /* tp_clear */
1792 0, /* tp_richcompare */
1793 0, /* tp_weaklistoffset */
1794 0, /* tp_iter */
1795 0, /* tp_iternext */
1796 0, /* tp_methods */
1797 scanner_members, /* tp_members */
1798 0, /* tp_getset */
1799 0, /* tp_base */
1800 0, /* tp_dict */
1801 0, /* tp_descr_get */
1802 0, /* tp_descr_set */
1803 0, /* tp_dictoffset */
1804 scanner_init, /* tp_init */
1805 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1806 scanner_new, /* tp_new */
1807 0,/* PyObject_GC_Del, */ /* tp_free */
1808 };
1809
1810 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1811 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1812 {
1813 PyEncoderObject *s;
1814 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1815 if (s != NULL) {
1816 s->markers = NULL;
1817 s->defaultfn = NULL;
1818 s->encoder = NULL;
1819 s->indent = NULL;
1820 s->key_separator = NULL;
1821 s->item_separator = NULL;
1822 s->sort_keys = NULL;
1823 s->skipkeys = NULL;
1824 }
1825 return (PyObject *)s;
1826 }
1827
1828 static int
encoder_init(PyObject * self,PyObject * args,PyObject * kwds)1829 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1830 {
1831 /* initialize Encoder object */
1832 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1833
1834 PyEncoderObject *s;
1835 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1836 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
1837
1838 assert(PyEncoder_Check(self));
1839 s = (PyEncoderObject *)self;
1840
1841 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1842 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1843 &sort_keys, &skipkeys, &allow_nan))
1844 return -1;
1845
1846 s->markers = markers;
1847 s->defaultfn = defaultfn;
1848 s->encoder = encoder;
1849 s->indent = indent;
1850 s->key_separator = key_separator;
1851 s->item_separator = item_separator;
1852 s->sort_keys = sort_keys;
1853 s->skipkeys = skipkeys;
1854 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1855 s->allow_nan = PyObject_IsTrue(allow_nan);
1856
1857 Py_INCREF(s->markers);
1858 Py_INCREF(s->defaultfn);
1859 Py_INCREF(s->encoder);
1860 Py_INCREF(s->indent);
1861 Py_INCREF(s->key_separator);
1862 Py_INCREF(s->item_separator);
1863 Py_INCREF(s->sort_keys);
1864 Py_INCREF(s->skipkeys);
1865 return 0;
1866 }
1867
1868 static PyObject *
encoder_call(PyObject * self,PyObject * args,PyObject * kwds)1869 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1870 {
1871 /* Python callable interface to encode_listencode_obj */
1872 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1873 PyObject *obj;
1874 PyObject *rval;
1875 Py_ssize_t indent_level;
1876 PyEncoderObject *s;
1877 assert(PyEncoder_Check(self));
1878 s = (PyEncoderObject *)self;
1879 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1880 &obj, _convertPyInt_AsSsize_t, &indent_level))
1881 return NULL;
1882 rval = PyList_New(0);
1883 if (rval == NULL)
1884 return NULL;
1885 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1886 Py_DECREF(rval);
1887 return NULL;
1888 }
1889 return rval;
1890 }
1891
1892 static PyObject *
_encoded_const(PyObject * obj)1893 _encoded_const(PyObject *obj)
1894 {
1895 /* Return the JSON string representation of None, True, False */
1896 if (obj == Py_None) {
1897 static PyObject *s_null = NULL;
1898 if (s_null == NULL) {
1899 s_null = PyString_InternFromString("null");
1900 }
1901 Py_INCREF(s_null);
1902 return s_null;
1903 }
1904 else if (obj == Py_True) {
1905 static PyObject *s_true = NULL;
1906 if (s_true == NULL) {
1907 s_true = PyString_InternFromString("true");
1908 }
1909 Py_INCREF(s_true);
1910 return s_true;
1911 }
1912 else if (obj == Py_False) {
1913 static PyObject *s_false = NULL;
1914 if (s_false == NULL) {
1915 s_false = PyString_InternFromString("false");
1916 }
1917 Py_INCREF(s_false);
1918 return s_false;
1919 }
1920 else {
1921 PyErr_SetString(PyExc_ValueError, "not a const");
1922 return NULL;
1923 }
1924 }
1925
1926 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1927 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1928 {
1929 /* Return the JSON representation of a PyFloat */
1930 double i = PyFloat_AS_DOUBLE(obj);
1931 if (!Py_IS_FINITE(i)) {
1932 if (!s->allow_nan) {
1933 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1934 return NULL;
1935 }
1936 if (i > 0) {
1937 return PyString_FromString("Infinity");
1938 }
1939 else if (i < 0) {
1940 return PyString_FromString("-Infinity");
1941 }
1942 else {
1943 return PyString_FromString("NaN");
1944 }
1945 }
1946 /* Use a better float format here? */
1947 return PyObject_Repr(obj);
1948 }
1949
1950 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1951 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1952 {
1953 /* Return the JSON representation of a string */
1954 if (s->fast_encode)
1955 return py_encode_basestring_ascii(NULL, obj);
1956 else
1957 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1958 }
1959
1960 static int
_steal_list_append(PyObject * lst,PyObject * stolen)1961 _steal_list_append(PyObject *lst, PyObject *stolen)
1962 {
1963 /* Append stolen and then decrement its reference count */
1964 int rval = PyList_Append(lst, stolen);
1965 Py_DECREF(stolen);
1966 return rval;
1967 }
1968
1969 static int
encoder_listencode_obj(PyEncoderObject * s,PyObject * rval,PyObject * obj,Py_ssize_t indent_level)1970 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1971 {
1972 /* Encode Python object obj to a JSON term, rval is a PyList */
1973 PyObject *newobj;
1974 int rv;
1975
1976 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1977 PyObject *cstr = _encoded_const(obj);
1978 if (cstr == NULL)
1979 return -1;
1980 return _steal_list_append(rval, cstr);
1981 }
1982 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1983 {
1984 PyObject *encoded = encoder_encode_string(s, obj);
1985 if (encoded == NULL)
1986 return -1;
1987 return _steal_list_append(rval, encoded);
1988 }
1989 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1990 PyObject *encoded = PyObject_Str(obj);
1991 if (encoded == NULL)
1992 return -1;
1993 return _steal_list_append(rval, encoded);
1994 }
1995 else if (PyFloat_Check(obj)) {
1996 PyObject *encoded = encoder_encode_float(s, obj);
1997 if (encoded == NULL)
1998 return -1;
1999 return _steal_list_append(rval, encoded);
2000 }
2001 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2002 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2003 return -1;
2004 rv = encoder_listencode_list(s, rval, obj, indent_level);
2005 Py_LeaveRecursiveCall();
2006 return rv;
2007 }
2008 else if (PyDict_Check(obj)) {
2009 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2010 return -1;
2011 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2012 Py_LeaveRecursiveCall();
2013 return rv;
2014 }
2015 else {
2016 PyObject *ident = NULL;
2017 if (s->markers != Py_None) {
2018 int has_key;
2019 ident = PyLong_FromVoidPtr(obj);
2020 if (ident == NULL)
2021 return -1;
2022 has_key = PyDict_Contains(s->markers, ident);
2023 if (has_key) {
2024 if (has_key != -1)
2025 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2026 Py_DECREF(ident);
2027 return -1;
2028 }
2029 if (PyDict_SetItem(s->markers, ident, obj)) {
2030 Py_DECREF(ident);
2031 return -1;
2032 }
2033 }
2034 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2035 if (newobj == NULL) {
2036 Py_XDECREF(ident);
2037 return -1;
2038 }
2039
2040 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2041 return -1;
2042 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2043 Py_LeaveRecursiveCall();
2044
2045 Py_DECREF(newobj);
2046 if (rv) {
2047 Py_XDECREF(ident);
2048 return -1;
2049 }
2050 if (ident != NULL) {
2051 if (PyDict_DelItem(s->markers, ident)) {
2052 Py_XDECREF(ident);
2053 return -1;
2054 }
2055 Py_XDECREF(ident);
2056 }
2057 return rv;
2058 }
2059 }
2060
2061 static int
encoder_listencode_dict(PyEncoderObject * s,PyObject * rval,PyObject * dct,Py_ssize_t indent_level)2062 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2063 {
2064 /* Encode Python dict dct a JSON term, rval is a PyList */
2065 static PyObject *open_dict = NULL;
2066 static PyObject *close_dict = NULL;
2067 static PyObject *empty_dict = NULL;
2068 PyObject *kstr = NULL;
2069 PyObject *ident = NULL;
2070 PyObject *key = NULL;
2071 PyObject *value = NULL;
2072 PyObject *it = NULL;
2073 int skipkeys;
2074 Py_ssize_t idx;
2075
2076 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2077 open_dict = PyString_InternFromString("{");
2078 close_dict = PyString_InternFromString("}");
2079 empty_dict = PyString_InternFromString("{}");
2080 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2081 return -1;
2082 }
2083 if (Py_SIZE(dct) == 0)
2084 return PyList_Append(rval, empty_dict);
2085
2086 if (s->markers != Py_None) {
2087 int has_key;
2088 ident = PyLong_FromVoidPtr(dct);
2089 if (ident == NULL)
2090 goto bail;
2091 has_key = PyDict_Contains(s->markers, ident);
2092 if (has_key) {
2093 if (has_key != -1)
2094 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2095 goto bail;
2096 }
2097 if (PyDict_SetItem(s->markers, ident, dct)) {
2098 goto bail;
2099 }
2100 }
2101
2102 if (PyList_Append(rval, open_dict))
2103 goto bail;
2104
2105 if (s->indent != Py_None) {
2106 /* TODO: DOES NOT RUN */
2107 indent_level += 1;
2108 /*
2109 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2110 separator = _item_separator + newline_indent
2111 buf += newline_indent
2112 */
2113 }
2114
2115 /* TODO: C speedup not implemented for sort_keys */
2116
2117 it = PyObject_GetIter(dct);
2118 if (it == NULL)
2119 goto bail;
2120 skipkeys = PyObject_IsTrue(s->skipkeys);
2121 idx = 0;
2122 while ((key = PyIter_Next(it)) != NULL) {
2123 PyObject *encoded;
2124
2125 if (PyString_Check(key) || PyUnicode_Check(key)) {
2126 Py_INCREF(key);
2127 kstr = key;
2128 }
2129 else if (PyFloat_Check(key)) {
2130 kstr = encoder_encode_float(s, key);
2131 if (kstr == NULL)
2132 goto bail;
2133 }
2134 else if (PyInt_Check(key) || PyLong_Check(key)) {
2135 kstr = PyObject_Str(key);
2136 if (kstr == NULL)
2137 goto bail;
2138 }
2139 else if (key == Py_True || key == Py_False || key == Py_None) {
2140 kstr = _encoded_const(key);
2141 if (kstr == NULL)
2142 goto bail;
2143 }
2144 else if (skipkeys) {
2145 Py_DECREF(key);
2146 continue;
2147 }
2148 else {
2149 /* TODO: include repr of key */
2150 PyErr_SetString(PyExc_TypeError, "keys must be a string");
2151 goto bail;
2152 }
2153
2154 if (idx) {
2155 if (PyList_Append(rval, s->item_separator))
2156 goto bail;
2157 }
2158
2159 value = PyObject_GetItem(dct, key);
2160 if (value == NULL)
2161 goto bail;
2162
2163 encoded = encoder_encode_string(s, kstr);
2164 Py_CLEAR(kstr);
2165 if (encoded == NULL)
2166 goto bail;
2167 if (PyList_Append(rval, encoded)) {
2168 Py_DECREF(encoded);
2169 goto bail;
2170 }
2171 Py_DECREF(encoded);
2172 if (PyList_Append(rval, s->key_separator))
2173 goto bail;
2174 if (encoder_listencode_obj(s, rval, value, indent_level))
2175 goto bail;
2176 idx += 1;
2177 Py_CLEAR(value);
2178 Py_DECREF(key);
2179 }
2180 if (PyErr_Occurred())
2181 goto bail;
2182 Py_CLEAR(it);
2183
2184 if (ident != NULL) {
2185 if (PyDict_DelItem(s->markers, ident))
2186 goto bail;
2187 Py_CLEAR(ident);
2188 }
2189 if (s->indent != Py_None) {
2190 /* TODO: DOES NOT RUN */
2191 /*
2192 indent_level -= 1;
2193
2194 yield '\n' + (' ' * (_indent * _current_indent_level))
2195 */
2196 }
2197 if (PyList_Append(rval, close_dict))
2198 goto bail;
2199 return 0;
2200
2201 bail:
2202 Py_XDECREF(it);
2203 Py_XDECREF(key);
2204 Py_XDECREF(value);
2205 Py_XDECREF(kstr);
2206 Py_XDECREF(ident);
2207 return -1;
2208 }
2209
2210
2211 static int
encoder_listencode_list(PyEncoderObject * s,PyObject * rval,PyObject * seq,Py_ssize_t indent_level)2212 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2213 {
2214 /* Encode Python list seq to a JSON term, rval is a PyList */
2215 static PyObject *open_array = NULL;
2216 static PyObject *close_array = NULL;
2217 static PyObject *empty_array = NULL;
2218 PyObject *ident = NULL;
2219 PyObject *s_fast = NULL;
2220 Py_ssize_t num_items;
2221 PyObject **seq_items;
2222 Py_ssize_t i;
2223
2224 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2225 open_array = PyString_InternFromString("[");
2226 close_array = PyString_InternFromString("]");
2227 empty_array = PyString_InternFromString("[]");
2228 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2229 return -1;
2230 }
2231 ident = NULL;
2232 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2233 if (s_fast == NULL)
2234 return -1;
2235 num_items = PySequence_Fast_GET_SIZE(s_fast);
2236 if (num_items == 0) {
2237 Py_DECREF(s_fast);
2238 return PyList_Append(rval, empty_array);
2239 }
2240
2241 if (s->markers != Py_None) {
2242 int has_key;
2243 ident = PyLong_FromVoidPtr(seq);
2244 if (ident == NULL)
2245 goto bail;
2246 has_key = PyDict_Contains(s->markers, ident);
2247 if (has_key) {
2248 if (has_key != -1)
2249 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2250 goto bail;
2251 }
2252 if (PyDict_SetItem(s->markers, ident, seq)) {
2253 goto bail;
2254 }
2255 }
2256
2257 seq_items = PySequence_Fast_ITEMS(s_fast);
2258 if (PyList_Append(rval, open_array))
2259 goto bail;
2260 if (s->indent != Py_None) {
2261 /* TODO: DOES NOT RUN */
2262 indent_level += 1;
2263 /*
2264 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2265 separator = _item_separator + newline_indent
2266 buf += newline_indent
2267 */
2268 }
2269 for (i = 0; i < num_items; i++) {
2270 PyObject *obj = seq_items[i];
2271 if (i) {
2272 if (PyList_Append(rval, s->item_separator))
2273 goto bail;
2274 }
2275 if (encoder_listencode_obj(s, rval, obj, indent_level))
2276 goto bail;
2277 }
2278 if (ident != NULL) {
2279 if (PyDict_DelItem(s->markers, ident))
2280 goto bail;
2281 Py_CLEAR(ident);
2282 }
2283 if (s->indent != Py_None) {
2284 /* TODO: DOES NOT RUN */
2285 /*
2286 indent_level -= 1;
2287
2288 yield '\n' + (' ' * (_indent * _current_indent_level))
2289 */
2290 }
2291 if (PyList_Append(rval, close_array))
2292 goto bail;
2293 Py_DECREF(s_fast);
2294 return 0;
2295
2296 bail:
2297 Py_XDECREF(ident);
2298 Py_DECREF(s_fast);
2299 return -1;
2300 }
2301
2302 static void
encoder_dealloc(PyObject * self)2303 encoder_dealloc(PyObject *self)
2304 {
2305 /* Deallocate Encoder */
2306 encoder_clear(self);
2307 Py_TYPE(self)->tp_free(self);
2308 }
2309
2310 static int
encoder_traverse(PyObject * self,visitproc visit,void * arg)2311 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2312 {
2313 PyEncoderObject *s;
2314 assert(PyEncoder_Check(self));
2315 s = (PyEncoderObject *)self;
2316 Py_VISIT(s->markers);
2317 Py_VISIT(s->defaultfn);
2318 Py_VISIT(s->encoder);
2319 Py_VISIT(s->indent);
2320 Py_VISIT(s->key_separator);
2321 Py_VISIT(s->item_separator);
2322 Py_VISIT(s->sort_keys);
2323 Py_VISIT(s->skipkeys);
2324 return 0;
2325 }
2326
2327 static int
encoder_clear(PyObject * self)2328 encoder_clear(PyObject *self)
2329 {
2330 /* Deallocate Encoder */
2331 PyEncoderObject *s;
2332 assert(PyEncoder_Check(self));
2333 s = (PyEncoderObject *)self;
2334 Py_CLEAR(s->markers);
2335 Py_CLEAR(s->defaultfn);
2336 Py_CLEAR(s->encoder);
2337 Py_CLEAR(s->indent);
2338 Py_CLEAR(s->key_separator);
2339 Py_CLEAR(s->item_separator);
2340 Py_CLEAR(s->sort_keys);
2341 Py_CLEAR(s->skipkeys);
2342 return 0;
2343 }
2344
2345 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2346
2347 static
2348 PyTypeObject PyEncoderType = {
2349 PyObject_HEAD_INIT(NULL)
2350 0, /* tp_internal */
2351 "_json.Encoder", /* tp_name */
2352 sizeof(PyEncoderObject), /* tp_basicsize */
2353 0, /* tp_itemsize */
2354 encoder_dealloc, /* tp_dealloc */
2355 0, /* tp_print */
2356 0, /* tp_getattr */
2357 0, /* tp_setattr */
2358 0, /* tp_compare */
2359 0, /* tp_repr */
2360 0, /* tp_as_number */
2361 0, /* tp_as_sequence */
2362 0, /* tp_as_mapping */
2363 0, /* tp_hash */
2364 encoder_call, /* tp_call */
2365 0, /* tp_str */
2366 0, /* tp_getattro */
2367 0, /* tp_setattro */
2368 0, /* tp_as_buffer */
2369 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2370 encoder_doc, /* tp_doc */
2371 encoder_traverse, /* tp_traverse */
2372 encoder_clear, /* tp_clear */
2373 0, /* tp_richcompare */
2374 0, /* tp_weaklistoffset */
2375 0, /* tp_iter */
2376 0, /* tp_iternext */
2377 0, /* tp_methods */
2378 encoder_members, /* tp_members */
2379 0, /* tp_getset */
2380 0, /* tp_base */
2381 0, /* tp_dict */
2382 0, /* tp_descr_get */
2383 0, /* tp_descr_set */
2384 0, /* tp_dictoffset */
2385 encoder_init, /* tp_init */
2386 0, /* tp_alloc */
2387 encoder_new, /* tp_new */
2388 0, /* tp_free */
2389 };
2390
2391 static PyMethodDef speedups_methods[] = {
2392 {"encode_basestring_ascii",
2393 (PyCFunction)py_encode_basestring_ascii,
2394 METH_O,
2395 pydoc_encode_basestring_ascii},
2396 {"scanstring",
2397 (PyCFunction)py_scanstring,
2398 METH_VARARGS,
2399 pydoc_scanstring},
2400 {NULL, NULL, 0, NULL}
2401 };
2402
2403 PyDoc_STRVAR(module_doc,
2404 "json speedups\n");
2405
2406 void
init_json(void)2407 init_json(void)
2408 {
2409 PyObject *m;
2410 PyScannerType.tp_new = PyType_GenericNew;
2411 if (PyType_Ready(&PyScannerType) < 0)
2412 return;
2413 PyEncoderType.tp_new = PyType_GenericNew;
2414 if (PyType_Ready(&PyEncoderType) < 0)
2415 return;
2416 m = Py_InitModule3("_json", speedups_methods, module_doc);
2417 Py_INCREF((PyObject*)&PyScannerType);
2418 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2419 Py_INCREF((PyObject*)&PyEncoderType);
2420 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2421 }
2422