1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
16
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
22
23 #define DEFAULT_ENCODING "utf-8"
24
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
32
33 typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
38 PyObject *pairs_hook;
39 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42 } PyScannerObject;
43
44 static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53 };
54
55 typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67 } PyEncoderObject;
68
69 static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79 };
80
81 static Py_ssize_t
82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83 static PyObject *
84 ascii_escape_unicode(PyObject *pystr);
85 static PyObject *
86 ascii_escape_str(PyObject *pystr);
87 static PyObject *
88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89 void init_json(void);
90 static PyObject *
91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92 static PyObject *
93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94 static PyObject *
95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96 static PyObject *
97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98 static int
99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100 static void
101 scanner_dealloc(PyObject *self);
102 static int
103 scanner_clear(PyObject *self);
104 static PyObject *
105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106 static int
107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108 static void
109 encoder_dealloc(PyObject *self);
110 static int
111 encoder_clear(PyObject *self);
112 static int
113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114 static int
115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116 static int
117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118 static PyObject *
119 _encoded_const(PyObject *obj);
120 static void
121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122 static PyObject *
123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124 static int
125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126 static PyObject *
127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128 static PyObject *
129 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134 #define MIN_EXPANSION 6
135 #ifdef Py_UNICODE_WIDE
136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
137 #else
138 #define MAX_EXPANSION MIN_EXPANSION
139 #endif
140
141 static int
_convertPyInt_AsSsize_t(PyObject * o,Py_ssize_t * size_ptr)142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143 {
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
149 }
150
151 static PyObject *
_convertPyInt_FromSsize_t(Py_ssize_t * size_ptr)152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153 {
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156 }
157
158 static Py_ssize_t
ascii_escape_char(Py_UNICODE c,char * output,Py_ssize_t chars)159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160 {
161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174 #ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187 #endif
188 output[chars++] = 'u';
189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
193 }
194 return chars;
195 }
196
197 static PyObject *
ascii_escape_unicode(PyObject * pystr)198 ascii_escape_unicode(PyObject *pystr)
199 {
200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
204 Py_ssize_t max_output_size;
205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
212
213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
216 rval = PyString_FromStringAndSize(NULL, output_size);
217 if (rval == NULL) {
218 return NULL;
219 }
220 output = PyString_AS_STRING(rval);
221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
227 }
228 else {
229 chars = ascii_escape_char(c, output, chars);
230 }
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
233 Py_ssize_t new_output_size = output_size * 2;
234 /* This is an upper bound */
235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
237 }
238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
243 }
244 output = PyString_AS_STRING(rval);
245 }
246 }
247 }
248 output[chars++] = '"';
249 if (_PyString_Resize(&rval, chars) == -1) {
250 return NULL;
251 }
252 return rval;
253 }
254
255 static PyObject *
ascii_escape_str(PyObject * pystr)256 ascii_escape_str(PyObject *pystr)
257 {
258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
266
267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
269
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
284 }
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
288 }
289 }
290 break;
291 }
292 }
293
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
297 }
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301 }
302 rval = PyString_FromStringAndSize(NULL, output_size);
303 if (rval == NULL) {
304 return NULL;
305 }
306 output = PyString_AS_STRING(rval);
307 output[0] = '"';
308
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
312
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
317 }
318 else {
319 chars = ascii_escape_char(c, output, chars);
320 }
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
327 }
328 if (_PyString_Resize(&rval, output_size) == -1) {
329 return NULL;
330 }
331 output = PyString_AS_STRING(rval);
332 }
333 }
334 output[chars++] = '"';
335 if (_PyString_Resize(&rval, chars) == -1) {
336 return NULL;
337 }
338 return rval;
339 }
340
341 static void
raise_errmsg(char * msg,PyObject * s,Py_ssize_t end)342 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343 {
344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
353 Py_DECREF(decoder);
354 if (errmsg_fn == NULL)
355 return;
356 }
357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
361 }
362 }
363
364 static PyObject *
join_list_unicode(PyObject * lst)365 join_list_unicode(PyObject *lst)
366 {
367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
373
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
378 }
379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
380 }
381
382 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)383 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384 /* return (rval, idx) tuple, stealing reference to rval */
385 PyObject *tpl;
386 PyObject *pyidx;
387 /*
388 steal a reference to rval, returns (rval, idx)
389 */
390 if (rval == NULL) {
391 return NULL;
392 }
393 pyidx = PyInt_FromSsize_t(idx);
394 if (pyidx == NULL) {
395 Py_DECREF(rval);
396 return NULL;
397 }
398 tpl = PyTuple_New(2);
399 if (tpl == NULL) {
400 Py_DECREF(pyidx);
401 Py_DECREF(rval);
402 return NULL;
403 }
404 PyTuple_SET_ITEM(tpl, 0, rval);
405 PyTuple_SET_ITEM(tpl, 1, pyidx);
406 return tpl;
407 }
408
409 static PyObject *
scanstring_str(PyObject * pystr,Py_ssize_t end,char * encoding,int strict,Py_ssize_t * next_end_ptr)410 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411 {
412 /* Read the JSON string from PyString pystr.
413 end is the index of the first character after the quote.
414 encoding is the encoding of pystr (must be an ASCII superset)
415 if strict is zero then literal control characters are allowed
416 *next_end_ptr is a return-by-reference index of the character
417 after the end quote
418
419 Return value is a new PyString (if ASCII-only) or PyUnicode
420 */
421 PyObject *rval;
422 Py_ssize_t len = PyString_GET_SIZE(pystr);
423 Py_ssize_t begin = end - 1;
424 Py_ssize_t next;
425 char *buf = PyString_AS_STRING(pystr);
426 PyObject *chunks = PyList_New(0);
427 if (chunks == NULL) {
428 goto bail;
429 }
430 if (end < 0 || len <= end) {
431 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432 goto bail;
433 }
434 while (1) {
435 /* Find the end of the string or the next escape */
436 Py_UNICODE c = 0;
437 PyObject *chunk = NULL;
438 for (next = end; next < len; next++) {
439 c = (unsigned char)buf[next];
440 if (c == '"' || c == '\\') {
441 break;
442 }
443 else if (strict && c <= 0x1f) {
444 raise_errmsg("Invalid control character at", pystr, next);
445 goto bail;
446 }
447 }
448 if (!(c == '"' || c == '\\')) {
449 raise_errmsg("Unterminated string starting at", pystr, begin);
450 goto bail;
451 }
452 /* Pick up this chunk if it's not zero length */
453 if (next != end) {
454 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
455 if (strchunk == NULL) {
456 goto bail;
457 }
458 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459 Py_DECREF(strchunk);
460 if (chunk == NULL) {
461 goto bail;
462 }
463 if (PyList_Append(chunks, chunk)) {
464 Py_DECREF(chunk);
465 goto bail;
466 }
467 Py_DECREF(chunk);
468 }
469 next++;
470 if (c == '"') {
471 end = next;
472 break;
473 }
474 if (next == len) {
475 raise_errmsg("Unterminated string starting at", pystr, begin);
476 goto bail;
477 }
478 c = buf[next];
479 if (c != 'u') {
480 /* Non-unicode backslash escapes */
481 end = next + 1;
482 switch (c) {
483 case '"': break;
484 case '\\': break;
485 case '/': break;
486 case 'b': c = '\b'; break;
487 case 'f': c = '\f'; break;
488 case 'n': c = '\n'; break;
489 case 'r': c = '\r'; break;
490 case 't': c = '\t'; break;
491 default: c = 0;
492 }
493 if (c == 0) {
494 raise_errmsg("Invalid \\escape", pystr, end - 2);
495 goto bail;
496 }
497 }
498 else {
499 c = 0;
500 next++;
501 end = next + 4;
502 if (end >= len) {
503 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504 goto bail;
505 }
506 /* Decode 4 hex digits */
507 for (; next < end; next++) {
508 Py_UNICODE digit = buf[next];
509 c <<= 4;
510 switch (digit) {
511 case '0': case '1': case '2': case '3': case '4':
512 case '5': case '6': case '7': case '8': case '9':
513 c |= (digit - '0'); break;
514 case 'a': case 'b': case 'c': case 'd': case 'e':
515 case 'f':
516 c |= (digit - 'a' + 10); break;
517 case 'A': case 'B': case 'C': case 'D': case 'E':
518 case 'F':
519 c |= (digit - 'A' + 10); break;
520 default:
521 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522 goto bail;
523 }
524 }
525 #ifdef Py_UNICODE_WIDE
526 /* Surrogate pair */
527 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
528 buf[next++] == '\\' &&
529 buf[next++] == 'u') {
530 Py_UNICODE c2 = 0;
531 end += 6;
532 /* Decode 4 hex digits */
533 for (; next < end; next++) {
534 Py_UNICODE digit = buf[next];
535 c2 <<= 4;
536 switch (digit) {
537 case '0': case '1': case '2': case '3': case '4':
538 case '5': case '6': case '7': case '8': case '9':
539 c2 |= (digit - '0'); break;
540 case 'a': case 'b': case 'c': case 'd': case 'e':
541 case 'f':
542 c2 |= (digit - 'a' + 10); break;
543 case 'A': case 'B': case 'C': case 'D': case 'E':
544 case 'F':
545 c2 |= (digit - 'A' + 10); break;
546 default:
547 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
548 goto bail;
549 }
550 }
551 if ((c2 & 0xfc00) == 0xdc00)
552 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
553 else
554 end -= 6;
555 }
556 #endif
557 }
558 chunk = PyUnicode_FromUnicode(&c, 1);
559 if (chunk == NULL) {
560 goto bail;
561 }
562 if (PyList_Append(chunks, chunk)) {
563 Py_DECREF(chunk);
564 goto bail;
565 }
566 Py_DECREF(chunk);
567 }
568
569 rval = join_list_unicode(chunks);
570 if (rval == NULL) {
571 goto bail;
572 }
573 Py_CLEAR(chunks);
574 *next_end_ptr = end;
575 return rval;
576 bail:
577 *next_end_ptr = -1;
578 Py_XDECREF(chunks);
579 return NULL;
580 }
581
582
583 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)584 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
585 {
586 /* Read the JSON string from PyUnicode pystr.
587 end is the index of the first character after the quote.
588 if strict is zero then literal control characters are allowed
589 *next_end_ptr is a return-by-reference index of the character
590 after the end quote
591
592 Return value is a new PyUnicode
593 */
594 PyObject *rval;
595 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
596 Py_ssize_t begin = end - 1;
597 Py_ssize_t next;
598 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
599 PyObject *chunks = PyList_New(0);
600 if (chunks == NULL) {
601 goto bail;
602 }
603 if (end < 0 || len <= end) {
604 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
605 goto bail;
606 }
607 while (1) {
608 /* Find the end of the string or the next escape */
609 Py_UNICODE c = 0;
610 PyObject *chunk = NULL;
611 for (next = end; next < len; next++) {
612 c = buf[next];
613 if (c == '"' || c == '\\') {
614 break;
615 }
616 else if (strict && c <= 0x1f) {
617 raise_errmsg("Invalid control character at", pystr, next);
618 goto bail;
619 }
620 }
621 if (!(c == '"' || c == '\\')) {
622 raise_errmsg("Unterminated string starting at", pystr, begin);
623 goto bail;
624 }
625 /* Pick up this chunk if it's not zero length */
626 if (next != end) {
627 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
628 if (chunk == NULL) {
629 goto bail;
630 }
631 if (PyList_Append(chunks, chunk)) {
632 Py_DECREF(chunk);
633 goto bail;
634 }
635 Py_DECREF(chunk);
636 }
637 next++;
638 if (c == '"') {
639 end = next;
640 break;
641 }
642 if (next == len) {
643 raise_errmsg("Unterminated string starting at", pystr, begin);
644 goto bail;
645 }
646 c = buf[next];
647 if (c != 'u') {
648 /* Non-unicode backslash escapes */
649 end = next + 1;
650 switch (c) {
651 case '"': break;
652 case '\\': break;
653 case '/': break;
654 case 'b': c = '\b'; break;
655 case 'f': c = '\f'; break;
656 case 'n': c = '\n'; break;
657 case 'r': c = '\r'; break;
658 case 't': c = '\t'; break;
659 default: c = 0;
660 }
661 if (c == 0) {
662 raise_errmsg("Invalid \\escape", pystr, end - 2);
663 goto bail;
664 }
665 }
666 else {
667 c = 0;
668 next++;
669 end = next + 4;
670 if (end >= len) {
671 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
672 goto bail;
673 }
674 /* Decode 4 hex digits */
675 for (; next < end; next++) {
676 Py_UNICODE digit = buf[next];
677 c <<= 4;
678 switch (digit) {
679 case '0': case '1': case '2': case '3': case '4':
680 case '5': case '6': case '7': case '8': case '9':
681 c |= (digit - '0'); break;
682 case 'a': case 'b': case 'c': case 'd': case 'e':
683 case 'f':
684 c |= (digit - 'a' + 10); break;
685 case 'A': case 'B': case 'C': case 'D': case 'E':
686 case 'F':
687 c |= (digit - 'A' + 10); break;
688 default:
689 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
690 goto bail;
691 }
692 }
693 #ifdef Py_UNICODE_WIDE
694 /* Surrogate pair */
695 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
696 buf[next++] == '\\' && buf[next++] == 'u') {
697 Py_UNICODE c2 = 0;
698 end += 6;
699 /* Decode 4 hex digits */
700 for (; next < end; next++) {
701 Py_UNICODE digit = buf[next];
702 c2 <<= 4;
703 switch (digit) {
704 case '0': case '1': case '2': case '3': case '4':
705 case '5': case '6': case '7': case '8': case '9':
706 c2 |= (digit - '0'); break;
707 case 'a': case 'b': case 'c': case 'd': case 'e':
708 case 'f':
709 c2 |= (digit - 'a' + 10); break;
710 case 'A': case 'B': case 'C': case 'D': case 'E':
711 case 'F':
712 c2 |= (digit - 'A' + 10); break;
713 default:
714 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
715 goto bail;
716 }
717 }
718 if ((c2 & 0xfc00) == 0xdc00)
719 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
720 else
721 end -= 6;
722 }
723 #endif
724 }
725 chunk = PyUnicode_FromUnicode(&c, 1);
726 if (chunk == NULL) {
727 goto bail;
728 }
729 if (PyList_Append(chunks, chunk)) {
730 Py_DECREF(chunk);
731 goto bail;
732 }
733 Py_DECREF(chunk);
734 }
735
736 rval = join_list_unicode(chunks);
737 if (rval == NULL) {
738 goto bail;
739 }
740 Py_DECREF(chunks);
741 *next_end_ptr = end;
742 return rval;
743 bail:
744 *next_end_ptr = -1;
745 Py_XDECREF(chunks);
746 return NULL;
747 }
748
749 PyDoc_STRVAR(pydoc_scanstring,
750 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
751 "\n"
752 "Scan the string s for a JSON string. End is the index of the\n"
753 "character in s after the quote that started the JSON string.\n"
754 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
755 "on attempt to decode an invalid string. If strict is False then literal\n"
756 "control characters are allowed in the string.\n"
757 "\n"
758 "Returns a tuple of the decoded string and the index of the character in s\n"
759 "after the end quote."
760 );
761
762 static PyObject *
py_scanstring(PyObject * self UNUSED,PyObject * args)763 py_scanstring(PyObject* self UNUSED, PyObject *args)
764 {
765 PyObject *pystr;
766 PyObject *rval;
767 Py_ssize_t end;
768 Py_ssize_t next_end = -1;
769 char *encoding = NULL;
770 int strict = 1;
771 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
772 return NULL;
773 }
774 if (encoding == NULL) {
775 encoding = DEFAULT_ENCODING;
776 }
777 if (PyString_Check(pystr)) {
778 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
779 }
780 else if (PyUnicode_Check(pystr)) {
781 rval = scanstring_unicode(pystr, end, strict, &next_end);
782 }
783 else {
784 PyErr_Format(PyExc_TypeError,
785 "first argument must be a string, not %.80s",
786 Py_TYPE(pystr)->tp_name);
787 return NULL;
788 }
789 return _build_rval_index_tuple(rval, next_end);
790 }
791
792 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
793 "encode_basestring_ascii(basestring) -> str\n"
794 "\n"
795 "Return an ASCII-only JSON representation of a Python string"
796 );
797
798 static PyObject *
py_encode_basestring_ascii(PyObject * self UNUSED,PyObject * pystr)799 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
800 {
801 /* Return an ASCII-only JSON representation of a Python string */
802 /* METH_O */
803 if (PyString_Check(pystr)) {
804 return ascii_escape_str(pystr);
805 }
806 else if (PyUnicode_Check(pystr)) {
807 return ascii_escape_unicode(pystr);
808 }
809 else {
810 PyErr_Format(PyExc_TypeError,
811 "first argument must be a string, not %.80s",
812 Py_TYPE(pystr)->tp_name);
813 return NULL;
814 }
815 }
816
817 static void
scanner_dealloc(PyObject * self)818 scanner_dealloc(PyObject *self)
819 {
820 /* Deallocate scanner object */
821 scanner_clear(self);
822 Py_TYPE(self)->tp_free(self);
823 }
824
825 static int
scanner_traverse(PyObject * self,visitproc visit,void * arg)826 scanner_traverse(PyObject *self, visitproc visit, void *arg)
827 {
828 PyScannerObject *s;
829 assert(PyScanner_Check(self));
830 s = (PyScannerObject *)self;
831 Py_VISIT(s->encoding);
832 Py_VISIT(s->strict);
833 Py_VISIT(s->object_hook);
834 Py_VISIT(s->pairs_hook);
835 Py_VISIT(s->parse_float);
836 Py_VISIT(s->parse_int);
837 Py_VISIT(s->parse_constant);
838 return 0;
839 }
840
841 static int
scanner_clear(PyObject * self)842 scanner_clear(PyObject *self)
843 {
844 PyScannerObject *s;
845 assert(PyScanner_Check(self));
846 s = (PyScannerObject *)self;
847 Py_CLEAR(s->encoding);
848 Py_CLEAR(s->strict);
849 Py_CLEAR(s->object_hook);
850 Py_CLEAR(s->pairs_hook);
851 Py_CLEAR(s->parse_float);
852 Py_CLEAR(s->parse_int);
853 Py_CLEAR(s->parse_constant);
854 return 0;
855 }
856
857 static PyObject *
_parse_object_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)858 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
859 /* Read a JSON object from PyString pystr.
860 idx is the index of the first character after the opening curly brace.
861 *next_idx_ptr is a return-by-reference index to the first character after
862 the closing curly brace.
863
864 Returns a new PyObject (usually a dict, but object_hook can change that)
865 */
866 char *str = PyString_AS_STRING(pystr);
867 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
868 PyObject *rval;
869 PyObject *pairs;
870 PyObject *item;
871 PyObject *key = NULL;
872 PyObject *val = NULL;
873 char *encoding = PyString_AS_STRING(s->encoding);
874 int strict = PyObject_IsTrue(s->strict);
875 Py_ssize_t next_idx;
876
877 pairs = PyList_New(0);
878 if (pairs == NULL)
879 return NULL;
880
881 /* skip whitespace after { */
882 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
883
884 /* only loop if the object is non-empty */
885 if (idx <= end_idx && str[idx] != '}') {
886 while (idx <= end_idx) {
887 /* read key */
888 if (str[idx] != '"') {
889 raise_errmsg("Expecting property name", pystr, idx);
890 goto bail;
891 }
892 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
893 if (key == NULL)
894 goto bail;
895 idx = next_idx;
896
897 /* skip whitespace between key and : delimiter, read :, skip whitespace */
898 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
899 if (idx > end_idx || str[idx] != ':') {
900 raise_errmsg("Expecting : delimiter", pystr, idx);
901 goto bail;
902 }
903 idx++;
904 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
905
906 /* read any JSON data type */
907 val = scan_once_str(s, pystr, idx, &next_idx);
908 if (val == NULL)
909 goto bail;
910
911 item = PyTuple_Pack(2, key, val);
912 if (item == NULL)
913 goto bail;
914 Py_CLEAR(key);
915 Py_CLEAR(val);
916 if (PyList_Append(pairs, item) == -1) {
917 Py_DECREF(item);
918 goto bail;
919 }
920 Py_DECREF(item);
921 idx = next_idx;
922
923 /* skip whitespace before } or , */
924 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
925
926 /* bail if the object is closed or we didn't get the , delimiter */
927 if (idx > end_idx) break;
928 if (str[idx] == '}') {
929 break;
930 }
931 else if (str[idx] != ',') {
932 raise_errmsg("Expecting , delimiter", pystr, idx);
933 goto bail;
934 }
935 idx++;
936
937 /* skip whitespace after , delimiter */
938 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
939 }
940 }
941 /* verify that idx < end_idx, str[idx] should be '}' */
942 if (idx > end_idx || str[idx] != '}') {
943 raise_errmsg("Expecting object", pystr, end_idx);
944 goto bail;
945 }
946
947 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
948 if (s->pairs_hook != Py_None) {
949 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
950 if (val == NULL)
951 goto bail;
952 Py_DECREF(pairs);
953 *next_idx_ptr = idx + 1;
954 return val;
955 }
956
957 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
958 pairs, NULL);
959 if (rval == NULL)
960 goto bail;
961 Py_CLEAR(pairs);
962
963 /* if object_hook is not None: rval = object_hook(rval) */
964 if (s->object_hook != Py_None) {
965 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
966 if (val == NULL)
967 goto bail;
968 Py_DECREF(rval);
969 rval = val;
970 val = NULL;
971 }
972 *next_idx_ptr = idx + 1;
973 return rval;
974 bail:
975 Py_XDECREF(key);
976 Py_XDECREF(val);
977 Py_XDECREF(pairs);
978 return NULL;
979 }
980
981 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)982 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
983 /* Read a JSON object from PyUnicode pystr.
984 idx is the index of the first character after the opening curly brace.
985 *next_idx_ptr is a return-by-reference index to the first character after
986 the closing curly brace.
987
988 Returns a new PyObject (usually a dict, but object_hook can change that)
989 */
990 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
991 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
992 PyObject *rval;
993 PyObject *pairs;
994 PyObject *item;
995 PyObject *key = NULL;
996 PyObject *val = NULL;
997 int strict = PyObject_IsTrue(s->strict);
998 Py_ssize_t next_idx;
999
1000 pairs = PyList_New(0);
1001 if (pairs == NULL)
1002 return NULL;
1003
1004 /* skip whitespace after { */
1005 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1006
1007 /* only loop if the object is non-empty */
1008 if (idx <= end_idx && str[idx] != '}') {
1009 while (idx <= end_idx) {
1010 /* read key */
1011 if (str[idx] != '"') {
1012 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
1013 goto bail;
1014 }
1015 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1016 if (key == NULL)
1017 goto bail;
1018 idx = next_idx;
1019
1020 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1021 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1022 if (idx > end_idx || str[idx] != ':') {
1023 raise_errmsg("Expecting ':' delimiter", pystr, idx);
1024 goto bail;
1025 }
1026 idx++;
1027 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1028
1029 /* read any JSON term */
1030 val = scan_once_unicode(s, pystr, idx, &next_idx);
1031 if (val == NULL)
1032 goto bail;
1033
1034 item = PyTuple_Pack(2, key, val);
1035 if (item == NULL)
1036 goto bail;
1037 Py_CLEAR(key);
1038 Py_CLEAR(val);
1039 if (PyList_Append(pairs, item) == -1) {
1040 Py_DECREF(item);
1041 goto bail;
1042 }
1043 Py_DECREF(item);
1044 idx = next_idx;
1045
1046 /* skip whitespace before } or , */
1047 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1048
1049 /* bail if the object is closed or we didn't get the , delimiter */
1050 if (idx > end_idx) break;
1051 if (str[idx] == '}') {
1052 break;
1053 }
1054 else if (str[idx] != ',') {
1055 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1056 goto bail;
1057 }
1058 idx++;
1059
1060 /* skip whitespace after , delimiter */
1061 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1062 }
1063 }
1064
1065 /* verify that idx < end_idx, str[idx] should be '}' */
1066 if (idx > end_idx || str[idx] != '}') {
1067 raise_errmsg("Expecting object", pystr, end_idx);
1068 goto bail;
1069 }
1070
1071 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1072 if (s->pairs_hook != Py_None) {
1073 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1074 if (val == NULL)
1075 goto bail;
1076 Py_DECREF(pairs);
1077 *next_idx_ptr = idx + 1;
1078 return val;
1079 }
1080
1081 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1082 pairs, NULL);
1083 if (rval == NULL)
1084 goto bail;
1085 Py_CLEAR(pairs);
1086
1087 /* if object_hook is not None: rval = object_hook(rval) */
1088 if (s->object_hook != Py_None) {
1089 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1090 if (val == NULL)
1091 goto bail;
1092 Py_DECREF(rval);
1093 rval = val;
1094 val = NULL;
1095 }
1096 *next_idx_ptr = idx + 1;
1097 return rval;
1098 bail:
1099 Py_XDECREF(key);
1100 Py_XDECREF(val);
1101 Py_XDECREF(pairs);
1102 return NULL;
1103 }
1104
1105 static PyObject *
_parse_array_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1106 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1107 /* Read a JSON array from PyString pystr.
1108 idx is the index of the first character after the opening brace.
1109 *next_idx_ptr is a return-by-reference index to the first character after
1110 the closing brace.
1111
1112 Returns a new PyList
1113 */
1114 char *str = PyString_AS_STRING(pystr);
1115 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1116 PyObject *val = NULL;
1117 PyObject *rval = PyList_New(0);
1118 Py_ssize_t next_idx;
1119 if (rval == NULL)
1120 return NULL;
1121
1122 /* skip whitespace after [ */
1123 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1124
1125 /* only loop if the array is non-empty */
1126 if (idx <= end_idx && str[idx] != ']') {
1127 while (idx <= end_idx) {
1128
1129 /* read any JSON term and de-tuplefy the (rval, idx) */
1130 val = scan_once_str(s, pystr, idx, &next_idx);
1131 if (val == NULL)
1132 goto bail;
1133
1134 if (PyList_Append(rval, val) == -1)
1135 goto bail;
1136
1137 Py_CLEAR(val);
1138 idx = next_idx;
1139
1140 /* skip whitespace between term and , */
1141 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1142
1143 /* bail if the array is closed or we didn't get the , delimiter */
1144 if (idx > end_idx) break;
1145 if (str[idx] == ']') {
1146 break;
1147 }
1148 else if (str[idx] != ',') {
1149 raise_errmsg("Expecting , delimiter", pystr, idx);
1150 goto bail;
1151 }
1152 idx++;
1153
1154 /* skip whitespace after , */
1155 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1156 }
1157 }
1158
1159 /* verify that idx < end_idx, str[idx] should be ']' */
1160 if (idx > end_idx || str[idx] != ']') {
1161 raise_errmsg("Expecting object", pystr, end_idx);
1162 goto bail;
1163 }
1164 *next_idx_ptr = idx + 1;
1165 return rval;
1166 bail:
1167 Py_XDECREF(val);
1168 Py_DECREF(rval);
1169 return NULL;
1170 }
1171
1172 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1173 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1174 /* Read a JSON array from PyString pystr.
1175 idx is the index of the first character after the opening brace.
1176 *next_idx_ptr is a return-by-reference index to the first character after
1177 the closing brace.
1178
1179 Returns a new PyList
1180 */
1181 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1182 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1183 PyObject *val = NULL;
1184 PyObject *rval = PyList_New(0);
1185 Py_ssize_t next_idx;
1186 if (rval == NULL)
1187 return NULL;
1188
1189 /* skip whitespace after [ */
1190 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1191
1192 /* only loop if the array is non-empty */
1193 if (idx <= end_idx && str[idx] != ']') {
1194 while (idx <= end_idx) {
1195
1196 /* read any JSON term */
1197 val = scan_once_unicode(s, pystr, idx, &next_idx);
1198 if (val == NULL)
1199 goto bail;
1200
1201 if (PyList_Append(rval, val) == -1)
1202 goto bail;
1203
1204 Py_CLEAR(val);
1205 idx = next_idx;
1206
1207 /* skip whitespace between term and , */
1208 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1209
1210 /* bail if the array is closed or we didn't get the , delimiter */
1211 if (idx > end_idx) break;
1212 if (str[idx] == ']') {
1213 break;
1214 }
1215 else if (str[idx] != ',') {
1216 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1217 goto bail;
1218 }
1219 idx++;
1220
1221 /* skip whitespace after , */
1222 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1223 }
1224 }
1225
1226 /* verify that idx < end_idx, str[idx] should be ']' */
1227 if (idx > end_idx || str[idx] != ']') {
1228 raise_errmsg("Expecting object", pystr, end_idx);
1229 goto bail;
1230 }
1231 *next_idx_ptr = idx + 1;
1232 return rval;
1233 bail:
1234 Py_XDECREF(val);
1235 Py_DECREF(rval);
1236 return NULL;
1237 }
1238
1239 static PyObject *
_parse_constant(PyScannerObject * s,char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1240 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1241 /* Read a JSON constant from PyString pystr.
1242 constant is the constant string that was found
1243 ("NaN", "Infinity", "-Infinity").
1244 idx is the index of the first character of the constant
1245 *next_idx_ptr is a return-by-reference index to the first character after
1246 the constant.
1247
1248 Returns the result of parse_constant
1249 */
1250 PyObject *cstr;
1251 PyObject *rval;
1252 /* constant is "NaN", "Infinity", or "-Infinity" */
1253 cstr = PyString_InternFromString(constant);
1254 if (cstr == NULL)
1255 return NULL;
1256
1257 /* rval = parse_constant(constant) */
1258 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1259 idx += PyString_GET_SIZE(cstr);
1260 Py_DECREF(cstr);
1261 *next_idx_ptr = idx;
1262 return rval;
1263 }
1264
1265 static PyObject *
_match_number_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1266 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1267 /* Read a JSON number from PyString pystr.
1268 idx is the index of the first character of the number
1269 *next_idx_ptr is a return-by-reference index to the first character after
1270 the number.
1271
1272 Returns a new PyObject representation of that number:
1273 PyInt, PyLong, or PyFloat.
1274 May return other types if parse_int or parse_float are set
1275 */
1276 char *str = PyString_AS_STRING(pystr);
1277 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1278 Py_ssize_t idx = start;
1279 int is_float = 0;
1280 PyObject *rval;
1281 PyObject *numstr;
1282
1283 /* read a sign if it's there, make sure it's not the end of the string */
1284 if (str[idx] == '-') {
1285 idx++;
1286 if (idx > end_idx) {
1287 PyErr_SetNone(PyExc_StopIteration);
1288 return NULL;
1289 }
1290 }
1291
1292 /* read as many integer digits as we find as long as it doesn't start with 0 */
1293 if (str[idx] >= '1' && str[idx] <= '9') {
1294 idx++;
1295 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1296 }
1297 /* if it starts with 0 we only expect one integer digit */
1298 else if (str[idx] == '0') {
1299 idx++;
1300 }
1301 /* no integer digits, error */
1302 else {
1303 PyErr_SetNone(PyExc_StopIteration);
1304 return NULL;
1305 }
1306
1307 /* if the next char is '.' followed by a digit then read all float digits */
1308 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1309 is_float = 1;
1310 idx += 2;
1311 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1312 }
1313
1314 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1315 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1316
1317 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1318 Py_ssize_t e_start = idx;
1319 idx++;
1320
1321 /* read an exponent sign if present */
1322 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1323
1324 /* read all digits */
1325 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1326
1327 /* if we got a digit, then parse as float. if not, backtrack */
1328 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1329 is_float = 1;
1330 }
1331 else {
1332 idx = e_start;
1333 }
1334 }
1335
1336 /* copy the section we determined to be a number */
1337 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1338 if (numstr == NULL)
1339 return NULL;
1340 if (is_float) {
1341 /* parse as a float using a fast path if available, otherwise call user defined method */
1342 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1343 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1344 }
1345 else {
1346 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1347 NULL, NULL);
1348 if (d == -1.0 && PyErr_Occurred())
1349 return NULL;
1350 rval = PyFloat_FromDouble(d);
1351 }
1352 }
1353 else {
1354 /* parse as an int using a fast path if available, otherwise call user defined method */
1355 if (s->parse_int != (PyObject *)&PyInt_Type) {
1356 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1357 }
1358 else {
1359 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1360 }
1361 }
1362 Py_DECREF(numstr);
1363 *next_idx_ptr = idx;
1364 return rval;
1365 }
1366
1367 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1368 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1369 /* Read a JSON number from PyUnicode pystr.
1370 idx is the index of the first character of the number
1371 *next_idx_ptr is a return-by-reference index to the first character after
1372 the number.
1373
1374 Returns a new PyObject representation of that number:
1375 PyInt, PyLong, or PyFloat.
1376 May return other types if parse_int or parse_float are set
1377 */
1378 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1379 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1380 Py_ssize_t idx = start;
1381 int is_float = 0;
1382 PyObject *rval;
1383 PyObject *numstr;
1384
1385 /* read a sign if it's there, make sure it's not the end of the string */
1386 if (str[idx] == '-') {
1387 idx++;
1388 if (idx > end_idx) {
1389 PyErr_SetNone(PyExc_StopIteration);
1390 return NULL;
1391 }
1392 }
1393
1394 /* read as many integer digits as we find as long as it doesn't start with 0 */
1395 if (str[idx] >= '1' && str[idx] <= '9') {
1396 idx++;
1397 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1398 }
1399 /* if it starts with 0 we only expect one integer digit */
1400 else if (str[idx] == '0') {
1401 idx++;
1402 }
1403 /* no integer digits, error */
1404 else {
1405 PyErr_SetNone(PyExc_StopIteration);
1406 return NULL;
1407 }
1408
1409 /* if the next char is '.' followed by a digit then read all float digits */
1410 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1411 is_float = 1;
1412 idx += 2;
1413 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1414 }
1415
1416 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1417 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1418 Py_ssize_t e_start = idx;
1419 idx++;
1420
1421 /* read an exponent sign if present */
1422 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1423
1424 /* read all digits */
1425 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1426
1427 /* if we got a digit, then parse as float. if not, backtrack */
1428 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1429 is_float = 1;
1430 }
1431 else {
1432 idx = e_start;
1433 }
1434 }
1435
1436 /* copy the section we determined to be a number */
1437 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1438 if (numstr == NULL)
1439 return NULL;
1440 if (is_float) {
1441 /* parse as a float using a fast path if available, otherwise call user defined method */
1442 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1443 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1444 }
1445 else {
1446 rval = PyFloat_FromString(numstr, NULL);
1447 }
1448 }
1449 else {
1450 /* no fast path for unicode -> int, just call */
1451 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1452 }
1453 Py_DECREF(numstr);
1454 *next_idx_ptr = idx;
1455 return rval;
1456 }
1457
1458 static PyObject *
scan_once_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1459 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1460 {
1461 /* Read one JSON term (of any kind) from PyString pystr.
1462 idx is the index of the first character of the term
1463 *next_idx_ptr is a return-by-reference index to the first character after
1464 the number.
1465
1466 Returns a new PyObject representation of the term.
1467 */
1468 PyObject *res;
1469 char *str = PyString_AS_STRING(pystr);
1470 Py_ssize_t length = PyString_GET_SIZE(pystr);
1471 if (idx < 0) {
1472 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1473 return NULL;
1474 }
1475 if (idx >= length) {
1476 PyErr_SetNone(PyExc_StopIteration);
1477 return NULL;
1478 }
1479 switch (str[idx]) {
1480 case '"':
1481 /* string */
1482 return scanstring_str(pystr, idx + 1,
1483 PyString_AS_STRING(s->encoding),
1484 PyObject_IsTrue(s->strict),
1485 next_idx_ptr);
1486 case '{':
1487 /* object */
1488 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1489 "from a byte string"))
1490 return NULL;
1491 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1492 Py_LeaveRecursiveCall();
1493 return res;
1494 case '[':
1495 /* array */
1496 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1497 "from a byte string"))
1498 return NULL;
1499 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1500 Py_LeaveRecursiveCall();
1501 return res;
1502 case 'n':
1503 /* null */
1504 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1505 Py_INCREF(Py_None);
1506 *next_idx_ptr = idx + 4;
1507 return Py_None;
1508 }
1509 break;
1510 case 't':
1511 /* true */
1512 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1513 Py_INCREF(Py_True);
1514 *next_idx_ptr = idx + 4;
1515 return Py_True;
1516 }
1517 break;
1518 case 'f':
1519 /* false */
1520 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1521 Py_INCREF(Py_False);
1522 *next_idx_ptr = idx + 5;
1523 return Py_False;
1524 }
1525 break;
1526 case 'N':
1527 /* NaN */
1528 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1529 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1530 }
1531 break;
1532 case 'I':
1533 /* Infinity */
1534 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1535 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1536 }
1537 break;
1538 case '-':
1539 /* -Infinity */
1540 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1541 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1542 }
1543 break;
1544 }
1545 /* Didn't find a string, object, array, or named constant. Look for a number. */
1546 return _match_number_str(s, pystr, idx, next_idx_ptr);
1547 }
1548
1549 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1550 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1551 {
1552 /* Read one JSON term (of any kind) from PyUnicode pystr.
1553 idx is the index of the first character of the term
1554 *next_idx_ptr is a return-by-reference index to the first character after
1555 the number.
1556
1557 Returns a new PyObject representation of the term.
1558 */
1559 PyObject *res;
1560 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1561 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1562 if (idx < 0) {
1563 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1564 return NULL;
1565 }
1566 if (idx >= length) {
1567 PyErr_SetNone(PyExc_StopIteration);
1568 return NULL;
1569 }
1570 switch (str[idx]) {
1571 case '"':
1572 /* string */
1573 return scanstring_unicode(pystr, idx + 1,
1574 PyObject_IsTrue(s->strict),
1575 next_idx_ptr);
1576 case '{':
1577 /* object */
1578 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1579 "from a unicode string"))
1580 return NULL;
1581 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1582 Py_LeaveRecursiveCall();
1583 return res;
1584 case '[':
1585 /* array */
1586 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1587 "from a unicode string"))
1588 return NULL;
1589 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1590 Py_LeaveRecursiveCall();
1591 return res;
1592 case 'n':
1593 /* null */
1594 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1595 Py_INCREF(Py_None);
1596 *next_idx_ptr = idx + 4;
1597 return Py_None;
1598 }
1599 break;
1600 case 't':
1601 /* true */
1602 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1603 Py_INCREF(Py_True);
1604 *next_idx_ptr = idx + 4;
1605 return Py_True;
1606 }
1607 break;
1608 case 'f':
1609 /* false */
1610 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1611 Py_INCREF(Py_False);
1612 *next_idx_ptr = idx + 5;
1613 return Py_False;
1614 }
1615 break;
1616 case 'N':
1617 /* NaN */
1618 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1619 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1620 }
1621 break;
1622 case 'I':
1623 /* Infinity */
1624 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1625 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1626 }
1627 break;
1628 case '-':
1629 /* -Infinity */
1630 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1631 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1632 }
1633 break;
1634 }
1635 /* Didn't find a string, object, array, or named constant. Look for a number. */
1636 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1637 }
1638
1639 static PyObject *
scanner_call(PyObject * self,PyObject * args,PyObject * kwds)1640 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1641 {
1642 /* Python callable interface to scan_once_{str,unicode} */
1643 PyObject *pystr;
1644 PyObject *rval;
1645 Py_ssize_t idx;
1646 Py_ssize_t next_idx = -1;
1647 static char *kwlist[] = {"string", "idx", NULL};
1648 PyScannerObject *s;
1649 assert(PyScanner_Check(self));
1650 s = (PyScannerObject *)self;
1651 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1652 return NULL;
1653
1654 if (PyString_Check(pystr)) {
1655 rval = scan_once_str(s, pystr, idx, &next_idx);
1656 }
1657 else if (PyUnicode_Check(pystr)) {
1658 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1659 }
1660 else {
1661 PyErr_Format(PyExc_TypeError,
1662 "first argument must be a string, not %.80s",
1663 Py_TYPE(pystr)->tp_name);
1664 return NULL;
1665 }
1666 return _build_rval_index_tuple(rval, next_idx);
1667 }
1668
1669 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1670 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1671 {
1672 PyScannerObject *s;
1673 s = (PyScannerObject *)type->tp_alloc(type, 0);
1674 if (s != NULL) {
1675 s->encoding = NULL;
1676 s->strict = NULL;
1677 s->object_hook = NULL;
1678 s->pairs_hook = NULL;
1679 s->parse_float = NULL;
1680 s->parse_int = NULL;
1681 s->parse_constant = NULL;
1682 }
1683 return (PyObject *)s;
1684 }
1685
1686 static int
scanner_init(PyObject * self,PyObject * args,PyObject * kwds)1687 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1688 {
1689 /* Initialize Scanner object */
1690 PyObject *ctx;
1691 static char *kwlist[] = {"context", NULL};
1692 PyScannerObject *s;
1693
1694 assert(PyScanner_Check(self));
1695 s = (PyScannerObject *)self;
1696
1697 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1698 return -1;
1699
1700 /* PyString_AS_STRING is used on encoding */
1701 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1702 if (s->encoding == NULL)
1703 goto bail;
1704 if (s->encoding == Py_None) {
1705 Py_DECREF(Py_None);
1706 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1707 }
1708 else if (PyUnicode_Check(s->encoding)) {
1709 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1710 Py_DECREF(s->encoding);
1711 s->encoding = tmp;
1712 }
1713 if (s->encoding == NULL)
1714 goto bail;
1715 if (!PyString_Check(s->encoding)) {
1716 PyErr_Format(PyExc_TypeError,
1717 "encoding must be a string, not %.80s",
1718 Py_TYPE(s->encoding)->tp_name);
1719 goto bail;
1720 }
1721
1722
1723 /* All of these will fail "gracefully" so we don't need to verify them */
1724 s->strict = PyObject_GetAttrString(ctx, "strict");
1725 if (s->strict == NULL)
1726 goto bail;
1727 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1728 if (s->object_hook == NULL)
1729 goto bail;
1730 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1731 if (s->pairs_hook == NULL)
1732 goto bail;
1733 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1734 if (s->parse_float == NULL)
1735 goto bail;
1736 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1737 if (s->parse_int == NULL)
1738 goto bail;
1739 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1740 if (s->parse_constant == NULL)
1741 goto bail;
1742
1743 return 0;
1744
1745 bail:
1746 Py_CLEAR(s->encoding);
1747 Py_CLEAR(s->strict);
1748 Py_CLEAR(s->object_hook);
1749 Py_CLEAR(s->pairs_hook);
1750 Py_CLEAR(s->parse_float);
1751 Py_CLEAR(s->parse_int);
1752 Py_CLEAR(s->parse_constant);
1753 return -1;
1754 }
1755
1756 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1757
1758 static
1759 PyTypeObject PyScannerType = {
1760 PyObject_HEAD_INIT(NULL)
1761 0, /* tp_internal */
1762 "_json.Scanner", /* tp_name */
1763 sizeof(PyScannerObject), /* tp_basicsize */
1764 0, /* tp_itemsize */
1765 scanner_dealloc, /* tp_dealloc */
1766 0, /* tp_print */
1767 0, /* tp_getattr */
1768 0, /* tp_setattr */
1769 0, /* tp_compare */
1770 0, /* tp_repr */
1771 0, /* tp_as_number */
1772 0, /* tp_as_sequence */
1773 0, /* tp_as_mapping */
1774 0, /* tp_hash */
1775 scanner_call, /* tp_call */
1776 0, /* tp_str */
1777 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1778 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1779 0, /* tp_as_buffer */
1780 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1781 scanner_doc, /* tp_doc */
1782 scanner_traverse, /* tp_traverse */
1783 scanner_clear, /* tp_clear */
1784 0, /* tp_richcompare */
1785 0, /* tp_weaklistoffset */
1786 0, /* tp_iter */
1787 0, /* tp_iternext */
1788 0, /* tp_methods */
1789 scanner_members, /* tp_members */
1790 0, /* tp_getset */
1791 0, /* tp_base */
1792 0, /* tp_dict */
1793 0, /* tp_descr_get */
1794 0, /* tp_descr_set */
1795 0, /* tp_dictoffset */
1796 scanner_init, /* tp_init */
1797 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1798 scanner_new, /* tp_new */
1799 0,/* PyObject_GC_Del, */ /* tp_free */
1800 };
1801
1802 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1803 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1804 {
1805 PyEncoderObject *s;
1806 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1807 if (s != NULL) {
1808 s->markers = NULL;
1809 s->defaultfn = NULL;
1810 s->encoder = NULL;
1811 s->indent = NULL;
1812 s->key_separator = NULL;
1813 s->item_separator = NULL;
1814 s->sort_keys = NULL;
1815 s->skipkeys = NULL;
1816 }
1817 return (PyObject *)s;
1818 }
1819
1820 static int
encoder_init(PyObject * self,PyObject * args,PyObject * kwds)1821 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1822 {
1823 /* initialize Encoder object */
1824 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1825
1826 PyEncoderObject *s;
1827 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1828 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
1829
1830 assert(PyEncoder_Check(self));
1831 s = (PyEncoderObject *)self;
1832
1833 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1834 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1835 &sort_keys, &skipkeys, &allow_nan))
1836 return -1;
1837
1838 s->markers = markers;
1839 s->defaultfn = defaultfn;
1840 s->encoder = encoder;
1841 s->indent = indent;
1842 s->key_separator = key_separator;
1843 s->item_separator = item_separator;
1844 s->sort_keys = sort_keys;
1845 s->skipkeys = skipkeys;
1846 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1847 s->allow_nan = PyObject_IsTrue(allow_nan);
1848
1849 Py_INCREF(s->markers);
1850 Py_INCREF(s->defaultfn);
1851 Py_INCREF(s->encoder);
1852 Py_INCREF(s->indent);
1853 Py_INCREF(s->key_separator);
1854 Py_INCREF(s->item_separator);
1855 Py_INCREF(s->sort_keys);
1856 Py_INCREF(s->skipkeys);
1857 return 0;
1858 }
1859
1860 static PyObject *
encoder_call(PyObject * self,PyObject * args,PyObject * kwds)1861 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1862 {
1863 /* Python callable interface to encode_listencode_obj */
1864 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1865 PyObject *obj;
1866 PyObject *rval;
1867 Py_ssize_t indent_level;
1868 PyEncoderObject *s;
1869 assert(PyEncoder_Check(self));
1870 s = (PyEncoderObject *)self;
1871 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1872 &obj, _convertPyInt_AsSsize_t, &indent_level))
1873 return NULL;
1874 rval = PyList_New(0);
1875 if (rval == NULL)
1876 return NULL;
1877 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1878 Py_DECREF(rval);
1879 return NULL;
1880 }
1881 return rval;
1882 }
1883
1884 static PyObject *
_encoded_const(PyObject * obj)1885 _encoded_const(PyObject *obj)
1886 {
1887 /* Return the JSON string representation of None, True, False */
1888 if (obj == Py_None) {
1889 static PyObject *s_null = NULL;
1890 if (s_null == NULL) {
1891 s_null = PyString_InternFromString("null");
1892 }
1893 Py_INCREF(s_null);
1894 return s_null;
1895 }
1896 else if (obj == Py_True) {
1897 static PyObject *s_true = NULL;
1898 if (s_true == NULL) {
1899 s_true = PyString_InternFromString("true");
1900 }
1901 Py_INCREF(s_true);
1902 return s_true;
1903 }
1904 else if (obj == Py_False) {
1905 static PyObject *s_false = NULL;
1906 if (s_false == NULL) {
1907 s_false = PyString_InternFromString("false");
1908 }
1909 Py_INCREF(s_false);
1910 return s_false;
1911 }
1912 else {
1913 PyErr_SetString(PyExc_ValueError, "not a const");
1914 return NULL;
1915 }
1916 }
1917
1918 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1919 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1920 {
1921 /* Return the JSON representation of a PyFloat */
1922 double i = PyFloat_AS_DOUBLE(obj);
1923 if (!Py_IS_FINITE(i)) {
1924 if (!s->allow_nan) {
1925 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1926 return NULL;
1927 }
1928 if (i > 0) {
1929 return PyString_FromString("Infinity");
1930 }
1931 else if (i < 0) {
1932 return PyString_FromString("-Infinity");
1933 }
1934 else {
1935 return PyString_FromString("NaN");
1936 }
1937 }
1938 /* Use a better float format here? */
1939 return PyObject_Repr(obj);
1940 }
1941
1942 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1943 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1944 {
1945 /* Return the JSON representation of a string */
1946 if (s->fast_encode)
1947 return py_encode_basestring_ascii(NULL, obj);
1948 else
1949 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1950 }
1951
1952 static int
_steal_list_append(PyObject * lst,PyObject * stolen)1953 _steal_list_append(PyObject *lst, PyObject *stolen)
1954 {
1955 /* Append stolen and then decrement its reference count */
1956 int rval = PyList_Append(lst, stolen);
1957 Py_DECREF(stolen);
1958 return rval;
1959 }
1960
1961 static int
encoder_listencode_obj(PyEncoderObject * s,PyObject * rval,PyObject * obj,Py_ssize_t indent_level)1962 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1963 {
1964 /* Encode Python object obj to a JSON term, rval is a PyList */
1965 PyObject *newobj;
1966 int rv;
1967
1968 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1969 PyObject *cstr = _encoded_const(obj);
1970 if (cstr == NULL)
1971 return -1;
1972 return _steal_list_append(rval, cstr);
1973 }
1974 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1975 {
1976 PyObject *encoded = encoder_encode_string(s, obj);
1977 if (encoded == NULL)
1978 return -1;
1979 return _steal_list_append(rval, encoded);
1980 }
1981 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1982 PyObject *encoded = PyObject_Str(obj);
1983 if (encoded == NULL)
1984 return -1;
1985 return _steal_list_append(rval, encoded);
1986 }
1987 else if (PyFloat_Check(obj)) {
1988 PyObject *encoded = encoder_encode_float(s, obj);
1989 if (encoded == NULL)
1990 return -1;
1991 return _steal_list_append(rval, encoded);
1992 }
1993 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1994 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1995 return -1;
1996 rv = encoder_listencode_list(s, rval, obj, indent_level);
1997 Py_LeaveRecursiveCall();
1998 return rv;
1999 }
2000 else if (PyDict_Check(obj)) {
2001 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2002 return -1;
2003 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2004 Py_LeaveRecursiveCall();
2005 return rv;
2006 }
2007 else {
2008 PyObject *ident = NULL;
2009 if (s->markers != Py_None) {
2010 int has_key;
2011 ident = PyLong_FromVoidPtr(obj);
2012 if (ident == NULL)
2013 return -1;
2014 has_key = PyDict_Contains(s->markers, ident);
2015 if (has_key) {
2016 if (has_key != -1)
2017 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2018 Py_DECREF(ident);
2019 return -1;
2020 }
2021 if (PyDict_SetItem(s->markers, ident, obj)) {
2022 Py_DECREF(ident);
2023 return -1;
2024 }
2025 }
2026 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2027 if (newobj == NULL) {
2028 Py_XDECREF(ident);
2029 return -1;
2030 }
2031
2032 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2033 return -1;
2034 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2035 Py_LeaveRecursiveCall();
2036
2037 Py_DECREF(newobj);
2038 if (rv) {
2039 Py_XDECREF(ident);
2040 return -1;
2041 }
2042 if (ident != NULL) {
2043 if (PyDict_DelItem(s->markers, ident)) {
2044 Py_XDECREF(ident);
2045 return -1;
2046 }
2047 Py_XDECREF(ident);
2048 }
2049 return rv;
2050 }
2051 }
2052
2053 static int
encoder_listencode_dict(PyEncoderObject * s,PyObject * rval,PyObject * dct,Py_ssize_t indent_level)2054 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2055 {
2056 /* Encode Python dict dct a JSON term, rval is a PyList */
2057 static PyObject *open_dict = NULL;
2058 static PyObject *close_dict = NULL;
2059 static PyObject *empty_dict = NULL;
2060 PyObject *kstr = NULL;
2061 PyObject *ident = NULL;
2062 PyObject *key = NULL;
2063 PyObject *value = NULL;
2064 PyObject *it = NULL;
2065 int skipkeys;
2066 Py_ssize_t idx;
2067
2068 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2069 open_dict = PyString_InternFromString("{");
2070 close_dict = PyString_InternFromString("}");
2071 empty_dict = PyString_InternFromString("{}");
2072 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2073 return -1;
2074 }
2075 if (Py_SIZE(dct) == 0)
2076 return PyList_Append(rval, empty_dict);
2077
2078 if (s->markers != Py_None) {
2079 int has_key;
2080 ident = PyLong_FromVoidPtr(dct);
2081 if (ident == NULL)
2082 goto bail;
2083 has_key = PyDict_Contains(s->markers, ident);
2084 if (has_key) {
2085 if (has_key != -1)
2086 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2087 goto bail;
2088 }
2089 if (PyDict_SetItem(s->markers, ident, dct)) {
2090 goto bail;
2091 }
2092 }
2093
2094 if (PyList_Append(rval, open_dict))
2095 goto bail;
2096
2097 if (s->indent != Py_None) {
2098 /* TODO: DOES NOT RUN */
2099 indent_level += 1;
2100 /*
2101 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2102 separator = _item_separator + newline_indent
2103 buf += newline_indent
2104 */
2105 }
2106
2107 /* TODO: C speedup not implemented for sort_keys */
2108
2109 it = PyObject_GetIter(dct);
2110 if (it == NULL)
2111 goto bail;
2112 skipkeys = PyObject_IsTrue(s->skipkeys);
2113 idx = 0;
2114 while ((key = PyIter_Next(it)) != NULL) {
2115 PyObject *encoded;
2116
2117 if (PyString_Check(key) || PyUnicode_Check(key)) {
2118 Py_INCREF(key);
2119 kstr = key;
2120 }
2121 else if (PyFloat_Check(key)) {
2122 kstr = encoder_encode_float(s, key);
2123 if (kstr == NULL)
2124 goto bail;
2125 }
2126 else if (PyInt_Check(key) || PyLong_Check(key)) {
2127 kstr = PyObject_Str(key);
2128 if (kstr == NULL)
2129 goto bail;
2130 }
2131 else if (key == Py_True || key == Py_False || key == Py_None) {
2132 kstr = _encoded_const(key);
2133 if (kstr == NULL)
2134 goto bail;
2135 }
2136 else if (skipkeys) {
2137 Py_DECREF(key);
2138 continue;
2139 }
2140 else {
2141 /* TODO: include repr of key */
2142 PyErr_SetString(PyExc_TypeError, "keys must be a string");
2143 goto bail;
2144 }
2145
2146 if (idx) {
2147 if (PyList_Append(rval, s->item_separator))
2148 goto bail;
2149 }
2150
2151 value = PyObject_GetItem(dct, key);
2152 if (value == NULL)
2153 goto bail;
2154
2155 encoded = encoder_encode_string(s, kstr);
2156 Py_CLEAR(kstr);
2157 if (encoded == NULL)
2158 goto bail;
2159 if (PyList_Append(rval, encoded)) {
2160 Py_DECREF(encoded);
2161 goto bail;
2162 }
2163 Py_DECREF(encoded);
2164 if (PyList_Append(rval, s->key_separator))
2165 goto bail;
2166 if (encoder_listencode_obj(s, rval, value, indent_level))
2167 goto bail;
2168 idx += 1;
2169 Py_CLEAR(value);
2170 Py_DECREF(key);
2171 }
2172 if (PyErr_Occurred())
2173 goto bail;
2174 Py_CLEAR(it);
2175
2176 if (ident != NULL) {
2177 if (PyDict_DelItem(s->markers, ident))
2178 goto bail;
2179 Py_CLEAR(ident);
2180 }
2181 if (s->indent != Py_None) {
2182 /* TODO: DOES NOT RUN */
2183 /*
2184 indent_level -= 1;
2185
2186 yield '\n' + (' ' * (_indent * _current_indent_level))
2187 */
2188 }
2189 if (PyList_Append(rval, close_dict))
2190 goto bail;
2191 return 0;
2192
2193 bail:
2194 Py_XDECREF(it);
2195 Py_XDECREF(key);
2196 Py_XDECREF(value);
2197 Py_XDECREF(kstr);
2198 Py_XDECREF(ident);
2199 return -1;
2200 }
2201
2202
2203 static int
encoder_listencode_list(PyEncoderObject * s,PyObject * rval,PyObject * seq,Py_ssize_t indent_level)2204 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2205 {
2206 /* Encode Python list seq to a JSON term, rval is a PyList */
2207 static PyObject *open_array = NULL;
2208 static PyObject *close_array = NULL;
2209 static PyObject *empty_array = NULL;
2210 PyObject *ident = NULL;
2211 PyObject *s_fast = NULL;
2212 Py_ssize_t i;
2213
2214 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2215 open_array = PyString_InternFromString("[");
2216 close_array = PyString_InternFromString("]");
2217 empty_array = PyString_InternFromString("[]");
2218 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2219 return -1;
2220 }
2221 ident = NULL;
2222 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2223 if (s_fast == NULL)
2224 return -1;
2225 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
2226 Py_DECREF(s_fast);
2227 return PyList_Append(rval, empty_array);
2228 }
2229
2230 if (s->markers != Py_None) {
2231 int has_key;
2232 ident = PyLong_FromVoidPtr(seq);
2233 if (ident == NULL)
2234 goto bail;
2235 has_key = PyDict_Contains(s->markers, ident);
2236 if (has_key) {
2237 if (has_key != -1)
2238 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2239 goto bail;
2240 }
2241 if (PyDict_SetItem(s->markers, ident, seq)) {
2242 goto bail;
2243 }
2244 }
2245
2246 if (PyList_Append(rval, open_array))
2247 goto bail;
2248 if (s->indent != Py_None) {
2249 /* TODO: DOES NOT RUN */
2250 indent_level += 1;
2251 /*
2252 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2253 separator = _item_separator + newline_indent
2254 buf += newline_indent
2255 */
2256 }
2257 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2258 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
2259 if (i) {
2260 if (PyList_Append(rval, s->item_separator))
2261 goto bail;
2262 }
2263 if (encoder_listencode_obj(s, rval, obj, indent_level))
2264 goto bail;
2265 }
2266 if (ident != NULL) {
2267 if (PyDict_DelItem(s->markers, ident))
2268 goto bail;
2269 Py_CLEAR(ident);
2270 }
2271 if (s->indent != Py_None) {
2272 /* TODO: DOES NOT RUN */
2273 /*
2274 indent_level -= 1;
2275
2276 yield '\n' + (' ' * (_indent * _current_indent_level))
2277 */
2278 }
2279 if (PyList_Append(rval, close_array))
2280 goto bail;
2281 Py_DECREF(s_fast);
2282 return 0;
2283
2284 bail:
2285 Py_XDECREF(ident);
2286 Py_DECREF(s_fast);
2287 return -1;
2288 }
2289
2290 static void
encoder_dealloc(PyObject * self)2291 encoder_dealloc(PyObject *self)
2292 {
2293 /* Deallocate Encoder */
2294 encoder_clear(self);
2295 Py_TYPE(self)->tp_free(self);
2296 }
2297
2298 static int
encoder_traverse(PyObject * self,visitproc visit,void * arg)2299 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2300 {
2301 PyEncoderObject *s;
2302 assert(PyEncoder_Check(self));
2303 s = (PyEncoderObject *)self;
2304 Py_VISIT(s->markers);
2305 Py_VISIT(s->defaultfn);
2306 Py_VISIT(s->encoder);
2307 Py_VISIT(s->indent);
2308 Py_VISIT(s->key_separator);
2309 Py_VISIT(s->item_separator);
2310 Py_VISIT(s->sort_keys);
2311 Py_VISIT(s->skipkeys);
2312 return 0;
2313 }
2314
2315 static int
encoder_clear(PyObject * self)2316 encoder_clear(PyObject *self)
2317 {
2318 /* Deallocate Encoder */
2319 PyEncoderObject *s;
2320 assert(PyEncoder_Check(self));
2321 s = (PyEncoderObject *)self;
2322 Py_CLEAR(s->markers);
2323 Py_CLEAR(s->defaultfn);
2324 Py_CLEAR(s->encoder);
2325 Py_CLEAR(s->indent);
2326 Py_CLEAR(s->key_separator);
2327 Py_CLEAR(s->item_separator);
2328 Py_CLEAR(s->sort_keys);
2329 Py_CLEAR(s->skipkeys);
2330 return 0;
2331 }
2332
2333 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2334
2335 static
2336 PyTypeObject PyEncoderType = {
2337 PyObject_HEAD_INIT(NULL)
2338 0, /* tp_internal */
2339 "_json.Encoder", /* tp_name */
2340 sizeof(PyEncoderObject), /* tp_basicsize */
2341 0, /* tp_itemsize */
2342 encoder_dealloc, /* tp_dealloc */
2343 0, /* tp_print */
2344 0, /* tp_getattr */
2345 0, /* tp_setattr */
2346 0, /* tp_compare */
2347 0, /* tp_repr */
2348 0, /* tp_as_number */
2349 0, /* tp_as_sequence */
2350 0, /* tp_as_mapping */
2351 0, /* tp_hash */
2352 encoder_call, /* tp_call */
2353 0, /* tp_str */
2354 0, /* tp_getattro */
2355 0, /* tp_setattro */
2356 0, /* tp_as_buffer */
2357 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2358 encoder_doc, /* tp_doc */
2359 encoder_traverse, /* tp_traverse */
2360 encoder_clear, /* tp_clear */
2361 0, /* tp_richcompare */
2362 0, /* tp_weaklistoffset */
2363 0, /* tp_iter */
2364 0, /* tp_iternext */
2365 0, /* tp_methods */
2366 encoder_members, /* tp_members */
2367 0, /* tp_getset */
2368 0, /* tp_base */
2369 0, /* tp_dict */
2370 0, /* tp_descr_get */
2371 0, /* tp_descr_set */
2372 0, /* tp_dictoffset */
2373 encoder_init, /* tp_init */
2374 0, /* tp_alloc */
2375 encoder_new, /* tp_new */
2376 0, /* tp_free */
2377 };
2378
2379 static PyMethodDef speedups_methods[] = {
2380 {"encode_basestring_ascii",
2381 (PyCFunction)py_encode_basestring_ascii,
2382 METH_O,
2383 pydoc_encode_basestring_ascii},
2384 {"scanstring",
2385 (PyCFunction)py_scanstring,
2386 METH_VARARGS,
2387 pydoc_scanstring},
2388 {NULL, NULL, 0, NULL}
2389 };
2390
2391 PyDoc_STRVAR(module_doc,
2392 "json speedups\n");
2393
2394 void
init_json(void)2395 init_json(void)
2396 {
2397 PyObject *m;
2398 PyScannerType.tp_new = PyType_GenericNew;
2399 if (PyType_Ready(&PyScannerType) < 0)
2400 return;
2401 PyEncoderType.tp_new = PyType_GenericNew;
2402 if (PyType_Ready(&PyEncoderType) < 0)
2403 return;
2404 m = Py_InitModule3("_json", speedups_methods, module_doc);
2405 Py_INCREF((PyObject*)&PyScannerType);
2406 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2407 Py_INCREF((PyObject*)&PyEncoderType);
2408 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2409 }
2410