1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
16
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
22
23 #define DEFAULT_ENCODING "utf-8"
24
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
32
33 typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
38 PyObject *pairs_hook;
39 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42 } PyScannerObject;
43
44 static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53 };
54
55 typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67 } PyEncoderObject;
68
69 static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79 };
80
81 static Py_ssize_t
82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83 static PyObject *
84 ascii_escape_unicode(PyObject *pystr);
85 static PyObject *
86 ascii_escape_str(PyObject *pystr);
87 static PyObject *
88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89 void init_json(void);
90 static PyObject *
91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92 static PyObject *
93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94 static PyObject *
95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96 static PyObject *
97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98 static int
99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100 static void
101 scanner_dealloc(PyObject *self);
102 static int
103 scanner_clear(PyObject *self);
104 static PyObject *
105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106 static int
107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108 static void
109 encoder_dealloc(PyObject *self);
110 static int
111 encoder_clear(PyObject *self);
112 static int
113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114 static int
115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116 static int
117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118 static PyObject *
119 _encoded_const(PyObject *obj);
120 static void
121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122 static PyObject *
123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124 static int
125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126 static PyObject *
127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128 static PyObject *
129 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134 #define MIN_EXPANSION 6
135 #ifdef Py_UNICODE_WIDE
136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
137 #else
138 #define MAX_EXPANSION MIN_EXPANSION
139 #endif
140
141 static int
_convertPyInt_AsSsize_t(PyObject * o,Py_ssize_t * size_ptr)142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143 {
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
149 }
150
151 static PyObject *
_convertPyInt_FromSsize_t(Py_ssize_t * size_ptr)152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153 {
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156 }
157
158 static Py_ssize_t
ascii_escape_char(Py_UNICODE c,char * output,Py_ssize_t chars)159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160 {
161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174 #ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187 #endif
188 output[chars++] = 'u';
189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
193 }
194 return chars;
195 }
196
197 static PyObject *
ascii_escape_unicode(PyObject * pystr)198 ascii_escape_unicode(PyObject *pystr)
199 {
200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
204 Py_ssize_t max_output_size;
205 Py_ssize_t chars;
206 Py_ssize_t incr;
207 PyObject *rval;
208 char *output;
209 Py_UNICODE *input_unicode;
210
211 input_chars = PyUnicode_GET_SIZE(pystr);
212 input_unicode = PyUnicode_AS_UNICODE(pystr);
213
214 output_size = input_chars;
215 incr = 2; /* for quotes */
216 /* One char input can be up to 6 chars output, estimate 4 of these */
217 incr += MIN_EXPANSION * 4;
218 if (PY_SSIZE_T_MAX - incr < output_size) {
219 PyErr_NoMemory();
220 return NULL;
221 }
222 output_size += incr;
223 if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
224 PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
225 max_output_size = PY_SSIZE_T_MAX;
226 else
227 max_output_size = 2 + (input_chars * MAX_EXPANSION);
228 rval = PyString_FromStringAndSize(NULL, output_size);
229 if (rval == NULL) {
230 return NULL;
231 }
232 output = PyString_AS_STRING(rval);
233 chars = 0;
234 output[chars++] = '"';
235 for (i = 0; i < input_chars; i++) {
236 Py_UNICODE c = input_unicode[i];
237 if (S_CHAR(c)) {
238 output[chars++] = (char)c;
239 }
240 else {
241 chars = ascii_escape_char(c, output, chars);
242 }
243 if (output_size - chars < (1 + MAX_EXPANSION)) {
244 if (output_size == PY_SSIZE_T_MAX) {
245 Py_DECREF(rval);
246 PyErr_NoMemory();
247 return NULL;
248 }
249 /* There's more than four, so let's resize by a lot */
250 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
251 output_size *= 2;
252 else
253 output_size = max_output_size;
254 if (_PyString_Resize(&rval, output_size) == -1) {
255 return NULL;
256 }
257 output = PyString_AS_STRING(rval);
258 }
259 }
260 output[chars++] = '"';
261 if (_PyString_Resize(&rval, chars) == -1) {
262 return NULL;
263 }
264 return rval;
265 }
266
267 static PyObject *
ascii_escape_str(PyObject * pystr)268 ascii_escape_str(PyObject *pystr)
269 {
270 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
271 Py_ssize_t i;
272 Py_ssize_t input_chars;
273 Py_ssize_t output_size;
274 Py_ssize_t max_output_size;
275 Py_ssize_t chars;
276 Py_ssize_t incr;
277 PyObject *rval;
278 char *output;
279 char *input_str;
280
281 input_chars = PyString_GET_SIZE(pystr);
282 input_str = PyString_AS_STRING(pystr);
283
284 /* Fast path for a string that's already ASCII */
285 for (i = 0; i < input_chars; i++) {
286 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
287 if (!S_CHAR(c)) {
288 /* If we have to escape something, scan the string for unicode */
289 Py_ssize_t j;
290 for (j = i; j < input_chars; j++) {
291 c = (Py_UNICODE)(unsigned char)input_str[j];
292 if (c > 0x7f) {
293 /* We hit a non-ASCII character, bail to unicode mode */
294 PyObject *uni;
295 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
296 if (uni == NULL) {
297 return NULL;
298 }
299 rval = ascii_escape_unicode(uni);
300 Py_DECREF(uni);
301 return rval;
302 }
303 }
304 break;
305 }
306 }
307
308 output_size = input_chars;
309 incr = 2; /* for quotes */
310 if (i != input_chars) {
311 /* One char input can be up to 6 chars output, estimate 4 of these */
312 incr += MIN_EXPANSION * 4;
313 }
314 if (PY_SSIZE_T_MAX - incr < output_size) {
315 PyErr_NoMemory();
316 return NULL;
317 }
318 output_size += incr;
319 if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
320 PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
321 max_output_size = PY_SSIZE_T_MAX;
322 else
323 max_output_size = 2 + (input_chars * MIN_EXPANSION);
324 rval = PyString_FromStringAndSize(NULL, output_size);
325 if (rval == NULL) {
326 return NULL;
327 }
328 output = PyString_AS_STRING(rval);
329 output[0] = '"';
330
331 /* We know that everything up to i is ASCII already */
332 chars = i + 1;
333 memcpy(&output[1], input_str, i);
334
335 for (; i < input_chars; i++) {
336 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
337 if (S_CHAR(c)) {
338 output[chars++] = (char)c;
339 }
340 else {
341 chars = ascii_escape_char(c, output, chars);
342 }
343 /* An ASCII char can't possibly expand to a surrogate! */
344 if (output_size - chars < (1 + MIN_EXPANSION)) {
345 if (output_size == PY_SSIZE_T_MAX) {
346 Py_DECREF(rval);
347 PyErr_NoMemory();
348 return NULL;
349 }
350 /* There's more than four, so let's resize by a lot */
351 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
352 output_size *= 2;
353 else
354 output_size = max_output_size;
355 if (_PyString_Resize(&rval, output_size) == -1) {
356 return NULL;
357 }
358 output = PyString_AS_STRING(rval);
359 }
360 }
361 output[chars++] = '"';
362 if (_PyString_Resize(&rval, chars) == -1) {
363 return NULL;
364 }
365 return rval;
366 }
367
368 static void
raise_errmsg(char * msg,PyObject * s,Py_ssize_t end)369 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
370 {
371 /* Use the Python function json.decoder.errmsg to raise a nice
372 looking ValueError exception */
373 static PyObject *errmsg_fn = NULL;
374 PyObject *pymsg;
375 if (errmsg_fn == NULL) {
376 PyObject *decoder = PyImport_ImportModule("json.decoder");
377 if (decoder == NULL)
378 return;
379 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
380 Py_DECREF(decoder);
381 if (errmsg_fn == NULL)
382 return;
383 }
384 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
385 if (pymsg) {
386 PyErr_SetObject(PyExc_ValueError, pymsg);
387 Py_DECREF(pymsg);
388 }
389 }
390
391 static PyObject *
join_list_unicode(PyObject * lst)392 join_list_unicode(PyObject *lst)
393 {
394 /* return u''.join(lst) */
395 static PyObject *joinfn = NULL;
396 if (joinfn == NULL) {
397 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
398 if (ustr == NULL)
399 return NULL;
400
401 joinfn = PyObject_GetAttrString(ustr, "join");
402 Py_DECREF(ustr);
403 if (joinfn == NULL)
404 return NULL;
405 }
406 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
407 }
408
409 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)410 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
411 /* return (rval, idx) tuple, stealing reference to rval */
412 PyObject *tpl;
413 PyObject *pyidx;
414 /*
415 steal a reference to rval, returns (rval, idx)
416 */
417 if (rval == NULL) {
418 return NULL;
419 }
420 pyidx = PyInt_FromSsize_t(idx);
421 if (pyidx == NULL) {
422 Py_DECREF(rval);
423 return NULL;
424 }
425 tpl = PyTuple_New(2);
426 if (tpl == NULL) {
427 Py_DECREF(pyidx);
428 Py_DECREF(rval);
429 return NULL;
430 }
431 PyTuple_SET_ITEM(tpl, 0, rval);
432 PyTuple_SET_ITEM(tpl, 1, pyidx);
433 return tpl;
434 }
435
436 static PyObject *
scanstring_str(PyObject * pystr,Py_ssize_t end,char * encoding,int strict,Py_ssize_t * next_end_ptr)437 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
438 {
439 /* Read the JSON string from PyString pystr.
440 end is the index of the first character after the quote.
441 encoding is the encoding of pystr (must be an ASCII superset)
442 if strict is zero then literal control characters are allowed
443 *next_end_ptr is a return-by-reference index of the character
444 after the end quote
445
446 Return value is a new PyString (if ASCII-only) or PyUnicode
447 */
448 PyObject *rval;
449 Py_ssize_t len = PyString_GET_SIZE(pystr);
450 Py_ssize_t begin = end - 1;
451 Py_ssize_t next;
452 char *buf = PyString_AS_STRING(pystr);
453 PyObject *chunks = PyList_New(0);
454 if (chunks == NULL) {
455 goto bail;
456 }
457 if (end < 0 || len <= end) {
458 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
459 goto bail;
460 }
461 while (1) {
462 /* Find the end of the string or the next escape */
463 Py_UNICODE c = 0;
464 PyObject *chunk = NULL;
465 for (next = end; next < len; next++) {
466 c = (unsigned char)buf[next];
467 if (c == '"' || c == '\\') {
468 break;
469 }
470 else if (strict && c <= 0x1f) {
471 raise_errmsg("Invalid control character at", pystr, next);
472 goto bail;
473 }
474 }
475 if (!(c == '"' || c == '\\')) {
476 raise_errmsg("Unterminated string starting at", pystr, begin);
477 goto bail;
478 }
479 /* Pick up this chunk if it's not zero length */
480 if (next != end) {
481 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
482 if (strchunk == NULL) {
483 goto bail;
484 }
485 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
486 Py_DECREF(strchunk);
487 if (chunk == NULL) {
488 goto bail;
489 }
490 if (PyList_Append(chunks, chunk)) {
491 Py_DECREF(chunk);
492 goto bail;
493 }
494 Py_DECREF(chunk);
495 }
496 next++;
497 if (c == '"') {
498 end = next;
499 break;
500 }
501 if (next == len) {
502 raise_errmsg("Unterminated string starting at", pystr, begin);
503 goto bail;
504 }
505 c = buf[next];
506 if (c != 'u') {
507 /* Non-unicode backslash escapes */
508 end = next + 1;
509 switch (c) {
510 case '"': break;
511 case '\\': break;
512 case '/': break;
513 case 'b': c = '\b'; break;
514 case 'f': c = '\f'; break;
515 case 'n': c = '\n'; break;
516 case 'r': c = '\r'; break;
517 case 't': c = '\t'; break;
518 default: c = 0;
519 }
520 if (c == 0) {
521 raise_errmsg("Invalid \\escape", pystr, end - 2);
522 goto bail;
523 }
524 }
525 else {
526 c = 0;
527 next++;
528 end = next + 4;
529 if (end >= len) {
530 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
531 goto bail;
532 }
533 /* Decode 4 hex digits */
534 for (; next < end; next++) {
535 Py_UNICODE digit = buf[next];
536 c <<= 4;
537 switch (digit) {
538 case '0': case '1': case '2': case '3': case '4':
539 case '5': case '6': case '7': case '8': case '9':
540 c |= (digit - '0'); break;
541 case 'a': case 'b': case 'c': case 'd': case 'e':
542 case 'f':
543 c |= (digit - 'a' + 10); break;
544 case 'A': case 'B': case 'C': case 'D': case 'E':
545 case 'F':
546 c |= (digit - 'A' + 10); break;
547 default:
548 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
549 goto bail;
550 }
551 }
552 #ifdef Py_UNICODE_WIDE
553 /* Surrogate pair */
554 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
555 buf[next++] == '\\' &&
556 buf[next++] == 'u') {
557 Py_UNICODE c2 = 0;
558 end += 6;
559 /* Decode 4 hex digits */
560 for (; next < end; next++) {
561 Py_UNICODE digit = buf[next];
562 c2 <<= 4;
563 switch (digit) {
564 case '0': case '1': case '2': case '3': case '4':
565 case '5': case '6': case '7': case '8': case '9':
566 c2 |= (digit - '0'); break;
567 case 'a': case 'b': case 'c': case 'd': case 'e':
568 case 'f':
569 c2 |= (digit - 'a' + 10); break;
570 case 'A': case 'B': case 'C': case 'D': case 'E':
571 case 'F':
572 c2 |= (digit - 'A' + 10); break;
573 default:
574 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
575 goto bail;
576 }
577 }
578 if ((c2 & 0xfc00) == 0xdc00)
579 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
580 else
581 end -= 6;
582 }
583 #endif
584 }
585 chunk = PyUnicode_FromUnicode(&c, 1);
586 if (chunk == NULL) {
587 goto bail;
588 }
589 if (PyList_Append(chunks, chunk)) {
590 Py_DECREF(chunk);
591 goto bail;
592 }
593 Py_DECREF(chunk);
594 }
595
596 rval = join_list_unicode(chunks);
597 if (rval == NULL) {
598 goto bail;
599 }
600 Py_CLEAR(chunks);
601 *next_end_ptr = end;
602 return rval;
603 bail:
604 *next_end_ptr = -1;
605 Py_XDECREF(chunks);
606 return NULL;
607 }
608
609
610 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)611 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
612 {
613 /* Read the JSON string from PyUnicode pystr.
614 end is the index of the first character after the quote.
615 if strict is zero then literal control characters are allowed
616 *next_end_ptr is a return-by-reference index of the character
617 after the end quote
618
619 Return value is a new PyUnicode
620 */
621 PyObject *rval;
622 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
623 Py_ssize_t begin = end - 1;
624 Py_ssize_t next;
625 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
626 PyObject *chunks = PyList_New(0);
627 if (chunks == NULL) {
628 goto bail;
629 }
630 if (end < 0 || len <= end) {
631 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
632 goto bail;
633 }
634 while (1) {
635 /* Find the end of the string or the next escape */
636 Py_UNICODE c = 0;
637 PyObject *chunk = NULL;
638 for (next = end; next < len; next++) {
639 c = buf[next];
640 if (c == '"' || c == '\\') {
641 break;
642 }
643 else if (strict && c <= 0x1f) {
644 raise_errmsg("Invalid control character at", pystr, next);
645 goto bail;
646 }
647 }
648 if (!(c == '"' || c == '\\')) {
649 raise_errmsg("Unterminated string starting at", pystr, begin);
650 goto bail;
651 }
652 /* Pick up this chunk if it's not zero length */
653 if (next != end) {
654 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
655 if (chunk == NULL) {
656 goto bail;
657 }
658 if (PyList_Append(chunks, chunk)) {
659 Py_DECREF(chunk);
660 goto bail;
661 }
662 Py_DECREF(chunk);
663 }
664 next++;
665 if (c == '"') {
666 end = next;
667 break;
668 }
669 if (next == len) {
670 raise_errmsg("Unterminated string starting at", pystr, begin);
671 goto bail;
672 }
673 c = buf[next];
674 if (c != 'u') {
675 /* Non-unicode backslash escapes */
676 end = next + 1;
677 switch (c) {
678 case '"': break;
679 case '\\': break;
680 case '/': break;
681 case 'b': c = '\b'; break;
682 case 'f': c = '\f'; break;
683 case 'n': c = '\n'; break;
684 case 'r': c = '\r'; break;
685 case 't': c = '\t'; break;
686 default: c = 0;
687 }
688 if (c == 0) {
689 raise_errmsg("Invalid \\escape", pystr, end - 2);
690 goto bail;
691 }
692 }
693 else {
694 c = 0;
695 next++;
696 end = next + 4;
697 if (end >= len) {
698 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
699 goto bail;
700 }
701 /* Decode 4 hex digits */
702 for (; next < end; next++) {
703 Py_UNICODE digit = buf[next];
704 c <<= 4;
705 switch (digit) {
706 case '0': case '1': case '2': case '3': case '4':
707 case '5': case '6': case '7': case '8': case '9':
708 c |= (digit - '0'); break;
709 case 'a': case 'b': case 'c': case 'd': case 'e':
710 case 'f':
711 c |= (digit - 'a' + 10); break;
712 case 'A': case 'B': case 'C': case 'D': case 'E':
713 case 'F':
714 c |= (digit - 'A' + 10); break;
715 default:
716 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
717 goto bail;
718 }
719 }
720 #ifdef Py_UNICODE_WIDE
721 /* Surrogate pair */
722 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
723 buf[next++] == '\\' && buf[next++] == 'u') {
724 Py_UNICODE c2 = 0;
725 end += 6;
726 /* Decode 4 hex digits */
727 for (; next < end; next++) {
728 Py_UNICODE digit = buf[next];
729 c2 <<= 4;
730 switch (digit) {
731 case '0': case '1': case '2': case '3': case '4':
732 case '5': case '6': case '7': case '8': case '9':
733 c2 |= (digit - '0'); break;
734 case 'a': case 'b': case 'c': case 'd': case 'e':
735 case 'f':
736 c2 |= (digit - 'a' + 10); break;
737 case 'A': case 'B': case 'C': case 'D': case 'E':
738 case 'F':
739 c2 |= (digit - 'A' + 10); break;
740 default:
741 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
742 goto bail;
743 }
744 }
745 if ((c2 & 0xfc00) == 0xdc00)
746 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
747 else
748 end -= 6;
749 }
750 #endif
751 }
752 chunk = PyUnicode_FromUnicode(&c, 1);
753 if (chunk == NULL) {
754 goto bail;
755 }
756 if (PyList_Append(chunks, chunk)) {
757 Py_DECREF(chunk);
758 goto bail;
759 }
760 Py_DECREF(chunk);
761 }
762
763 rval = join_list_unicode(chunks);
764 if (rval == NULL) {
765 goto bail;
766 }
767 Py_DECREF(chunks);
768 *next_end_ptr = end;
769 return rval;
770 bail:
771 *next_end_ptr = -1;
772 Py_XDECREF(chunks);
773 return NULL;
774 }
775
776 PyDoc_STRVAR(pydoc_scanstring,
777 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
778 "\n"
779 "Scan the string s for a JSON string. End is the index of the\n"
780 "character in s after the quote that started the JSON string.\n"
781 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
782 "on attempt to decode an invalid string. If strict is False then literal\n"
783 "control characters are allowed in the string.\n"
784 "\n"
785 "Returns a tuple of the decoded string and the index of the character in s\n"
786 "after the end quote."
787 );
788
789 static PyObject *
py_scanstring(PyObject * self UNUSED,PyObject * args)790 py_scanstring(PyObject* self UNUSED, PyObject *args)
791 {
792 PyObject *pystr;
793 PyObject *rval;
794 Py_ssize_t end;
795 Py_ssize_t next_end = -1;
796 char *encoding = NULL;
797 int strict = 1;
798 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
799 return NULL;
800 }
801 if (encoding == NULL) {
802 encoding = DEFAULT_ENCODING;
803 }
804 if (PyString_Check(pystr)) {
805 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
806 }
807 else if (PyUnicode_Check(pystr)) {
808 rval = scanstring_unicode(pystr, end, strict, &next_end);
809 }
810 else {
811 PyErr_Format(PyExc_TypeError,
812 "first argument must be a string, not %.80s",
813 Py_TYPE(pystr)->tp_name);
814 return NULL;
815 }
816 return _build_rval_index_tuple(rval, next_end);
817 }
818
819 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
820 "encode_basestring_ascii(basestring) -> str\n"
821 "\n"
822 "Return an ASCII-only JSON representation of a Python string"
823 );
824
825 static PyObject *
py_encode_basestring_ascii(PyObject * self UNUSED,PyObject * pystr)826 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
827 {
828 /* Return an ASCII-only JSON representation of a Python string */
829 /* METH_O */
830 if (PyString_Check(pystr)) {
831 return ascii_escape_str(pystr);
832 }
833 else if (PyUnicode_Check(pystr)) {
834 return ascii_escape_unicode(pystr);
835 }
836 else {
837 PyErr_Format(PyExc_TypeError,
838 "first argument must be a string, not %.80s",
839 Py_TYPE(pystr)->tp_name);
840 return NULL;
841 }
842 }
843
844 static void
scanner_dealloc(PyObject * self)845 scanner_dealloc(PyObject *self)
846 {
847 /* Deallocate scanner object */
848 scanner_clear(self);
849 Py_TYPE(self)->tp_free(self);
850 }
851
852 static int
scanner_traverse(PyObject * self,visitproc visit,void * arg)853 scanner_traverse(PyObject *self, visitproc visit, void *arg)
854 {
855 PyScannerObject *s;
856 assert(PyScanner_Check(self));
857 s = (PyScannerObject *)self;
858 Py_VISIT(s->encoding);
859 Py_VISIT(s->strict);
860 Py_VISIT(s->object_hook);
861 Py_VISIT(s->pairs_hook);
862 Py_VISIT(s->parse_float);
863 Py_VISIT(s->parse_int);
864 Py_VISIT(s->parse_constant);
865 return 0;
866 }
867
868 static int
scanner_clear(PyObject * self)869 scanner_clear(PyObject *self)
870 {
871 PyScannerObject *s;
872 assert(PyScanner_Check(self));
873 s = (PyScannerObject *)self;
874 Py_CLEAR(s->encoding);
875 Py_CLEAR(s->strict);
876 Py_CLEAR(s->object_hook);
877 Py_CLEAR(s->pairs_hook);
878 Py_CLEAR(s->parse_float);
879 Py_CLEAR(s->parse_int);
880 Py_CLEAR(s->parse_constant);
881 return 0;
882 }
883
884 static PyObject *
_parse_object_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)885 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
886 /* Read a JSON object from PyString pystr.
887 idx is the index of the first character after the opening curly brace.
888 *next_idx_ptr is a return-by-reference index to the first character after
889 the closing curly brace.
890
891 Returns a new PyObject (usually a dict, but object_hook can change that)
892 */
893 char *str = PyString_AS_STRING(pystr);
894 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
895 PyObject *rval;
896 PyObject *pairs;
897 PyObject *item;
898 PyObject *key = NULL;
899 PyObject *val = NULL;
900 char *encoding = PyString_AS_STRING(s->encoding);
901 int strict = PyObject_IsTrue(s->strict);
902 Py_ssize_t next_idx;
903
904 if (strict < 0)
905 return NULL;
906
907 pairs = PyList_New(0);
908 if (pairs == NULL)
909 return NULL;
910
911 /* skip whitespace after { */
912 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
913
914 /* only loop if the object is non-empty */
915 if (idx <= end_idx && str[idx] != '}') {
916 while (idx <= end_idx) {
917 /* read key */
918 if (str[idx] != '"') {
919 raise_errmsg("Expecting property name", pystr, idx);
920 goto bail;
921 }
922 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
923 if (key == NULL)
924 goto bail;
925 idx = next_idx;
926
927 /* skip whitespace between key and : delimiter, read :, skip whitespace */
928 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
929 if (idx > end_idx || str[idx] != ':') {
930 raise_errmsg("Expecting : delimiter", pystr, idx);
931 goto bail;
932 }
933 idx++;
934 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
935
936 /* read any JSON data type */
937 val = scan_once_str(s, pystr, idx, &next_idx);
938 if (val == NULL)
939 goto bail;
940
941 item = PyTuple_Pack(2, key, val);
942 if (item == NULL)
943 goto bail;
944 Py_CLEAR(key);
945 Py_CLEAR(val);
946 if (PyList_Append(pairs, item) == -1) {
947 Py_DECREF(item);
948 goto bail;
949 }
950 Py_DECREF(item);
951 idx = next_idx;
952
953 /* skip whitespace before } or , */
954 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
955
956 /* bail if the object is closed or we didn't get the , delimiter */
957 if (idx > end_idx) break;
958 if (str[idx] == '}') {
959 break;
960 }
961 else if (str[idx] != ',') {
962 raise_errmsg("Expecting , delimiter", pystr, idx);
963 goto bail;
964 }
965 idx++;
966
967 /* skip whitespace after , delimiter */
968 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
969 }
970 }
971 /* verify that idx < end_idx, str[idx] should be '}' */
972 if (idx > end_idx || str[idx] != '}') {
973 raise_errmsg("Expecting object", pystr, end_idx);
974 goto bail;
975 }
976
977 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
978 if (s->pairs_hook != Py_None) {
979 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
980 if (val == NULL)
981 goto bail;
982 Py_DECREF(pairs);
983 *next_idx_ptr = idx + 1;
984 return val;
985 }
986
987 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
988 pairs, NULL);
989 if (rval == NULL)
990 goto bail;
991 Py_CLEAR(pairs);
992
993 /* if object_hook is not None: rval = object_hook(rval) */
994 if (s->object_hook != Py_None) {
995 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
996 if (val == NULL)
997 goto bail;
998 Py_DECREF(rval);
999 rval = val;
1000 val = NULL;
1001 }
1002 *next_idx_ptr = idx + 1;
1003 return rval;
1004 bail:
1005 Py_XDECREF(key);
1006 Py_XDECREF(val);
1007 Py_XDECREF(pairs);
1008 return NULL;
1009 }
1010
1011 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1012 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1013 /* Read a JSON object from PyUnicode pystr.
1014 idx is the index of the first character after the opening curly brace.
1015 *next_idx_ptr is a return-by-reference index to the first character after
1016 the closing curly brace.
1017
1018 Returns a new PyObject (usually a dict, but object_hook can change that)
1019 */
1020 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1021 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1022 PyObject *rval;
1023 PyObject *pairs;
1024 PyObject *item;
1025 PyObject *key = NULL;
1026 PyObject *val = NULL;
1027 int strict = PyObject_IsTrue(s->strict);
1028 Py_ssize_t next_idx;
1029
1030 if (strict < 0)
1031 return NULL;
1032
1033 pairs = PyList_New(0);
1034 if (pairs == NULL)
1035 return NULL;
1036
1037 /* skip whitespace after { */
1038 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1039
1040 /* only loop if the object is non-empty */
1041 if (idx <= end_idx && str[idx] != '}') {
1042 while (idx <= end_idx) {
1043 /* read key */
1044 if (str[idx] != '"') {
1045 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
1046 goto bail;
1047 }
1048 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1049 if (key == NULL)
1050 goto bail;
1051 idx = next_idx;
1052
1053 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1054 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1055 if (idx > end_idx || str[idx] != ':') {
1056 raise_errmsg("Expecting ':' delimiter", pystr, idx);
1057 goto bail;
1058 }
1059 idx++;
1060 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1061
1062 /* read any JSON term */
1063 val = scan_once_unicode(s, pystr, idx, &next_idx);
1064 if (val == NULL)
1065 goto bail;
1066
1067 item = PyTuple_Pack(2, key, val);
1068 if (item == NULL)
1069 goto bail;
1070 Py_CLEAR(key);
1071 Py_CLEAR(val);
1072 if (PyList_Append(pairs, item) == -1) {
1073 Py_DECREF(item);
1074 goto bail;
1075 }
1076 Py_DECREF(item);
1077 idx = next_idx;
1078
1079 /* skip whitespace before } or , */
1080 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1081
1082 /* bail if the object is closed or we didn't get the , delimiter */
1083 if (idx > end_idx) break;
1084 if (str[idx] == '}') {
1085 break;
1086 }
1087 else if (str[idx] != ',') {
1088 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1089 goto bail;
1090 }
1091 idx++;
1092
1093 /* skip whitespace after , delimiter */
1094 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1095 }
1096 }
1097
1098 /* verify that idx < end_idx, str[idx] should be '}' */
1099 if (idx > end_idx || str[idx] != '}') {
1100 raise_errmsg("Expecting object", pystr, end_idx);
1101 goto bail;
1102 }
1103
1104 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1105 if (s->pairs_hook != Py_None) {
1106 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1107 if (val == NULL)
1108 goto bail;
1109 Py_DECREF(pairs);
1110 *next_idx_ptr = idx + 1;
1111 return val;
1112 }
1113
1114 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1115 pairs, NULL);
1116 if (rval == NULL)
1117 goto bail;
1118 Py_CLEAR(pairs);
1119
1120 /* if object_hook is not None: rval = object_hook(rval) */
1121 if (s->object_hook != Py_None) {
1122 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1123 if (val == NULL)
1124 goto bail;
1125 Py_DECREF(rval);
1126 rval = val;
1127 val = NULL;
1128 }
1129 *next_idx_ptr = idx + 1;
1130 return rval;
1131 bail:
1132 Py_XDECREF(key);
1133 Py_XDECREF(val);
1134 Py_XDECREF(pairs);
1135 return NULL;
1136 }
1137
1138 static PyObject *
_parse_array_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1139 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1140 /* Read a JSON array from PyString pystr.
1141 idx is the index of the first character after the opening brace.
1142 *next_idx_ptr is a return-by-reference index to the first character after
1143 the closing brace.
1144
1145 Returns a new PyList
1146 */
1147 char *str = PyString_AS_STRING(pystr);
1148 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1149 PyObject *val = NULL;
1150 PyObject *rval = PyList_New(0);
1151 Py_ssize_t next_idx;
1152 if (rval == NULL)
1153 return NULL;
1154
1155 /* skip whitespace after [ */
1156 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1157
1158 /* only loop if the array is non-empty */
1159 if (idx <= end_idx && str[idx] != ']') {
1160 while (idx <= end_idx) {
1161
1162 /* read any JSON term and de-tuplefy the (rval, idx) */
1163 val = scan_once_str(s, pystr, idx, &next_idx);
1164 if (val == NULL)
1165 goto bail;
1166
1167 if (PyList_Append(rval, val) == -1)
1168 goto bail;
1169
1170 Py_CLEAR(val);
1171 idx = next_idx;
1172
1173 /* skip whitespace between term and , */
1174 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1175
1176 /* bail if the array is closed or we didn't get the , delimiter */
1177 if (idx > end_idx) break;
1178 if (str[idx] == ']') {
1179 break;
1180 }
1181 else if (str[idx] != ',') {
1182 raise_errmsg("Expecting , delimiter", pystr, idx);
1183 goto bail;
1184 }
1185 idx++;
1186
1187 /* skip whitespace after , */
1188 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1189 }
1190 }
1191
1192 /* verify that idx < end_idx, str[idx] should be ']' */
1193 if (idx > end_idx || str[idx] != ']') {
1194 raise_errmsg("Expecting object", pystr, end_idx);
1195 goto bail;
1196 }
1197 *next_idx_ptr = idx + 1;
1198 return rval;
1199 bail:
1200 Py_XDECREF(val);
1201 Py_DECREF(rval);
1202 return NULL;
1203 }
1204
1205 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1206 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1207 /* Read a JSON array from PyString pystr.
1208 idx is the index of the first character after the opening brace.
1209 *next_idx_ptr is a return-by-reference index to the first character after
1210 the closing brace.
1211
1212 Returns a new PyList
1213 */
1214 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1215 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1216 PyObject *val = NULL;
1217 PyObject *rval = PyList_New(0);
1218 Py_ssize_t next_idx;
1219 if (rval == NULL)
1220 return NULL;
1221
1222 /* skip whitespace after [ */
1223 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1224
1225 /* only loop if the array is non-empty */
1226 if (idx <= end_idx && str[idx] != ']') {
1227 while (idx <= end_idx) {
1228
1229 /* read any JSON term */
1230 val = scan_once_unicode(s, pystr, idx, &next_idx);
1231 if (val == NULL)
1232 goto bail;
1233
1234 if (PyList_Append(rval, val) == -1)
1235 goto bail;
1236
1237 Py_CLEAR(val);
1238 idx = next_idx;
1239
1240 /* skip whitespace between term and , */
1241 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1242
1243 /* bail if the array is closed or we didn't get the , delimiter */
1244 if (idx > end_idx) break;
1245 if (str[idx] == ']') {
1246 break;
1247 }
1248 else if (str[idx] != ',') {
1249 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1250 goto bail;
1251 }
1252 idx++;
1253
1254 /* skip whitespace after , */
1255 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1256 }
1257 }
1258
1259 /* verify that idx < end_idx, str[idx] should be ']' */
1260 if (idx > end_idx || str[idx] != ']') {
1261 raise_errmsg("Expecting object", pystr, end_idx);
1262 goto bail;
1263 }
1264 *next_idx_ptr = idx + 1;
1265 return rval;
1266 bail:
1267 Py_XDECREF(val);
1268 Py_DECREF(rval);
1269 return NULL;
1270 }
1271
1272 static PyObject *
_parse_constant(PyScannerObject * s,char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1273 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1274 /* Read a JSON constant from PyString pystr.
1275 constant is the constant string that was found
1276 ("NaN", "Infinity", "-Infinity").
1277 idx is the index of the first character of the constant
1278 *next_idx_ptr is a return-by-reference index to the first character after
1279 the constant.
1280
1281 Returns the result of parse_constant
1282 */
1283 PyObject *cstr;
1284 PyObject *rval;
1285 /* constant is "NaN", "Infinity", or "-Infinity" */
1286 cstr = PyString_InternFromString(constant);
1287 if (cstr == NULL)
1288 return NULL;
1289
1290 /* rval = parse_constant(constant) */
1291 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1292 idx += PyString_GET_SIZE(cstr);
1293 Py_DECREF(cstr);
1294 *next_idx_ptr = idx;
1295 return rval;
1296 }
1297
1298 static PyObject *
_match_number_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1299 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1300 /* Read a JSON number from PyString pystr.
1301 idx is the index of the first character of the number
1302 *next_idx_ptr is a return-by-reference index to the first character after
1303 the number.
1304
1305 Returns a new PyObject representation of that number:
1306 PyInt, PyLong, or PyFloat.
1307 May return other types if parse_int or parse_float are set
1308 */
1309 char *str = PyString_AS_STRING(pystr);
1310 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1311 Py_ssize_t idx = start;
1312 int is_float = 0;
1313 PyObject *rval;
1314 PyObject *numstr;
1315
1316 /* read a sign if it's there, make sure it's not the end of the string */
1317 if (str[idx] == '-') {
1318 idx++;
1319 if (idx > end_idx) {
1320 PyErr_SetNone(PyExc_StopIteration);
1321 return NULL;
1322 }
1323 }
1324
1325 /* read as many integer digits as we find as long as it doesn't start with 0 */
1326 if (str[idx] >= '1' && str[idx] <= '9') {
1327 idx++;
1328 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1329 }
1330 /* if it starts with 0 we only expect one integer digit */
1331 else if (str[idx] == '0') {
1332 idx++;
1333 }
1334 /* no integer digits, error */
1335 else {
1336 PyErr_SetNone(PyExc_StopIteration);
1337 return NULL;
1338 }
1339
1340 /* if the next char is '.' followed by a digit then read all float digits */
1341 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1342 is_float = 1;
1343 idx += 2;
1344 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1345 }
1346
1347 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1348 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1349
1350 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1351 Py_ssize_t e_start = idx;
1352 idx++;
1353
1354 /* read an exponent sign if present */
1355 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1356
1357 /* read all digits */
1358 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1359
1360 /* if we got a digit, then parse as float. if not, backtrack */
1361 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1362 is_float = 1;
1363 }
1364 else {
1365 idx = e_start;
1366 }
1367 }
1368
1369 /* copy the section we determined to be a number */
1370 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1371 if (numstr == NULL)
1372 return NULL;
1373 if (is_float) {
1374 /* parse as a float using a fast path if available, otherwise call user defined method */
1375 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1376 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1377 }
1378 else {
1379 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1380 NULL, NULL);
1381 if (d == -1.0 && PyErr_Occurred())
1382 return NULL;
1383 rval = PyFloat_FromDouble(d);
1384 }
1385 }
1386 else {
1387 /* parse as an int using a fast path if available, otherwise call user defined method */
1388 if (s->parse_int != (PyObject *)&PyInt_Type) {
1389 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1390 }
1391 else {
1392 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1393 }
1394 }
1395 Py_DECREF(numstr);
1396 *next_idx_ptr = idx;
1397 return rval;
1398 }
1399
1400 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1401 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1402 /* Read a JSON number from PyUnicode pystr.
1403 idx is the index of the first character of the number
1404 *next_idx_ptr is a return-by-reference index to the first character after
1405 the number.
1406
1407 Returns a new PyObject representation of that number:
1408 PyInt, PyLong, or PyFloat.
1409 May return other types if parse_int or parse_float are set
1410 */
1411 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1412 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1413 Py_ssize_t idx = start;
1414 int is_float = 0;
1415 PyObject *rval;
1416 PyObject *numstr;
1417
1418 /* read a sign if it's there, make sure it's not the end of the string */
1419 if (str[idx] == '-') {
1420 idx++;
1421 if (idx > end_idx) {
1422 PyErr_SetNone(PyExc_StopIteration);
1423 return NULL;
1424 }
1425 }
1426
1427 /* read as many integer digits as we find as long as it doesn't start with 0 */
1428 if (str[idx] >= '1' && str[idx] <= '9') {
1429 idx++;
1430 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1431 }
1432 /* if it starts with 0 we only expect one integer digit */
1433 else if (str[idx] == '0') {
1434 idx++;
1435 }
1436 /* no integer digits, error */
1437 else {
1438 PyErr_SetNone(PyExc_StopIteration);
1439 return NULL;
1440 }
1441
1442 /* if the next char is '.' followed by a digit then read all float digits */
1443 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1444 is_float = 1;
1445 idx += 2;
1446 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1447 }
1448
1449 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1450 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1451 Py_ssize_t e_start = idx;
1452 idx++;
1453
1454 /* read an exponent sign if present */
1455 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1456
1457 /* read all digits */
1458 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1459
1460 /* if we got a digit, then parse as float. if not, backtrack */
1461 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1462 is_float = 1;
1463 }
1464 else {
1465 idx = e_start;
1466 }
1467 }
1468
1469 /* copy the section we determined to be a number */
1470 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1471 if (numstr == NULL)
1472 return NULL;
1473 if (is_float) {
1474 /* parse as a float using a fast path if available, otherwise call user defined method */
1475 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1476 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1477 }
1478 else {
1479 rval = PyFloat_FromString(numstr, NULL);
1480 }
1481 }
1482 else {
1483 /* no fast path for unicode -> int, just call */
1484 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1485 }
1486 Py_DECREF(numstr);
1487 *next_idx_ptr = idx;
1488 return rval;
1489 }
1490
1491 static PyObject *
scan_once_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1492 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1493 {
1494 /* Read one JSON term (of any kind) from PyString pystr.
1495 idx is the index of the first character of the term
1496 *next_idx_ptr is a return-by-reference index to the first character after
1497 the number.
1498
1499 Returns a new PyObject representation of the term.
1500 */
1501 PyObject *res;
1502 int strict;
1503 char *str = PyString_AS_STRING(pystr);
1504 Py_ssize_t length = PyString_GET_SIZE(pystr);
1505 if (idx < 0) {
1506 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1507 return NULL;
1508 }
1509 if (idx >= length) {
1510 PyErr_SetNone(PyExc_StopIteration);
1511 return NULL;
1512 }
1513 switch (str[idx]) {
1514 case '"':
1515 /* string */
1516 strict = PyObject_IsTrue(s->strict);
1517 if (strict < 0)
1518 return NULL;
1519 return scanstring_str(pystr, idx + 1,
1520 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
1521 case '{':
1522 /* object */
1523 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1524 "from a byte string"))
1525 return NULL;
1526 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1527 Py_LeaveRecursiveCall();
1528 return res;
1529 case '[':
1530 /* array */
1531 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1532 "from a byte string"))
1533 return NULL;
1534 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1535 Py_LeaveRecursiveCall();
1536 return res;
1537 case 'n':
1538 /* null */
1539 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1540 Py_INCREF(Py_None);
1541 *next_idx_ptr = idx + 4;
1542 return Py_None;
1543 }
1544 break;
1545 case 't':
1546 /* true */
1547 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1548 Py_INCREF(Py_True);
1549 *next_idx_ptr = idx + 4;
1550 return Py_True;
1551 }
1552 break;
1553 case 'f':
1554 /* false */
1555 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1556 Py_INCREF(Py_False);
1557 *next_idx_ptr = idx + 5;
1558 return Py_False;
1559 }
1560 break;
1561 case 'N':
1562 /* NaN */
1563 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1564 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1565 }
1566 break;
1567 case 'I':
1568 /* Infinity */
1569 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1570 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1571 }
1572 break;
1573 case '-':
1574 /* -Infinity */
1575 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1576 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1577 }
1578 break;
1579 }
1580 /* Didn't find a string, object, array, or named constant. Look for a number. */
1581 return _match_number_str(s, pystr, idx, next_idx_ptr);
1582 }
1583
1584 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1585 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1586 {
1587 /* Read one JSON term (of any kind) from PyUnicode pystr.
1588 idx is the index of the first character of the term
1589 *next_idx_ptr is a return-by-reference index to the first character after
1590 the number.
1591
1592 Returns a new PyObject representation of the term.
1593 */
1594 PyObject *res;
1595 int strict;
1596 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1597 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1598 if (idx < 0) {
1599 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1600 return NULL;
1601 }
1602 if (idx >= length) {
1603 PyErr_SetNone(PyExc_StopIteration);
1604 return NULL;
1605 }
1606 switch (str[idx]) {
1607 case '"':
1608 /* string */
1609 strict = PyObject_IsTrue(s->strict);
1610 if (strict < 0)
1611 return NULL;
1612 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
1613 case '{':
1614 /* object */
1615 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1616 "from a unicode string"))
1617 return NULL;
1618 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1619 Py_LeaveRecursiveCall();
1620 return res;
1621 case '[':
1622 /* array */
1623 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1624 "from a unicode string"))
1625 return NULL;
1626 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1627 Py_LeaveRecursiveCall();
1628 return res;
1629 case 'n':
1630 /* null */
1631 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1632 Py_INCREF(Py_None);
1633 *next_idx_ptr = idx + 4;
1634 return Py_None;
1635 }
1636 break;
1637 case 't':
1638 /* true */
1639 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1640 Py_INCREF(Py_True);
1641 *next_idx_ptr = idx + 4;
1642 return Py_True;
1643 }
1644 break;
1645 case 'f':
1646 /* false */
1647 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1648 Py_INCREF(Py_False);
1649 *next_idx_ptr = idx + 5;
1650 return Py_False;
1651 }
1652 break;
1653 case 'N':
1654 /* NaN */
1655 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1656 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1657 }
1658 break;
1659 case 'I':
1660 /* Infinity */
1661 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1662 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1663 }
1664 break;
1665 case '-':
1666 /* -Infinity */
1667 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1668 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1669 }
1670 break;
1671 }
1672 /* Didn't find a string, object, array, or named constant. Look for a number. */
1673 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1674 }
1675
1676 static PyObject *
scanner_call(PyObject * self,PyObject * args,PyObject * kwds)1677 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1678 {
1679 /* Python callable interface to scan_once_{str,unicode} */
1680 PyObject *pystr;
1681 PyObject *rval;
1682 Py_ssize_t idx;
1683 Py_ssize_t next_idx = -1;
1684 static char *kwlist[] = {"string", "idx", NULL};
1685 PyScannerObject *s;
1686 assert(PyScanner_Check(self));
1687 s = (PyScannerObject *)self;
1688 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1689 return NULL;
1690
1691 if (PyString_Check(pystr)) {
1692 rval = scan_once_str(s, pystr, idx, &next_idx);
1693 }
1694 else if (PyUnicode_Check(pystr)) {
1695 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1696 }
1697 else {
1698 PyErr_Format(PyExc_TypeError,
1699 "first argument must be a string, not %.80s",
1700 Py_TYPE(pystr)->tp_name);
1701 return NULL;
1702 }
1703 return _build_rval_index_tuple(rval, next_idx);
1704 }
1705
1706 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1707 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1708 {
1709 PyScannerObject *s;
1710 s = (PyScannerObject *)type->tp_alloc(type, 0);
1711 if (s != NULL) {
1712 s->encoding = NULL;
1713 s->strict = NULL;
1714 s->object_hook = NULL;
1715 s->pairs_hook = NULL;
1716 s->parse_float = NULL;
1717 s->parse_int = NULL;
1718 s->parse_constant = NULL;
1719 }
1720 return (PyObject *)s;
1721 }
1722
1723 static int
scanner_init(PyObject * self,PyObject * args,PyObject * kwds)1724 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1725 {
1726 /* Initialize Scanner object */
1727 PyObject *ctx;
1728 static char *kwlist[] = {"context", NULL};
1729 PyScannerObject *s;
1730
1731 assert(PyScanner_Check(self));
1732 s = (PyScannerObject *)self;
1733
1734 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1735 return -1;
1736
1737 /* PyString_AS_STRING is used on encoding */
1738 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1739 if (s->encoding == NULL)
1740 goto bail;
1741 if (s->encoding == Py_None) {
1742 Py_DECREF(Py_None);
1743 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1744 }
1745 else if (PyUnicode_Check(s->encoding)) {
1746 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1747 Py_SETREF(s->encoding, tmp);
1748 }
1749 if (s->encoding == NULL)
1750 goto bail;
1751 if (!PyString_Check(s->encoding)) {
1752 PyErr_Format(PyExc_TypeError,
1753 "encoding must be a string, not %.80s",
1754 Py_TYPE(s->encoding)->tp_name);
1755 goto bail;
1756 }
1757
1758
1759 /* All of these will fail "gracefully" so we don't need to verify them */
1760 s->strict = PyObject_GetAttrString(ctx, "strict");
1761 if (s->strict == NULL)
1762 goto bail;
1763 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1764 if (s->object_hook == NULL)
1765 goto bail;
1766 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1767 if (s->pairs_hook == NULL)
1768 goto bail;
1769 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1770 if (s->parse_float == NULL)
1771 goto bail;
1772 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1773 if (s->parse_int == NULL)
1774 goto bail;
1775 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1776 if (s->parse_constant == NULL)
1777 goto bail;
1778
1779 return 0;
1780
1781 bail:
1782 Py_CLEAR(s->encoding);
1783 Py_CLEAR(s->strict);
1784 Py_CLEAR(s->object_hook);
1785 Py_CLEAR(s->pairs_hook);
1786 Py_CLEAR(s->parse_float);
1787 Py_CLEAR(s->parse_int);
1788 Py_CLEAR(s->parse_constant);
1789 return -1;
1790 }
1791
1792 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1793
1794 static
1795 PyTypeObject PyScannerType = {
1796 PyObject_HEAD_INIT(NULL)
1797 0, /* tp_internal */
1798 "_json.Scanner", /* tp_name */
1799 sizeof(PyScannerObject), /* tp_basicsize */
1800 0, /* tp_itemsize */
1801 scanner_dealloc, /* tp_dealloc */
1802 0, /* tp_print */
1803 0, /* tp_getattr */
1804 0, /* tp_setattr */
1805 0, /* tp_compare */
1806 0, /* tp_repr */
1807 0, /* tp_as_number */
1808 0, /* tp_as_sequence */
1809 0, /* tp_as_mapping */
1810 0, /* tp_hash */
1811 scanner_call, /* tp_call */
1812 0, /* tp_str */
1813 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1814 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1815 0, /* tp_as_buffer */
1816 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1817 scanner_doc, /* tp_doc */
1818 scanner_traverse, /* tp_traverse */
1819 scanner_clear, /* tp_clear */
1820 0, /* tp_richcompare */
1821 0, /* tp_weaklistoffset */
1822 0, /* tp_iter */
1823 0, /* tp_iternext */
1824 0, /* tp_methods */
1825 scanner_members, /* tp_members */
1826 0, /* tp_getset */
1827 0, /* tp_base */
1828 0, /* tp_dict */
1829 0, /* tp_descr_get */
1830 0, /* tp_descr_set */
1831 0, /* tp_dictoffset */
1832 scanner_init, /* tp_init */
1833 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1834 scanner_new, /* tp_new */
1835 0,/* PyObject_GC_Del, */ /* tp_free */
1836 };
1837
1838 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1839 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1840 {
1841 PyEncoderObject *s;
1842 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1843 if (s != NULL) {
1844 s->markers = NULL;
1845 s->defaultfn = NULL;
1846 s->encoder = NULL;
1847 s->indent = NULL;
1848 s->key_separator = NULL;
1849 s->item_separator = NULL;
1850 s->sort_keys = NULL;
1851 s->skipkeys = NULL;
1852 }
1853 return (PyObject *)s;
1854 }
1855
1856 static int
encoder_init(PyObject * self,PyObject * args,PyObject * kwds)1857 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1858 {
1859 /* initialize Encoder object */
1860 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1861
1862 PyEncoderObject *s;
1863 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1864 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1865 int allow_nan;
1866
1867 assert(PyEncoder_Check(self));
1868 s = (PyEncoderObject *)self;
1869
1870 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1871 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1872 &sort_keys, &skipkeys, &allow_nan_obj))
1873 return -1;
1874
1875 allow_nan = PyObject_IsTrue(allow_nan_obj);
1876 if (allow_nan < 0)
1877 return -1;
1878
1879 if (markers != Py_None && !PyDict_Check(markers)) {
1880 PyErr_Format(PyExc_TypeError,
1881 "make_encoder() argument 1 must be dict or None, "
1882 "not %.200s", Py_TYPE(markers)->tp_name);
1883 return -1;
1884 }
1885
1886 s->markers = markers;
1887 s->defaultfn = defaultfn;
1888 s->encoder = encoder;
1889 s->indent = indent;
1890 s->key_separator = key_separator;
1891 s->item_separator = item_separator;
1892 s->sort_keys = sort_keys;
1893 s->skipkeys = skipkeys;
1894 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1895 s->allow_nan = allow_nan;
1896
1897 Py_INCREF(s->markers);
1898 Py_INCREF(s->defaultfn);
1899 Py_INCREF(s->encoder);
1900 Py_INCREF(s->indent);
1901 Py_INCREF(s->key_separator);
1902 Py_INCREF(s->item_separator);
1903 Py_INCREF(s->sort_keys);
1904 Py_INCREF(s->skipkeys);
1905 return 0;
1906 }
1907
1908 static PyObject *
encoder_call(PyObject * self,PyObject * args,PyObject * kwds)1909 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1910 {
1911 /* Python callable interface to encode_listencode_obj */
1912 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1913 PyObject *obj;
1914 PyObject *rval;
1915 Py_ssize_t indent_level;
1916 PyEncoderObject *s;
1917 assert(PyEncoder_Check(self));
1918 s = (PyEncoderObject *)self;
1919 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1920 &obj, _convertPyInt_AsSsize_t, &indent_level))
1921 return NULL;
1922 rval = PyList_New(0);
1923 if (rval == NULL)
1924 return NULL;
1925 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1926 Py_DECREF(rval);
1927 return NULL;
1928 }
1929 return rval;
1930 }
1931
1932 static PyObject *
_encoded_const(PyObject * obj)1933 _encoded_const(PyObject *obj)
1934 {
1935 /* Return the JSON string representation of None, True, False */
1936 if (obj == Py_None) {
1937 static PyObject *s_null = NULL;
1938 if (s_null == NULL) {
1939 s_null = PyString_InternFromString("null");
1940 }
1941 Py_INCREF(s_null);
1942 return s_null;
1943 }
1944 else if (obj == Py_True) {
1945 static PyObject *s_true = NULL;
1946 if (s_true == NULL) {
1947 s_true = PyString_InternFromString("true");
1948 }
1949 Py_INCREF(s_true);
1950 return s_true;
1951 }
1952 else if (obj == Py_False) {
1953 static PyObject *s_false = NULL;
1954 if (s_false == NULL) {
1955 s_false = PyString_InternFromString("false");
1956 }
1957 Py_INCREF(s_false);
1958 return s_false;
1959 }
1960 else {
1961 PyErr_SetString(PyExc_ValueError, "not a const");
1962 return NULL;
1963 }
1964 }
1965
1966 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1967 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1968 {
1969 /* Return the JSON representation of a PyFloat */
1970 double i = PyFloat_AS_DOUBLE(obj);
1971 if (!Py_IS_FINITE(i)) {
1972 if (!s->allow_nan) {
1973 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1974 return NULL;
1975 }
1976 if (i > 0) {
1977 return PyString_FromString("Infinity");
1978 }
1979 else if (i < 0) {
1980 return PyString_FromString("-Infinity");
1981 }
1982 else {
1983 return PyString_FromString("NaN");
1984 }
1985 }
1986 /* Make sure to use the base float class repr method */
1987 return PyFloat_Type.tp_repr(obj);
1988 }
1989
1990 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1991 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1992 {
1993 /* Return the JSON representation of a string */
1994 if (s->fast_encode)
1995 return py_encode_basestring_ascii(NULL, obj);
1996 else
1997 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1998 }
1999
2000 static int
_steal_list_append(PyObject * lst,PyObject * stolen)2001 _steal_list_append(PyObject *lst, PyObject *stolen)
2002 {
2003 /* Append stolen and then decrement its reference count */
2004 int rval = PyList_Append(lst, stolen);
2005 Py_DECREF(stolen);
2006 return rval;
2007 }
2008
2009 static int
encoder_listencode_obj(PyEncoderObject * s,PyObject * rval,PyObject * obj,Py_ssize_t indent_level)2010 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2011 {
2012 /* Encode Python object obj to a JSON term, rval is a PyList */
2013 PyObject *newobj;
2014 int rv;
2015
2016 if (obj == Py_None || obj == Py_True || obj == Py_False) {
2017 PyObject *cstr = _encoded_const(obj);
2018 if (cstr == NULL)
2019 return -1;
2020 return _steal_list_append(rval, cstr);
2021 }
2022 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2023 {
2024 PyObject *encoded = encoder_encode_string(s, obj);
2025 if (encoded == NULL)
2026 return -1;
2027 return _steal_list_append(rval, encoded);
2028 }
2029 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2030 PyObject *encoded = PyObject_Str(obj);
2031 if (encoded == NULL)
2032 return -1;
2033 return _steal_list_append(rval, encoded);
2034 }
2035 else if (PyFloat_Check(obj)) {
2036 PyObject *encoded = encoder_encode_float(s, obj);
2037 if (encoded == NULL)
2038 return -1;
2039 return _steal_list_append(rval, encoded);
2040 }
2041 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2042 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2043 return -1;
2044 rv = encoder_listencode_list(s, rval, obj, indent_level);
2045 Py_LeaveRecursiveCall();
2046 return rv;
2047 }
2048 else if (PyDict_Check(obj)) {
2049 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2050 return -1;
2051 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2052 Py_LeaveRecursiveCall();
2053 return rv;
2054 }
2055 else {
2056 PyObject *ident = NULL;
2057 if (s->markers != Py_None) {
2058 int has_key;
2059 ident = PyLong_FromVoidPtr(obj);
2060 if (ident == NULL)
2061 return -1;
2062 has_key = PyDict_Contains(s->markers, ident);
2063 if (has_key) {
2064 if (has_key != -1)
2065 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2066 Py_DECREF(ident);
2067 return -1;
2068 }
2069 if (PyDict_SetItem(s->markers, ident, obj)) {
2070 Py_DECREF(ident);
2071 return -1;
2072 }
2073 }
2074 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2075 if (newobj == NULL) {
2076 Py_XDECREF(ident);
2077 return -1;
2078 }
2079
2080 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2081 return -1;
2082 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2083 Py_LeaveRecursiveCall();
2084
2085 Py_DECREF(newobj);
2086 if (rv) {
2087 Py_XDECREF(ident);
2088 return -1;
2089 }
2090 if (ident != NULL) {
2091 if (PyDict_DelItem(s->markers, ident)) {
2092 Py_XDECREF(ident);
2093 return -1;
2094 }
2095 Py_XDECREF(ident);
2096 }
2097 return rv;
2098 }
2099 }
2100
2101 static int
encoder_listencode_dict(PyEncoderObject * s,PyObject * rval,PyObject * dct,Py_ssize_t indent_level)2102 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2103 {
2104 /* Encode Python dict dct a JSON term, rval is a PyList */
2105 static PyObject *open_dict = NULL;
2106 static PyObject *close_dict = NULL;
2107 static PyObject *empty_dict = NULL;
2108 PyObject *kstr = NULL;
2109 PyObject *ident = NULL;
2110 PyObject *key = NULL;
2111 PyObject *value = NULL;
2112 PyObject *it = NULL;
2113 int skipkeys;
2114 Py_ssize_t idx;
2115
2116 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2117 open_dict = PyString_InternFromString("{");
2118 close_dict = PyString_InternFromString("}");
2119 empty_dict = PyString_InternFromString("{}");
2120 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2121 return -1;
2122 }
2123 if (Py_SIZE(dct) == 0)
2124 return PyList_Append(rval, empty_dict);
2125
2126 if (s->markers != Py_None) {
2127 int has_key;
2128 ident = PyLong_FromVoidPtr(dct);
2129 if (ident == NULL)
2130 goto bail;
2131 has_key = PyDict_Contains(s->markers, ident);
2132 if (has_key) {
2133 if (has_key != -1)
2134 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2135 goto bail;
2136 }
2137 if (PyDict_SetItem(s->markers, ident, dct)) {
2138 goto bail;
2139 }
2140 }
2141
2142 if (PyList_Append(rval, open_dict))
2143 goto bail;
2144
2145 if (s->indent != Py_None) {
2146 /* TODO: DOES NOT RUN */
2147 indent_level += 1;
2148 /*
2149 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2150 separator = _item_separator + newline_indent
2151 buf += newline_indent
2152 */
2153 }
2154
2155 /* TODO: C speedup not implemented for sort_keys */
2156
2157 it = PyObject_GetIter(dct);
2158 if (it == NULL)
2159 goto bail;
2160 skipkeys = PyObject_IsTrue(s->skipkeys);
2161 if (skipkeys < 0)
2162 goto bail;
2163 idx = 0;
2164 while ((key = PyIter_Next(it)) != NULL) {
2165 PyObject *encoded;
2166
2167 if (PyString_Check(key) || PyUnicode_Check(key)) {
2168 Py_INCREF(key);
2169 kstr = key;
2170 }
2171 else if (PyFloat_Check(key)) {
2172 kstr = encoder_encode_float(s, key);
2173 if (kstr == NULL)
2174 goto bail;
2175 }
2176 else if (PyInt_Check(key) || PyLong_Check(key)) {
2177 kstr = PyObject_Str(key);
2178 if (kstr == NULL)
2179 goto bail;
2180 }
2181 else if (key == Py_True || key == Py_False || key == Py_None) {
2182 kstr = _encoded_const(key);
2183 if (kstr == NULL)
2184 goto bail;
2185 }
2186 else if (skipkeys) {
2187 Py_DECREF(key);
2188 continue;
2189 }
2190 else {
2191 /* TODO: include repr of key */
2192 PyErr_SetString(PyExc_TypeError, "keys must be a string");
2193 goto bail;
2194 }
2195
2196 if (idx) {
2197 if (PyList_Append(rval, s->item_separator))
2198 goto bail;
2199 }
2200
2201 value = PyObject_GetItem(dct, key);
2202 if (value == NULL)
2203 goto bail;
2204
2205 encoded = encoder_encode_string(s, kstr);
2206 Py_CLEAR(kstr);
2207 if (encoded == NULL)
2208 goto bail;
2209 if (PyList_Append(rval, encoded)) {
2210 Py_DECREF(encoded);
2211 goto bail;
2212 }
2213 Py_DECREF(encoded);
2214 if (PyList_Append(rval, s->key_separator))
2215 goto bail;
2216 if (encoder_listencode_obj(s, rval, value, indent_level))
2217 goto bail;
2218 idx += 1;
2219 Py_CLEAR(value);
2220 Py_DECREF(key);
2221 }
2222 if (PyErr_Occurred())
2223 goto bail;
2224 Py_CLEAR(it);
2225
2226 if (ident != NULL) {
2227 if (PyDict_DelItem(s->markers, ident))
2228 goto bail;
2229 Py_CLEAR(ident);
2230 }
2231 if (s->indent != Py_None) {
2232 /* TODO: DOES NOT RUN */
2233 /*
2234 indent_level -= 1;
2235
2236 yield '\n' + (' ' * (_indent * _current_indent_level))
2237 */
2238 }
2239 if (PyList_Append(rval, close_dict))
2240 goto bail;
2241 return 0;
2242
2243 bail:
2244 Py_XDECREF(it);
2245 Py_XDECREF(key);
2246 Py_XDECREF(value);
2247 Py_XDECREF(kstr);
2248 Py_XDECREF(ident);
2249 return -1;
2250 }
2251
2252
2253 static int
encoder_listencode_list(PyEncoderObject * s,PyObject * rval,PyObject * seq,Py_ssize_t indent_level)2254 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2255 {
2256 /* Encode Python list seq to a JSON term, rval is a PyList */
2257 static PyObject *open_array = NULL;
2258 static PyObject *close_array = NULL;
2259 static PyObject *empty_array = NULL;
2260 PyObject *ident = NULL;
2261 PyObject *s_fast = NULL;
2262 Py_ssize_t i;
2263
2264 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2265 open_array = PyString_InternFromString("[");
2266 close_array = PyString_InternFromString("]");
2267 empty_array = PyString_InternFromString("[]");
2268 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2269 return -1;
2270 }
2271 ident = NULL;
2272 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2273 if (s_fast == NULL)
2274 return -1;
2275 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
2276 Py_DECREF(s_fast);
2277 return PyList_Append(rval, empty_array);
2278 }
2279
2280 if (s->markers != Py_None) {
2281 int has_key;
2282 ident = PyLong_FromVoidPtr(seq);
2283 if (ident == NULL)
2284 goto bail;
2285 has_key = PyDict_Contains(s->markers, ident);
2286 if (has_key) {
2287 if (has_key != -1)
2288 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2289 goto bail;
2290 }
2291 if (PyDict_SetItem(s->markers, ident, seq)) {
2292 goto bail;
2293 }
2294 }
2295
2296 if (PyList_Append(rval, open_array))
2297 goto bail;
2298 if (s->indent != Py_None) {
2299 /* TODO: DOES NOT RUN */
2300 indent_level += 1;
2301 /*
2302 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2303 separator = _item_separator + newline_indent
2304 buf += newline_indent
2305 */
2306 }
2307 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2308 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
2309 if (i) {
2310 if (PyList_Append(rval, s->item_separator))
2311 goto bail;
2312 }
2313 if (encoder_listencode_obj(s, rval, obj, indent_level))
2314 goto bail;
2315 }
2316 if (ident != NULL) {
2317 if (PyDict_DelItem(s->markers, ident))
2318 goto bail;
2319 Py_CLEAR(ident);
2320 }
2321 if (s->indent != Py_None) {
2322 /* TODO: DOES NOT RUN */
2323 /*
2324 indent_level -= 1;
2325
2326 yield '\n' + (' ' * (_indent * _current_indent_level))
2327 */
2328 }
2329 if (PyList_Append(rval, close_array))
2330 goto bail;
2331 Py_DECREF(s_fast);
2332 return 0;
2333
2334 bail:
2335 Py_XDECREF(ident);
2336 Py_DECREF(s_fast);
2337 return -1;
2338 }
2339
2340 static void
encoder_dealloc(PyObject * self)2341 encoder_dealloc(PyObject *self)
2342 {
2343 /* Deallocate Encoder */
2344 encoder_clear(self);
2345 Py_TYPE(self)->tp_free(self);
2346 }
2347
2348 static int
encoder_traverse(PyObject * self,visitproc visit,void * arg)2349 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2350 {
2351 PyEncoderObject *s;
2352 assert(PyEncoder_Check(self));
2353 s = (PyEncoderObject *)self;
2354 Py_VISIT(s->markers);
2355 Py_VISIT(s->defaultfn);
2356 Py_VISIT(s->encoder);
2357 Py_VISIT(s->indent);
2358 Py_VISIT(s->key_separator);
2359 Py_VISIT(s->item_separator);
2360 Py_VISIT(s->sort_keys);
2361 Py_VISIT(s->skipkeys);
2362 return 0;
2363 }
2364
2365 static int
encoder_clear(PyObject * self)2366 encoder_clear(PyObject *self)
2367 {
2368 /* Deallocate Encoder */
2369 PyEncoderObject *s;
2370 assert(PyEncoder_Check(self));
2371 s = (PyEncoderObject *)self;
2372 Py_CLEAR(s->markers);
2373 Py_CLEAR(s->defaultfn);
2374 Py_CLEAR(s->encoder);
2375 Py_CLEAR(s->indent);
2376 Py_CLEAR(s->key_separator);
2377 Py_CLEAR(s->item_separator);
2378 Py_CLEAR(s->sort_keys);
2379 Py_CLEAR(s->skipkeys);
2380 return 0;
2381 }
2382
2383 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2384
2385 static
2386 PyTypeObject PyEncoderType = {
2387 PyObject_HEAD_INIT(NULL)
2388 0, /* tp_internal */
2389 "_json.Encoder", /* tp_name */
2390 sizeof(PyEncoderObject), /* tp_basicsize */
2391 0, /* tp_itemsize */
2392 encoder_dealloc, /* tp_dealloc */
2393 0, /* tp_print */
2394 0, /* tp_getattr */
2395 0, /* tp_setattr */
2396 0, /* tp_compare */
2397 0, /* tp_repr */
2398 0, /* tp_as_number */
2399 0, /* tp_as_sequence */
2400 0, /* tp_as_mapping */
2401 0, /* tp_hash */
2402 encoder_call, /* tp_call */
2403 0, /* tp_str */
2404 0, /* tp_getattro */
2405 0, /* tp_setattro */
2406 0, /* tp_as_buffer */
2407 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2408 encoder_doc, /* tp_doc */
2409 encoder_traverse, /* tp_traverse */
2410 encoder_clear, /* tp_clear */
2411 0, /* tp_richcompare */
2412 0, /* tp_weaklistoffset */
2413 0, /* tp_iter */
2414 0, /* tp_iternext */
2415 0, /* tp_methods */
2416 encoder_members, /* tp_members */
2417 0, /* tp_getset */
2418 0, /* tp_base */
2419 0, /* tp_dict */
2420 0, /* tp_descr_get */
2421 0, /* tp_descr_set */
2422 0, /* tp_dictoffset */
2423 encoder_init, /* tp_init */
2424 0, /* tp_alloc */
2425 encoder_new, /* tp_new */
2426 0, /* tp_free */
2427 };
2428
2429 static PyMethodDef speedups_methods[] = {
2430 {"encode_basestring_ascii",
2431 (PyCFunction)py_encode_basestring_ascii,
2432 METH_O,
2433 pydoc_encode_basestring_ascii},
2434 {"scanstring",
2435 (PyCFunction)py_scanstring,
2436 METH_VARARGS,
2437 pydoc_scanstring},
2438 {NULL, NULL, 0, NULL}
2439 };
2440
2441 PyDoc_STRVAR(module_doc,
2442 "json speedups\n");
2443
2444 void
init_json(void)2445 init_json(void)
2446 {
2447 PyObject *m;
2448 PyScannerType.tp_new = PyType_GenericNew;
2449 if (PyType_Ready(&PyScannerType) < 0)
2450 return;
2451 PyEncoderType.tp_new = PyType_GenericNew;
2452 if (PyType_Ready(&PyEncoderType) < 0)
2453 return;
2454 m = Py_InitModule3("_json", speedups_methods, module_doc);
2455 if (m == NULL)
2456 return;
2457 Py_INCREF((PyObject*)&PyScannerType);
2458 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2459 Py_INCREF((PyObject*)&PyEncoderType);
2460 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2461 }
2462