• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
16 
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
22 
23 #define DEFAULT_ENCODING "utf-8"
24 
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29 
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
32 
33 typedef struct _PyScannerObject {
34     PyObject_HEAD
35     PyObject *encoding;
36     PyObject *strict;
37     PyObject *object_hook;
38     PyObject *pairs_hook;
39     PyObject *parse_float;
40     PyObject *parse_int;
41     PyObject *parse_constant;
42 } PyScannerObject;
43 
44 static PyMemberDef scanner_members[] = {
45     {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46     {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47     {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48     {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49     {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50     {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51     {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52     {NULL}
53 };
54 
55 typedef struct _PyEncoderObject {
56     PyObject_HEAD
57     PyObject *markers;
58     PyObject *defaultfn;
59     PyObject *encoder;
60     PyObject *indent;
61     PyObject *key_separator;
62     PyObject *item_separator;
63     PyObject *sort_keys;
64     PyObject *skipkeys;
65     int fast_encode;
66     int allow_nan;
67 } PyEncoderObject;
68 
69 static PyMemberDef encoder_members[] = {
70     {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71     {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72     {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73     {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74     {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75     {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76     {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77     {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78     {NULL}
79 };
80 
81 static Py_ssize_t
82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83 static PyObject *
84 ascii_escape_unicode(PyObject *pystr);
85 static PyObject *
86 ascii_escape_str(PyObject *pystr);
87 static PyObject *
88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89 void init_json(void);
90 static PyObject *
91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92 static PyObject *
93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94 static PyObject *
95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96 static PyObject *
97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98 static int
99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100 static void
101 scanner_dealloc(PyObject *self);
102 static int
103 scanner_clear(PyObject *self);
104 static PyObject *
105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106 static int
107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108 static void
109 encoder_dealloc(PyObject *self);
110 static int
111 encoder_clear(PyObject *self);
112 static int
113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114 static int
115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116 static int
117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118 static PyObject *
119 _encoded_const(PyObject *obj);
120 static void
121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122 static PyObject *
123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124 static int
125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126 static PyObject *
127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128 static PyObject *
129 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130 
131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133 
134 #define MIN_EXPANSION 6
135 #ifdef Py_UNICODE_WIDE
136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
137 #else
138 #define MAX_EXPANSION MIN_EXPANSION
139 #endif
140 
141 static int
_convertPyInt_AsSsize_t(PyObject * o,Py_ssize_t * size_ptr)142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143 {
144     /* PyObject to Py_ssize_t converter */
145     *size_ptr = PyInt_AsSsize_t(o);
146     if (*size_ptr == -1 && PyErr_Occurred())
147         return 0;
148     return 1;
149 }
150 
151 static PyObject *
_convertPyInt_FromSsize_t(Py_ssize_t * size_ptr)152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153 {
154     /* Py_ssize_t to PyObject converter */
155     return PyInt_FromSsize_t(*size_ptr);
156 }
157 
158 static Py_ssize_t
ascii_escape_char(Py_UNICODE c,char * output,Py_ssize_t chars)159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160 {
161     /* Escape unicode code point c to ASCII escape sequences
162     in char *output. output must have at least 12 bytes unused to
163     accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164     output[chars++] = '\\';
165     switch (c) {
166         case '\\': output[chars++] = (char)c; break;
167         case '"': output[chars++] = (char)c; break;
168         case '\b': output[chars++] = 'b'; break;
169         case '\f': output[chars++] = 'f'; break;
170         case '\n': output[chars++] = 'n'; break;
171         case '\r': output[chars++] = 'r'; break;
172         case '\t': output[chars++] = 't'; break;
173         default:
174 #ifdef Py_UNICODE_WIDE
175             if (c >= 0x10000) {
176                 /* UTF-16 surrogate pair */
177                 Py_UNICODE v = c - 0x10000;
178                 c = 0xd800 | ((v >> 10) & 0x3ff);
179                 output[chars++] = 'u';
180                 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181                 output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
182                 output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
183                 output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
184                 c = 0xdc00 | (v & 0x3ff);
185                 output[chars++] = '\\';
186             }
187 #endif
188             output[chars++] = 'u';
189             output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190             output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
191             output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
192             output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
193     }
194     return chars;
195 }
196 
197 static PyObject *
ascii_escape_unicode(PyObject * pystr)198 ascii_escape_unicode(PyObject *pystr)
199 {
200     /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201     Py_ssize_t i;
202     Py_ssize_t input_chars;
203     Py_ssize_t output_size;
204     Py_ssize_t max_output_size;
205     Py_ssize_t chars;
206     PyObject *rval;
207     char *output;
208     Py_UNICODE *input_unicode;
209 
210     input_chars = PyUnicode_GET_SIZE(pystr);
211     input_unicode = PyUnicode_AS_UNICODE(pystr);
212 
213     /* One char input can be up to 6 chars output, estimate 4 of these */
214     output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
215     max_output_size = 2 + (input_chars * MAX_EXPANSION);
216     rval = PyString_FromStringAndSize(NULL, output_size);
217     if (rval == NULL) {
218         return NULL;
219     }
220     output = PyString_AS_STRING(rval);
221     chars = 0;
222     output[chars++] = '"';
223     for (i = 0; i < input_chars; i++) {
224         Py_UNICODE c = input_unicode[i];
225         if (S_CHAR(c)) {
226             output[chars++] = (char)c;
227         }
228         else {
229             chars = ascii_escape_char(c, output, chars);
230         }
231         if (output_size - chars < (1 + MAX_EXPANSION)) {
232             /* There's more than four, so let's resize by a lot */
233             Py_ssize_t new_output_size = output_size * 2;
234             /* This is an upper bound */
235             if (new_output_size > max_output_size) {
236                 new_output_size = max_output_size;
237             }
238             /* Make sure that the output size changed before resizing */
239             if (new_output_size != output_size) {
240                 output_size = new_output_size;
241                 if (_PyString_Resize(&rval, output_size) == -1) {
242                     return NULL;
243                 }
244                 output = PyString_AS_STRING(rval);
245             }
246         }
247     }
248     output[chars++] = '"';
249     if (_PyString_Resize(&rval, chars) == -1) {
250         return NULL;
251     }
252     return rval;
253 }
254 
255 static PyObject *
ascii_escape_str(PyObject * pystr)256 ascii_escape_str(PyObject *pystr)
257 {
258     /* Take a PyString pystr and return a new ASCII-only escaped PyString */
259     Py_ssize_t i;
260     Py_ssize_t input_chars;
261     Py_ssize_t output_size;
262     Py_ssize_t chars;
263     PyObject *rval;
264     char *output;
265     char *input_str;
266 
267     input_chars = PyString_GET_SIZE(pystr);
268     input_str = PyString_AS_STRING(pystr);
269 
270     /* Fast path for a string that's already ASCII */
271     for (i = 0; i < input_chars; i++) {
272         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273         if (!S_CHAR(c)) {
274             /* If we have to escape something, scan the string for unicode */
275             Py_ssize_t j;
276             for (j = i; j < input_chars; j++) {
277                 c = (Py_UNICODE)(unsigned char)input_str[j];
278                 if (c > 0x7f) {
279                     /* We hit a non-ASCII character, bail to unicode mode */
280                     PyObject *uni;
281                     uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282                     if (uni == NULL) {
283                         return NULL;
284                     }
285                     rval = ascii_escape_unicode(uni);
286                     Py_DECREF(uni);
287                     return rval;
288                 }
289             }
290             break;
291         }
292     }
293 
294     if (i == input_chars) {
295         /* Input is already ASCII */
296         output_size = 2 + input_chars;
297     }
298     else {
299         /* One char input can be up to 6 chars output, estimate 4 of these */
300         output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301     }
302     rval = PyString_FromStringAndSize(NULL, output_size);
303     if (rval == NULL) {
304         return NULL;
305     }
306     output = PyString_AS_STRING(rval);
307     output[0] = '"';
308 
309     /* We know that everything up to i is ASCII already */
310     chars = i + 1;
311     memcpy(&output[1], input_str, i);
312 
313     for (; i < input_chars; i++) {
314         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
315         if (S_CHAR(c)) {
316             output[chars++] = (char)c;
317         }
318         else {
319             chars = ascii_escape_char(c, output, chars);
320         }
321         /* An ASCII char can't possibly expand to a surrogate! */
322         if (output_size - chars < (1 + MIN_EXPANSION)) {
323             /* There's more than four, so let's resize by a lot */
324             output_size *= 2;
325             if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326                 output_size = 2 + (input_chars * MIN_EXPANSION);
327             }
328             if (_PyString_Resize(&rval, output_size) == -1) {
329                 return NULL;
330             }
331             output = PyString_AS_STRING(rval);
332         }
333     }
334     output[chars++] = '"';
335     if (_PyString_Resize(&rval, chars) == -1) {
336         return NULL;
337     }
338     return rval;
339 }
340 
341 static void
raise_errmsg(char * msg,PyObject * s,Py_ssize_t end)342 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343 {
344     /* Use the Python function json.decoder.errmsg to raise a nice
345     looking ValueError exception */
346     static PyObject *errmsg_fn = NULL;
347     PyObject *pymsg;
348     if (errmsg_fn == NULL) {
349         PyObject *decoder = PyImport_ImportModule("json.decoder");
350         if (decoder == NULL)
351             return;
352         errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
353         Py_DECREF(decoder);
354         if (errmsg_fn == NULL)
355             return;
356     }
357     pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
358     if (pymsg) {
359         PyErr_SetObject(PyExc_ValueError, pymsg);
360         Py_DECREF(pymsg);
361     }
362 }
363 
364 static PyObject *
join_list_unicode(PyObject * lst)365 join_list_unicode(PyObject *lst)
366 {
367     /* return u''.join(lst) */
368     static PyObject *joinfn = NULL;
369     if (joinfn == NULL) {
370         PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371         if (ustr == NULL)
372             return NULL;
373 
374         joinfn = PyObject_GetAttrString(ustr, "join");
375         Py_DECREF(ustr);
376         if (joinfn == NULL)
377             return NULL;
378     }
379     return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
380 }
381 
382 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)383 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384     /* return (rval, idx) tuple, stealing reference to rval */
385     PyObject *tpl;
386     PyObject *pyidx;
387     /*
388     steal a reference to rval, returns (rval, idx)
389     */
390     if (rval == NULL) {
391         return NULL;
392     }
393     pyidx = PyInt_FromSsize_t(idx);
394     if (pyidx == NULL) {
395         Py_DECREF(rval);
396         return NULL;
397     }
398     tpl = PyTuple_New(2);
399     if (tpl == NULL) {
400         Py_DECREF(pyidx);
401         Py_DECREF(rval);
402         return NULL;
403     }
404     PyTuple_SET_ITEM(tpl, 0, rval);
405     PyTuple_SET_ITEM(tpl, 1, pyidx);
406     return tpl;
407 }
408 
409 static PyObject *
scanstring_str(PyObject * pystr,Py_ssize_t end,char * encoding,int strict,Py_ssize_t * next_end_ptr)410 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411 {
412     /* Read the JSON string from PyString pystr.
413     end is the index of the first character after the quote.
414     encoding is the encoding of pystr (must be an ASCII superset)
415     if strict is zero then literal control characters are allowed
416     *next_end_ptr is a return-by-reference index of the character
417         after the end quote
418 
419     Return value is a new PyString (if ASCII-only) or PyUnicode
420     */
421     PyObject *rval;
422     Py_ssize_t len = PyString_GET_SIZE(pystr);
423     Py_ssize_t begin = end - 1;
424     Py_ssize_t next;
425     char *buf = PyString_AS_STRING(pystr);
426     PyObject *chunks = PyList_New(0);
427     if (chunks == NULL) {
428         goto bail;
429     }
430     if (end < 0 || len <= end) {
431         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432         goto bail;
433     }
434     while (1) {
435         /* Find the end of the string or the next escape */
436         Py_UNICODE c = 0;
437         PyObject *chunk = NULL;
438         for (next = end; next < len; next++) {
439             c = (unsigned char)buf[next];
440             if (c == '"' || c == '\\') {
441                 break;
442             }
443             else if (strict && c <= 0x1f) {
444                 raise_errmsg("Invalid control character at", pystr, next);
445                 goto bail;
446             }
447         }
448         if (!(c == '"' || c == '\\')) {
449             raise_errmsg("Unterminated string starting at", pystr, begin);
450             goto bail;
451         }
452         /* Pick up this chunk if it's not zero length */
453         if (next != end) {
454             PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
455             if (strchunk == NULL) {
456                 goto bail;
457             }
458             chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459             Py_DECREF(strchunk);
460             if (chunk == NULL) {
461                 goto bail;
462             }
463             if (PyList_Append(chunks, chunk)) {
464                 Py_DECREF(chunk);
465                 goto bail;
466             }
467             Py_DECREF(chunk);
468         }
469         next++;
470         if (c == '"') {
471             end = next;
472             break;
473         }
474         if (next == len) {
475             raise_errmsg("Unterminated string starting at", pystr, begin);
476             goto bail;
477         }
478         c = buf[next];
479         if (c != 'u') {
480             /* Non-unicode backslash escapes */
481             end = next + 1;
482             switch (c) {
483                 case '"': break;
484                 case '\\': break;
485                 case '/': break;
486                 case 'b': c = '\b'; break;
487                 case 'f': c = '\f'; break;
488                 case 'n': c = '\n'; break;
489                 case 'r': c = '\r'; break;
490                 case 't': c = '\t'; break;
491                 default: c = 0;
492             }
493             if (c == 0) {
494                 raise_errmsg("Invalid \\escape", pystr, end - 2);
495                 goto bail;
496             }
497         }
498         else {
499             c = 0;
500             next++;
501             end = next + 4;
502             if (end >= len) {
503                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504                 goto bail;
505             }
506             /* Decode 4 hex digits */
507             for (; next < end; next++) {
508                 Py_UNICODE digit = buf[next];
509                 c <<= 4;
510                 switch (digit) {
511                     case '0': case '1': case '2': case '3': case '4':
512                     case '5': case '6': case '7': case '8': case '9':
513                         c |= (digit - '0'); break;
514                     case 'a': case 'b': case 'c': case 'd': case 'e':
515                     case 'f':
516                         c |= (digit - 'a' + 10); break;
517                     case 'A': case 'B': case 'C': case 'D': case 'E':
518                     case 'F':
519                         c |= (digit - 'A' + 10); break;
520                     default:
521                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522                         goto bail;
523                 }
524             }
525 #ifdef Py_UNICODE_WIDE
526             /* Surrogate pair */
527             if ((c & 0xfc00) == 0xd800) {
528                 Py_UNICODE c2 = 0;
529                 if (end + 6 >= len) {
530                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
531                     goto bail;
532                 }
533                 if (buf[next++] != '\\' || buf[next++] != 'u') {
534                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
535                     goto bail;
536                 }
537                 end += 6;
538                 /* Decode 4 hex digits */
539                 for (; next < end; next++) {
540                     Py_UNICODE digit = buf[next];
541                     c2 <<= 4;
542                     switch (digit) {
543                         case '0': case '1': case '2': case '3': case '4':
544                         case '5': case '6': case '7': case '8': case '9':
545                             c2 |= (digit - '0'); break;
546                         case 'a': case 'b': case 'c': case 'd': case 'e':
547                         case 'f':
548                             c2 |= (digit - 'a' + 10); break;
549                         case 'A': case 'B': case 'C': case 'D': case 'E':
550                         case 'F':
551                             c2 |= (digit - 'A' + 10); break;
552                         default:
553                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
554                             goto bail;
555                     }
556                 }
557                 if ((c2 & 0xfc00) != 0xdc00) {
558                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
559                     goto bail;
560                 }
561                 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
562             }
563             else if ((c & 0xfc00) == 0xdc00) {
564                 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
565                 goto bail;
566             }
567 #endif
568         }
569         chunk = PyUnicode_FromUnicode(&c, 1);
570         if (chunk == NULL) {
571             goto bail;
572         }
573         if (PyList_Append(chunks, chunk)) {
574             Py_DECREF(chunk);
575             goto bail;
576         }
577         Py_DECREF(chunk);
578     }
579 
580     rval = join_list_unicode(chunks);
581     if (rval == NULL) {
582         goto bail;
583     }
584     Py_CLEAR(chunks);
585     *next_end_ptr = end;
586     return rval;
587 bail:
588     *next_end_ptr = -1;
589     Py_XDECREF(chunks);
590     return NULL;
591 }
592 
593 
594 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)595 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
596 {
597     /* Read the JSON string from PyUnicode pystr.
598     end is the index of the first character after the quote.
599     if strict is zero then literal control characters are allowed
600     *next_end_ptr is a return-by-reference index of the character
601         after the end quote
602 
603     Return value is a new PyUnicode
604     */
605     PyObject *rval;
606     Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
607     Py_ssize_t begin = end - 1;
608     Py_ssize_t next;
609     const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
610     PyObject *chunks = PyList_New(0);
611     if (chunks == NULL) {
612         goto bail;
613     }
614     if (end < 0 || len <= end) {
615         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
616         goto bail;
617     }
618     while (1) {
619         /* Find the end of the string or the next escape */
620         Py_UNICODE c = 0;
621         PyObject *chunk = NULL;
622         for (next = end; next < len; next++) {
623             c = buf[next];
624             if (c == '"' || c == '\\') {
625                 break;
626             }
627             else if (strict && c <= 0x1f) {
628                 raise_errmsg("Invalid control character at", pystr, next);
629                 goto bail;
630             }
631         }
632         if (!(c == '"' || c == '\\')) {
633             raise_errmsg("Unterminated string starting at", pystr, begin);
634             goto bail;
635         }
636         /* Pick up this chunk if it's not zero length */
637         if (next != end) {
638             chunk = PyUnicode_FromUnicode(&buf[end], next - end);
639             if (chunk == NULL) {
640                 goto bail;
641             }
642             if (PyList_Append(chunks, chunk)) {
643                 Py_DECREF(chunk);
644                 goto bail;
645             }
646             Py_DECREF(chunk);
647         }
648         next++;
649         if (c == '"') {
650             end = next;
651             break;
652         }
653         if (next == len) {
654             raise_errmsg("Unterminated string starting at", pystr, begin);
655             goto bail;
656         }
657         c = buf[next];
658         if (c != 'u') {
659             /* Non-unicode backslash escapes */
660             end = next + 1;
661             switch (c) {
662                 case '"': break;
663                 case '\\': break;
664                 case '/': break;
665                 case 'b': c = '\b'; break;
666                 case 'f': c = '\f'; break;
667                 case 'n': c = '\n'; break;
668                 case 'r': c = '\r'; break;
669                 case 't': c = '\t'; break;
670                 default: c = 0;
671             }
672             if (c == 0) {
673                 raise_errmsg("Invalid \\escape", pystr, end - 2);
674                 goto bail;
675             }
676         }
677         else {
678             c = 0;
679             next++;
680             end = next + 4;
681             if (end >= len) {
682                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
683                 goto bail;
684             }
685             /* Decode 4 hex digits */
686             for (; next < end; next++) {
687                 Py_UNICODE digit = buf[next];
688                 c <<= 4;
689                 switch (digit) {
690                     case '0': case '1': case '2': case '3': case '4':
691                     case '5': case '6': case '7': case '8': case '9':
692                         c |= (digit - '0'); break;
693                     case 'a': case 'b': case 'c': case 'd': case 'e':
694                     case 'f':
695                         c |= (digit - 'a' + 10); break;
696                     case 'A': case 'B': case 'C': case 'D': case 'E':
697                     case 'F':
698                         c |= (digit - 'A' + 10); break;
699                     default:
700                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
701                         goto bail;
702                 }
703             }
704 #ifdef Py_UNICODE_WIDE
705             /* Surrogate pair */
706             if ((c & 0xfc00) == 0xd800) {
707                 Py_UNICODE c2 = 0;
708                 if (end + 6 >= len) {
709                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
710                     goto bail;
711                 }
712                 if (buf[next++] != '\\' || buf[next++] != 'u') {
713                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
714                     goto bail;
715                 }
716                 end += 6;
717                 /* Decode 4 hex digits */
718                 for (; next < end; next++) {
719                     Py_UNICODE digit = buf[next];
720                     c2 <<= 4;
721                     switch (digit) {
722                         case '0': case '1': case '2': case '3': case '4':
723                         case '5': case '6': case '7': case '8': case '9':
724                             c2 |= (digit - '0'); break;
725                         case 'a': case 'b': case 'c': case 'd': case 'e':
726                         case 'f':
727                             c2 |= (digit - 'a' + 10); break;
728                         case 'A': case 'B': case 'C': case 'D': case 'E':
729                         case 'F':
730                             c2 |= (digit - 'A' + 10); break;
731                         default:
732                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
733                             goto bail;
734                     }
735                 }
736                 if ((c2 & 0xfc00) != 0xdc00) {
737                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
738                     goto bail;
739                 }
740                 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
741             }
742             else if ((c & 0xfc00) == 0xdc00) {
743                 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
744                 goto bail;
745             }
746 #endif
747         }
748         chunk = PyUnicode_FromUnicode(&c, 1);
749         if (chunk == NULL) {
750             goto bail;
751         }
752         if (PyList_Append(chunks, chunk)) {
753             Py_DECREF(chunk);
754             goto bail;
755         }
756         Py_DECREF(chunk);
757     }
758 
759     rval = join_list_unicode(chunks);
760     if (rval == NULL) {
761         goto bail;
762     }
763     Py_DECREF(chunks);
764     *next_end_ptr = end;
765     return rval;
766 bail:
767     *next_end_ptr = -1;
768     Py_XDECREF(chunks);
769     return NULL;
770 }
771 
772 PyDoc_STRVAR(pydoc_scanstring,
773     "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774     "\n"
775     "Scan the string s for a JSON string. End is the index of the\n"
776     "character in s after the quote that started the JSON string.\n"
777     "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778     "on attempt to decode an invalid string. If strict is False then literal\n"
779     "control characters are allowed in the string.\n"
780     "\n"
781     "Returns a tuple of the decoded string and the index of the character in s\n"
782     "after the end quote."
783 );
784 
785 static PyObject *
py_scanstring(PyObject * self UNUSED,PyObject * args)786 py_scanstring(PyObject* self UNUSED, PyObject *args)
787 {
788     PyObject *pystr;
789     PyObject *rval;
790     Py_ssize_t end;
791     Py_ssize_t next_end = -1;
792     char *encoding = NULL;
793     int strict = 1;
794     if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
795         return NULL;
796     }
797     if (encoding == NULL) {
798         encoding = DEFAULT_ENCODING;
799     }
800     if (PyString_Check(pystr)) {
801         rval = scanstring_str(pystr, end, encoding, strict, &next_end);
802     }
803     else if (PyUnicode_Check(pystr)) {
804         rval = scanstring_unicode(pystr, end, strict, &next_end);
805     }
806     else {
807         PyErr_Format(PyExc_TypeError,
808                      "first argument must be a string, not %.80s",
809                      Py_TYPE(pystr)->tp_name);
810         return NULL;
811     }
812     return _build_rval_index_tuple(rval, next_end);
813 }
814 
815 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
816     "encode_basestring_ascii(basestring) -> str\n"
817     "\n"
818     "Return an ASCII-only JSON representation of a Python string"
819 );
820 
821 static PyObject *
py_encode_basestring_ascii(PyObject * self UNUSED,PyObject * pystr)822 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
823 {
824     /* Return an ASCII-only JSON representation of a Python string */
825     /* METH_O */
826     if (PyString_Check(pystr)) {
827         return ascii_escape_str(pystr);
828     }
829     else if (PyUnicode_Check(pystr)) {
830         return ascii_escape_unicode(pystr);
831     }
832     else {
833         PyErr_Format(PyExc_TypeError,
834                      "first argument must be a string, not %.80s",
835                      Py_TYPE(pystr)->tp_name);
836         return NULL;
837     }
838 }
839 
840 static void
scanner_dealloc(PyObject * self)841 scanner_dealloc(PyObject *self)
842 {
843     /* Deallocate scanner object */
844     scanner_clear(self);
845     Py_TYPE(self)->tp_free(self);
846 }
847 
848 static int
scanner_traverse(PyObject * self,visitproc visit,void * arg)849 scanner_traverse(PyObject *self, visitproc visit, void *arg)
850 {
851     PyScannerObject *s;
852     assert(PyScanner_Check(self));
853     s = (PyScannerObject *)self;
854     Py_VISIT(s->encoding);
855     Py_VISIT(s->strict);
856     Py_VISIT(s->object_hook);
857     Py_VISIT(s->pairs_hook);
858     Py_VISIT(s->parse_float);
859     Py_VISIT(s->parse_int);
860     Py_VISIT(s->parse_constant);
861     return 0;
862 }
863 
864 static int
scanner_clear(PyObject * self)865 scanner_clear(PyObject *self)
866 {
867     PyScannerObject *s;
868     assert(PyScanner_Check(self));
869     s = (PyScannerObject *)self;
870     Py_CLEAR(s->encoding);
871     Py_CLEAR(s->strict);
872     Py_CLEAR(s->object_hook);
873     Py_CLEAR(s->pairs_hook);
874     Py_CLEAR(s->parse_float);
875     Py_CLEAR(s->parse_int);
876     Py_CLEAR(s->parse_constant);
877     return 0;
878 }
879 
880 static PyObject *
_parse_object_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)881 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
882     /* Read a JSON object from PyString pystr.
883     idx is the index of the first character after the opening curly brace.
884     *next_idx_ptr is a return-by-reference index to the first character after
885         the closing curly brace.
886 
887     Returns a new PyObject (usually a dict, but object_hook can change that)
888     */
889     char *str = PyString_AS_STRING(pystr);
890     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
891     PyObject *rval;
892     PyObject *pairs;
893     PyObject *item;
894     PyObject *key = NULL;
895     PyObject *val = NULL;
896     char *encoding = PyString_AS_STRING(s->encoding);
897     int strict = PyObject_IsTrue(s->strict);
898     Py_ssize_t next_idx;
899 
900     pairs = PyList_New(0);
901     if (pairs == NULL)
902         return NULL;
903 
904     /* skip whitespace after { */
905     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
906 
907     /* only loop if the object is non-empty */
908     if (idx <= end_idx && str[idx] != '}') {
909         while (idx <= end_idx) {
910             /* read key */
911             if (str[idx] != '"') {
912                 raise_errmsg("Expecting property name", pystr, idx);
913                 goto bail;
914             }
915             key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
916             if (key == NULL)
917                 goto bail;
918             idx = next_idx;
919 
920             /* skip whitespace between key and : delimiter, read :, skip whitespace */
921             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
922             if (idx > end_idx || str[idx] != ':') {
923                 raise_errmsg("Expecting : delimiter", pystr, idx);
924                 goto bail;
925             }
926             idx++;
927             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
928 
929             /* read any JSON data type */
930             val = scan_once_str(s, pystr, idx, &next_idx);
931             if (val == NULL)
932                 goto bail;
933 
934             item = PyTuple_Pack(2, key, val);
935             if (item == NULL)
936                 goto bail;
937             Py_CLEAR(key);
938             Py_CLEAR(val);
939             if (PyList_Append(pairs, item) == -1) {
940                 Py_DECREF(item);
941                 goto bail;
942             }
943             Py_DECREF(item);
944             idx = next_idx;
945 
946             /* skip whitespace before } or , */
947             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
948 
949             /* bail if the object is closed or we didn't get the , delimiter */
950             if (idx > end_idx) break;
951             if (str[idx] == '}') {
952                 break;
953             }
954             else if (str[idx] != ',') {
955                 raise_errmsg("Expecting , delimiter", pystr, idx);
956                 goto bail;
957             }
958             idx++;
959 
960             /* skip whitespace after , delimiter */
961             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
962         }
963     }
964     /* verify that idx < end_idx, str[idx] should be '}' */
965     if (idx > end_idx || str[idx] != '}') {
966         raise_errmsg("Expecting object", pystr, end_idx);
967         goto bail;
968     }
969 
970     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
971     if (s->pairs_hook != Py_None) {
972         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
973         if (val == NULL)
974             goto bail;
975         Py_DECREF(pairs);
976         *next_idx_ptr = idx + 1;
977         return val;
978     }
979 
980     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
981                                          pairs, NULL);
982     if (rval == NULL)
983         goto bail;
984     Py_CLEAR(pairs);
985 
986     /* if object_hook is not None: rval = object_hook(rval) */
987     if (s->object_hook != Py_None) {
988         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
989         if (val == NULL)
990             goto bail;
991         Py_DECREF(rval);
992         rval = val;
993         val = NULL;
994     }
995     *next_idx_ptr = idx + 1;
996     return rval;
997 bail:
998     Py_XDECREF(key);
999     Py_XDECREF(val);
1000     Py_XDECREF(pairs);
1001     return NULL;
1002 }
1003 
1004 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1005 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1006     /* Read a JSON object from PyUnicode pystr.
1007     idx is the index of the first character after the opening curly brace.
1008     *next_idx_ptr is a return-by-reference index to the first character after
1009         the closing curly brace.
1010 
1011     Returns a new PyObject (usually a dict, but object_hook can change that)
1012     */
1013     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1014     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1015     PyObject *rval;
1016     PyObject *pairs;
1017     PyObject *item;
1018     PyObject *key = NULL;
1019     PyObject *val = NULL;
1020     int strict = PyObject_IsTrue(s->strict);
1021     Py_ssize_t next_idx;
1022 
1023     pairs = PyList_New(0);
1024     if (pairs == NULL)
1025         return NULL;
1026 
1027     /* skip whitespace after { */
1028     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1029 
1030     /* only loop if the object is non-empty */
1031     if (idx <= end_idx && str[idx] != '}') {
1032         while (idx <= end_idx) {
1033             /* read key */
1034             if (str[idx] != '"') {
1035                 raise_errmsg("Expecting property name", pystr, idx);
1036                 goto bail;
1037             }
1038             key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1039             if (key == NULL)
1040                 goto bail;
1041             idx = next_idx;
1042 
1043             /* skip whitespace between key and : delimiter, read :, skip whitespace */
1044             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1045             if (idx > end_idx || str[idx] != ':') {
1046                 raise_errmsg("Expecting : delimiter", pystr, idx);
1047                 goto bail;
1048             }
1049             idx++;
1050             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1051 
1052             /* read any JSON term */
1053             val = scan_once_unicode(s, pystr, idx, &next_idx);
1054             if (val == NULL)
1055                 goto bail;
1056 
1057             item = PyTuple_Pack(2, key, val);
1058             if (item == NULL)
1059                 goto bail;
1060             Py_CLEAR(key);
1061             Py_CLEAR(val);
1062             if (PyList_Append(pairs, item) == -1) {
1063                 Py_DECREF(item);
1064                 goto bail;
1065             }
1066             Py_DECREF(item);
1067             idx = next_idx;
1068 
1069             /* skip whitespace before } or , */
1070             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1071 
1072             /* bail if the object is closed or we didn't get the , delimiter */
1073             if (idx > end_idx) break;
1074             if (str[idx] == '}') {
1075                 break;
1076             }
1077             else if (str[idx] != ',') {
1078                 raise_errmsg("Expecting , delimiter", pystr, idx);
1079                 goto bail;
1080             }
1081             idx++;
1082 
1083             /* skip whitespace after , delimiter */
1084             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1085         }
1086     }
1087 
1088     /* verify that idx < end_idx, str[idx] should be '}' */
1089     if (idx > end_idx || str[idx] != '}') {
1090         raise_errmsg("Expecting object", pystr, end_idx);
1091         goto bail;
1092     }
1093 
1094     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1095     if (s->pairs_hook != Py_None) {
1096         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1097         if (val == NULL)
1098             goto bail;
1099         Py_DECREF(pairs);
1100         *next_idx_ptr = idx + 1;
1101         return val;
1102     }
1103 
1104     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1105                                          pairs, NULL);
1106     if (rval == NULL)
1107         goto bail;
1108     Py_CLEAR(pairs);
1109 
1110     /* if object_hook is not None: rval = object_hook(rval) */
1111     if (s->object_hook != Py_None) {
1112         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1113         if (val == NULL)
1114             goto bail;
1115         Py_DECREF(rval);
1116         rval = val;
1117         val = NULL;
1118     }
1119     *next_idx_ptr = idx + 1;
1120     return rval;
1121 bail:
1122     Py_XDECREF(key);
1123     Py_XDECREF(val);
1124     Py_XDECREF(pairs);
1125     return NULL;
1126 }
1127 
1128 static PyObject *
_parse_array_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1129 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1130     /* Read a JSON array from PyString pystr.
1131     idx is the index of the first character after the opening brace.
1132     *next_idx_ptr is a return-by-reference index to the first character after
1133         the closing brace.
1134 
1135     Returns a new PyList
1136     */
1137     char *str = PyString_AS_STRING(pystr);
1138     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1139     PyObject *val = NULL;
1140     PyObject *rval = PyList_New(0);
1141     Py_ssize_t next_idx;
1142     if (rval == NULL)
1143         return NULL;
1144 
1145     /* skip whitespace after [ */
1146     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1147 
1148     /* only loop if the array is non-empty */
1149     if (idx <= end_idx && str[idx] != ']') {
1150         while (idx <= end_idx) {
1151 
1152             /* read any JSON term and de-tuplefy the (rval, idx) */
1153             val = scan_once_str(s, pystr, idx, &next_idx);
1154             if (val == NULL)
1155                 goto bail;
1156 
1157             if (PyList_Append(rval, val) == -1)
1158                 goto bail;
1159 
1160             Py_CLEAR(val);
1161             idx = next_idx;
1162 
1163             /* skip whitespace between term and , */
1164             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1165 
1166             /* bail if the array is closed or we didn't get the , delimiter */
1167             if (idx > end_idx) break;
1168             if (str[idx] == ']') {
1169                 break;
1170             }
1171             else if (str[idx] != ',') {
1172                 raise_errmsg("Expecting , delimiter", pystr, idx);
1173                 goto bail;
1174             }
1175             idx++;
1176 
1177             /* skip whitespace after , */
1178             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1179         }
1180     }
1181 
1182     /* verify that idx < end_idx, str[idx] should be ']' */
1183     if (idx > end_idx || str[idx] != ']') {
1184         raise_errmsg("Expecting object", pystr, end_idx);
1185         goto bail;
1186     }
1187     *next_idx_ptr = idx + 1;
1188     return rval;
1189 bail:
1190     Py_XDECREF(val);
1191     Py_DECREF(rval);
1192     return NULL;
1193 }
1194 
1195 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1196 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1197     /* Read a JSON array from PyString pystr.
1198     idx is the index of the first character after the opening brace.
1199     *next_idx_ptr is a return-by-reference index to the first character after
1200         the closing brace.
1201 
1202     Returns a new PyList
1203     */
1204     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1205     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1206     PyObject *val = NULL;
1207     PyObject *rval = PyList_New(0);
1208     Py_ssize_t next_idx;
1209     if (rval == NULL)
1210         return NULL;
1211 
1212     /* skip whitespace after [ */
1213     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1214 
1215     /* only loop if the array is non-empty */
1216     if (idx <= end_idx && str[idx] != ']') {
1217         while (idx <= end_idx) {
1218 
1219             /* read any JSON term  */
1220             val = scan_once_unicode(s, pystr, idx, &next_idx);
1221             if (val == NULL)
1222                 goto bail;
1223 
1224             if (PyList_Append(rval, val) == -1)
1225                 goto bail;
1226 
1227             Py_CLEAR(val);
1228             idx = next_idx;
1229 
1230             /* skip whitespace between term and , */
1231             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232 
1233             /* bail if the array is closed or we didn't get the , delimiter */
1234             if (idx > end_idx) break;
1235             if (str[idx] == ']') {
1236                 break;
1237             }
1238             else if (str[idx] != ',') {
1239                 raise_errmsg("Expecting , delimiter", pystr, idx);
1240                 goto bail;
1241             }
1242             idx++;
1243 
1244             /* skip whitespace after , */
1245             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1246         }
1247     }
1248 
1249     /* verify that idx < end_idx, str[idx] should be ']' */
1250     if (idx > end_idx || str[idx] != ']') {
1251         raise_errmsg("Expecting object", pystr, end_idx);
1252         goto bail;
1253     }
1254     *next_idx_ptr = idx + 1;
1255     return rval;
1256 bail:
1257     Py_XDECREF(val);
1258     Py_DECREF(rval);
1259     return NULL;
1260 }
1261 
1262 static PyObject *
_parse_constant(PyScannerObject * s,char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1263 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1264     /* Read a JSON constant from PyString pystr.
1265     constant is the constant string that was found
1266         ("NaN", "Infinity", "-Infinity").
1267     idx is the index of the first character of the constant
1268     *next_idx_ptr is a return-by-reference index to the first character after
1269         the constant.
1270 
1271     Returns the result of parse_constant
1272     */
1273     PyObject *cstr;
1274     PyObject *rval;
1275     /* constant is "NaN", "Infinity", or "-Infinity" */
1276     cstr = PyString_InternFromString(constant);
1277     if (cstr == NULL)
1278         return NULL;
1279 
1280     /* rval = parse_constant(constant) */
1281     rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1282     idx += PyString_GET_SIZE(cstr);
1283     Py_DECREF(cstr);
1284     *next_idx_ptr = idx;
1285     return rval;
1286 }
1287 
1288 static PyObject *
_match_number_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1289 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1290     /* Read a JSON number from PyString pystr.
1291     idx is the index of the first character of the number
1292     *next_idx_ptr is a return-by-reference index to the first character after
1293         the number.
1294 
1295     Returns a new PyObject representation of that number:
1296         PyInt, PyLong, or PyFloat.
1297         May return other types if parse_int or parse_float are set
1298     */
1299     char *str = PyString_AS_STRING(pystr);
1300     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1301     Py_ssize_t idx = start;
1302     int is_float = 0;
1303     PyObject *rval;
1304     PyObject *numstr;
1305 
1306     /* read a sign if it's there, make sure it's not the end of the string */
1307     if (str[idx] == '-') {
1308         idx++;
1309         if (idx > end_idx) {
1310             PyErr_SetNone(PyExc_StopIteration);
1311             return NULL;
1312         }
1313     }
1314 
1315     /* read as many integer digits as we find as long as it doesn't start with 0 */
1316     if (str[idx] >= '1' && str[idx] <= '9') {
1317         idx++;
1318         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1319     }
1320     /* if it starts with 0 we only expect one integer digit */
1321     else if (str[idx] == '0') {
1322         idx++;
1323     }
1324     /* no integer digits, error */
1325     else {
1326         PyErr_SetNone(PyExc_StopIteration);
1327         return NULL;
1328     }
1329 
1330     /* if the next char is '.' followed by a digit then read all float digits */
1331     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1332         is_float = 1;
1333         idx += 2;
1334         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1335     }
1336 
1337     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1338     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1339 
1340         /* save the index of the 'e' or 'E' just in case we need to backtrack */
1341         Py_ssize_t e_start = idx;
1342         idx++;
1343 
1344         /* read an exponent sign if present */
1345         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1346 
1347         /* read all digits */
1348         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1349 
1350         /* if we got a digit, then parse as float. if not, backtrack */
1351         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1352             is_float = 1;
1353         }
1354         else {
1355             idx = e_start;
1356         }
1357     }
1358 
1359     /* copy the section we determined to be a number */
1360     numstr = PyString_FromStringAndSize(&str[start], idx - start);
1361     if (numstr == NULL)
1362         return NULL;
1363     if (is_float) {
1364         /* parse as a float using a fast path if available, otherwise call user defined method */
1365         if (s->parse_float != (PyObject *)&PyFloat_Type) {
1366             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1367         }
1368         else {
1369             double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1370                                              NULL, NULL);
1371             if (d == -1.0 && PyErr_Occurred())
1372                 return NULL;
1373             rval = PyFloat_FromDouble(d);
1374         }
1375     }
1376     else {
1377         /* parse as an int using a fast path if available, otherwise call user defined method */
1378         if (s->parse_int != (PyObject *)&PyInt_Type) {
1379             rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1380         }
1381         else {
1382             rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1383         }
1384     }
1385     Py_DECREF(numstr);
1386     *next_idx_ptr = idx;
1387     return rval;
1388 }
1389 
1390 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1391 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1392     /* Read a JSON number from PyUnicode pystr.
1393     idx is the index of the first character of the number
1394     *next_idx_ptr is a return-by-reference index to the first character after
1395         the number.
1396 
1397     Returns a new PyObject representation of that number:
1398         PyInt, PyLong, or PyFloat.
1399         May return other types if parse_int or parse_float are set
1400     */
1401     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1402     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1403     Py_ssize_t idx = start;
1404     int is_float = 0;
1405     PyObject *rval;
1406     PyObject *numstr;
1407 
1408     /* read a sign if it's there, make sure it's not the end of the string */
1409     if (str[idx] == '-') {
1410         idx++;
1411         if (idx > end_idx) {
1412             PyErr_SetNone(PyExc_StopIteration);
1413             return NULL;
1414         }
1415     }
1416 
1417     /* read as many integer digits as we find as long as it doesn't start with 0 */
1418     if (str[idx] >= '1' && str[idx] <= '9') {
1419         idx++;
1420         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1421     }
1422     /* if it starts with 0 we only expect one integer digit */
1423     else if (str[idx] == '0') {
1424         idx++;
1425     }
1426     /* no integer digits, error */
1427     else {
1428         PyErr_SetNone(PyExc_StopIteration);
1429         return NULL;
1430     }
1431 
1432     /* if the next char is '.' followed by a digit then read all float digits */
1433     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1434         is_float = 1;
1435         idx += 2;
1436         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1437     }
1438 
1439     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1440     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1441         Py_ssize_t e_start = idx;
1442         idx++;
1443 
1444         /* read an exponent sign if present */
1445         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1446 
1447         /* read all digits */
1448         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1449 
1450         /* if we got a digit, then parse as float. if not, backtrack */
1451         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1452             is_float = 1;
1453         }
1454         else {
1455             idx = e_start;
1456         }
1457     }
1458 
1459     /* copy the section we determined to be a number */
1460     numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1461     if (numstr == NULL)
1462         return NULL;
1463     if (is_float) {
1464         /* parse as a float using a fast path if available, otherwise call user defined method */
1465         if (s->parse_float != (PyObject *)&PyFloat_Type) {
1466             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1467         }
1468         else {
1469             rval = PyFloat_FromString(numstr, NULL);
1470         }
1471     }
1472     else {
1473         /* no fast path for unicode -> int, just call */
1474         rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1475     }
1476     Py_DECREF(numstr);
1477     *next_idx_ptr = idx;
1478     return rval;
1479 }
1480 
1481 static PyObject *
scan_once_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1482 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1483 {
1484     /* Read one JSON term (of any kind) from PyString pystr.
1485     idx is the index of the first character of the term
1486     *next_idx_ptr is a return-by-reference index to the first character after
1487         the number.
1488 
1489     Returns a new PyObject representation of the term.
1490     */
1491     PyObject *res;
1492     char *str = PyString_AS_STRING(pystr);
1493     Py_ssize_t length = PyString_GET_SIZE(pystr);
1494     if (idx >= length) {
1495         PyErr_SetNone(PyExc_StopIteration);
1496         return NULL;
1497     }
1498     switch (str[idx]) {
1499         case '"':
1500             /* string */
1501             return scanstring_str(pystr, idx + 1,
1502                 PyString_AS_STRING(s->encoding),
1503                 PyObject_IsTrue(s->strict),
1504                 next_idx_ptr);
1505         case '{':
1506             /* object */
1507             if (Py_EnterRecursiveCall(" while decoding a JSON object "
1508                                       "from a byte string"))
1509                 return NULL;
1510             res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1511             Py_LeaveRecursiveCall();
1512             return res;
1513         case '[':
1514             /* array */
1515             if (Py_EnterRecursiveCall(" while decoding a JSON array "
1516                                       "from a byte string"))
1517                 return NULL;
1518             res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1519             Py_LeaveRecursiveCall();
1520             return res;
1521         case 'n':
1522             /* null */
1523             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1524                 Py_INCREF(Py_None);
1525                 *next_idx_ptr = idx + 4;
1526                 return Py_None;
1527             }
1528             break;
1529         case 't':
1530             /* true */
1531             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1532                 Py_INCREF(Py_True);
1533                 *next_idx_ptr = idx + 4;
1534                 return Py_True;
1535             }
1536             break;
1537         case 'f':
1538             /* false */
1539             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1540                 Py_INCREF(Py_False);
1541                 *next_idx_ptr = idx + 5;
1542                 return Py_False;
1543             }
1544             break;
1545         case 'N':
1546             /* NaN */
1547             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1548                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1549             }
1550             break;
1551         case 'I':
1552             /* Infinity */
1553             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1554                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1555             }
1556             break;
1557         case '-':
1558             /* -Infinity */
1559             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1560                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1561             }
1562             break;
1563     }
1564     /* Didn't find a string, object, array, or named constant. Look for a number. */
1565     return _match_number_str(s, pystr, idx, next_idx_ptr);
1566 }
1567 
1568 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1569 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1570 {
1571     /* Read one JSON term (of any kind) from PyUnicode pystr.
1572     idx is the index of the first character of the term
1573     *next_idx_ptr is a return-by-reference index to the first character after
1574         the number.
1575 
1576     Returns a new PyObject representation of the term.
1577     */
1578     PyObject *res;
1579     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1580     Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1581     if (idx >= length) {
1582         PyErr_SetNone(PyExc_StopIteration);
1583         return NULL;
1584     }
1585     switch (str[idx]) {
1586         case '"':
1587             /* string */
1588             return scanstring_unicode(pystr, idx + 1,
1589                 PyObject_IsTrue(s->strict),
1590                 next_idx_ptr);
1591         case '{':
1592             /* object */
1593             if (Py_EnterRecursiveCall(" while decoding a JSON object "
1594                                       "from a unicode string"))
1595                 return NULL;
1596             res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1597             Py_LeaveRecursiveCall();
1598             return res;
1599         case '[':
1600             /* array */
1601             if (Py_EnterRecursiveCall(" while decoding a JSON array "
1602                                       "from a unicode string"))
1603                 return NULL;
1604             res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1605             Py_LeaveRecursiveCall();
1606             return res;
1607         case 'n':
1608             /* null */
1609             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1610                 Py_INCREF(Py_None);
1611                 *next_idx_ptr = idx + 4;
1612                 return Py_None;
1613             }
1614             break;
1615         case 't':
1616             /* true */
1617             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1618                 Py_INCREF(Py_True);
1619                 *next_idx_ptr = idx + 4;
1620                 return Py_True;
1621             }
1622             break;
1623         case 'f':
1624             /* false */
1625             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1626                 Py_INCREF(Py_False);
1627                 *next_idx_ptr = idx + 5;
1628                 return Py_False;
1629             }
1630             break;
1631         case 'N':
1632             /* NaN */
1633             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1634                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1635             }
1636             break;
1637         case 'I':
1638             /* Infinity */
1639             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1640                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1641             }
1642             break;
1643         case '-':
1644             /* -Infinity */
1645             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1646                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1647             }
1648             break;
1649     }
1650     /* Didn't find a string, object, array, or named constant. Look for a number. */
1651     return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1652 }
1653 
1654 static PyObject *
scanner_call(PyObject * self,PyObject * args,PyObject * kwds)1655 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1656 {
1657     /* Python callable interface to scan_once_{str,unicode} */
1658     PyObject *pystr;
1659     PyObject *rval;
1660     Py_ssize_t idx;
1661     Py_ssize_t next_idx = -1;
1662     static char *kwlist[] = {"string", "idx", NULL};
1663     PyScannerObject *s;
1664     assert(PyScanner_Check(self));
1665     s = (PyScannerObject *)self;
1666     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1667         return NULL;
1668 
1669     if (PyString_Check(pystr)) {
1670         rval = scan_once_str(s, pystr, idx, &next_idx);
1671     }
1672     else if (PyUnicode_Check(pystr)) {
1673         rval = scan_once_unicode(s, pystr, idx, &next_idx);
1674     }
1675     else {
1676         PyErr_Format(PyExc_TypeError,
1677                  "first argument must be a string, not %.80s",
1678                  Py_TYPE(pystr)->tp_name);
1679         return NULL;
1680     }
1681     return _build_rval_index_tuple(rval, next_idx);
1682 }
1683 
1684 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1685 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1686 {
1687     PyScannerObject *s;
1688     s = (PyScannerObject *)type->tp_alloc(type, 0);
1689     if (s != NULL) {
1690         s->encoding = NULL;
1691         s->strict = NULL;
1692         s->object_hook = NULL;
1693         s->pairs_hook = NULL;
1694         s->parse_float = NULL;
1695         s->parse_int = NULL;
1696         s->parse_constant = NULL;
1697     }
1698     return (PyObject *)s;
1699 }
1700 
1701 static int
scanner_init(PyObject * self,PyObject * args,PyObject * kwds)1702 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1703 {
1704     /* Initialize Scanner object */
1705     PyObject *ctx;
1706     static char *kwlist[] = {"context", NULL};
1707     PyScannerObject *s;
1708 
1709     assert(PyScanner_Check(self));
1710     s = (PyScannerObject *)self;
1711 
1712     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1713         return -1;
1714 
1715     /* PyString_AS_STRING is used on encoding */
1716     s->encoding = PyObject_GetAttrString(ctx, "encoding");
1717     if (s->encoding == NULL)
1718         goto bail;
1719     if (s->encoding == Py_None) {
1720         Py_DECREF(Py_None);
1721         s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1722     }
1723     else if (PyUnicode_Check(s->encoding)) {
1724         PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1725         Py_DECREF(s->encoding);
1726         s->encoding = tmp;
1727     }
1728     if (s->encoding == NULL || !PyString_Check(s->encoding))
1729         goto bail;
1730 
1731     /* All of these will fail "gracefully" so we don't need to verify them */
1732     s->strict = PyObject_GetAttrString(ctx, "strict");
1733     if (s->strict == NULL)
1734         goto bail;
1735     s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1736     if (s->object_hook == NULL)
1737         goto bail;
1738     s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1739     if (s->pairs_hook == NULL)
1740         goto bail;
1741     s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1742     if (s->parse_float == NULL)
1743         goto bail;
1744     s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1745     if (s->parse_int == NULL)
1746         goto bail;
1747     s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1748     if (s->parse_constant == NULL)
1749         goto bail;
1750 
1751     return 0;
1752 
1753 bail:
1754     Py_CLEAR(s->encoding);
1755     Py_CLEAR(s->strict);
1756     Py_CLEAR(s->object_hook);
1757     Py_CLEAR(s->pairs_hook);
1758     Py_CLEAR(s->parse_float);
1759     Py_CLEAR(s->parse_int);
1760     Py_CLEAR(s->parse_constant);
1761     return -1;
1762 }
1763 
1764 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1765 
1766 static
1767 PyTypeObject PyScannerType = {
1768     PyObject_HEAD_INIT(NULL)
1769     0,                    /* tp_internal */
1770     "_json.Scanner",       /* tp_name */
1771     sizeof(PyScannerObject), /* tp_basicsize */
1772     0,                    /* tp_itemsize */
1773     scanner_dealloc, /* tp_dealloc */
1774     0,                    /* tp_print */
1775     0,                    /* tp_getattr */
1776     0,                    /* tp_setattr */
1777     0,                    /* tp_compare */
1778     0,                    /* tp_repr */
1779     0,                    /* tp_as_number */
1780     0,                    /* tp_as_sequence */
1781     0,                    /* tp_as_mapping */
1782     0,                    /* tp_hash */
1783     scanner_call,         /* tp_call */
1784     0,                    /* tp_str */
1785     0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
1786     0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
1787     0,                    /* tp_as_buffer */
1788     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1789     scanner_doc,          /* tp_doc */
1790     scanner_traverse,                    /* tp_traverse */
1791     scanner_clear,                    /* tp_clear */
1792     0,                    /* tp_richcompare */
1793     0,                    /* tp_weaklistoffset */
1794     0,                    /* tp_iter */
1795     0,                    /* tp_iternext */
1796     0,                    /* tp_methods */
1797     scanner_members,                    /* tp_members */
1798     0,                    /* tp_getset */
1799     0,                    /* tp_base */
1800     0,                    /* tp_dict */
1801     0,                    /* tp_descr_get */
1802     0,                    /* tp_descr_set */
1803     0,                    /* tp_dictoffset */
1804     scanner_init,                    /* tp_init */
1805     0,/* PyType_GenericAlloc, */        /* tp_alloc */
1806     scanner_new,          /* tp_new */
1807     0,/* PyObject_GC_Del, */              /* tp_free */
1808 };
1809 
1810 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1811 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1812 {
1813     PyEncoderObject *s;
1814     s = (PyEncoderObject *)type->tp_alloc(type, 0);
1815     if (s != NULL) {
1816         s->markers = NULL;
1817         s->defaultfn = NULL;
1818         s->encoder = NULL;
1819         s->indent = NULL;
1820         s->key_separator = NULL;
1821         s->item_separator = NULL;
1822         s->sort_keys = NULL;
1823         s->skipkeys = NULL;
1824     }
1825     return (PyObject *)s;
1826 }
1827 
1828 static int
encoder_init(PyObject * self,PyObject * args,PyObject * kwds)1829 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1830 {
1831     /* initialize Encoder object */
1832     static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1833 
1834     PyEncoderObject *s;
1835     PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1836     PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
1837 
1838     assert(PyEncoder_Check(self));
1839     s = (PyEncoderObject *)self;
1840 
1841     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1842         &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1843         &sort_keys, &skipkeys, &allow_nan))
1844         return -1;
1845 
1846     s->markers = markers;
1847     s->defaultfn = defaultfn;
1848     s->encoder = encoder;
1849     s->indent = indent;
1850     s->key_separator = key_separator;
1851     s->item_separator = item_separator;
1852     s->sort_keys = sort_keys;
1853     s->skipkeys = skipkeys;
1854     s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1855     s->allow_nan = PyObject_IsTrue(allow_nan);
1856 
1857     Py_INCREF(s->markers);
1858     Py_INCREF(s->defaultfn);
1859     Py_INCREF(s->encoder);
1860     Py_INCREF(s->indent);
1861     Py_INCREF(s->key_separator);
1862     Py_INCREF(s->item_separator);
1863     Py_INCREF(s->sort_keys);
1864     Py_INCREF(s->skipkeys);
1865     return 0;
1866 }
1867 
1868 static PyObject *
encoder_call(PyObject * self,PyObject * args,PyObject * kwds)1869 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1870 {
1871     /* Python callable interface to encode_listencode_obj */
1872     static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1873     PyObject *obj;
1874     PyObject *rval;
1875     Py_ssize_t indent_level;
1876     PyEncoderObject *s;
1877     assert(PyEncoder_Check(self));
1878     s = (PyEncoderObject *)self;
1879     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1880         &obj, _convertPyInt_AsSsize_t, &indent_level))
1881         return NULL;
1882     rval = PyList_New(0);
1883     if (rval == NULL)
1884         return NULL;
1885     if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1886         Py_DECREF(rval);
1887         return NULL;
1888     }
1889     return rval;
1890 }
1891 
1892 static PyObject *
_encoded_const(PyObject * obj)1893 _encoded_const(PyObject *obj)
1894 {
1895     /* Return the JSON string representation of None, True, False */
1896     if (obj == Py_None) {
1897         static PyObject *s_null = NULL;
1898         if (s_null == NULL) {
1899             s_null = PyString_InternFromString("null");
1900         }
1901         Py_INCREF(s_null);
1902         return s_null;
1903     }
1904     else if (obj == Py_True) {
1905         static PyObject *s_true = NULL;
1906         if (s_true == NULL) {
1907             s_true = PyString_InternFromString("true");
1908         }
1909         Py_INCREF(s_true);
1910         return s_true;
1911     }
1912     else if (obj == Py_False) {
1913         static PyObject *s_false = NULL;
1914         if (s_false == NULL) {
1915             s_false = PyString_InternFromString("false");
1916         }
1917         Py_INCREF(s_false);
1918         return s_false;
1919     }
1920     else {
1921         PyErr_SetString(PyExc_ValueError, "not a const");
1922         return NULL;
1923     }
1924 }
1925 
1926 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1927 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1928 {
1929     /* Return the JSON representation of a PyFloat */
1930     double i = PyFloat_AS_DOUBLE(obj);
1931     if (!Py_IS_FINITE(i)) {
1932         if (!s->allow_nan) {
1933             PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1934             return NULL;
1935         }
1936         if (i > 0) {
1937             return PyString_FromString("Infinity");
1938         }
1939         else if (i < 0) {
1940             return PyString_FromString("-Infinity");
1941         }
1942         else {
1943             return PyString_FromString("NaN");
1944         }
1945     }
1946     /* Use a better float format here? */
1947     return PyObject_Repr(obj);
1948 }
1949 
1950 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1951 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1952 {
1953     /* Return the JSON representation of a string */
1954     if (s->fast_encode)
1955         return py_encode_basestring_ascii(NULL, obj);
1956     else
1957         return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1958 }
1959 
1960 static int
_steal_list_append(PyObject * lst,PyObject * stolen)1961 _steal_list_append(PyObject *lst, PyObject *stolen)
1962 {
1963     /* Append stolen and then decrement its reference count */
1964     int rval = PyList_Append(lst, stolen);
1965     Py_DECREF(stolen);
1966     return rval;
1967 }
1968 
1969 static int
encoder_listencode_obj(PyEncoderObject * s,PyObject * rval,PyObject * obj,Py_ssize_t indent_level)1970 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1971 {
1972     /* Encode Python object obj to a JSON term, rval is a PyList */
1973     PyObject *newobj;
1974     int rv;
1975 
1976     if (obj == Py_None || obj == Py_True || obj == Py_False) {
1977         PyObject *cstr = _encoded_const(obj);
1978         if (cstr == NULL)
1979             return -1;
1980         return _steal_list_append(rval, cstr);
1981     }
1982     else if (PyString_Check(obj) || PyUnicode_Check(obj))
1983     {
1984         PyObject *encoded = encoder_encode_string(s, obj);
1985         if (encoded == NULL)
1986             return -1;
1987         return _steal_list_append(rval, encoded);
1988     }
1989     else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1990         PyObject *encoded = PyObject_Str(obj);
1991         if (encoded == NULL)
1992             return -1;
1993         return _steal_list_append(rval, encoded);
1994     }
1995     else if (PyFloat_Check(obj)) {
1996         PyObject *encoded = encoder_encode_float(s, obj);
1997         if (encoded == NULL)
1998             return -1;
1999         return _steal_list_append(rval, encoded);
2000     }
2001     else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2002         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2003             return -1;
2004         rv = encoder_listencode_list(s, rval, obj, indent_level);
2005         Py_LeaveRecursiveCall();
2006         return rv;
2007     }
2008     else if (PyDict_Check(obj)) {
2009         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2010             return -1;
2011         rv = encoder_listencode_dict(s, rval, obj, indent_level);
2012         Py_LeaveRecursiveCall();
2013         return rv;
2014     }
2015     else {
2016         PyObject *ident = NULL;
2017         if (s->markers != Py_None) {
2018             int has_key;
2019             ident = PyLong_FromVoidPtr(obj);
2020             if (ident == NULL)
2021                 return -1;
2022             has_key = PyDict_Contains(s->markers, ident);
2023             if (has_key) {
2024                 if (has_key != -1)
2025                     PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2026                 Py_DECREF(ident);
2027                 return -1;
2028             }
2029             if (PyDict_SetItem(s->markers, ident, obj)) {
2030                 Py_DECREF(ident);
2031                 return -1;
2032             }
2033         }
2034         newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2035         if (newobj == NULL) {
2036             Py_XDECREF(ident);
2037             return -1;
2038         }
2039 
2040         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2041             return -1;
2042         rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2043         Py_LeaveRecursiveCall();
2044 
2045         Py_DECREF(newobj);
2046         if (rv) {
2047             Py_XDECREF(ident);
2048             return -1;
2049         }
2050         if (ident != NULL) {
2051             if (PyDict_DelItem(s->markers, ident)) {
2052                 Py_XDECREF(ident);
2053                 return -1;
2054             }
2055             Py_XDECREF(ident);
2056         }
2057         return rv;
2058     }
2059 }
2060 
2061 static int
encoder_listencode_dict(PyEncoderObject * s,PyObject * rval,PyObject * dct,Py_ssize_t indent_level)2062 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2063 {
2064     /* Encode Python dict dct a JSON term, rval is a PyList */
2065     static PyObject *open_dict = NULL;
2066     static PyObject *close_dict = NULL;
2067     static PyObject *empty_dict = NULL;
2068     PyObject *kstr = NULL;
2069     PyObject *ident = NULL;
2070     PyObject *key = NULL;
2071     PyObject *value = NULL;
2072     PyObject *it = NULL;
2073     int skipkeys;
2074     Py_ssize_t idx;
2075 
2076     if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2077         open_dict = PyString_InternFromString("{");
2078         close_dict = PyString_InternFromString("}");
2079         empty_dict = PyString_InternFromString("{}");
2080         if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2081             return -1;
2082     }
2083     if (Py_SIZE(dct) == 0)
2084         return PyList_Append(rval, empty_dict);
2085 
2086     if (s->markers != Py_None) {
2087         int has_key;
2088         ident = PyLong_FromVoidPtr(dct);
2089         if (ident == NULL)
2090             goto bail;
2091         has_key = PyDict_Contains(s->markers, ident);
2092         if (has_key) {
2093             if (has_key != -1)
2094                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2095             goto bail;
2096         }
2097         if (PyDict_SetItem(s->markers, ident, dct)) {
2098             goto bail;
2099         }
2100     }
2101 
2102     if (PyList_Append(rval, open_dict))
2103         goto bail;
2104 
2105     if (s->indent != Py_None) {
2106         /* TODO: DOES NOT RUN */
2107         indent_level += 1;
2108         /*
2109             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2110             separator = _item_separator + newline_indent
2111             buf += newline_indent
2112         */
2113     }
2114 
2115     /* TODO: C speedup not implemented for sort_keys */
2116 
2117     it = PyObject_GetIter(dct);
2118     if (it == NULL)
2119         goto bail;
2120     skipkeys = PyObject_IsTrue(s->skipkeys);
2121     idx = 0;
2122     while ((key = PyIter_Next(it)) != NULL) {
2123         PyObject *encoded;
2124 
2125         if (PyString_Check(key) || PyUnicode_Check(key)) {
2126             Py_INCREF(key);
2127             kstr = key;
2128         }
2129         else if (PyFloat_Check(key)) {
2130             kstr = encoder_encode_float(s, key);
2131             if (kstr == NULL)
2132                 goto bail;
2133         }
2134         else if (PyInt_Check(key) || PyLong_Check(key)) {
2135             kstr = PyObject_Str(key);
2136             if (kstr == NULL)
2137                 goto bail;
2138         }
2139         else if (key == Py_True || key == Py_False || key == Py_None) {
2140             kstr = _encoded_const(key);
2141             if (kstr == NULL)
2142                 goto bail;
2143         }
2144         else if (skipkeys) {
2145             Py_DECREF(key);
2146             continue;
2147         }
2148         else {
2149             /* TODO: include repr of key */
2150             PyErr_SetString(PyExc_TypeError, "keys must be a string");
2151             goto bail;
2152         }
2153 
2154         if (idx) {
2155             if (PyList_Append(rval, s->item_separator))
2156                 goto bail;
2157         }
2158 
2159         value = PyObject_GetItem(dct, key);
2160         if (value == NULL)
2161             goto bail;
2162 
2163         encoded = encoder_encode_string(s, kstr);
2164         Py_CLEAR(kstr);
2165         if (encoded == NULL)
2166             goto bail;
2167         if (PyList_Append(rval, encoded)) {
2168             Py_DECREF(encoded);
2169             goto bail;
2170         }
2171         Py_DECREF(encoded);
2172         if (PyList_Append(rval, s->key_separator))
2173             goto bail;
2174         if (encoder_listencode_obj(s, rval, value, indent_level))
2175             goto bail;
2176         idx += 1;
2177         Py_CLEAR(value);
2178         Py_DECREF(key);
2179     }
2180     if (PyErr_Occurred())
2181         goto bail;
2182     Py_CLEAR(it);
2183 
2184     if (ident != NULL) {
2185         if (PyDict_DelItem(s->markers, ident))
2186             goto bail;
2187         Py_CLEAR(ident);
2188     }
2189     if (s->indent != Py_None) {
2190         /* TODO: DOES NOT RUN */
2191         /*
2192             indent_level -= 1;
2193 
2194             yield '\n' + (' ' * (_indent * _current_indent_level))
2195         */
2196     }
2197     if (PyList_Append(rval, close_dict))
2198         goto bail;
2199     return 0;
2200 
2201 bail:
2202     Py_XDECREF(it);
2203     Py_XDECREF(key);
2204     Py_XDECREF(value);
2205     Py_XDECREF(kstr);
2206     Py_XDECREF(ident);
2207     return -1;
2208 }
2209 
2210 
2211 static int
encoder_listencode_list(PyEncoderObject * s,PyObject * rval,PyObject * seq,Py_ssize_t indent_level)2212 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2213 {
2214     /* Encode Python list seq to a JSON term, rval is a PyList */
2215     static PyObject *open_array = NULL;
2216     static PyObject *close_array = NULL;
2217     static PyObject *empty_array = NULL;
2218     PyObject *ident = NULL;
2219     PyObject *s_fast = NULL;
2220     Py_ssize_t num_items;
2221     PyObject **seq_items;
2222     Py_ssize_t i;
2223 
2224     if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2225         open_array = PyString_InternFromString("[");
2226         close_array = PyString_InternFromString("]");
2227         empty_array = PyString_InternFromString("[]");
2228         if (open_array == NULL || close_array == NULL || empty_array == NULL)
2229             return -1;
2230     }
2231     ident = NULL;
2232     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2233     if (s_fast == NULL)
2234         return -1;
2235     num_items = PySequence_Fast_GET_SIZE(s_fast);
2236     if (num_items == 0) {
2237         Py_DECREF(s_fast);
2238         return PyList_Append(rval, empty_array);
2239     }
2240 
2241     if (s->markers != Py_None) {
2242         int has_key;
2243         ident = PyLong_FromVoidPtr(seq);
2244         if (ident == NULL)
2245             goto bail;
2246         has_key = PyDict_Contains(s->markers, ident);
2247         if (has_key) {
2248             if (has_key != -1)
2249                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2250             goto bail;
2251         }
2252         if (PyDict_SetItem(s->markers, ident, seq)) {
2253             goto bail;
2254         }
2255     }
2256 
2257     seq_items = PySequence_Fast_ITEMS(s_fast);
2258     if (PyList_Append(rval, open_array))
2259         goto bail;
2260     if (s->indent != Py_None) {
2261         /* TODO: DOES NOT RUN */
2262         indent_level += 1;
2263         /*
2264             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2265             separator = _item_separator + newline_indent
2266             buf += newline_indent
2267         */
2268     }
2269     for (i = 0; i < num_items; i++) {
2270         PyObject *obj = seq_items[i];
2271         if (i) {
2272             if (PyList_Append(rval, s->item_separator))
2273                 goto bail;
2274         }
2275         if (encoder_listencode_obj(s, rval, obj, indent_level))
2276             goto bail;
2277     }
2278     if (ident != NULL) {
2279         if (PyDict_DelItem(s->markers, ident))
2280             goto bail;
2281         Py_CLEAR(ident);
2282     }
2283     if (s->indent != Py_None) {
2284         /* TODO: DOES NOT RUN */
2285         /*
2286             indent_level -= 1;
2287 
2288             yield '\n' + (' ' * (_indent * _current_indent_level))
2289         */
2290     }
2291     if (PyList_Append(rval, close_array))
2292         goto bail;
2293     Py_DECREF(s_fast);
2294     return 0;
2295 
2296 bail:
2297     Py_XDECREF(ident);
2298     Py_DECREF(s_fast);
2299     return -1;
2300 }
2301 
2302 static void
encoder_dealloc(PyObject * self)2303 encoder_dealloc(PyObject *self)
2304 {
2305     /* Deallocate Encoder */
2306     encoder_clear(self);
2307     Py_TYPE(self)->tp_free(self);
2308 }
2309 
2310 static int
encoder_traverse(PyObject * self,visitproc visit,void * arg)2311 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2312 {
2313     PyEncoderObject *s;
2314     assert(PyEncoder_Check(self));
2315     s = (PyEncoderObject *)self;
2316     Py_VISIT(s->markers);
2317     Py_VISIT(s->defaultfn);
2318     Py_VISIT(s->encoder);
2319     Py_VISIT(s->indent);
2320     Py_VISIT(s->key_separator);
2321     Py_VISIT(s->item_separator);
2322     Py_VISIT(s->sort_keys);
2323     Py_VISIT(s->skipkeys);
2324     return 0;
2325 }
2326 
2327 static int
encoder_clear(PyObject * self)2328 encoder_clear(PyObject *self)
2329 {
2330     /* Deallocate Encoder */
2331     PyEncoderObject *s;
2332     assert(PyEncoder_Check(self));
2333     s = (PyEncoderObject *)self;
2334     Py_CLEAR(s->markers);
2335     Py_CLEAR(s->defaultfn);
2336     Py_CLEAR(s->encoder);
2337     Py_CLEAR(s->indent);
2338     Py_CLEAR(s->key_separator);
2339     Py_CLEAR(s->item_separator);
2340     Py_CLEAR(s->sort_keys);
2341     Py_CLEAR(s->skipkeys);
2342     return 0;
2343 }
2344 
2345 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2346 
2347 static
2348 PyTypeObject PyEncoderType = {
2349     PyObject_HEAD_INIT(NULL)
2350     0,                    /* tp_internal */
2351     "_json.Encoder",       /* tp_name */
2352     sizeof(PyEncoderObject), /* tp_basicsize */
2353     0,                    /* tp_itemsize */
2354     encoder_dealloc, /* tp_dealloc */
2355     0,                    /* tp_print */
2356     0,                    /* tp_getattr */
2357     0,                    /* tp_setattr */
2358     0,                    /* tp_compare */
2359     0,                    /* tp_repr */
2360     0,                    /* tp_as_number */
2361     0,                    /* tp_as_sequence */
2362     0,                    /* tp_as_mapping */
2363     0,                    /* tp_hash */
2364     encoder_call,         /* tp_call */
2365     0,                    /* tp_str */
2366     0,                    /* tp_getattro */
2367     0,                    /* tp_setattro */
2368     0,                    /* tp_as_buffer */
2369     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2370     encoder_doc,          /* tp_doc */
2371     encoder_traverse,     /* tp_traverse */
2372     encoder_clear,        /* tp_clear */
2373     0,                    /* tp_richcompare */
2374     0,                    /* tp_weaklistoffset */
2375     0,                    /* tp_iter */
2376     0,                    /* tp_iternext */
2377     0,                    /* tp_methods */
2378     encoder_members,      /* tp_members */
2379     0,                    /* tp_getset */
2380     0,                    /* tp_base */
2381     0,                    /* tp_dict */
2382     0,                    /* tp_descr_get */
2383     0,                    /* tp_descr_set */
2384     0,                    /* tp_dictoffset */
2385     encoder_init,         /* tp_init */
2386     0,                    /* tp_alloc */
2387     encoder_new,          /* tp_new */
2388     0,                    /* tp_free */
2389 };
2390 
2391 static PyMethodDef speedups_methods[] = {
2392     {"encode_basestring_ascii",
2393         (PyCFunction)py_encode_basestring_ascii,
2394         METH_O,
2395         pydoc_encode_basestring_ascii},
2396     {"scanstring",
2397         (PyCFunction)py_scanstring,
2398         METH_VARARGS,
2399         pydoc_scanstring},
2400     {NULL, NULL, 0, NULL}
2401 };
2402 
2403 PyDoc_STRVAR(module_doc,
2404 "json speedups\n");
2405 
2406 void
init_json(void)2407 init_json(void)
2408 {
2409     PyObject *m;
2410     PyScannerType.tp_new = PyType_GenericNew;
2411     if (PyType_Ready(&PyScannerType) < 0)
2412         return;
2413     PyEncoderType.tp_new = PyType_GenericNew;
2414     if (PyType_Ready(&PyEncoderType) < 0)
2415         return;
2416     m = Py_InitModule3("_json", speedups_methods, module_doc);
2417     Py_INCREF((PyObject*)&PyScannerType);
2418     PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2419     Py_INCREF((PyObject*)&PyEncoderType);
2420     PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2421 }
2422