• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implementation of JSONDecoder
2"""
3import re
4
5from json import scanner
6try:
7    from _json import scanstring as c_scanstring
8except ImportError:
9    c_scanstring = None
10
11__all__ = ['JSONDecoder', 'JSONDecodeError']
12
13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
15NaN = float('nan')
16PosInf = float('inf')
17NegInf = float('-inf')
18
19
20class JSONDecodeError(ValueError):
21    """Subclass of ValueError with the following additional properties:
22
23    msg: The unformatted error message
24    doc: The JSON document being parsed
25    pos: The start index of doc where parsing failed
26    lineno: The line corresponding to pos
27    colno: The column corresponding to pos
28
29    """
30    # Note that this exception is used from _json
31    def __init__(self, msg, doc, pos):
32        lineno = doc.count('\n', 0, pos) + 1
33        colno = pos - doc.rfind('\n', 0, pos)
34        errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
35        ValueError.__init__(self, errmsg)
36        self.msg = msg
37        self.doc = doc
38        self.pos = pos
39        self.lineno = lineno
40        self.colno = colno
41
42    def __reduce__(self):
43        return self.__class__, (self.msg, self.doc, self.pos)
44
45
46_CONSTANTS = {
47    '-Infinity': NegInf,
48    'Infinity': PosInf,
49    'NaN': NaN,
50}
51
52
53HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS)
54STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
55BACKSLASH = {
56    '"': '"', '\\': '\\', '/': '/',
57    'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
58}
59
60def _decode_uXXXX(s, pos, _m=HEXDIGITS.match):
61    esc = _m(s, pos + 1)
62    if esc is not None:
63        try:
64            return int(esc.group(), 16)
65        except ValueError:
66            pass
67    msg = "Invalid \\uXXXX escape"
68    raise JSONDecodeError(msg, s, pos)
69
70def py_scanstring(s, end, strict=True,
71        _b=BACKSLASH, _m=STRINGCHUNK.match):
72    """Scan the string s for a JSON string. End is the index of the
73    character in s after the quote that started the JSON string.
74    Unescapes all valid JSON string escape sequences and raises ValueError
75    on attempt to decode an invalid string. If strict is False then literal
76    control characters are allowed in the string.
77
78    Returns a tuple of the decoded string and the index of the character in s
79    after the end quote."""
80    chunks = []
81    _append = chunks.append
82    begin = end - 1
83    while 1:
84        chunk = _m(s, end)
85        if chunk is None:
86            raise JSONDecodeError("Unterminated string starting at", s, begin)
87        end = chunk.end()
88        content, terminator = chunk.groups()
89        # Content is contains zero or more unescaped string characters
90        if content:
91            _append(content)
92        # Terminator is the end of string, a literal control character,
93        # or a backslash denoting that an escape sequence follows
94        if terminator == '"':
95            break
96        elif terminator != '\\':
97            if strict:
98                #msg = "Invalid control character %r at" % (terminator,)
99                msg = "Invalid control character {0!r} at".format(terminator)
100                raise JSONDecodeError(msg, s, end)
101            else:
102                _append(terminator)
103                continue
104        try:
105            esc = s[end]
106        except IndexError:
107            raise JSONDecodeError("Unterminated string starting at",
108                                  s, begin) from None
109        # If not a unicode escape sequence, must be in the lookup table
110        if esc != 'u':
111            try:
112                char = _b[esc]
113            except KeyError:
114                msg = "Invalid \\escape: {0!r}".format(esc)
115                raise JSONDecodeError(msg, s, end)
116            end += 1
117        else:
118            uni = _decode_uXXXX(s, end)
119            end += 5
120            if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
121                uni2 = _decode_uXXXX(s, end + 1)
122                if 0xdc00 <= uni2 <= 0xdfff:
123                    uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
124                    end += 6
125            char = chr(uni)
126        _append(char)
127    return ''.join(chunks), end
128
129
130# Use speedup if available
131scanstring = c_scanstring or py_scanstring
132
133WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
134WHITESPACE_STR = ' \t\n\r'
135
136
137def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
138               memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
139    s, end = s_and_end
140    pairs = []
141    pairs_append = pairs.append
142    # Backwards compatibility
143    if memo is None:
144        memo = {}
145    memo_get = memo.setdefault
146    # Use a slice to prevent IndexError from being raised, the following
147    # check will raise a more specific ValueError if the string is empty
148    nextchar = s[end:end + 1]
149    # Normally we expect nextchar == '"'
150    if nextchar != '"':
151        if nextchar in _ws:
152            end = _w(s, end).end()
153            nextchar = s[end:end + 1]
154        # Trivial empty object
155        if nextchar == '}':
156            if object_pairs_hook is not None:
157                result = object_pairs_hook(pairs)
158                return result, end + 1
159            pairs = {}
160            if object_hook is not None:
161                pairs = object_hook(pairs)
162            return pairs, end + 1
163        elif nextchar != '"':
164            raise JSONDecodeError(
165                "Expecting property name enclosed in double quotes", s, end)
166    end += 1
167    while True:
168        key, end = scanstring(s, end, strict)
169        key = memo_get(key, key)
170        # To skip some function call overhead we optimize the fast paths where
171        # the JSON key separator is ": " or just ":".
172        if s[end:end + 1] != ':':
173            end = _w(s, end).end()
174            if s[end:end + 1] != ':':
175                raise JSONDecodeError("Expecting ':' delimiter", s, end)
176        end += 1
177
178        try:
179            if s[end] in _ws:
180                end += 1
181                if s[end] in _ws:
182                    end = _w(s, end + 1).end()
183        except IndexError:
184            pass
185
186        try:
187            value, end = scan_once(s, end)
188        except StopIteration as err:
189            raise JSONDecodeError("Expecting value", s, err.value) from None
190        pairs_append((key, value))
191        try:
192            nextchar = s[end]
193            if nextchar in _ws:
194                end = _w(s, end + 1).end()
195                nextchar = s[end]
196        except IndexError:
197            nextchar = ''
198        end += 1
199
200        if nextchar == '}':
201            break
202        elif nextchar != ',':
203            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
204        comma_idx = end - 1
205        end = _w(s, end).end()
206        nextchar = s[end:end + 1]
207        end += 1
208        if nextchar != '"':
209            if nextchar == '}':
210                raise JSONDecodeError("Illegal trailing comma before end of object", s, comma_idx)
211            raise JSONDecodeError(
212                "Expecting property name enclosed in double quotes", s, end - 1)
213    if object_pairs_hook is not None:
214        result = object_pairs_hook(pairs)
215        return result, end
216    pairs = dict(pairs)
217    if object_hook is not None:
218        pairs = object_hook(pairs)
219    return pairs, end
220
221def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
222    s, end = s_and_end
223    values = []
224    nextchar = s[end:end + 1]
225    if nextchar in _ws:
226        end = _w(s, end + 1).end()
227        nextchar = s[end:end + 1]
228    # Look-ahead for trivial empty array
229    if nextchar == ']':
230        return values, end + 1
231    _append = values.append
232    while True:
233        try:
234            value, end = scan_once(s, end)
235        except StopIteration as err:
236            raise JSONDecodeError("Expecting value", s, err.value) from None
237        _append(value)
238        nextchar = s[end:end + 1]
239        if nextchar in _ws:
240            end = _w(s, end + 1).end()
241            nextchar = s[end:end + 1]
242        end += 1
243        if nextchar == ']':
244            break
245        elif nextchar != ',':
246            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
247        comma_idx = end - 1
248        try:
249            if s[end] in _ws:
250                end += 1
251                if s[end] in _ws:
252                    end = _w(s, end + 1).end()
253            nextchar = s[end:end + 1]
254        except IndexError:
255            pass
256        if nextchar == ']':
257            raise JSONDecodeError("Illegal trailing comma before end of array", s, comma_idx)
258
259    return values, end
260
261
262class JSONDecoder(object):
263    """Simple JSON <https://json.org> decoder
264
265    Performs the following translations in decoding by default:
266
267    +---------------+-------------------+
268    | JSON          | Python            |
269    +===============+===================+
270    | object        | dict              |
271    +---------------+-------------------+
272    | array         | list              |
273    +---------------+-------------------+
274    | string        | str               |
275    +---------------+-------------------+
276    | number (int)  | int               |
277    +---------------+-------------------+
278    | number (real) | float             |
279    +---------------+-------------------+
280    | true          | True              |
281    +---------------+-------------------+
282    | false         | False             |
283    +---------------+-------------------+
284    | null          | None              |
285    +---------------+-------------------+
286
287    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
288    their corresponding ``float`` values, which is outside the JSON spec.
289
290    """
291
292    def __init__(self, *, object_hook=None, parse_float=None,
293            parse_int=None, parse_constant=None, strict=True,
294            object_pairs_hook=None):
295        """``object_hook``, if specified, will be called with the result
296        of every JSON object decoded and its return value will be used in
297        place of the given ``dict``.  This can be used to provide custom
298        deserializations (e.g. to support JSON-RPC class hinting).
299
300        ``object_pairs_hook``, if specified will be called with the result of
301        every JSON object decoded with an ordered list of pairs.  The return
302        value of ``object_pairs_hook`` will be used instead of the ``dict``.
303        This feature can be used to implement custom decoders.
304        If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
305        priority.
306
307        ``parse_float``, if specified, will be called with the string
308        of every JSON float to be decoded. By default this is equivalent to
309        float(num_str). This can be used to use another datatype or parser
310        for JSON floats (e.g. decimal.Decimal).
311
312        ``parse_int``, if specified, will be called with the string
313        of every JSON int to be decoded. By default this is equivalent to
314        int(num_str). This can be used to use another datatype or parser
315        for JSON integers (e.g. float).
316
317        ``parse_constant``, if specified, will be called with one of the
318        following strings: -Infinity, Infinity, NaN.
319        This can be used to raise an exception if invalid JSON numbers
320        are encountered.
321
322        If ``strict`` is false (true is the default), then control
323        characters will be allowed inside strings.  Control characters in
324        this context are those with character codes in the 0-31 range,
325        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
326        """
327        self.object_hook = object_hook
328        self.parse_float = parse_float or float
329        self.parse_int = parse_int or int
330        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
331        self.strict = strict
332        self.object_pairs_hook = object_pairs_hook
333        self.parse_object = JSONObject
334        self.parse_array = JSONArray
335        self.parse_string = scanstring
336        self.memo = {}
337        self.scan_once = scanner.make_scanner(self)
338
339
340    def decode(self, s, _w=WHITESPACE.match):
341        """Return the Python representation of ``s`` (a ``str`` instance
342        containing a JSON document).
343
344        """
345        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
346        end = _w(s, end).end()
347        if end != len(s):
348            raise JSONDecodeError("Extra data", s, end)
349        return obj
350
351    def raw_decode(self, s, idx=0):
352        """Decode a JSON document from ``s`` (a ``str`` beginning with
353        a JSON document) and return a 2-tuple of the Python
354        representation and the index in ``s`` where the document ended.
355
356        This can be used to decode a JSON document from a string that may
357        have extraneous data at the end.
358
359        """
360        try:
361            obj, end = self.scan_once(s, idx)
362        except StopIteration as err:
363            raise JSONDecodeError("Expecting value", s, err.value) from None
364        return obj, end
365