• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implementation of JSONDecoder
2"""
3import re
4
5from json import scanner
6try:
7    from _json import scanstring as c_scanstring
8except ImportError:
9    c_scanstring = None
10
11__all__ = ['JSONDecoder', 'JSONDecodeError']
12
13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
15NaN = float('nan')
16PosInf = float('inf')
17NegInf = float('-inf')
18
19
20class JSONDecodeError(ValueError):
21    """Subclass of ValueError with the following additional properties:
22
23    msg: The unformatted error message
24    doc: The JSON document being parsed
25    pos: The start index of doc where parsing failed
26    lineno: The line corresponding to pos
27    colno: The column corresponding to pos
28
29    """
30    # Note that this exception is used from _json
31    def __init__(self, msg, doc, pos):
32        lineno = doc.count('\n', 0, pos) + 1
33        colno = pos - doc.rfind('\n', 0, pos)
34        errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
35        ValueError.__init__(self, errmsg)
36        self.msg = msg
37        self.doc = doc
38        self.pos = pos
39        self.lineno = lineno
40        self.colno = colno
41
42    def __reduce__(self):
43        return self.__class__, (self.msg, self.doc, self.pos)
44
45
46_CONSTANTS = {
47    '-Infinity': NegInf,
48    'Infinity': PosInf,
49    'NaN': NaN,
50}
51
52
53STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
54BACKSLASH = {
55    '"': '"', '\\': '\\', '/': '/',
56    'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
57}
58
59def _decode_uXXXX(s, pos):
60    esc = s[pos + 1:pos + 5]
61    if len(esc) == 4 and esc[1] not in 'xX':
62        try:
63            return int(esc, 16)
64        except ValueError:
65            pass
66    msg = "Invalid \\uXXXX escape"
67    raise JSONDecodeError(msg, s, pos)
68
69def py_scanstring(s, end, strict=True,
70        _b=BACKSLASH, _m=STRINGCHUNK.match):
71    """Scan the string s for a JSON string. End is the index of the
72    character in s after the quote that started the JSON string.
73    Unescapes all valid JSON string escape sequences and raises ValueError
74    on attempt to decode an invalid string. If strict is False then literal
75    control characters are allowed in the string.
76
77    Returns a tuple of the decoded string and the index of the character in s
78    after the end quote."""
79    chunks = []
80    _append = chunks.append
81    begin = end - 1
82    while 1:
83        chunk = _m(s, end)
84        if chunk is None:
85            raise JSONDecodeError("Unterminated string starting at", s, begin)
86        end = chunk.end()
87        content, terminator = chunk.groups()
88        # Content is contains zero or more unescaped string characters
89        if content:
90            _append(content)
91        # Terminator is the end of string, a literal control character,
92        # or a backslash denoting that an escape sequence follows
93        if terminator == '"':
94            break
95        elif terminator != '\\':
96            if strict:
97                #msg = "Invalid control character %r at" % (terminator,)
98                msg = "Invalid control character {0!r} at".format(terminator)
99                raise JSONDecodeError(msg, s, end)
100            else:
101                _append(terminator)
102                continue
103        try:
104            esc = s[end]
105        except IndexError:
106            raise JSONDecodeError("Unterminated string starting at",
107                                  s, begin) from None
108        # If not a unicode escape sequence, must be in the lookup table
109        if esc != 'u':
110            try:
111                char = _b[esc]
112            except KeyError:
113                msg = "Invalid \\escape: {0!r}".format(esc)
114                raise JSONDecodeError(msg, s, end)
115            end += 1
116        else:
117            uni = _decode_uXXXX(s, end)
118            end += 5
119            if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
120                uni2 = _decode_uXXXX(s, end + 1)
121                if 0xdc00 <= uni2 <= 0xdfff:
122                    uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
123                    end += 6
124            char = chr(uni)
125        _append(char)
126    return ''.join(chunks), end
127
128
129# Use speedup if available
130scanstring = c_scanstring or py_scanstring
131
132WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
133WHITESPACE_STR = ' \t\n\r'
134
135
136def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
137               memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
138    s, end = s_and_end
139    pairs = []
140    pairs_append = pairs.append
141    # Backwards compatibility
142    if memo is None:
143        memo = {}
144    memo_get = memo.setdefault
145    # Use a slice to prevent IndexError from being raised, the following
146    # check will raise a more specific ValueError if the string is empty
147    nextchar = s[end:end + 1]
148    # Normally we expect nextchar == '"'
149    if nextchar != '"':
150        if nextchar in _ws:
151            end = _w(s, end).end()
152            nextchar = s[end:end + 1]
153        # Trivial empty object
154        if nextchar == '}':
155            if object_pairs_hook is not None:
156                result = object_pairs_hook(pairs)
157                return result, end + 1
158            pairs = {}
159            if object_hook is not None:
160                pairs = object_hook(pairs)
161            return pairs, end + 1
162        elif nextchar != '"':
163            raise JSONDecodeError(
164                "Expecting property name enclosed in double quotes", s, end)
165    end += 1
166    while True:
167        key, end = scanstring(s, end, strict)
168        key = memo_get(key, key)
169        # To skip some function call overhead we optimize the fast paths where
170        # the JSON key separator is ": " or just ":".
171        if s[end:end + 1] != ':':
172            end = _w(s, end).end()
173            if s[end:end + 1] != ':':
174                raise JSONDecodeError("Expecting ':' delimiter", s, end)
175        end += 1
176
177        try:
178            if s[end] in _ws:
179                end += 1
180                if s[end] in _ws:
181                    end = _w(s, end + 1).end()
182        except IndexError:
183            pass
184
185        try:
186            value, end = scan_once(s, end)
187        except StopIteration as err:
188            raise JSONDecodeError("Expecting value", s, err.value) from None
189        pairs_append((key, value))
190        try:
191            nextchar = s[end]
192            if nextchar in _ws:
193                end = _w(s, end + 1).end()
194                nextchar = s[end]
195        except IndexError:
196            nextchar = ''
197        end += 1
198
199        if nextchar == '}':
200            break
201        elif nextchar != ',':
202            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
203        end = _w(s, end).end()
204        nextchar = s[end:end + 1]
205        end += 1
206        if nextchar != '"':
207            raise JSONDecodeError(
208                "Expecting property name enclosed in double quotes", s, end - 1)
209    if object_pairs_hook is not None:
210        result = object_pairs_hook(pairs)
211        return result, end
212    pairs = dict(pairs)
213    if object_hook is not None:
214        pairs = object_hook(pairs)
215    return pairs, end
216
217def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
218    s, end = s_and_end
219    values = []
220    nextchar = s[end:end + 1]
221    if nextchar in _ws:
222        end = _w(s, end + 1).end()
223        nextchar = s[end:end + 1]
224    # Look-ahead for trivial empty array
225    if nextchar == ']':
226        return values, end + 1
227    _append = values.append
228    while True:
229        try:
230            value, end = scan_once(s, end)
231        except StopIteration as err:
232            raise JSONDecodeError("Expecting value", s, err.value) from None
233        _append(value)
234        nextchar = s[end:end + 1]
235        if nextchar in _ws:
236            end = _w(s, end + 1).end()
237            nextchar = s[end:end + 1]
238        end += 1
239        if nextchar == ']':
240            break
241        elif nextchar != ',':
242            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
243        try:
244            if s[end] in _ws:
245                end += 1
246                if s[end] in _ws:
247                    end = _w(s, end + 1).end()
248        except IndexError:
249            pass
250
251    return values, end
252
253
254class JSONDecoder(object):
255    """Simple JSON <http://json.org> decoder
256
257    Performs the following translations in decoding by default:
258
259    +---------------+-------------------+
260    | JSON          | Python            |
261    +===============+===================+
262    | object        | dict              |
263    +---------------+-------------------+
264    | array         | list              |
265    +---------------+-------------------+
266    | string        | str               |
267    +---------------+-------------------+
268    | number (int)  | int               |
269    +---------------+-------------------+
270    | number (real) | float             |
271    +---------------+-------------------+
272    | true          | True              |
273    +---------------+-------------------+
274    | false         | False             |
275    +---------------+-------------------+
276    | null          | None              |
277    +---------------+-------------------+
278
279    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
280    their corresponding ``float`` values, which is outside the JSON spec.
281
282    """
283
284    def __init__(self, *, object_hook=None, parse_float=None,
285            parse_int=None, parse_constant=None, strict=True,
286            object_pairs_hook=None):
287        """``object_hook``, if specified, will be called with the result
288        of every JSON object decoded and its return value will be used in
289        place of the given ``dict``.  This can be used to provide custom
290        deserializations (e.g. to support JSON-RPC class hinting).
291
292        ``object_pairs_hook``, if specified will be called with the result of
293        every JSON object decoded with an ordered list of pairs.  The return
294        value of ``object_pairs_hook`` will be used instead of the ``dict``.
295        This feature can be used to implement custom decoders.
296        If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
297        priority.
298
299        ``parse_float``, if specified, will be called with the string
300        of every JSON float to be decoded. By default this is equivalent to
301        float(num_str). This can be used to use another datatype or parser
302        for JSON floats (e.g. decimal.Decimal).
303
304        ``parse_int``, if specified, will be called with the string
305        of every JSON int to be decoded. By default this is equivalent to
306        int(num_str). This can be used to use another datatype or parser
307        for JSON integers (e.g. float).
308
309        ``parse_constant``, if specified, will be called with one of the
310        following strings: -Infinity, Infinity, NaN.
311        This can be used to raise an exception if invalid JSON numbers
312        are encountered.
313
314        If ``strict`` is false (true is the default), then control
315        characters will be allowed inside strings.  Control characters in
316        this context are those with character codes in the 0-31 range,
317        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
318        """
319        self.object_hook = object_hook
320        self.parse_float = parse_float or float
321        self.parse_int = parse_int or int
322        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
323        self.strict = strict
324        self.object_pairs_hook = object_pairs_hook
325        self.parse_object = JSONObject
326        self.parse_array = JSONArray
327        self.parse_string = scanstring
328        self.memo = {}
329        self.scan_once = scanner.make_scanner(self)
330
331
332    def decode(self, s, _w=WHITESPACE.match):
333        """Return the Python representation of ``s`` (a ``str`` instance
334        containing a JSON document).
335
336        """
337        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338        end = _w(s, end).end()
339        if end != len(s):
340            raise JSONDecodeError("Extra data", s, end)
341        return obj
342
343    def raw_decode(self, s, idx=0):
344        """Decode a JSON document from ``s`` (a ``str`` beginning with
345        a JSON document) and return a 2-tuple of the Python
346        representation and the index in ``s`` where the document ended.
347
348        This can be used to decode a JSON document from a string that may
349        have extraneous data at the end.
350
351        """
352        try:
353            obj, end = self.scan_once(s, idx)
354        except StopIteration as err:
355            raise JSONDecodeError("Expecting value", s, err.value) from None
356        return obj, end
357