• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implementation of JSONDecoder
2"""
3import re
4import sys
5import struct
6
7from json import scanner
8try:
9    from _json import scanstring as c_scanstring
10except ImportError:
11    c_scanstring = None
12
13__all__ = ['JSONDecoder']
14
15FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
16
17def _floatconstants():
18    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19    if sys.byteorder != 'big':
20        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21    nan, inf = struct.unpack('dd', _BYTES)
22    return nan, inf, -inf
23
24NaN, PosInf, NegInf = _floatconstants()
25
26
27def linecol(doc, pos):
28    lineno = doc.count('\n', 0, pos) + 1
29    if lineno == 1:
30        colno = pos + 1
31    else:
32        colno = pos - doc.rindex('\n', 0, pos)
33    return lineno, colno
34
35
36def errmsg(msg, doc, pos, end=None):
37    # Note that this function is called from _json
38    lineno, colno = linecol(doc, pos)
39    if end is None:
40        fmt = '{0}: line {1} column {2} (char {3})'
41        return fmt.format(msg, lineno, colno, pos)
42        #fmt = '%s: line %d column %d (char %d)'
43        #return fmt % (msg, lineno, colno, pos)
44    endlineno, endcolno = linecol(doc, end)
45    fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46    return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47    #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48    #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
49
50
51_CONSTANTS = {
52    '-Infinity': NegInf,
53    'Infinity': PosInf,
54    'NaN': NaN,
55}
56
57STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
58BACKSLASH = {
59    '"': u'"', '\\': u'\\', '/': u'/',
60    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
61}
62
63DEFAULT_ENCODING = "utf-8"
64
65def _decode_uXXXX(s, pos):
66    esc = s[pos + 1:pos + 5]
67    if len(esc) == 4 and esc[1] not in 'xX':
68        try:
69            return int(esc, 16)
70        except ValueError:
71            pass
72    msg = "Invalid \\uXXXX escape"
73    raise ValueError(errmsg(msg, s, pos))
74
75def py_scanstring(s, end, encoding=None, strict=True,
76        _b=BACKSLASH, _m=STRINGCHUNK.match):
77    """Scan the string s for a JSON string. End is the index of the
78    character in s after the quote that started the JSON string.
79    Unescapes all valid JSON string escape sequences and raises ValueError
80    on attempt to decode an invalid string. If strict is False then literal
81    control characters are allowed in the string.
82
83    Returns a tuple of the decoded string and the index of the character in s
84    after the end quote."""
85    if encoding is None:
86        encoding = DEFAULT_ENCODING
87    chunks = []
88    _append = chunks.append
89    begin = end - 1
90    while 1:
91        chunk = _m(s, end)
92        if chunk is None:
93            raise ValueError(
94                errmsg("Unterminated string starting at", s, begin))
95        end = chunk.end()
96        content, terminator = chunk.groups()
97        # Content is contains zero or more unescaped string characters
98        if content:
99            if not isinstance(content, unicode):
100                content = unicode(content, encoding)
101            _append(content)
102        # Terminator is the end of string, a literal control character,
103        # or a backslash denoting that an escape sequence follows
104        if terminator == '"':
105            break
106        elif terminator != '\\':
107            if strict:
108                #msg = "Invalid control character %r at" % (terminator,)
109                msg = "Invalid control character {0!r} at".format(terminator)
110                raise ValueError(errmsg(msg, s, end))
111            else:
112                _append(terminator)
113                continue
114        try:
115            esc = s[end]
116        except IndexError:
117            raise ValueError(
118                errmsg("Unterminated string starting at", s, begin))
119        # If not a unicode escape sequence, must be in the lookup table
120        if esc != 'u':
121            try:
122                char = _b[esc]
123            except KeyError:
124                msg = "Invalid \\escape: " + repr(esc)
125                raise ValueError(errmsg(msg, s, end))
126            end += 1
127        else:
128            # Unicode escape sequence
129            uni = _decode_uXXXX(s, end)
130            end += 5
131            # Check for surrogate pair on UCS-4 systems
132            if sys.maxunicode > 65535 and \
133               0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
134                uni2 = _decode_uXXXX(s, end + 1)
135                if 0xdc00 <= uni2 <= 0xdfff:
136                    uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
137                    end += 6
138            char = unichr(uni)
139        # Append the unescaped character
140        _append(char)
141    return u''.join(chunks), end
142
143
144# Use speedup if available
145scanstring = c_scanstring or py_scanstring
146
147WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
148WHITESPACE_STR = ' \t\n\r'
149
150def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
151               object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
152    s, end = s_and_end
153    pairs = []
154    pairs_append = pairs.append
155    # Use a slice to prevent IndexError from being raised, the following
156    # check will raise a more specific ValueError if the string is empty
157    nextchar = s[end:end + 1]
158    # Normally we expect nextchar == '"'
159    if nextchar != '"':
160        if nextchar in _ws:
161            end = _w(s, end).end()
162            nextchar = s[end:end + 1]
163        # Trivial empty object
164        if nextchar == '}':
165            if object_pairs_hook is not None:
166                result = object_pairs_hook(pairs)
167                return result, end + 1
168            pairs = {}
169            if object_hook is not None:
170                pairs = object_hook(pairs)
171            return pairs, end + 1
172        elif nextchar != '"':
173            raise ValueError(errmsg(
174                "Expecting property name enclosed in double quotes", s, end))
175    end += 1
176    while True:
177        key, end = scanstring(s, end, encoding, strict)
178
179        # To skip some function call overhead we optimize the fast paths where
180        # the JSON key separator is ": " or just ":".
181        if s[end:end + 1] != ':':
182            end = _w(s, end).end()
183            if s[end:end + 1] != ':':
184                raise ValueError(errmsg("Expecting ':' delimiter", s, end))
185        end += 1
186
187        try:
188            if s[end] in _ws:
189                end += 1
190                if s[end] in _ws:
191                    end = _w(s, end + 1).end()
192        except IndexError:
193            pass
194
195        try:
196            value, end = scan_once(s, end)
197        except StopIteration:
198            raise ValueError(errmsg("Expecting object", s, end))
199        pairs_append((key, value))
200
201        try:
202            nextchar = s[end]
203            if nextchar in _ws:
204                end = _w(s, end + 1).end()
205                nextchar = s[end]
206        except IndexError:
207            nextchar = ''
208        end += 1
209
210        if nextchar == '}':
211            break
212        elif nextchar != ',':
213            raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
214
215        try:
216            nextchar = s[end]
217            if nextchar in _ws:
218                end += 1
219                nextchar = s[end]
220                if nextchar in _ws:
221                    end = _w(s, end + 1).end()
222                    nextchar = s[end]
223        except IndexError:
224            nextchar = ''
225
226        end += 1
227        if nextchar != '"':
228            raise ValueError(errmsg(
229                "Expecting property name enclosed in double quotes", s, end - 1))
230    if object_pairs_hook is not None:
231        result = object_pairs_hook(pairs)
232        return result, end
233    pairs = dict(pairs)
234    if object_hook is not None:
235        pairs = object_hook(pairs)
236    return pairs, end
237
238def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
239    s, end = s_and_end
240    values = []
241    nextchar = s[end:end + 1]
242    if nextchar in _ws:
243        end = _w(s, end + 1).end()
244        nextchar = s[end:end + 1]
245    # Look-ahead for trivial empty array
246    if nextchar == ']':
247        return values, end + 1
248    _append = values.append
249    while True:
250        try:
251            value, end = scan_once(s, end)
252        except StopIteration:
253            raise ValueError(errmsg("Expecting object", s, end))
254        _append(value)
255        nextchar = s[end:end + 1]
256        if nextchar in _ws:
257            end = _w(s, end + 1).end()
258            nextchar = s[end:end + 1]
259        end += 1
260        if nextchar == ']':
261            break
262        elif nextchar != ',':
263            raise ValueError(errmsg("Expecting ',' delimiter", s, end))
264        try:
265            if s[end] in _ws:
266                end += 1
267                if s[end] in _ws:
268                    end = _w(s, end + 1).end()
269        except IndexError:
270            pass
271
272    return values, end
273
274class JSONDecoder(object):
275    """Simple JSON <http://json.org> decoder
276
277    Performs the following translations in decoding by default:
278
279    +---------------+-------------------+
280    | JSON          | Python            |
281    +===============+===================+
282    | object        | dict              |
283    +---------------+-------------------+
284    | array         | list              |
285    +---------------+-------------------+
286    | string        | unicode           |
287    +---------------+-------------------+
288    | number (int)  | int, long         |
289    +---------------+-------------------+
290    | number (real) | float             |
291    +---------------+-------------------+
292    | true          | True              |
293    +---------------+-------------------+
294    | false         | False             |
295    +---------------+-------------------+
296    | null          | None              |
297    +---------------+-------------------+
298
299    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
300    their corresponding ``float`` values, which is outside the JSON spec.
301
302    """
303
304    def __init__(self, encoding=None, object_hook=None, parse_float=None,
305            parse_int=None, parse_constant=None, strict=True,
306            object_pairs_hook=None):
307        """``encoding`` determines the encoding used to interpret any ``str``
308        objects decoded by this instance (utf-8 by default).  It has no
309        effect when decoding ``unicode`` objects.
310
311        Note that currently only encodings that are a superset of ASCII work,
312        strings of other encodings should be passed in as ``unicode``.
313
314        ``object_hook``, if specified, will be called with the result
315        of every JSON object decoded and its return value will be used in
316        place of the given ``dict``.  This can be used to provide custom
317        deserializations (e.g. to support JSON-RPC class hinting).
318
319        ``object_pairs_hook``, if specified will be called with the result of
320        every JSON object decoded with an ordered list of pairs.  The return
321        value of ``object_pairs_hook`` will be used instead of the ``dict``.
322        This feature can be used to implement custom decoders that rely on the
323        order that the key and value pairs are decoded (for example,
324        collections.OrderedDict will remember the order of insertion). If
325        ``object_hook`` is also defined, the ``object_pairs_hook`` takes
326        priority.
327
328        ``parse_float``, if specified, will be called with the string
329        of every JSON float to be decoded. By default this is equivalent to
330        float(num_str). This can be used to use another datatype or parser
331        for JSON floats (e.g. decimal.Decimal).
332
333        ``parse_int``, if specified, will be called with the string
334        of every JSON int to be decoded. By default this is equivalent to
335        int(num_str). This can be used to use another datatype or parser
336        for JSON integers (e.g. float).
337
338        ``parse_constant``, if specified, will be called with one of the
339        following strings: -Infinity, Infinity, NaN.
340        This can be used to raise an exception if invalid JSON numbers
341        are encountered.
342
343        If ``strict`` is false (true is the default), then control
344        characters will be allowed inside strings.  Control characters in
345        this context are those with character codes in the 0-31 range,
346        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
347
348        """
349        self.encoding = encoding
350        self.object_hook = object_hook
351        self.object_pairs_hook = object_pairs_hook
352        self.parse_float = parse_float or float
353        self.parse_int = parse_int or int
354        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
355        self.strict = strict
356        self.parse_object = JSONObject
357        self.parse_array = JSONArray
358        self.parse_string = scanstring
359        self.scan_once = scanner.make_scanner(self)
360
361    def decode(self, s, _w=WHITESPACE.match):
362        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
363        instance containing a JSON document)
364
365        """
366        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
367        end = _w(s, end).end()
368        if end != len(s):
369            raise ValueError(errmsg("Extra data", s, end, len(s)))
370        return obj
371
372    def raw_decode(self, s, idx=0):
373        """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
374        beginning with a JSON document) and return a 2-tuple of the Python
375        representation and the index in ``s`` where the document ended.
376
377        This can be used to decode a JSON document from a string that may
378        have extraneous data at the end.
379
380        """
381        try:
382            obj, end = self.scan_once(s, idx)
383        except StopIteration:
384            raise ValueError("No JSON object could be decoded")
385        return obj, end
386