• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implementation of JSONDecoder
2"""
3import re
4import sys
5import struct
6
7from json import scanner
8try:
9    from _json import scanstring as c_scanstring
10except ImportError:
11    c_scanstring = None
12
13__all__ = ['JSONDecoder']
14
15FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
16
17def _floatconstants():
18    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19    if sys.byteorder != 'big':
20        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21    nan, inf = struct.unpack('dd', _BYTES)
22    return nan, inf, -inf
23
24NaN, PosInf, NegInf = _floatconstants()
25
26
27def linecol(doc, pos):
28    lineno = doc.count('\n', 0, pos) + 1
29    if lineno == 1:
30        colno = pos
31    else:
32        colno = pos - doc.rindex('\n', 0, pos)
33    return lineno, colno
34
35
36def errmsg(msg, doc, pos, end=None):
37    # Note that this function is called from _json
38    lineno, colno = linecol(doc, pos)
39    if end is None:
40        fmt = '{0}: line {1} column {2} (char {3})'
41        return fmt.format(msg, lineno, colno, pos)
42        #fmt = '%s: line %d column %d (char %d)'
43        #return fmt % (msg, lineno, colno, pos)
44    endlineno, endcolno = linecol(doc, end)
45    fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46    return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47    #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48    #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
49
50
51_CONSTANTS = {
52    '-Infinity': NegInf,
53    'Infinity': PosInf,
54    'NaN': NaN,
55}
56
57STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
58BACKSLASH = {
59    '"': u'"', '\\': u'\\', '/': u'/',
60    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
61}
62
63DEFAULT_ENCODING = "utf-8"
64
65def py_scanstring(s, end, encoding=None, strict=True,
66        _b=BACKSLASH, _m=STRINGCHUNK.match):
67    """Scan the string s for a JSON string. End is the index of the
68    character in s after the quote that started the JSON string.
69    Unescapes all valid JSON string escape sequences and raises ValueError
70    on attempt to decode an invalid string. If strict is False then literal
71    control characters are allowed in the string.
72
73    Returns a tuple of the decoded string and the index of the character in s
74    after the end quote."""
75    if encoding is None:
76        encoding = DEFAULT_ENCODING
77    chunks = []
78    _append = chunks.append
79    begin = end - 1
80    while 1:
81        chunk = _m(s, end)
82        if chunk is None:
83            raise ValueError(
84                errmsg("Unterminated string starting at", s, begin))
85        end = chunk.end()
86        content, terminator = chunk.groups()
87        # Content is contains zero or more unescaped string characters
88        if content:
89            if not isinstance(content, unicode):
90                content = unicode(content, encoding)
91            _append(content)
92        # Terminator is the end of string, a literal control character,
93        # or a backslash denoting that an escape sequence follows
94        if terminator == '"':
95            break
96        elif terminator != '\\':
97            if strict:
98                #msg = "Invalid control character %r at" % (terminator,)
99                msg = "Invalid control character {0!r} at".format(terminator)
100                raise ValueError(errmsg(msg, s, end))
101            else:
102                _append(terminator)
103                continue
104        try:
105            esc = s[end]
106        except IndexError:
107            raise ValueError(
108                errmsg("Unterminated string starting at", s, begin))
109        # If not a unicode escape sequence, must be in the lookup table
110        if esc != 'u':
111            try:
112                char = _b[esc]
113            except KeyError:
114                msg = "Invalid \\escape: " + repr(esc)
115                raise ValueError(errmsg(msg, s, end))
116            end += 1
117        else:
118            # Unicode escape sequence
119            esc = s[end + 1:end + 5]
120            next_end = end + 5
121            if len(esc) != 4:
122                msg = "Invalid \\uXXXX escape"
123                raise ValueError(errmsg(msg, s, end))
124            uni = int(esc, 16)
125            # Check for surrogate pair on UCS-4 systems
126            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
127                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
128                if not s[end + 5:end + 7] == '\\u':
129                    raise ValueError(errmsg(msg, s, end))
130                esc2 = s[end + 7:end + 11]
131                if len(esc2) != 4:
132                    raise ValueError(errmsg(msg, s, end))
133                uni2 = int(esc2, 16)
134                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
135                next_end += 6
136            char = unichr(uni)
137            end = next_end
138        # Append the unescaped character
139        _append(char)
140    return u''.join(chunks), end
141
142
143# Use speedup if available
144scanstring = c_scanstring or py_scanstring
145
146WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
147WHITESPACE_STR = ' \t\n\r'
148
149def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
150               object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
151    s, end = s_and_end
152    pairs = []
153    pairs_append = pairs.append
154    # Use a slice to prevent IndexError from being raised, the following
155    # check will raise a more specific ValueError if the string is empty
156    nextchar = s[end:end + 1]
157    # Normally we expect nextchar == '"'
158    if nextchar != '"':
159        if nextchar in _ws:
160            end = _w(s, end).end()
161            nextchar = s[end:end + 1]
162        # Trivial empty object
163        if nextchar == '}':
164            if object_pairs_hook is not None:
165                result = object_pairs_hook(pairs)
166                return result, end
167            pairs = {}
168            if object_hook is not None:
169                pairs = object_hook(pairs)
170            return pairs, end + 1
171        elif nextchar != '"':
172            raise ValueError(errmsg("Expecting property name", s, end))
173    end += 1
174    while True:
175        key, end = scanstring(s, end, encoding, strict)
176
177        # To skip some function call overhead we optimize the fast paths where
178        # the JSON key separator is ": " or just ":".
179        if s[end:end + 1] != ':':
180            end = _w(s, end).end()
181            if s[end:end + 1] != ':':
182                raise ValueError(errmsg("Expecting : delimiter", s, end))
183
184        end += 1
185
186        try:
187            if s[end] in _ws:
188                end += 1
189                if s[end] in _ws:
190                    end = _w(s, end + 1).end()
191        except IndexError:
192            pass
193
194        try:
195            value, end = scan_once(s, end)
196        except StopIteration:
197            raise ValueError(errmsg("Expecting object", s, end))
198        pairs_append((key, value))
199
200        try:
201            nextchar = s[end]
202            if nextchar in _ws:
203                end = _w(s, end + 1).end()
204                nextchar = s[end]
205        except IndexError:
206            nextchar = ''
207        end += 1
208
209        if nextchar == '}':
210            break
211        elif nextchar != ',':
212            raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
213
214        try:
215            nextchar = s[end]
216            if nextchar in _ws:
217                end += 1
218                nextchar = s[end]
219                if nextchar in _ws:
220                    end = _w(s, end + 1).end()
221                    nextchar = s[end]
222        except IndexError:
223            nextchar = ''
224
225        end += 1
226        if nextchar != '"':
227            raise ValueError(errmsg("Expecting property name", s, end - 1))
228
229    if object_pairs_hook is not None:
230        result = object_pairs_hook(pairs)
231        return result, end
232    pairs = dict(pairs)
233    if object_hook is not None:
234        pairs = object_hook(pairs)
235    return pairs, end
236
237def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
238    s, end = s_and_end
239    values = []
240    nextchar = s[end:end + 1]
241    if nextchar in _ws:
242        end = _w(s, end + 1).end()
243        nextchar = s[end:end + 1]
244    # Look-ahead for trivial empty array
245    if nextchar == ']':
246        return values, end + 1
247    _append = values.append
248    while True:
249        try:
250            value, end = scan_once(s, end)
251        except StopIteration:
252            raise ValueError(errmsg("Expecting object", s, end))
253        _append(value)
254        nextchar = s[end:end + 1]
255        if nextchar in _ws:
256            end = _w(s, end + 1).end()
257            nextchar = s[end:end + 1]
258        end += 1
259        if nextchar == ']':
260            break
261        elif nextchar != ',':
262            raise ValueError(errmsg("Expecting , delimiter", s, end))
263
264        try:
265            if s[end] in _ws:
266                end += 1
267                if s[end] in _ws:
268                    end = _w(s, end + 1).end()
269        except IndexError:
270            pass
271
272    return values, end
273
274class JSONDecoder(object):
275    """Simple JSON <http://json.org> decoder
276
277    Performs the following translations in decoding by default:
278
279    +---------------+-------------------+
280    | JSON          | Python            |
281    +===============+===================+
282    | object        | dict              |
283    +---------------+-------------------+
284    | array         | list              |
285    +---------------+-------------------+
286    | string        | unicode           |
287    +---------------+-------------------+
288    | number (int)  | int, long         |
289    +---------------+-------------------+
290    | number (real) | float             |
291    +---------------+-------------------+
292    | true          | True              |
293    +---------------+-------------------+
294    | false         | False             |
295    +---------------+-------------------+
296    | null          | None              |
297    +---------------+-------------------+
298
299    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
300    their corresponding ``float`` values, which is outside the JSON spec.
301
302    """
303
304    def __init__(self, encoding=None, object_hook=None, parse_float=None,
305            parse_int=None, parse_constant=None, strict=True,
306            object_pairs_hook=None):
307        """``encoding`` determines the encoding used to interpret any ``str``
308        objects decoded by this instance (utf-8 by default).  It has no
309        effect when decoding ``unicode`` objects.
310
311        Note that currently only encodings that are a superset of ASCII work,
312        strings of other encodings should be passed in as ``unicode``.
313
314        ``object_hook``, if specified, will be called with the result
315        of every JSON object decoded and its return value will be used in
316        place of the given ``dict``.  This can be used to provide custom
317        deserializations (e.g. to support JSON-RPC class hinting).
318
319        ``object_pairs_hook``, if specified will be called with the result of
320        every JSON object decoded with an ordered list of pairs.  The return
321        value of ``object_pairs_hook`` will be used instead of the ``dict``.
322        This feature can be used to implement custom decoders that rely on the
323        order that the key and value pairs are decoded (for example,
324        collections.OrderedDict will remember the order of insertion). If
325        ``object_hook`` is also defined, the ``object_pairs_hook`` takes
326        priority.
327
328        ``parse_float``, if specified, will be called with the string
329        of every JSON float to be decoded. By default this is equivalent to
330        float(num_str). This can be used to use another datatype or parser
331        for JSON floats (e.g. decimal.Decimal).
332
333        ``parse_int``, if specified, will be called with the string
334        of every JSON int to be decoded. By default this is equivalent to
335        int(num_str). This can be used to use another datatype or parser
336        for JSON integers (e.g. float).
337
338        ``parse_constant``, if specified, will be called with one of the
339        following strings: -Infinity, Infinity, NaN.
340        This can be used to raise an exception if invalid JSON numbers
341        are encountered.
342
343        If ``strict`` is false (true is the default), then control
344        characters will be allowed inside strings.  Control characters in
345        this context are those with character codes in the 0-31 range,
346        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
347
348        """
349        self.encoding = encoding
350        self.object_hook = object_hook
351        self.object_pairs_hook = object_pairs_hook
352        self.parse_float = parse_float or float
353        self.parse_int = parse_int or int
354        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
355        self.strict = strict
356        self.parse_object = JSONObject
357        self.parse_array = JSONArray
358        self.parse_string = scanstring
359        self.scan_once = scanner.make_scanner(self)
360
361    def decode(self, s, _w=WHITESPACE.match):
362        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
363        instance containing a JSON document)
364
365        """
366        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
367        end = _w(s, end).end()
368        if end != len(s):
369            raise ValueError(errmsg("Extra data", s, end, len(s)))
370        return obj
371
372    def raw_decode(self, s, idx=0):
373        """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
374        beginning with a JSON document) and return a 2-tuple of the Python
375        representation and the index in ``s`` where the document ended.
376
377        This can be used to decode a JSON document from a string that may
378        have extraneous data at the end.
379
380        """
381        try:
382            obj, end = self.scan_once(s, idx)
383        except StopIteration:
384            raise ValueError("No JSON object could be decoded")
385        return obj, end
386