1"""Implementation of JSONDecoder 2""" 3import re 4import sys 5import struct 6 7from json import scanner 8try: 9 from _json import scanstring as c_scanstring 10except ImportError: 11 c_scanstring = None 12 13__all__ = ['JSONDecoder'] 14 15FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 16 17def _floatconstants(): 18 nan, = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00') 19 inf, = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00') 20 return nan, inf, -inf 21 22NaN, PosInf, NegInf = _floatconstants() 23 24 25def linecol(doc, pos): 26 lineno = doc.count('\n', 0, pos) + 1 27 if lineno == 1: 28 colno = pos + 1 29 else: 30 colno = pos - doc.rindex('\n', 0, pos) 31 return lineno, colno 32 33 34def errmsg(msg, doc, pos, end=None): 35 # Note that this function is called from _json 36 lineno, colno = linecol(doc, pos) 37 if end is None: 38 fmt = '{0}: line {1} column {2} (char {3})' 39 return fmt.format(msg, lineno, colno, pos) 40 #fmt = '%s: line %d column %d (char %d)' 41 #return fmt % (msg, lineno, colno, pos) 42 endlineno, endcolno = linecol(doc, end) 43 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 44 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 45 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 46 #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) 47 48 49_CONSTANTS = { 50 '-Infinity': NegInf, 51 'Infinity': PosInf, 52 'NaN': NaN, 53} 54 55STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 56BACKSLASH = { 57 '"': u'"', '\\': u'\\', '/': u'/', 58 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 59} 60 61DEFAULT_ENCODING = "utf-8" 62 63def _decode_uXXXX(s, pos): 64 esc = s[pos + 1:pos + 5] 65 if len(esc) == 4 and esc[1] not in 'xX': 66 try: 67 return int(esc, 16) 68 except ValueError: 69 pass 70 msg = "Invalid \\uXXXX escape" 71 raise ValueError(errmsg(msg, s, pos)) 72 73def py_scanstring(s, end, encoding=None, strict=True, 74 _b=BACKSLASH, _m=STRINGCHUNK.match): 75 """Scan the string s for a JSON string. End is the index of the 76 character in s after the quote that started the JSON string. 77 Unescapes all valid JSON string escape sequences and raises ValueError 78 on attempt to decode an invalid string. If strict is False then literal 79 control characters are allowed in the string. 80 81 Returns a tuple of the decoded string and the index of the character in s 82 after the end quote.""" 83 if encoding is None: 84 encoding = DEFAULT_ENCODING 85 chunks = [] 86 _append = chunks.append 87 begin = end - 1 88 while 1: 89 chunk = _m(s, end) 90 if chunk is None: 91 raise ValueError( 92 errmsg("Unterminated string starting at", s, begin)) 93 end = chunk.end() 94 content, terminator = chunk.groups() 95 # Content is contains zero or more unescaped string characters 96 if content: 97 if not isinstance(content, unicode): 98 content = unicode(content, encoding) 99 _append(content) 100 # Terminator is the end of string, a literal control character, 101 # or a backslash denoting that an escape sequence follows 102 if terminator == '"': 103 break 104 elif terminator != '\\': 105 if strict: 106 #msg = "Invalid control character %r at" % (terminator,) 107 msg = "Invalid control character {0!r} at".format(terminator) 108 raise ValueError(errmsg(msg, s, end)) 109 else: 110 _append(terminator) 111 continue 112 try: 113 esc = s[end] 114 except IndexError: 115 raise ValueError( 116 errmsg("Unterminated string starting at", s, begin)) 117 # If not a unicode escape sequence, must be in the lookup table 118 if esc != 'u': 119 try: 120 char = _b[esc] 121 except KeyError: 122 msg = "Invalid \\escape: " + repr(esc) 123 raise ValueError(errmsg(msg, s, end)) 124 end += 1 125 else: 126 # Unicode escape sequence 127 uni = _decode_uXXXX(s, end) 128 end += 5 129 # Check for surrogate pair on UCS-4 systems 130 if sys.maxunicode > 65535 and \ 131 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 132 uni2 = _decode_uXXXX(s, end + 1) 133 if 0xdc00 <= uni2 <= 0xdfff: 134 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 135 end += 6 136 char = unichr(uni) 137 # Append the unescaped character 138 _append(char) 139 return u''.join(chunks), end 140 141 142# Use speedup if available 143scanstring = c_scanstring or py_scanstring 144 145WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 146WHITESPACE_STR = ' \t\n\r' 147 148def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, 149 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 150 s, end = s_and_end 151 pairs = [] 152 pairs_append = pairs.append 153 # Use a slice to prevent IndexError from being raised, the following 154 # check will raise a more specific ValueError if the string is empty 155 nextchar = s[end:end + 1] 156 # Normally we expect nextchar == '"' 157 if nextchar != '"': 158 if nextchar in _ws: 159 end = _w(s, end).end() 160 nextchar = s[end:end + 1] 161 # Trivial empty object 162 if nextchar == '}': 163 if object_pairs_hook is not None: 164 result = object_pairs_hook(pairs) 165 return result, end + 1 166 pairs = {} 167 if object_hook is not None: 168 pairs = object_hook(pairs) 169 return pairs, end + 1 170 elif nextchar != '"': 171 raise ValueError(errmsg( 172 "Expecting property name enclosed in double quotes", s, end)) 173 end += 1 174 while True: 175 key, end = scanstring(s, end, encoding, strict) 176 177 # To skip some function call overhead we optimize the fast paths where 178 # the JSON key separator is ": " or just ":". 179 if s[end:end + 1] != ':': 180 end = _w(s, end).end() 181 if s[end:end + 1] != ':': 182 raise ValueError(errmsg("Expecting ':' delimiter", s, end)) 183 end += 1 184 185 try: 186 if s[end] in _ws: 187 end += 1 188 if s[end] in _ws: 189 end = _w(s, end + 1).end() 190 except IndexError: 191 pass 192 193 try: 194 value, end = scan_once(s, end) 195 except StopIteration: 196 raise ValueError(errmsg("Expecting object", s, end)) 197 pairs_append((key, value)) 198 199 try: 200 nextchar = s[end] 201 if nextchar in _ws: 202 end = _w(s, end + 1).end() 203 nextchar = s[end] 204 except IndexError: 205 nextchar = '' 206 end += 1 207 208 if nextchar == '}': 209 break 210 elif nextchar != ',': 211 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 212 213 try: 214 nextchar = s[end] 215 if nextchar in _ws: 216 end += 1 217 nextchar = s[end] 218 if nextchar in _ws: 219 end = _w(s, end + 1).end() 220 nextchar = s[end] 221 except IndexError: 222 nextchar = '' 223 224 end += 1 225 if nextchar != '"': 226 raise ValueError(errmsg( 227 "Expecting property name enclosed in double quotes", s, end - 1)) 228 if object_pairs_hook is not None: 229 result = object_pairs_hook(pairs) 230 return result, end 231 pairs = dict(pairs) 232 if object_hook is not None: 233 pairs = object_hook(pairs) 234 return pairs, end 235 236def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 237 s, end = s_and_end 238 values = [] 239 nextchar = s[end:end + 1] 240 if nextchar in _ws: 241 end = _w(s, end + 1).end() 242 nextchar = s[end:end + 1] 243 # Look-ahead for trivial empty array 244 if nextchar == ']': 245 return values, end + 1 246 _append = values.append 247 while True: 248 try: 249 value, end = scan_once(s, end) 250 except StopIteration: 251 raise ValueError(errmsg("Expecting object", s, end)) 252 _append(value) 253 nextchar = s[end:end + 1] 254 if nextchar in _ws: 255 end = _w(s, end + 1).end() 256 nextchar = s[end:end + 1] 257 end += 1 258 if nextchar == ']': 259 break 260 elif nextchar != ',': 261 raise ValueError(errmsg("Expecting ',' delimiter", s, end)) 262 try: 263 if s[end] in _ws: 264 end += 1 265 if s[end] in _ws: 266 end = _w(s, end + 1).end() 267 except IndexError: 268 pass 269 270 return values, end 271 272class JSONDecoder(object): 273 """Simple JSON <http://json.org> decoder 274 275 Performs the following translations in decoding by default: 276 277 +---------------+-------------------+ 278 | JSON | Python | 279 +===============+===================+ 280 | object | dict | 281 +---------------+-------------------+ 282 | array | list | 283 +---------------+-------------------+ 284 | string | unicode | 285 +---------------+-------------------+ 286 | number (int) | int, long | 287 +---------------+-------------------+ 288 | number (real) | float | 289 +---------------+-------------------+ 290 | true | True | 291 +---------------+-------------------+ 292 | false | False | 293 +---------------+-------------------+ 294 | null | None | 295 +---------------+-------------------+ 296 297 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 298 their corresponding ``float`` values, which is outside the JSON spec. 299 300 """ 301 302 def __init__(self, encoding=None, object_hook=None, parse_float=None, 303 parse_int=None, parse_constant=None, strict=True, 304 object_pairs_hook=None): 305 """``encoding`` determines the encoding used to interpret any ``str`` 306 objects decoded by this instance (utf-8 by default). It has no 307 effect when decoding ``unicode`` objects. 308 309 Note that currently only encodings that are a superset of ASCII work, 310 strings of other encodings should be passed in as ``unicode``. 311 312 ``object_hook``, if specified, will be called with the result 313 of every JSON object decoded and its return value will be used in 314 place of the given ``dict``. This can be used to provide custom 315 deserializations (e.g. to support JSON-RPC class hinting). 316 317 ``object_pairs_hook``, if specified will be called with the result of 318 every JSON object decoded with an ordered list of pairs. The return 319 value of ``object_pairs_hook`` will be used instead of the ``dict``. 320 This feature can be used to implement custom decoders that rely on the 321 order that the key and value pairs are decoded (for example, 322 collections.OrderedDict will remember the order of insertion). If 323 ``object_hook`` is also defined, the ``object_pairs_hook`` takes 324 priority. 325 326 ``parse_float``, if specified, will be called with the string 327 of every JSON float to be decoded. By default this is equivalent to 328 float(num_str). This can be used to use another datatype or parser 329 for JSON floats (e.g. decimal.Decimal). 330 331 ``parse_int``, if specified, will be called with the string 332 of every JSON int to be decoded. By default this is equivalent to 333 int(num_str). This can be used to use another datatype or parser 334 for JSON integers (e.g. float). 335 336 ``parse_constant``, if specified, will be called with one of the 337 following strings: -Infinity, Infinity, NaN. 338 This can be used to raise an exception if invalid JSON numbers 339 are encountered. 340 341 If ``strict`` is false (true is the default), then control 342 characters will be allowed inside strings. Control characters in 343 this context are those with character codes in the 0-31 range, 344 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 345 346 """ 347 self.encoding = encoding 348 self.object_hook = object_hook 349 self.object_pairs_hook = object_pairs_hook 350 self.parse_float = parse_float or float 351 self.parse_int = parse_int or int 352 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 353 self.strict = strict 354 self.parse_object = JSONObject 355 self.parse_array = JSONArray 356 self.parse_string = scanstring 357 self.scan_once = scanner.make_scanner(self) 358 359 def decode(self, s, _w=WHITESPACE.match): 360 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 361 instance containing a JSON document) 362 363 """ 364 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 365 end = _w(s, end).end() 366 if end != len(s): 367 raise ValueError(errmsg("Extra data", s, end, len(s))) 368 return obj 369 370 def raw_decode(self, s, idx=0): 371 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 372 beginning with a JSON document) and return a 2-tuple of the Python 373 representation and the index in ``s`` where the document ended. 374 375 This can be used to decode a JSON document from a string that may 376 have extraneous data at the end. 377 378 """ 379 try: 380 obj, end = self.scan_once(s, idx) 381 except StopIteration: 382 raise ValueError("No JSON object could be decoded") 383 return obj, end 384