1"""Implementation of JSONDecoder 2""" 3import re 4 5from json import scanner 6try: 7 from _json import scanstring as c_scanstring 8except ImportError: 9 c_scanstring = None 10 11__all__ = ['JSONDecoder', 'JSONDecodeError'] 12 13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 14 15NaN = float('nan') 16PosInf = float('inf') 17NegInf = float('-inf') 18 19 20class JSONDecodeError(ValueError): 21 """Subclass of ValueError with the following additional properties: 22 23 msg: The unformatted error message 24 doc: The JSON document being parsed 25 pos: The start index of doc where parsing failed 26 lineno: The line corresponding to pos 27 colno: The column corresponding to pos 28 29 """ 30 # Note that this exception is used from _json 31 def __init__(self, msg, doc, pos): 32 lineno = doc.count('\n', 0, pos) + 1 33 colno = pos - doc.rfind('\n', 0, pos) 34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) 35 ValueError.__init__(self, errmsg) 36 self.msg = msg 37 self.doc = doc 38 self.pos = pos 39 self.lineno = lineno 40 self.colno = colno 41 42 def __reduce__(self): 43 return self.__class__, (self.msg, self.doc, self.pos) 44 45 46_CONSTANTS = { 47 '-Infinity': NegInf, 48 'Infinity': PosInf, 49 'NaN': NaN, 50} 51 52 53HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS) 54STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 55BACKSLASH = { 56 '"': '"', '\\': '\\', '/': '/', 57 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 58} 59 60def _decode_uXXXX(s, pos, _m=HEXDIGITS.match): 61 esc = _m(s, pos + 1) 62 if esc is not None: 63 try: 64 return int(esc.group(), 16) 65 except ValueError: 66 pass 67 msg = "Invalid \\uXXXX escape" 68 raise JSONDecodeError(msg, s, pos) 69 70def py_scanstring(s, end, strict=True, 71 _b=BACKSLASH, _m=STRINGCHUNK.match): 72 """Scan the string s for a JSON string. End is the index of the 73 character in s after the quote that started the JSON string. 74 Unescapes all valid JSON string escape sequences and raises ValueError 75 on attempt to decode an invalid string. If strict is False then literal 76 control characters are allowed in the string. 77 78 Returns a tuple of the decoded string and the index of the character in s 79 after the end quote.""" 80 chunks = [] 81 _append = chunks.append 82 begin = end - 1 83 while 1: 84 chunk = _m(s, end) 85 if chunk is None: 86 raise JSONDecodeError("Unterminated string starting at", s, begin) 87 end = chunk.end() 88 content, terminator = chunk.groups() 89 # Content is contains zero or more unescaped string characters 90 if content: 91 _append(content) 92 # Terminator is the end of string, a literal control character, 93 # or a backslash denoting that an escape sequence follows 94 if terminator == '"': 95 break 96 elif terminator != '\\': 97 if strict: 98 #msg = "Invalid control character %r at" % (terminator,) 99 msg = "Invalid control character {0!r} at".format(terminator) 100 raise JSONDecodeError(msg, s, end) 101 else: 102 _append(terminator) 103 continue 104 try: 105 esc = s[end] 106 except IndexError: 107 raise JSONDecodeError("Unterminated string starting at", 108 s, begin) from None 109 # If not a unicode escape sequence, must be in the lookup table 110 if esc != 'u': 111 try: 112 char = _b[esc] 113 except KeyError: 114 msg = "Invalid \\escape: {0!r}".format(esc) 115 raise JSONDecodeError(msg, s, end) 116 end += 1 117 else: 118 uni = _decode_uXXXX(s, end) 119 end += 5 120 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 121 uni2 = _decode_uXXXX(s, end + 1) 122 if 0xdc00 <= uni2 <= 0xdfff: 123 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 124 end += 6 125 char = chr(uni) 126 _append(char) 127 return ''.join(chunks), end 128 129 130# Use speedup if available 131scanstring = c_scanstring or py_scanstring 132 133WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 134WHITESPACE_STR = ' \t\n\r' 135 136 137def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, 138 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 139 s, end = s_and_end 140 pairs = [] 141 pairs_append = pairs.append 142 # Backwards compatibility 143 if memo is None: 144 memo = {} 145 memo_get = memo.setdefault 146 # Use a slice to prevent IndexError from being raised, the following 147 # check will raise a more specific ValueError if the string is empty 148 nextchar = s[end:end + 1] 149 # Normally we expect nextchar == '"' 150 if nextchar != '"': 151 if nextchar in _ws: 152 end = _w(s, end).end() 153 nextchar = s[end:end + 1] 154 # Trivial empty object 155 if nextchar == '}': 156 if object_pairs_hook is not None: 157 result = object_pairs_hook(pairs) 158 return result, end + 1 159 pairs = {} 160 if object_hook is not None: 161 pairs = object_hook(pairs) 162 return pairs, end + 1 163 elif nextchar != '"': 164 raise JSONDecodeError( 165 "Expecting property name enclosed in double quotes", s, end) 166 end += 1 167 while True: 168 key, end = scanstring(s, end, strict) 169 key = memo_get(key, key) 170 # To skip some function call overhead we optimize the fast paths where 171 # the JSON key separator is ": " or just ":". 172 if s[end:end + 1] != ':': 173 end = _w(s, end).end() 174 if s[end:end + 1] != ':': 175 raise JSONDecodeError("Expecting ':' delimiter", s, end) 176 end += 1 177 178 try: 179 if s[end] in _ws: 180 end += 1 181 if s[end] in _ws: 182 end = _w(s, end + 1).end() 183 except IndexError: 184 pass 185 186 try: 187 value, end = scan_once(s, end) 188 except StopIteration as err: 189 raise JSONDecodeError("Expecting value", s, err.value) from None 190 pairs_append((key, value)) 191 try: 192 nextchar = s[end] 193 if nextchar in _ws: 194 end = _w(s, end + 1).end() 195 nextchar = s[end] 196 except IndexError: 197 nextchar = '' 198 end += 1 199 200 if nextchar == '}': 201 break 202 elif nextchar != ',': 203 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 204 comma_idx = end - 1 205 end = _w(s, end).end() 206 nextchar = s[end:end + 1] 207 end += 1 208 if nextchar != '"': 209 if nextchar == '}': 210 raise JSONDecodeError("Illegal trailing comma before end of object", s, comma_idx) 211 raise JSONDecodeError( 212 "Expecting property name enclosed in double quotes", s, end - 1) 213 if object_pairs_hook is not None: 214 result = object_pairs_hook(pairs) 215 return result, end 216 pairs = dict(pairs) 217 if object_hook is not None: 218 pairs = object_hook(pairs) 219 return pairs, end 220 221def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 222 s, end = s_and_end 223 values = [] 224 nextchar = s[end:end + 1] 225 if nextchar in _ws: 226 end = _w(s, end + 1).end() 227 nextchar = s[end:end + 1] 228 # Look-ahead for trivial empty array 229 if nextchar == ']': 230 return values, end + 1 231 _append = values.append 232 while True: 233 try: 234 value, end = scan_once(s, end) 235 except StopIteration as err: 236 raise JSONDecodeError("Expecting value", s, err.value) from None 237 _append(value) 238 nextchar = s[end:end + 1] 239 if nextchar in _ws: 240 end = _w(s, end + 1).end() 241 nextchar = s[end:end + 1] 242 end += 1 243 if nextchar == ']': 244 break 245 elif nextchar != ',': 246 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 247 comma_idx = end - 1 248 try: 249 if s[end] in _ws: 250 end += 1 251 if s[end] in _ws: 252 end = _w(s, end + 1).end() 253 nextchar = s[end:end + 1] 254 except IndexError: 255 pass 256 if nextchar == ']': 257 raise JSONDecodeError("Illegal trailing comma before end of array", s, comma_idx) 258 259 return values, end 260 261 262class JSONDecoder(object): 263 """Simple JSON <https://json.org> decoder 264 265 Performs the following translations in decoding by default: 266 267 +---------------+-------------------+ 268 | JSON | Python | 269 +===============+===================+ 270 | object | dict | 271 +---------------+-------------------+ 272 | array | list | 273 +---------------+-------------------+ 274 | string | str | 275 +---------------+-------------------+ 276 | number (int) | int | 277 +---------------+-------------------+ 278 | number (real) | float | 279 +---------------+-------------------+ 280 | true | True | 281 +---------------+-------------------+ 282 | false | False | 283 +---------------+-------------------+ 284 | null | None | 285 +---------------+-------------------+ 286 287 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 288 their corresponding ``float`` values, which is outside the JSON spec. 289 290 """ 291 292 def __init__(self, *, object_hook=None, parse_float=None, 293 parse_int=None, parse_constant=None, strict=True, 294 object_pairs_hook=None): 295 """``object_hook``, if specified, will be called with the result 296 of every JSON object decoded and its return value will be used in 297 place of the given ``dict``. This can be used to provide custom 298 deserializations (e.g. to support JSON-RPC class hinting). 299 300 ``object_pairs_hook``, if specified will be called with the result of 301 every JSON object decoded with an ordered list of pairs. The return 302 value of ``object_pairs_hook`` will be used instead of the ``dict``. 303 This feature can be used to implement custom decoders. 304 If ``object_hook`` is also defined, the ``object_pairs_hook`` takes 305 priority. 306 307 ``parse_float``, if specified, will be called with the string 308 of every JSON float to be decoded. By default this is equivalent to 309 float(num_str). This can be used to use another datatype or parser 310 for JSON floats (e.g. decimal.Decimal). 311 312 ``parse_int``, if specified, will be called with the string 313 of every JSON int to be decoded. By default this is equivalent to 314 int(num_str). This can be used to use another datatype or parser 315 for JSON integers (e.g. float). 316 317 ``parse_constant``, if specified, will be called with one of the 318 following strings: -Infinity, Infinity, NaN. 319 This can be used to raise an exception if invalid JSON numbers 320 are encountered. 321 322 If ``strict`` is false (true is the default), then control 323 characters will be allowed inside strings. Control characters in 324 this context are those with character codes in the 0-31 range, 325 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 326 """ 327 self.object_hook = object_hook 328 self.parse_float = parse_float or float 329 self.parse_int = parse_int or int 330 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 331 self.strict = strict 332 self.object_pairs_hook = object_pairs_hook 333 self.parse_object = JSONObject 334 self.parse_array = JSONArray 335 self.parse_string = scanstring 336 self.memo = {} 337 self.scan_once = scanner.make_scanner(self) 338 339 340 def decode(self, s, _w=WHITESPACE.match): 341 """Return the Python representation of ``s`` (a ``str`` instance 342 containing a JSON document). 343 344 """ 345 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 346 end = _w(s, end).end() 347 if end != len(s): 348 raise JSONDecodeError("Extra data", s, end) 349 return obj 350 351 def raw_decode(self, s, idx=0): 352 """Decode a JSON document from ``s`` (a ``str`` beginning with 353 a JSON document) and return a 2-tuple of the Python 354 representation and the index in ``s`` where the document ended. 355 356 This can be used to decode a JSON document from a string that may 357 have extraneous data at the end. 358 359 """ 360 try: 361 obj, end = self.scan_once(s, idx) 362 except StopIteration as err: 363 raise JSONDecodeError("Expecting value", s, err.value) from None 364 return obj, end 365