1""" 2Implementation of JSONDecoder 3""" 4import re 5 6from scanner import Scanner, pattern 7 8FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 9 10def _floatconstants(): 11 import struct 12 import sys 13 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') 14 if sys.byteorder != 'big': 15 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] 16 nan, inf = struct.unpack('dd', _BYTES) 17 return nan, inf, -inf 18 19NaN, PosInf, NegInf = _floatconstants() 20 21def linecol(doc, pos): 22 lineno = doc.count('\n', 0, pos) + 1 23 if lineno == 1: 24 colno = pos 25 else: 26 colno = pos - doc.rindex('\n', 0, pos) 27 return lineno, colno 28 29def errmsg(msg, doc, pos, end=None): 30 lineno, colno = linecol(doc, pos) 31 if end is None: 32 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) 33 endlineno, endcolno = linecol(doc, end) 34 return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( 35 msg, lineno, colno, endlineno, endcolno, pos, end) 36 37_CONSTANTS = { 38 '-Infinity': NegInf, 39 'Infinity': PosInf, 40 'NaN': NaN, 41 'true': True, 42 'false': False, 43 'null': None, 44} 45 46def JSONConstant(match, context, c=_CONSTANTS): 47 return c[match.group(0)], None 48pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) 49 50def JSONNumber(match, context): 51 match = JSONNumber.regex.match(match.string, *match.span()) 52 integer, frac, exp = match.groups() 53 if frac or exp: 54 res = float(integer + (frac or '') + (exp or '')) 55 else: 56 res = int(integer) 57 return res, None 58pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) 59 60STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS) 61BACKSLASH = { 62 '"': u'"', '\\': u'\\', '/': u'/', 63 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 64} 65 66DEFAULT_ENCODING = "utf-8" 67 68def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): 69 if encoding is None: 70 encoding = DEFAULT_ENCODING 71 chunks = [] 72 _append = chunks.append 73 begin = end - 1 74 while 1: 75 chunk = _m(s, end) 76 if chunk is None: 77 raise ValueError( 78 errmsg("Unterminated string starting at", s, begin)) 79 end = chunk.end() 80 content, terminator = chunk.groups() 81 if content: 82 if not isinstance(content, unicode): 83 content = unicode(content, encoding) 84 _append(content) 85 if terminator == '"': 86 break 87 try: 88 esc = s[end] 89 except IndexError: 90 raise ValueError( 91 errmsg("Unterminated string starting at", s, begin)) 92 if esc != 'u': 93 try: 94 m = _b[esc] 95 except KeyError: 96 raise ValueError( 97 errmsg("Invalid \\escape: %r" % (esc,), s, end)) 98 end += 1 99 else: 100 esc = s[end + 1:end + 5] 101 try: 102 m = unichr(int(esc, 16)) 103 if len(esc) != 4 or not esc.isalnum(): 104 raise ValueError 105 except ValueError: 106 raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) 107 end += 5 108 _append(m) 109 return u''.join(chunks), end 110 111def JSONString(match, context): 112 encoding = getattr(context, 'encoding', None) 113 return scanstring(match.string, match.end(), encoding) 114pattern(r'"')(JSONString) 115 116WHITESPACE = re.compile(r'\s*', FLAGS) 117 118def JSONObject(match, context, _w=WHITESPACE.match): 119 pairs = {} 120 s = match.string 121 end = _w(s, match.end()).end() 122 nextchar = s[end:end + 1] 123 # trivial empty object 124 if nextchar == '}': 125 return pairs, end + 1 126 if nextchar != '"': 127 raise ValueError(errmsg("Expecting property name", s, end)) 128 end += 1 129 encoding = getattr(context, 'encoding', None) 130 iterscan = JSONScanner.iterscan 131 while True: 132 key, end = scanstring(s, end, encoding) 133 end = _w(s, end).end() 134 if s[end:end + 1] != ':': 135 raise ValueError(errmsg("Expecting : delimiter", s, end)) 136 end = _w(s, end + 1).end() 137 try: 138 value, end = iterscan(s, idx=end, context=context).next() 139 except StopIteration: 140 raise ValueError(errmsg("Expecting object", s, end)) 141 pairs[key] = value 142 end = _w(s, end).end() 143 nextchar = s[end:end + 1] 144 end += 1 145 if nextchar == '}': 146 break 147 if nextchar != ',': 148 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) 149 end = _w(s, end).end() 150 nextchar = s[end:end + 1] 151 end += 1 152 if nextchar != '"': 153 raise ValueError(errmsg("Expecting property name", s, end - 1)) 154 object_hook = getattr(context, 'object_hook', None) 155 if object_hook is not None: 156 pairs = object_hook(pairs) 157 return pairs, end 158pattern(r'{')(JSONObject) 159 160def JSONArray(match, context, _w=WHITESPACE.match): 161 values = [] 162 s = match.string 163 end = _w(s, match.end()).end() 164 # look-ahead for trivial empty array 165 nextchar = s[end:end + 1] 166 if nextchar == ']': 167 return values, end + 1 168 iterscan = JSONScanner.iterscan 169 while True: 170 try: 171 value, end = iterscan(s, idx=end, context=context).next() 172 except StopIteration: 173 raise ValueError(errmsg("Expecting object", s, end)) 174 values.append(value) 175 end = _w(s, end).end() 176 nextchar = s[end:end + 1] 177 end += 1 178 if nextchar == ']': 179 break 180 if nextchar != ',': 181 raise ValueError(errmsg("Expecting , delimiter", s, end)) 182 end = _w(s, end).end() 183 return values, end 184pattern(r'\[')(JSONArray) 185 186ANYTHING = [ 187 JSONObject, 188 JSONArray, 189 JSONString, 190 JSONConstant, 191 JSONNumber, 192] 193 194JSONScanner = Scanner(ANYTHING) 195 196class JSONDecoder(object): 197 """ 198 Simple JSON <http://json.org> decoder 199 200 Performs the following translations in decoding: 201 202 +---------------+-------------------+ 203 | JSON | Python | 204 +===============+===================+ 205 | object | dict | 206 +---------------+-------------------+ 207 | array | list | 208 +---------------+-------------------+ 209 | string | unicode | 210 +---------------+-------------------+ 211 | number (int) | int, long | 212 +---------------+-------------------+ 213 | number (real) | float | 214 +---------------+-------------------+ 215 | true | True | 216 +---------------+-------------------+ 217 | false | False | 218 +---------------+-------------------+ 219 | null | None | 220 +---------------+-------------------+ 221 222 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 223 their corresponding ``float`` values, which is outside the JSON spec. 224 """ 225 226 _scanner = Scanner(ANYTHING) 227 __all__ = ['__init__', 'decode', 'raw_decode'] 228 229 def __init__(self, encoding=None, object_hook=None): 230 """ 231 ``encoding`` determines the encoding used to interpret any ``str`` 232 objects decoded by this instance (utf-8 by default). It has no 233 effect when decoding ``unicode`` objects. 234 235 Note that currently only encodings that are a superset of ASCII work, 236 strings of other encodings should be passed in as ``unicode``. 237 238 ``object_hook``, if specified, will be called with the result 239 of every JSON object decoded and its return value will be used in 240 place of the given ``dict``. This can be used to provide custom 241 deserializations (e.g. to support JSON-RPC class hinting). 242 """ 243 self.encoding = encoding 244 self.object_hook = object_hook 245 246 def decode(self, s, _w=WHITESPACE.match): 247 """ 248 Return the Python representation of ``s`` (a ``str`` or ``unicode`` 249 instance containing a JSON document) 250 """ 251 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 252 end = _w(s, end).end() 253 if end != len(s): 254 raise ValueError(errmsg("Extra data", s, end, len(s))) 255 return obj 256 257 def raw_decode(self, s, **kw): 258 """ 259 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning 260 with a JSON document) and return a 2-tuple of the Python 261 representation and the index in ``s`` where the document ended. 262 263 This can be used to decode a JSON document from a string that may 264 have extraneous data at the end. 265 """ 266 kw.setdefault('context', self) 267 try: 268 obj, end = self._scanner.iterscan(s, **kw).next() 269 except StopIteration: 270 raise ValueError("No JSON object could be decoded") 271 return obj, end 272 273__all__ = ['JSONDecoder'] 274