1"""Implementation of JSONEncoder 2""" 3import re 4 5try: 6 from _json import encode_basestring_ascii as c_encode_basestring_ascii 7except ImportError: 8 c_encode_basestring_ascii = None 9try: 10 from _json import make_encoder as c_make_encoder 11except ImportError: 12 c_make_encoder = None 13 14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') 15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 16HAS_UTF8 = re.compile(r'[\x80-\xff]') 17ESCAPE_DCT = { 18 '\\': '\\\\', 19 '"': '\\"', 20 '\b': '\\b', 21 '\f': '\\f', 22 '\n': '\\n', 23 '\r': '\\r', 24 '\t': '\\t', 25} 26for i in range(0x20): 27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 28 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 29 30# Assume this produces an infinity on all machines (probably not guaranteed) 31INFINITY = float('1e66666') 32FLOAT_REPR = repr 33 34def encode_basestring(s): 35 """Return a JSON representation of a Python string 36 37 """ 38 def replace(match): 39 return ESCAPE_DCT[match.group(0)] 40 return '"' + ESCAPE.sub(replace, s) + '"' 41 42 43def py_encode_basestring_ascii(s): 44 """Return an ASCII-only JSON representation of a Python string 45 46 """ 47 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 48 s = s.decode('utf-8') 49 def replace(match): 50 s = match.group(0) 51 try: 52 return ESCAPE_DCT[s] 53 except KeyError: 54 n = ord(s) 55 if n < 0x10000: 56 return '\\u{0:04x}'.format(n) 57 #return '\\u%04x' % (n,) 58 else: 59 # surrogate pair 60 n -= 0x10000 61 s1 = 0xd800 | ((n >> 10) & 0x3ff) 62 s2 = 0xdc00 | (n & 0x3ff) 63 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 64 #return '\\u%04x\\u%04x' % (s1, s2) 65 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 66 67 68encode_basestring_ascii = ( 69 c_encode_basestring_ascii or py_encode_basestring_ascii) 70 71class JSONEncoder(object): 72 """Extensible JSON <http://json.org> encoder for Python data structures. 73 74 Supports the following objects and types by default: 75 76 +-------------------+---------------+ 77 | Python | JSON | 78 +===================+===============+ 79 | dict | object | 80 +-------------------+---------------+ 81 | list, tuple | array | 82 +-------------------+---------------+ 83 | str, unicode | string | 84 +-------------------+---------------+ 85 | int, long, float | number | 86 +-------------------+---------------+ 87 | True | true | 88 +-------------------+---------------+ 89 | False | false | 90 +-------------------+---------------+ 91 | None | null | 92 +-------------------+---------------+ 93 94 To extend this to recognize other objects, subclass and implement a 95 ``.default()`` method with another method that returns a serializable 96 object for ``o`` if possible, otherwise it should call the superclass 97 implementation (to raise ``TypeError``). 98 99 """ 100 item_separator = ', ' 101 key_separator = ': ' 102 def __init__(self, skipkeys=False, ensure_ascii=True, 103 check_circular=True, allow_nan=True, sort_keys=False, 104 indent=None, separators=None, encoding='utf-8', default=None): 105 """Constructor for JSONEncoder, with sensible defaults. 106 107 If skipkeys is false, then it is a TypeError to attempt 108 encoding of keys that are not str, int, long, float or None. If 109 skipkeys is True, such items are simply skipped. 110 111 If ensure_ascii is true, the output is guaranteed to be str 112 objects with all incoming unicode characters escaped. If 113 ensure_ascii is false, the output will be unicode object. 114 115 If check_circular is true, then lists, dicts, and custom encoded 116 objects will be checked for circular references during encoding to 117 prevent an infinite recursion (which would cause an OverflowError). 118 Otherwise, no such check takes place. 119 120 If allow_nan is true, then NaN, Infinity, and -Infinity will be 121 encoded as such. This behavior is not JSON specification compliant, 122 but is consistent with most JavaScript based encoders and decoders. 123 Otherwise, it will be a ValueError to encode such floats. 124 125 If sort_keys is true, then the output of dictionaries will be 126 sorted by key; this is useful for regression tests to ensure 127 that JSON serializations can be compared on a day-to-day basis. 128 129 If indent is a non-negative integer, then JSON array 130 elements and object members will be pretty-printed with that 131 indent level. An indent level of 0 will only insert newlines. 132 None is the most compact representation. 133 134 If specified, separators should be a (item_separator, key_separator) 135 tuple. The default is (', ', ': '). To get the most compact JSON 136 representation you should specify (',', ':') to eliminate whitespace. 137 138 If specified, default is a function that gets called for objects 139 that can't otherwise be serialized. It should return a JSON encodable 140 version of the object or raise a ``TypeError``. 141 142 If encoding is not None, then all input strings will be 143 transformed into unicode using that encoding prior to JSON-encoding. 144 The default is UTF-8. 145 146 """ 147 148 self.skipkeys = skipkeys 149 self.ensure_ascii = ensure_ascii 150 self.check_circular = check_circular 151 self.allow_nan = allow_nan 152 self.sort_keys = sort_keys 153 self.indent = indent 154 if separators is not None: 155 self.item_separator, self.key_separator = separators 156 if default is not None: 157 self.default = default 158 self.encoding = encoding 159 160 def default(self, o): 161 """Implement this method in a subclass such that it returns 162 a serializable object for ``o``, or calls the base implementation 163 (to raise a ``TypeError``). 164 165 For example, to support arbitrary iterators, you could 166 implement default like this:: 167 168 def default(self, o): 169 try: 170 iterable = iter(o) 171 except TypeError: 172 pass 173 else: 174 return list(iterable) 175 return JSONEncoder.default(self, o) 176 177 """ 178 raise TypeError(repr(o) + " is not JSON serializable") 179 180 def encode(self, o): 181 """Return a JSON string representation of a Python data structure. 182 183 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 184 '{"foo": ["bar", "baz"]}' 185 186 """ 187 # This is for extremely simple cases and benchmarks. 188 if isinstance(o, basestring): 189 if isinstance(o, str): 190 _encoding = self.encoding 191 if (_encoding is not None 192 and not (_encoding == 'utf-8')): 193 o = o.decode(_encoding) 194 if self.ensure_ascii: 195 return encode_basestring_ascii(o) 196 else: 197 return encode_basestring(o) 198 # This doesn't pass the iterator directly to ''.join() because the 199 # exceptions aren't as detailed. The list call should be roughly 200 # equivalent to the PySequence_Fast that ''.join() would do. 201 chunks = self.iterencode(o, _one_shot=True) 202 if not isinstance(chunks, (list, tuple)): 203 chunks = list(chunks) 204 return ''.join(chunks) 205 206 def iterencode(self, o, _one_shot=False): 207 """Encode the given object and yield each string 208 representation as available. 209 210 For example:: 211 212 for chunk in JSONEncoder().iterencode(bigobject): 213 mysocket.write(chunk) 214 215 """ 216 if self.check_circular: 217 markers = {} 218 else: 219 markers = None 220 if self.ensure_ascii: 221 _encoder = encode_basestring_ascii 222 else: 223 _encoder = encode_basestring 224 if self.encoding != 'utf-8': 225 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): 226 if isinstance(o, str): 227 o = o.decode(_encoding) 228 return _orig_encoder(o) 229 230 def floatstr(o, allow_nan=self.allow_nan, 231 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): 232 # Check for specials. Note that this type of test is processor 233 # and/or platform-specific, so do tests which don't depend on the 234 # internals. 235 236 if o != o: 237 text = 'NaN' 238 elif o == _inf: 239 text = 'Infinity' 240 elif o == _neginf: 241 text = '-Infinity' 242 else: 243 return _repr(o) 244 245 if not allow_nan: 246 raise ValueError( 247 "Out of range float values are not JSON compliant: " + 248 repr(o)) 249 250 return text 251 252 253 if (_one_shot and c_make_encoder is not None 254 and self.indent is None and not self.sort_keys): 255 _iterencode = c_make_encoder( 256 markers, self.default, _encoder, self.indent, 257 self.key_separator, self.item_separator, self.sort_keys, 258 self.skipkeys, self.allow_nan) 259 else: 260 _iterencode = _make_iterencode( 261 markers, self.default, _encoder, self.indent, floatstr, 262 self.key_separator, self.item_separator, self.sort_keys, 263 self.skipkeys, _one_shot) 264 return _iterencode(o, 0) 265 266def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, 267 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, 268 ## HACK: hand-optimized bytecode; turn globals into locals 269 ValueError=ValueError, 270 basestring=basestring, 271 dict=dict, 272 float=float, 273 id=id, 274 int=int, 275 isinstance=isinstance, 276 list=list, 277 long=long, 278 str=str, 279 tuple=tuple, 280 ): 281 282 def _iterencode_list(lst, _current_indent_level): 283 if not lst: 284 yield '[]' 285 return 286 if markers is not None: 287 markerid = id(lst) 288 if markerid in markers: 289 raise ValueError("Circular reference detected") 290 markers[markerid] = lst 291 buf = '[' 292 if _indent is not None: 293 _current_indent_level += 1 294 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) 295 separator = _item_separator + newline_indent 296 buf += newline_indent 297 else: 298 newline_indent = None 299 separator = _item_separator 300 first = True 301 for value in lst: 302 if first: 303 first = False 304 else: 305 buf = separator 306 if isinstance(value, basestring): 307 yield buf + _encoder(value) 308 elif value is None: 309 yield buf + 'null' 310 elif value is True: 311 yield buf + 'true' 312 elif value is False: 313 yield buf + 'false' 314 elif isinstance(value, (int, long)): 315 yield buf + str(value) 316 elif isinstance(value, float): 317 yield buf + _floatstr(value) 318 else: 319 yield buf 320 if isinstance(value, (list, tuple)): 321 chunks = _iterencode_list(value, _current_indent_level) 322 elif isinstance(value, dict): 323 chunks = _iterencode_dict(value, _current_indent_level) 324 else: 325 chunks = _iterencode(value, _current_indent_level) 326 for chunk in chunks: 327 yield chunk 328 if newline_indent is not None: 329 _current_indent_level -= 1 330 yield '\n' + (' ' * (_indent * _current_indent_level)) 331 yield ']' 332 if markers is not None: 333 del markers[markerid] 334 335 def _iterencode_dict(dct, _current_indent_level): 336 if not dct: 337 yield '{}' 338 return 339 if markers is not None: 340 markerid = id(dct) 341 if markerid in markers: 342 raise ValueError("Circular reference detected") 343 markers[markerid] = dct 344 yield '{' 345 if _indent is not None: 346 _current_indent_level += 1 347 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) 348 item_separator = _item_separator + newline_indent 349 yield newline_indent 350 else: 351 newline_indent = None 352 item_separator = _item_separator 353 first = True 354 if _sort_keys: 355 items = sorted(dct.items(), key=lambda kv: kv[0]) 356 else: 357 items = dct.iteritems() 358 for key, value in items: 359 if isinstance(key, basestring): 360 pass 361 # JavaScript is weakly typed for these, so it makes sense to 362 # also allow them. Many encoders seem to do something like this. 363 elif isinstance(key, float): 364 key = _floatstr(key) 365 elif key is True: 366 key = 'true' 367 elif key is False: 368 key = 'false' 369 elif key is None: 370 key = 'null' 371 elif isinstance(key, (int, long)): 372 key = str(key) 373 elif _skipkeys: 374 continue 375 else: 376 raise TypeError("key " + repr(key) + " is not a string") 377 if first: 378 first = False 379 else: 380 yield item_separator 381 yield _encoder(key) 382 yield _key_separator 383 if isinstance(value, basestring): 384 yield _encoder(value) 385 elif value is None: 386 yield 'null' 387 elif value is True: 388 yield 'true' 389 elif value is False: 390 yield 'false' 391 elif isinstance(value, (int, long)): 392 yield str(value) 393 elif isinstance(value, float): 394 yield _floatstr(value) 395 else: 396 if isinstance(value, (list, tuple)): 397 chunks = _iterencode_list(value, _current_indent_level) 398 elif isinstance(value, dict): 399 chunks = _iterencode_dict(value, _current_indent_level) 400 else: 401 chunks = _iterencode(value, _current_indent_level) 402 for chunk in chunks: 403 yield chunk 404 if newline_indent is not None: 405 _current_indent_level -= 1 406 yield '\n' + (' ' * (_indent * _current_indent_level)) 407 yield '}' 408 if markers is not None: 409 del markers[markerid] 410 411 def _iterencode(o, _current_indent_level): 412 if isinstance(o, basestring): 413 yield _encoder(o) 414 elif o is None: 415 yield 'null' 416 elif o is True: 417 yield 'true' 418 elif o is False: 419 yield 'false' 420 elif isinstance(o, (int, long)): 421 yield str(o) 422 elif isinstance(o, float): 423 yield _floatstr(o) 424 elif isinstance(o, (list, tuple)): 425 for chunk in _iterencode_list(o, _current_indent_level): 426 yield chunk 427 elif isinstance(o, dict): 428 for chunk in _iterencode_dict(o, _current_indent_level): 429 yield chunk 430 else: 431 if markers is not None: 432 markerid = id(o) 433 if markerid in markers: 434 raise ValueError("Circular reference detected") 435 markers[markerid] = o 436 o = _default(o) 437 for chunk in _iterencode(o, _current_indent_level): 438 yield chunk 439 if markers is not None: 440 del markers[markerid] 441 442 return _iterencode 443